1//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SelectionDAG::LegalizeVectors method.
10//
11// The vector legalizer looks for vector operations which might need to be
12// scalarized and legalizes them. This is a separate step from Legalize because
13// scalarizing can introduce illegal types. For example, suppose we have an
14// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16// operation, which introduces nodes with the illegal type i64 which must be
17// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18// the operation must be unrolled, which introduces nodes with the illegal
19// type i8 which must be promoted.
20//
21// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22// or operations that happen to take a vector which are custom-lowered;
23// the legalization for such operations never produces nodes
24// with illegal types, so it's okay to put off legalizing them until
25// SelectionDAG::Legalize runs.
26//
27//===----------------------------------------------------------------------===//
28
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/SelectionDAG.h"
35#include "llvm/CodeGen/SelectionDAGNodes.h"
36#include "llvm/CodeGen/TargetLowering.h"
37#include "llvm/CodeGen/ValueTypes.h"
38#include "llvm/CodeGenTypes/MachineValueType.h"
39#include "llvm/IR/DataLayout.h"
40#include "llvm/Support/Casting.h"
41#include "llvm/Support/Compiler.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/ErrorHandling.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "legalizevectorops"
52
53namespace {
54
55class VectorLegalizer {
56 SelectionDAG& DAG;
57 const TargetLowering &TLI;
58 bool Changed = false; // Keep track of whether anything changed
59
60 /// For nodes that are of legal width, and that have more than one use, this
61 /// map indicates what regularized operand to use. This allows us to avoid
62 /// legalizing the same thing more than once.
63 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
64
65 /// Adds a node to the translation cache.
66 void AddLegalizedOperand(SDValue From, SDValue To) {
67 LegalizedNodes.insert(KV: std::make_pair(x&: From, y&: To));
68 // If someone requests legalization of the new node, return itself.
69 if (From != To)
70 LegalizedNodes.insert(KV: std::make_pair(x&: To, y&: To));
71 }
72
73 /// Legalizes the given node.
74 SDValue LegalizeOp(SDValue Op);
75
76 /// Assuming the node is legal, "legalize" the results.
77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
78
79 /// Make sure Results are legal and update the translation cache.
80 SDValue RecursivelyLegalizeResults(SDValue Op,
81 MutableArrayRef<SDValue> Results);
82
83 /// Wrapper to interface LowerOperation with a vector of Results.
84 /// Returns false if the target wants to use default expansion. Otherwise
85 /// returns true. If return is true and the Results are empty, then the
86 /// target wants to keep the input node as is.
87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
88
89 /// Implements unrolling a VSETCC.
90 SDValue UnrollVSETCC(SDNode *Node);
91
92 /// Implement expand-based legalization of vector operations.
93 ///
94 /// This is just a high-level routine to dispatch to specific code paths for
95 /// operations to legalize them.
96 void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
97
98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
99 /// FP_TO_SINT isn't legal.
100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
101
102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
103 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
105
106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
107 SDValue ExpandSEXTINREG(SDNode *Node);
108
109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
110 ///
111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
112 /// type. The contents of the bits in the extended part of each element are
113 /// undef.
114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
115
116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
117 ///
118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
119 /// type, then shifts left and arithmetic shifts right to introduce a sign
120 /// extension.
121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
122
123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
124 ///
125 /// Shuffles the low lanes of the operand into place and blends zeros into
126 /// the remaining lanes, finally bitcasting to the proper type.
127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
128
129 /// Expand bswap of vectors into a shuffle if legal.
130 SDValue ExpandBSWAP(SDNode *Node);
131
132 /// Implement vselect in terms of XOR, AND, OR when blend is not
133 /// supported by the target.
134 SDValue ExpandVSELECT(SDNode *Node);
135 SDValue ExpandVP_SELECT(SDNode *Node);
136 SDValue ExpandVP_MERGE(SDNode *Node);
137 SDValue ExpandVP_REM(SDNode *Node);
138 SDValue ExpandVP_FNEG(SDNode *Node);
139 SDValue ExpandVP_FABS(SDNode *Node);
140 SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
141 SDValue ExpandLOOP_DEPENDENCE_MASK(SDNode *N);
142 SDValue ExpandSELECT(SDNode *Node);
143 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
144 SDValue ExpandStore(SDNode *N);
145 SDValue ExpandFNEG(SDNode *Node);
146 SDValue ExpandFABS(SDNode *Node);
147 SDValue ExpandFCOPYSIGN(SDNode *Node);
148 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
150 SDValue ExpandBITREVERSE(SDNode *Node);
151 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
152 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
153 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
154 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
155 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
156 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
157
158 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
159 SmallVectorImpl<SDValue> &Results);
160
161 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
162
163 /// Implements vector promotion.
164 ///
165 /// This is essentially just bitcasting the operands to a different type and
166 /// bitcasting the result back to the original type.
167 void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
168
169 /// Implements [SU]INT_TO_FP vector promotion.
170 ///
171 /// This is a [zs]ext of the input operand to a larger integer type.
172 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
173
174 /// Implements FP_TO_[SU]INT vector promotion of the result type.
175 ///
176 /// It is promoted to a larger integer type. The result is then
177 /// truncated back to the original type.
178 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
179
180 /// Implements vector setcc operation promotion.
181 ///
182 /// All vector operands are promoted to a vector type with larger element
183 /// type.
184 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
185
186 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
187
188 /// Calculate the reduction using a type of higher precision and round the
189 /// result to match the original type. Setting NonArithmetic signifies the
190 /// rounding of the result does not affect its value.
191 void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
192 bool NonArithmetic);
193
194 void PromoteVECTOR_COMPRESS(SDNode *Node, SmallVectorImpl<SDValue> &Results);
195
196public:
197 VectorLegalizer(SelectionDAG& dag) :
198 DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
199
200 /// Begin legalizer the vector operations in the DAG.
201 bool Run();
202};
203
204} // end anonymous namespace
205
206bool VectorLegalizer::Run() {
207 // Before we start legalizing vector nodes, check if there are any vectors.
208 bool HasVectors = false;
209 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
210 E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) {
211 // Check if the values of the nodes contain vectors. We don't need to check
212 // the operands because we are going to check their values at some point.
213 HasVectors = llvm::any_of(Range: I->values(), P: [](EVT T) { return T.isVector(); });
214
215 // If we found a vector node we can start the legalization.
216 if (HasVectors)
217 break;
218 }
219
220 // If this basic block has no vectors then no need to legalize vectors.
221 if (!HasVectors)
222 return false;
223
224 // The legalize process is inherently a bottom-up recursive process (users
225 // legalize their uses before themselves). Given infinite stack space, we
226 // could just start legalizing on the root and traverse the whole graph. In
227 // practice however, this causes us to run out of stack space on large basic
228 // blocks. To avoid this problem, compute an ordering of the nodes where each
229 // node is only legalized after all of its operands are legalized.
230 DAG.AssignTopologicalOrder();
231 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
232 E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I)
233 LegalizeOp(Op: SDValue(&*I, 0));
234
235 // Finally, it's possible the root changed. Get the new root.
236 SDValue OldRoot = DAG.getRoot();
237 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
238 DAG.setRoot(LegalizedNodes[OldRoot]);
239
240 LegalizedNodes.clear();
241
242 // Remove dead nodes now.
243 DAG.RemoveDeadNodes();
244
245 return Changed;
246}
247
248SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
249 assert(Op->getNumValues() == Result->getNumValues() &&
250 "Unexpected number of results");
251 // Generic legalization: just pass the operand through.
252 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
253 AddLegalizedOperand(From: Op.getValue(R: i), To: SDValue(Result, i));
254 return SDValue(Result, Op.getResNo());
255}
256
257SDValue
258VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
259 MutableArrayRef<SDValue> Results) {
260 assert(Results.size() == Op->getNumValues() &&
261 "Unexpected number of results");
262 // Make sure that the generated code is itself legal.
263 for (unsigned i = 0, e = Results.size(); i != e; ++i) {
264 Results[i] = LegalizeOp(Op: Results[i]);
265 AddLegalizedOperand(From: Op.getValue(R: i), To: Results[i]);
266 }
267
268 return Results[Op.getResNo()];
269}
270
271SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
272 // Note that LegalizeOp may be reentered even from single-use nodes, which
273 // means that we always must cache transformed nodes.
274 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Val: Op);
275 if (I != LegalizedNodes.end()) return I->second;
276
277 // Legalize the operands
278 SmallVector<SDValue, 8> Ops;
279 for (const SDValue &Oper : Op->op_values())
280 Ops.push_back(Elt: LegalizeOp(Op: Oper));
281
282 SDNode *Node = DAG.UpdateNodeOperands(N: Op.getNode(), Ops);
283
284 bool HasVectorValueOrOp =
285 llvm::any_of(Range: Node->values(), P: [](EVT T) { return T.isVector(); }) ||
286 llvm::any_of(Range: Node->op_values(),
287 P: [](SDValue O) { return O.getValueType().isVector(); });
288 if (!HasVectorValueOrOp)
289 return TranslateLegalizeResults(Op, Result: Node);
290
291 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
292 EVT ValVT;
293 switch (Op.getOpcode()) {
294 default:
295 return TranslateLegalizeResults(Op, Result: Node);
296 case ISD::LOAD: {
297 LoadSDNode *LD = cast<LoadSDNode>(Val: Node);
298 ISD::LoadExtType ExtType = LD->getExtensionType();
299 EVT LoadedVT = LD->getMemoryVT();
300 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
301 Action = TLI.getLoadExtAction(ExtType, ValVT: LD->getValueType(ResNo: 0), MemVT: LoadedVT);
302 break;
303 }
304 case ISD::STORE: {
305 StoreSDNode *ST = cast<StoreSDNode>(Val: Node);
306 EVT StVT = ST->getMemoryVT();
307 MVT ValVT = ST->getValue().getSimpleValueType();
308 if (StVT.isVector() && ST->isTruncatingStore())
309 Action = TLI.getTruncStoreAction(ValVT, MemVT: StVT);
310 break;
311 }
312 case ISD::MERGE_VALUES:
313 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0));
314 // This operation lies about being legal: when it claims to be legal,
315 // it should actually be expanded.
316 if (Action == TargetLowering::Legal)
317 Action = TargetLowering::Expand;
318 break;
319#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
320 case ISD::STRICT_##DAGN:
321#include "llvm/IR/ConstrainedOps.def"
322 ValVT = Node->getValueType(ResNo: 0);
323 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
324 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
325 ValVT = Node->getOperand(Num: 1).getValueType();
326 if (Op.getOpcode() == ISD::STRICT_FSETCC ||
327 Op.getOpcode() == ISD::STRICT_FSETCCS) {
328 MVT OpVT = Node->getOperand(Num: 1).getSimpleValueType();
329 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 3))->get();
330 Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT);
331 if (Action == TargetLowering::Legal)
332 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT);
333 } else {
334 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: ValVT);
335 }
336 // If we're asked to expand a strict vector floating-point operation,
337 // by default we're going to simply unroll it. That is usually the
338 // best approach, except in the case where the resulting strict (scalar)
339 // operations would themselves use the fallback mutation to non-strict.
340 // In that specific case, just do the fallback on the vector op.
341 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
342 TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: ValVT) ==
343 TargetLowering::Legal) {
344 EVT EltVT = ValVT.getVectorElementType();
345 if (TLI.getOperationAction(Op: Node->getOpcode(), VT: EltVT)
346 == TargetLowering::Expand &&
347 TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: EltVT)
348 == TargetLowering::Legal)
349 Action = TargetLowering::Legal;
350 }
351 break;
352 case ISD::ADD:
353 case ISD::SUB:
354 case ISD::MUL:
355 case ISD::MULHS:
356 case ISD::MULHU:
357 case ISD::SDIV:
358 case ISD::UDIV:
359 case ISD::SREM:
360 case ISD::UREM:
361 case ISD::SDIVREM:
362 case ISD::UDIVREM:
363 case ISD::FADD:
364 case ISD::FSUB:
365 case ISD::FMUL:
366 case ISD::FDIV:
367 case ISD::FREM:
368 case ISD::AND:
369 case ISD::OR:
370 case ISD::XOR:
371 case ISD::SHL:
372 case ISD::SRA:
373 case ISD::SRL:
374 case ISD::FSHL:
375 case ISD::FSHR:
376 case ISD::ROTL:
377 case ISD::ROTR:
378 case ISD::ABS:
379 case ISD::ABDS:
380 case ISD::ABDU:
381 case ISD::AVGCEILS:
382 case ISD::AVGCEILU:
383 case ISD::AVGFLOORS:
384 case ISD::AVGFLOORU:
385 case ISD::BSWAP:
386 case ISD::BITREVERSE:
387 case ISD::CTLZ:
388 case ISD::CTTZ:
389 case ISD::CTLZ_ZERO_UNDEF:
390 case ISD::CTTZ_ZERO_UNDEF:
391 case ISD::CTPOP:
392 case ISD::CLMUL:
393 case ISD::CLMULH:
394 case ISD::CLMULR:
395 case ISD::SELECT:
396 case ISD::VSELECT:
397 case ISD::SELECT_CC:
398 case ISD::ZERO_EXTEND:
399 case ISD::ANY_EXTEND:
400 case ISD::TRUNCATE:
401 case ISD::SIGN_EXTEND:
402 case ISD::FP_TO_SINT:
403 case ISD::FP_TO_UINT:
404 case ISD::FNEG:
405 case ISD::FABS:
406 case ISD::FMINNUM:
407 case ISD::FMAXNUM:
408 case ISD::FMINNUM_IEEE:
409 case ISD::FMAXNUM_IEEE:
410 case ISD::FMINIMUM:
411 case ISD::FMAXIMUM:
412 case ISD::FMINIMUMNUM:
413 case ISD::FMAXIMUMNUM:
414 case ISD::FCOPYSIGN:
415 case ISD::FSQRT:
416 case ISD::FSIN:
417 case ISD::FCOS:
418 case ISD::FTAN:
419 case ISD::FASIN:
420 case ISD::FACOS:
421 case ISD::FATAN:
422 case ISD::FATAN2:
423 case ISD::FSINH:
424 case ISD::FCOSH:
425 case ISD::FTANH:
426 case ISD::FLDEXP:
427 case ISD::FPOWI:
428 case ISD::FPOW:
429 case ISD::FCBRT:
430 case ISD::FLOG:
431 case ISD::FLOG2:
432 case ISD::FLOG10:
433 case ISD::FEXP:
434 case ISD::FEXP2:
435 case ISD::FEXP10:
436 case ISD::FCEIL:
437 case ISD::FTRUNC:
438 case ISD::FRINT:
439 case ISD::FNEARBYINT:
440 case ISD::FROUND:
441 case ISD::FROUNDEVEN:
442 case ISD::FFLOOR:
443 case ISD::FP_ROUND:
444 case ISD::FP_EXTEND:
445 case ISD::FPTRUNC_ROUND:
446 case ISD::FMA:
447 case ISD::SIGN_EXTEND_INREG:
448 case ISD::ANY_EXTEND_VECTOR_INREG:
449 case ISD::SIGN_EXTEND_VECTOR_INREG:
450 case ISD::ZERO_EXTEND_VECTOR_INREG:
451 case ISD::SMIN:
452 case ISD::SMAX:
453 case ISD::UMIN:
454 case ISD::UMAX:
455 case ISD::SMUL_LOHI:
456 case ISD::UMUL_LOHI:
457 case ISD::SADDO:
458 case ISD::UADDO:
459 case ISD::SSUBO:
460 case ISD::USUBO:
461 case ISD::SMULO:
462 case ISD::UMULO:
463 case ISD::CONVERT_FROM_ARBITRARY_FP:
464 case ISD::FCANONICALIZE:
465 case ISD::FFREXP:
466 case ISD::FMODF:
467 case ISD::FSINCOS:
468 case ISD::FSINCOSPI:
469 case ISD::SADDSAT:
470 case ISD::UADDSAT:
471 case ISD::SSUBSAT:
472 case ISD::USUBSAT:
473 case ISD::SSHLSAT:
474 case ISD::USHLSAT:
475 case ISD::FP_TO_SINT_SAT:
476 case ISD::FP_TO_UINT_SAT:
477 case ISD::MGATHER:
478 case ISD::VECTOR_COMPRESS:
479 case ISD::SCMP:
480 case ISD::UCMP:
481 case ISD::LOOP_DEPENDENCE_WAR_MASK:
482 case ISD::LOOP_DEPENDENCE_RAW_MASK:
483 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0));
484 break;
485 case ISD::SMULFIX:
486 case ISD::SMULFIXSAT:
487 case ISD::UMULFIX:
488 case ISD::UMULFIXSAT:
489 case ISD::SDIVFIX:
490 case ISD::SDIVFIXSAT:
491 case ISD::UDIVFIX:
492 case ISD::UDIVFIXSAT: {
493 unsigned Scale = Node->getConstantOperandVal(Num: 2);
494 Action = TLI.getFixedPointOperationAction(Op: Node->getOpcode(),
495 VT: Node->getValueType(ResNo: 0), Scale);
496 break;
497 }
498 case ISD::LROUND:
499 case ISD::LLROUND:
500 case ISD::LRINT:
501 case ISD::LLRINT:
502 case ISD::SINT_TO_FP:
503 case ISD::UINT_TO_FP:
504 case ISD::VECREDUCE_ADD:
505 case ISD::VECREDUCE_MUL:
506 case ISD::VECREDUCE_AND:
507 case ISD::VECREDUCE_OR:
508 case ISD::VECREDUCE_XOR:
509 case ISD::VECREDUCE_SMAX:
510 case ISD::VECREDUCE_SMIN:
511 case ISD::VECREDUCE_UMAX:
512 case ISD::VECREDUCE_UMIN:
513 case ISD::VECREDUCE_FADD:
514 case ISD::VECREDUCE_FMAX:
515 case ISD::VECREDUCE_FMAXIMUM:
516 case ISD::VECREDUCE_FMIN:
517 case ISD::VECREDUCE_FMINIMUM:
518 case ISD::VECREDUCE_FMUL:
519 case ISD::VECTOR_FIND_LAST_ACTIVE:
520 Action = TLI.getOperationAction(Op: Node->getOpcode(),
521 VT: Node->getOperand(Num: 0).getValueType());
522 break;
523 case ISD::VECREDUCE_SEQ_FADD:
524 case ISD::VECREDUCE_SEQ_FMUL:
525 Action = TLI.getOperationAction(Op: Node->getOpcode(),
526 VT: Node->getOperand(Num: 1).getValueType());
527 break;
528 case ISD::SETCC: {
529 MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType();
530 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 2))->get();
531 Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT);
532 if (Action == TargetLowering::Legal)
533 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT);
534 break;
535 }
536 case ISD::PARTIAL_REDUCE_UMLA:
537 case ISD::PARTIAL_REDUCE_SMLA:
538 case ISD::PARTIAL_REDUCE_SUMLA:
539 case ISD::PARTIAL_REDUCE_FMLA:
540 Action =
541 TLI.getPartialReduceMLAAction(Opc: Op.getOpcode(), AccVT: Node->getValueType(ResNo: 0),
542 InputVT: Node->getOperand(Num: 1).getValueType());
543 break;
544
545#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
546 case ISD::VPID: { \
547 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
548 : Node->getOperand(LEGALPOS).getValueType(); \
549 if (ISD::VPID == ISD::VP_SETCC) { \
550 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
551 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
552 if (Action != TargetLowering::Legal) \
553 break; \
554 } \
555 /* Defer non-vector results to LegalizeDAG. */ \
556 if (!Node->getValueType(0).isVector() && \
557 Node->getValueType(0) != MVT::Other) { \
558 Action = TargetLowering::Legal; \
559 break; \
560 } \
561 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
562 } break;
563#include "llvm/IR/VPIntrinsics.def"
564 }
565
566 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
567
568 SmallVector<SDValue, 8> ResultVals;
569 switch (Action) {
570 default: llvm_unreachable("This action is not supported yet!");
571 case TargetLowering::Promote:
572 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
573 "This action is not supported yet!");
574 LLVM_DEBUG(dbgs() << "Promoting\n");
575 Promote(Node, Results&: ResultVals);
576 assert(!ResultVals.empty() && "No results for promotion?");
577 break;
578 case TargetLowering::Legal:
579 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
580 break;
581 case TargetLowering::Custom:
582 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
583 if (LowerOperationWrapper(N: Node, Results&: ResultVals))
584 break;
585 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
586 [[fallthrough]];
587 case TargetLowering::Expand:
588 LLVM_DEBUG(dbgs() << "Expanding\n");
589 Expand(Node, Results&: ResultVals);
590 break;
591 }
592
593 if (ResultVals.empty())
594 return TranslateLegalizeResults(Op, Result: Node);
595
596 Changed = true;
597 return RecursivelyLegalizeResults(Op, Results: ResultVals);
598}
599
600// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
601// merge them somehow?
602bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
603 SmallVectorImpl<SDValue> &Results) {
604 SDValue Res = TLI.LowerOperation(Op: SDValue(Node, 0), DAG);
605
606 if (!Res.getNode())
607 return false;
608
609 if (Res == SDValue(Node, 0))
610 return true;
611
612 // If the original node has one result, take the return value from
613 // LowerOperation as is. It might not be result number 0.
614 if (Node->getNumValues() == 1) {
615 Results.push_back(Elt: Res);
616 return true;
617 }
618
619 // If the original node has multiple results, then the return node should
620 // have the same number of results.
621 assert((Node->getNumValues() == Res->getNumValues()) &&
622 "Lowering returned the wrong number of results!");
623
624 // Places new result values base on N result number.
625 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
626 Results.push_back(Elt: Res.getValue(R: I));
627
628 return true;
629}
630
631void VectorLegalizer::PromoteSETCC(SDNode *Node,
632 SmallVectorImpl<SDValue> &Results) {
633 MVT VecVT = Node->getOperand(Num: 0).getSimpleValueType();
634 MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT);
635
636 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
637
638 SDLoc DL(Node);
639 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
640
641 Operands[0] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 0));
642 Operands[1] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 1));
643 Operands[2] = Node->getOperand(Num: 2);
644
645 if (Node->getOpcode() == ISD::VP_SETCC) {
646 Operands[3] = Node->getOperand(Num: 3); // mask
647 Operands[4] = Node->getOperand(Num: 4); // evl
648 }
649
650 SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: Node->getSimpleValueType(ResNo: 0),
651 Ops: Operands, Flags: Node->getFlags());
652
653 Results.push_back(Elt: Res);
654}
655
656void VectorLegalizer::PromoteSTRICT(SDNode *Node,
657 SmallVectorImpl<SDValue> &Results) {
658 MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType();
659 MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT);
660
661 assert(VecVT.isFloatingPoint());
662
663 SDLoc DL(Node);
664 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
665 SmallVector<SDValue, 2> Chains;
666
667 for (unsigned j = 1; j != Node->getNumOperands(); ++j)
668 if (Node->getOperand(Num: j).getValueType().isVector() &&
669 !(ISD::isVPOpcode(Opcode: Node->getOpcode()) &&
670 ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand.
671 {
672 // promote the vector operand.
673 SDValue Ext =
674 DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {NewVecVT, MVT::Other},
675 Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: j)});
676 Operands[j] = Ext.getValue(R: 0);
677 Chains.push_back(Elt: Ext.getValue(R: 1));
678 } else
679 Operands[j] = Node->getOperand(Num: j); // Skip no vector operand.
680
681 SDVTList VTs = DAG.getVTList(VT1: NewVecVT, VT2: Node->getValueType(ResNo: 1));
682
683 Operands[0] = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains);
684
685 SDValue Res =
686 DAG.getNode(Opcode: Node->getOpcode(), DL, VTList: VTs, Ops: Operands, Flags: Node->getFlags());
687
688 SDValue Round =
689 DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {VecVT, MVT::Other},
690 Ops: {Res.getValue(R: 1), Res.getValue(R: 0),
691 DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)});
692
693 Results.push_back(Elt: Round.getValue(R: 0));
694 Results.push_back(Elt: Round.getValue(R: 1));
695}
696
697void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
698 SmallVectorImpl<SDValue> &Results,
699 bool NonArithmetic) {
700 MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType();
701 assert(OpVT.isFloatingPoint() && "Expected floating point reduction!");
702 MVT NewOpVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: OpVT);
703
704 SDLoc DL(Node);
705 SDValue NewOp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NewOpVT, Operand: Node->getOperand(Num: 0));
706 SDValue Rdx =
707 DAG.getNode(Opcode: Node->getOpcode(), DL, VT: NewOpVT.getVectorElementType(), Operand: NewOp,
708 Flags: Node->getFlags());
709 SDValue Res =
710 DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Node->getValueType(ResNo: 0), N1: Rdx,
711 N2: DAG.getIntPtrConstant(Val: NonArithmetic, DL, /*isTarget=*/true));
712 Results.push_back(Elt: Res);
713}
714
715void VectorLegalizer::PromoteVECTOR_COMPRESS(
716 SDNode *Node, SmallVectorImpl<SDValue> &Results) {
717 SDLoc DL(Node);
718 EVT VT = Node->getValueType(ResNo: 0);
719 MVT PromotedVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VT.getSimpleVT());
720 assert((VT.isInteger() || VT.getSizeInBits() == PromotedVT.getSizeInBits()) &&
721 "Only integer promotion or bitcasts between types is supported");
722
723 SDValue Vec = Node->getOperand(Num: 0);
724 SDValue Mask = Node->getOperand(Num: 1);
725 SDValue Passthru = Node->getOperand(Num: 2);
726 if (VT.isInteger()) {
727 Vec = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: PromotedVT, Operand: Vec);
728 Mask = TLI.promoteTargetBoolean(DAG, Bool: Mask, ValVT: PromotedVT);
729 Passthru = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: PromotedVT, Operand: Passthru);
730 } else {
731 Vec = DAG.getBitcast(VT: PromotedVT, V: Vec);
732 Passthru = DAG.getBitcast(VT: PromotedVT, V: Passthru);
733 }
734
735 SDValue Result =
736 DAG.getNode(Opcode: ISD::VECTOR_COMPRESS, DL, VT: PromotedVT, N1: Vec, N2: Mask, N3: Passthru);
737 Result = VT.isInteger() ? DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Result)
738 : DAG.getBitcast(VT, V: Result);
739 Results.push_back(Elt: Result);
740}
741
742void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
743 // For a few operations there is a specific concept for promotion based on
744 // the operand's type.
745 switch (Node->getOpcode()) {
746 case ISD::SINT_TO_FP:
747 case ISD::UINT_TO_FP:
748 case ISD::STRICT_SINT_TO_FP:
749 case ISD::STRICT_UINT_TO_FP:
750 // "Promote" the operation by extending the operand.
751 PromoteINT_TO_FP(Node, Results);
752 return;
753 case ISD::FP_TO_UINT:
754 case ISD::FP_TO_SINT:
755 case ISD::STRICT_FP_TO_UINT:
756 case ISD::STRICT_FP_TO_SINT:
757 // Promote the operation by extending the operand.
758 PromoteFP_TO_INT(Node, Results);
759 return;
760 case ISD::VP_SETCC:
761 case ISD::SETCC:
762 // Promote the operation by extending the operand.
763 PromoteSETCC(Node, Results);
764 return;
765 case ISD::STRICT_FADD:
766 case ISD::STRICT_FSUB:
767 case ISD::STRICT_FMUL:
768 case ISD::STRICT_FDIV:
769 case ISD::STRICT_FSQRT:
770 case ISD::STRICT_FMA:
771 PromoteSTRICT(Node, Results);
772 return;
773 case ISD::VECREDUCE_FADD:
774 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/false);
775 return;
776 case ISD::VECREDUCE_FMAX:
777 case ISD::VECREDUCE_FMAXIMUM:
778 case ISD::VECREDUCE_FMIN:
779 case ISD::VECREDUCE_FMINIMUM:
780 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
781 return;
782 case ISD::VECTOR_COMPRESS:
783 PromoteVECTOR_COMPRESS(Node, Results);
784 return;
785
786 case ISD::FP_ROUND:
787 case ISD::FP_EXTEND:
788 // These operations are used to do promotion so they can't be promoted
789 // themselves.
790 llvm_unreachable("Don't know how to promote this operation!");
791 case ISD::VP_FABS:
792 case ISD::VP_FCOPYSIGN:
793 case ISD::VP_FNEG:
794 // Promoting fabs, fneg, and fcopysign changes their semantics.
795 llvm_unreachable("These operations should not be promoted");
796 }
797
798 // There are currently two cases of vector promotion:
799 // 1) Bitcasting a vector of integers to a different type to a vector of the
800 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
801 // 2) Extending a vector of floats to a vector of the same number of larger
802 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
803 assert(Node->getNumValues() == 1 &&
804 "Can't promote a vector with multiple results!");
805 MVT VT = Node->getSimpleValueType(ResNo: 0);
806 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
807 SDLoc dl(Node);
808 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
809
810 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
811 // Do not promote the mask operand of a VP OP.
812 bool SkipPromote = ISD::isVPOpcode(Opcode: Node->getOpcode()) &&
813 ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j;
814 if (Node->getOperand(Num: j).getValueType().isVector() && !SkipPromote)
815 if (Node->getOperand(Num: j)
816 .getValueType()
817 .getVectorElementType()
818 .isFloatingPoint() &&
819 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
820 if (ISD::isVPOpcode(Opcode: Node->getOpcode())) {
821 unsigned EVLIdx =
822 *ISD::getVPExplicitVectorLengthIdx(Opcode: Node->getOpcode());
823 unsigned MaskIdx = *ISD::getVPMaskIdx(Opcode: Node->getOpcode());
824 Operands[j] =
825 DAG.getNode(Opcode: ISD::VP_FP_EXTEND, DL: dl, VT: NVT, N1: Node->getOperand(Num: j),
826 N2: Node->getOperand(Num: MaskIdx), N3: Node->getOperand(Num: EVLIdx));
827 } else {
828 Operands[j] =
829 DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
830 }
831 else
832 Operands[j] = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
833 else
834 Operands[j] = Node->getOperand(Num: j);
835 }
836
837 SDValue Res =
838 DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: NVT, Ops: Operands, Flags: Node->getFlags());
839
840 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
841 (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
842 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
843 if (ISD::isVPOpcode(Opcode: Node->getOpcode())) {
844 unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Opcode: Node->getOpcode());
845 unsigned MaskIdx = *ISD::getVPMaskIdx(Opcode: Node->getOpcode());
846 Res = DAG.getNode(Opcode: ISD::VP_FP_ROUND, DL: dl, VT, N1: Res,
847 N2: Node->getOperand(Num: MaskIdx), N3: Node->getOperand(Num: EVLIdx));
848 } else {
849 Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Res,
850 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
851 }
852 else
853 Res = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Res);
854
855 Results.push_back(Elt: Res);
856}
857
858void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
859 SmallVectorImpl<SDValue> &Results) {
860 // INT_TO_FP operations may require the input operand be promoted even
861 // when the type is otherwise legal.
862 bool IsStrict = Node->isStrictFPOpcode();
863 MVT VT = Node->getOperand(Num: IsStrict ? 1 : 0).getSimpleValueType();
864 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
865 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
866 "Vectors have different number of elements!");
867
868 SDLoc dl(Node);
869 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
870
871 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
872 Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
873 ? ISD::ZERO_EXTEND
874 : ISD::SIGN_EXTEND;
875 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
876 if (Node->getOperand(Num: j).getValueType().isVector())
877 Operands[j] = DAG.getNode(Opcode: Opc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
878 else
879 Operands[j] = Node->getOperand(Num: j);
880 }
881
882 if (IsStrict) {
883 SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL: dl,
884 ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, Ops: Operands);
885 Results.push_back(Elt: Res);
886 Results.push_back(Elt: Res.getValue(R: 1));
887 return;
888 }
889
890 SDValue Res =
891 DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Ops: Operands);
892 Results.push_back(Elt: Res);
893}
894
895// For FP_TO_INT we promote the result type to a vector type with wider
896// elements and then truncate the result. This is different from the default
897// PromoteVector which uses bitcast to promote thus assumning that the
898// promoted vector type has the same overall size.
899void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
900 SmallVectorImpl<SDValue> &Results) {
901 MVT VT = Node->getSimpleValueType(ResNo: 0);
902 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
903 bool IsStrict = Node->isStrictFPOpcode();
904 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
905 "Vectors have different number of elements!");
906
907 unsigned NewOpc = Node->getOpcode();
908 // Change FP_TO_UINT to FP_TO_SINT if possible.
909 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
910 if (NewOpc == ISD::FP_TO_UINT &&
911 TLI.isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: NVT))
912 NewOpc = ISD::FP_TO_SINT;
913
914 if (NewOpc == ISD::STRICT_FP_TO_UINT &&
915 TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: NVT))
916 NewOpc = ISD::STRICT_FP_TO_SINT;
917
918 SDLoc dl(Node);
919 SDValue Promoted, Chain;
920 if (IsStrict) {
921 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, ResultTys: {NVT, MVT::Other},
922 Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: 1)});
923 Chain = Promoted.getValue(R: 1);
924 } else
925 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: 0));
926
927 // Assert that the converted value fits in the original type. If it doesn't
928 // (eg: because the value being converted is too big), then the result of the
929 // original operation was undefined anyway, so the assert is still correct.
930 if (Node->getOpcode() == ISD::FP_TO_UINT ||
931 Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
932 NewOpc = ISD::AssertZext;
933 else
934 NewOpc = ISD::AssertSext;
935
936 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, N1: Promoted,
937 N2: DAG.getValueType(VT.getScalarType()));
938 Promoted = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Promoted);
939 Results.push_back(Elt: Promoted);
940 if (IsStrict)
941 Results.push_back(Elt: Chain);
942}
943
944std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
945 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
946 return TLI.scalarizeVectorLoad(LD, DAG);
947}
948
949SDValue VectorLegalizer::ExpandStore(SDNode *N) {
950 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
951 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
952 return TF;
953}
954
955void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
956 switch (Node->getOpcode()) {
957 case ISD::LOAD: {
958 std::pair<SDValue, SDValue> Tmp = ExpandLoad(N: Node);
959 Results.push_back(Elt: Tmp.first);
960 Results.push_back(Elt: Tmp.second);
961 return;
962 }
963 case ISD::STORE:
964 Results.push_back(Elt: ExpandStore(N: Node));
965 return;
966 case ISD::MERGE_VALUES:
967 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
968 Results.push_back(Elt: Node->getOperand(Num: i));
969 return;
970 case ISD::SIGN_EXTEND_INREG:
971 if (SDValue Expanded = ExpandSEXTINREG(Node)) {
972 Results.push_back(Elt: Expanded);
973 return;
974 }
975 break;
976 case ISD::ANY_EXTEND_VECTOR_INREG:
977 Results.push_back(Elt: ExpandANY_EXTEND_VECTOR_INREG(Node));
978 return;
979 case ISD::SIGN_EXTEND_VECTOR_INREG:
980 Results.push_back(Elt: ExpandSIGN_EXTEND_VECTOR_INREG(Node));
981 return;
982 case ISD::ZERO_EXTEND_VECTOR_INREG:
983 Results.push_back(Elt: ExpandZERO_EXTEND_VECTOR_INREG(Node));
984 return;
985 case ISD::BSWAP:
986 if (SDValue Expanded = ExpandBSWAP(Node)) {
987 Results.push_back(Elt: Expanded);
988 return;
989 }
990 break;
991 case ISD::VP_BSWAP:
992 Results.push_back(Elt: TLI.expandVPBSWAP(N: Node, DAG));
993 return;
994 case ISD::VSELECT:
995 if (SDValue Expanded = ExpandVSELECT(Node)) {
996 Results.push_back(Elt: Expanded);
997 return;
998 }
999 break;
1000 case ISD::VP_SELECT:
1001 if (SDValue Expanded = ExpandVP_SELECT(Node)) {
1002 Results.push_back(Elt: Expanded);
1003 return;
1004 }
1005 break;
1006 case ISD::VP_SREM:
1007 case ISD::VP_UREM:
1008 if (SDValue Expanded = ExpandVP_REM(Node)) {
1009 Results.push_back(Elt: Expanded);
1010 return;
1011 }
1012 break;
1013 case ISD::VP_FNEG:
1014 if (SDValue Expanded = ExpandVP_FNEG(Node)) {
1015 Results.push_back(Elt: Expanded);
1016 return;
1017 }
1018 break;
1019 case ISD::VP_FABS:
1020 if (SDValue Expanded = ExpandVP_FABS(Node)) {
1021 Results.push_back(Elt: Expanded);
1022 return;
1023 }
1024 break;
1025 case ISD::VP_FCOPYSIGN:
1026 if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) {
1027 Results.push_back(Elt: Expanded);
1028 return;
1029 }
1030 break;
1031 case ISD::SELECT:
1032 if (SDValue Expanded = ExpandSELECT(Node)) {
1033 Results.push_back(Elt: Expanded);
1034 return;
1035 }
1036 break;
1037 case ISD::SELECT_CC: {
1038 if (Node->getValueType(ResNo: 0).isScalableVector()) {
1039 EVT CondVT = TLI.getSetCCResultType(
1040 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
1041 SDValue SetCC =
1042 DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc(Node), VT: CondVT, N1: Node->getOperand(Num: 0),
1043 N2: Node->getOperand(Num: 1), N3: Node->getOperand(Num: 4));
1044 Results.push_back(Elt: DAG.getSelect(DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0), Cond: SetCC,
1045 LHS: Node->getOperand(Num: 2),
1046 RHS: Node->getOperand(Num: 3)));
1047 return;
1048 }
1049 break;
1050 }
1051 case ISD::FP_TO_UINT:
1052 ExpandFP_TO_UINT(Node, Results);
1053 return;
1054 case ISD::UINT_TO_FP:
1055 ExpandUINT_TO_FLOAT(Node, Results);
1056 return;
1057 case ISD::FNEG:
1058 if (SDValue Expanded = ExpandFNEG(Node)) {
1059 Results.push_back(Elt: Expanded);
1060 return;
1061 }
1062 break;
1063 case ISD::FABS:
1064 if (SDValue Expanded = ExpandFABS(Node)) {
1065 Results.push_back(Elt: Expanded);
1066 return;
1067 }
1068 break;
1069 case ISD::FCOPYSIGN:
1070 if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
1071 Results.push_back(Elt: Expanded);
1072 return;
1073 }
1074 break;
1075 case ISD::FSUB:
1076 ExpandFSUB(Node, Results);
1077 return;
1078 case ISD::SETCC:
1079 case ISD::VP_SETCC:
1080 ExpandSETCC(Node, Results);
1081 return;
1082 case ISD::ABS:
1083 if (SDValue Expanded = TLI.expandABS(N: Node, DAG)) {
1084 Results.push_back(Elt: Expanded);
1085 return;
1086 }
1087 break;
1088 case ISD::ABDS:
1089 case ISD::ABDU:
1090 if (SDValue Expanded = TLI.expandABD(N: Node, DAG)) {
1091 Results.push_back(Elt: Expanded);
1092 return;
1093 }
1094 break;
1095 case ISD::AVGCEILS:
1096 case ISD::AVGCEILU:
1097 case ISD::AVGFLOORS:
1098 case ISD::AVGFLOORU:
1099 if (SDValue Expanded = TLI.expandAVG(N: Node, DAG)) {
1100 Results.push_back(Elt: Expanded);
1101 return;
1102 }
1103 break;
1104 case ISD::BITREVERSE:
1105 if (SDValue Expanded = ExpandBITREVERSE(Node)) {
1106 Results.push_back(Elt: Expanded);
1107 return;
1108 }
1109 break;
1110 case ISD::VP_BITREVERSE:
1111 if (SDValue Expanded = TLI.expandVPBITREVERSE(N: Node, DAG)) {
1112 Results.push_back(Elt: Expanded);
1113 return;
1114 }
1115 break;
1116 case ISD::CTPOP:
1117 if (SDValue Expanded = TLI.expandCTPOP(N: Node, DAG)) {
1118 Results.push_back(Elt: Expanded);
1119 return;
1120 }
1121 break;
1122 case ISD::VP_CTPOP:
1123 if (SDValue Expanded = TLI.expandVPCTPOP(N: Node, DAG)) {
1124 Results.push_back(Elt: Expanded);
1125 return;
1126 }
1127 break;
1128 case ISD::CTLZ:
1129 case ISD::CTLZ_ZERO_UNDEF:
1130 if (SDValue Expanded = TLI.expandCTLZ(N: Node, DAG)) {
1131 Results.push_back(Elt: Expanded);
1132 return;
1133 }
1134 break;
1135 case ISD::VP_CTLZ:
1136 case ISD::VP_CTLZ_ZERO_UNDEF:
1137 if (SDValue Expanded = TLI.expandVPCTLZ(N: Node, DAG)) {
1138 Results.push_back(Elt: Expanded);
1139 return;
1140 }
1141 break;
1142 case ISD::CTTZ:
1143 case ISD::CTTZ_ZERO_UNDEF:
1144 if (SDValue Expanded = TLI.expandCTTZ(N: Node, DAG)) {
1145 Results.push_back(Elt: Expanded);
1146 return;
1147 }
1148 break;
1149 case ISD::VP_CTTZ:
1150 case ISD::VP_CTTZ_ZERO_UNDEF:
1151 if (SDValue Expanded = TLI.expandVPCTTZ(N: Node, DAG)) {
1152 Results.push_back(Elt: Expanded);
1153 return;
1154 }
1155 break;
1156 case ISD::FSHL:
1157 case ISD::VP_FSHL:
1158 case ISD::FSHR:
1159 case ISD::VP_FSHR:
1160 if (SDValue Expanded = TLI.expandFunnelShift(N: Node, DAG)) {
1161 Results.push_back(Elt: Expanded);
1162 return;
1163 }
1164 break;
1165 case ISD::CLMUL:
1166 case ISD::CLMULR:
1167 case ISD::CLMULH:
1168 if (SDValue Expanded = TLI.expandCLMUL(N: Node, DAG)) {
1169 Results.push_back(Elt: Expanded);
1170 return;
1171 }
1172 break;
1173 case ISD::ROTL:
1174 case ISD::ROTR:
1175 if (SDValue Expanded = TLI.expandROT(N: Node, AllowVectorOps: false /*AllowVectorOps*/, DAG)) {
1176 Results.push_back(Elt: Expanded);
1177 return;
1178 }
1179 break;
1180 case ISD::FMINNUM:
1181 case ISD::FMAXNUM:
1182 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(N: Node, DAG)) {
1183 Results.push_back(Elt: Expanded);
1184 return;
1185 }
1186 break;
1187 case ISD::FMINIMUM:
1188 case ISD::FMAXIMUM:
1189 Results.push_back(Elt: TLI.expandFMINIMUM_FMAXIMUM(N: Node, DAG));
1190 return;
1191 case ISD::FMINIMUMNUM:
1192 case ISD::FMAXIMUMNUM:
1193 Results.push_back(Elt: TLI.expandFMINIMUMNUM_FMAXIMUMNUM(N: Node, DAG));
1194 return;
1195 case ISD::SMIN:
1196 case ISD::SMAX:
1197 case ISD::UMIN:
1198 case ISD::UMAX:
1199 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
1200 Results.push_back(Elt: Expanded);
1201 return;
1202 }
1203 break;
1204 case ISD::UADDO:
1205 case ISD::USUBO:
1206 ExpandUADDSUBO(Node, Results);
1207 return;
1208 case ISD::SADDO:
1209 case ISD::SSUBO:
1210 ExpandSADDSUBO(Node, Results);
1211 return;
1212 case ISD::UMULO:
1213 case ISD::SMULO:
1214 ExpandMULO(Node, Results);
1215 return;
1216 case ISD::USUBSAT:
1217 case ISD::SSUBSAT:
1218 case ISD::UADDSAT:
1219 case ISD::SADDSAT:
1220 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
1221 Results.push_back(Elt: Expanded);
1222 return;
1223 }
1224 break;
1225 case ISD::USHLSAT:
1226 case ISD::SSHLSAT:
1227 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
1228 Results.push_back(Elt: Expanded);
1229 return;
1230 }
1231 break;
1232 case ISD::FP_TO_SINT_SAT:
1233 case ISD::FP_TO_UINT_SAT:
1234 // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
1235 if (Node->getValueType(ResNo: 0).isScalableVector()) {
1236 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(N: Node, DAG)) {
1237 Results.push_back(Elt: Expanded);
1238 return;
1239 }
1240 }
1241 break;
1242 case ISD::SMULFIX:
1243 case ISD::UMULFIX:
1244 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
1245 Results.push_back(Elt: Expanded);
1246 return;
1247 }
1248 break;
1249 case ISD::SMULFIXSAT:
1250 case ISD::UMULFIXSAT:
1251 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
1252 // why. Maybe it results in worse codegen compared to the unroll for some
1253 // targets? This should probably be investigated. And if we still prefer to
1254 // unroll an explanation could be helpful.
1255 break;
1256 case ISD::SDIVFIX:
1257 case ISD::UDIVFIX:
1258 ExpandFixedPointDiv(Node, Results);
1259 return;
1260 case ISD::SDIVFIXSAT:
1261 case ISD::UDIVFIXSAT:
1262 break;
1263#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1264 case ISD::STRICT_##DAGN:
1265#include "llvm/IR/ConstrainedOps.def"
1266 ExpandStrictFPOp(Node, Results);
1267 return;
1268 case ISD::VECREDUCE_ADD:
1269 case ISD::VECREDUCE_MUL:
1270 case ISD::VECREDUCE_AND:
1271 case ISD::VECREDUCE_OR:
1272 case ISD::VECREDUCE_XOR:
1273 case ISD::VECREDUCE_SMAX:
1274 case ISD::VECREDUCE_SMIN:
1275 case ISD::VECREDUCE_UMAX:
1276 case ISD::VECREDUCE_UMIN:
1277 case ISD::VECREDUCE_FADD:
1278 case ISD::VECREDUCE_FMUL:
1279 case ISD::VECREDUCE_FMAX:
1280 case ISD::VECREDUCE_FMIN:
1281 case ISD::VECREDUCE_FMAXIMUM:
1282 case ISD::VECREDUCE_FMINIMUM:
1283 Results.push_back(Elt: TLI.expandVecReduce(Node, DAG));
1284 return;
1285 case ISD::PARTIAL_REDUCE_UMLA:
1286 case ISD::PARTIAL_REDUCE_SMLA:
1287 case ISD::PARTIAL_REDUCE_SUMLA:
1288 case ISD::PARTIAL_REDUCE_FMLA:
1289 Results.push_back(Elt: TLI.expandPartialReduceMLA(Node, DAG));
1290 return;
1291 case ISD::VECREDUCE_SEQ_FADD:
1292 case ISD::VECREDUCE_SEQ_FMUL:
1293 Results.push_back(Elt: TLI.expandVecReduceSeq(Node, DAG));
1294 return;
1295 case ISD::SREM:
1296 case ISD::UREM:
1297 ExpandREM(Node, Results);
1298 return;
1299 case ISD::VP_MERGE:
1300 if (SDValue Expanded = ExpandVP_MERGE(Node)) {
1301 Results.push_back(Elt: Expanded);
1302 return;
1303 }
1304 break;
1305 case ISD::FREM: {
1306 RTLIB::Libcall LC = RTLIB::getREM(VT: Node->getValueType(ResNo: 0));
1307 if (tryExpandVecMathCall(Node, LC, Results))
1308 return;
1309
1310 break;
1311 }
1312 case ISD::FSINCOS:
1313 case ISD::FSINCOSPI: {
1314 EVT VT = Node->getValueType(ResNo: 0);
1315 RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
1316 ? RTLIB::getSINCOS(RetVT: VT)
1317 : RTLIB::getSINCOSPI(RetVT: VT);
1318 if (LC != RTLIB::UNKNOWN_LIBCALL &&
1319 TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results))
1320 return;
1321
1322 // TODO: Try to see if there's a narrower call available to use before
1323 // scalarizing.
1324 break;
1325 }
1326 case ISD::FPOW: {
1327 RTLIB::Libcall LC = RTLIB::getPOW(RetVT: Node->getValueType(ResNo: 0));
1328 if (tryExpandVecMathCall(Node, LC, Results))
1329 return;
1330
1331 // TODO: Try to see if there's a narrower call available to use before
1332 // scalarizing.
1333 break;
1334 }
1335 case ISD::FCBRT: {
1336 RTLIB::Libcall LC = RTLIB::getCBRT(RetVT: Node->getValueType(ResNo: 0));
1337 if (tryExpandVecMathCall(Node, LC, Results))
1338 return;
1339
1340 // TODO: Try to see if there's a narrower call available to use before
1341 // scalarizing.
1342 break;
1343 }
1344 case ISD::FMODF: {
1345 EVT VT = Node->getValueType(ResNo: 0);
1346 RTLIB::Libcall LC = RTLIB::getMODF(VT);
1347 if (LC != RTLIB::UNKNOWN_LIBCALL &&
1348 TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results,
1349 /*CallRetResNo=*/0))
1350 return;
1351 break;
1352 }
1353 case ISD::VECTOR_COMPRESS:
1354 Results.push_back(Elt: TLI.expandVECTOR_COMPRESS(Node, DAG));
1355 return;
1356 case ISD::VECTOR_FIND_LAST_ACTIVE:
1357 Results.push_back(Elt: TLI.expandVectorFindLastActive(N: Node, DAG));
1358 return;
1359 case ISD::SCMP:
1360 case ISD::UCMP:
1361 Results.push_back(Elt: TLI.expandCMP(Node, DAG));
1362 return;
1363 case ISD::LOOP_DEPENDENCE_WAR_MASK:
1364 case ISD::LOOP_DEPENDENCE_RAW_MASK:
1365 Results.push_back(Elt: ExpandLOOP_DEPENDENCE_MASK(N: Node));
1366 return;
1367
1368 case ISD::FADD:
1369 case ISD::FMUL:
1370 case ISD::FMA:
1371 case ISD::FDIV:
1372 case ISD::FCEIL:
1373 case ISD::FFLOOR:
1374 case ISD::FNEARBYINT:
1375 case ISD::FRINT:
1376 case ISD::FROUND:
1377 case ISD::FROUNDEVEN:
1378 case ISD::FTRUNC:
1379 case ISD::FSQRT:
1380 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1381 Results.push_back(Elt: Expanded);
1382 return;
1383 }
1384 break;
1385 }
1386
1387 SDValue Unrolled = DAG.UnrollVectorOp(N: Node);
1388 if (Node->getNumValues() == 1) {
1389 Results.push_back(Elt: Unrolled);
1390 } else {
1391 assert(Node->getNumValues() == Unrolled->getNumValues() &&
1392 "VectorLegalizer Expand returned wrong number of results!");
1393 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
1394 Results.push_back(Elt: Unrolled.getValue(R: I));
1395 }
1396}
1397
1398SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
1399 // Lower a select instruction where the condition is a scalar and the
1400 // operands are vectors. Lower this select to VSELECT and implement it
1401 // using XOR AND OR. The selector bit is broadcasted.
1402 EVT VT = Node->getValueType(ResNo: 0);
1403 SDLoc DL(Node);
1404
1405 SDValue Mask = Node->getOperand(Num: 0);
1406 SDValue Op1 = Node->getOperand(Num: 1);
1407 SDValue Op2 = Node->getOperand(Num: 2);
1408
1409 assert(VT.isVector() && !Mask.getValueType().isVector()
1410 && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1411
1412 // If we can't even use the basic vector operations of
1413 // AND,OR,XOR, we will have to scalarize the op.
1414 // Notice that the operation may be 'promoted' which means that it is
1415 // 'bitcasted' to another type which is handled.
1416 // Also, we need to be able to construct a splat vector using either
1417 // BUILD_VECTOR or SPLAT_VECTOR.
1418 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1419 // BUILD_VECTOR?
1420 if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand ||
1421 TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand ||
1422 TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand ||
1423 TLI.getOperationAction(Op: VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
1424 : ISD::SPLAT_VECTOR,
1425 VT) == TargetLowering::Expand)
1426 return SDValue();
1427
1428 // Generate a mask operand.
1429 EVT MaskTy = VT.changeVectorElementTypeToInteger();
1430
1431 // What is the size of each element in the vector mask.
1432 EVT BitTy = MaskTy.getScalarType();
1433
1434 Mask = DAG.getSelect(DL, VT: BitTy, Cond: Mask, LHS: DAG.getAllOnesConstant(DL, VT: BitTy),
1435 RHS: DAG.getConstant(Val: 0, DL, VT: BitTy));
1436
1437 // Broadcast the mask so that the entire vector is all one or all zero.
1438 Mask = DAG.getSplat(VT: MaskTy, DL, Op: Mask);
1439
1440 // Bitcast the operands to be the same type as the mask.
1441 // This is needed when we select between FP types because
1442 // the mask is a vector of integers.
1443 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op1);
1444 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op2);
1445
1446 SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT: MaskTy);
1447
1448 Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op1, N2: Mask);
1449 Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op2, N2: NotMask);
1450 SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MaskTy, N1: Op1, N2: Op2);
1451 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val);
1452}
1453
1454SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1455 EVT VT = Node->getValueType(ResNo: 0);
1456
1457 // Make sure that the SRA and SHL instructions are available.
1458 if (TLI.getOperationAction(Op: ISD::SRA, VT) == TargetLowering::Expand ||
1459 TLI.getOperationAction(Op: ISD::SHL, VT) == TargetLowering::Expand)
1460 return SDValue();
1461
1462 SDLoc DL(Node);
1463 EVT OrigTy = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
1464
1465 unsigned BW = VT.getScalarSizeInBits();
1466 unsigned OrigBW = OrigTy.getScalarSizeInBits();
1467 SDValue ShiftSz = DAG.getConstant(Val: BW - OrigBW, DL, VT);
1468
1469 SDValue Op = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Node->getOperand(Num: 0), N2: ShiftSz);
1470 return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Op, N2: ShiftSz);
1471}
1472
1473// Generically expand a vector anyext in register to a shuffle of the relevant
1474// lanes into the appropriate locations, with other lanes left undef.
1475SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1476 SDLoc DL(Node);
1477 EVT VT = Node->getValueType(ResNo: 0);
1478 int NumElements = VT.getVectorNumElements();
1479 SDValue Src = Node->getOperand(Num: 0);
1480 EVT SrcVT = Src.getValueType();
1481 int NumSrcElements = SrcVT.getVectorNumElements();
1482
1483 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1484 // into a larger vector type.
1485 if (SrcVT.bitsLE(VT)) {
1486 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1487 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1488 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1489 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(),
1490 NumElements: NumSrcElements);
1491 Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SrcVT), SubVec: Src, Idx: 0);
1492 }
1493
1494 // Build a base mask of undef shuffles.
1495 SmallVector<int, 16> ShuffleMask;
1496 ShuffleMask.resize(N: NumSrcElements, NV: -1);
1497
1498 // Place the extended lanes into the correct locations.
1499 int ExtLaneScale = NumSrcElements / NumElements;
1500 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1501 for (int i = 0; i < NumElements; ++i)
1502 ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1503
1504 return DAG.getNode(
1505 Opcode: ISD::BITCAST, DL, VT,
1506 Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Src, N2: DAG.getPOISON(VT: SrcVT), Mask: ShuffleMask));
1507}
1508
1509SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1510 SDLoc DL(Node);
1511 EVT VT = Node->getValueType(ResNo: 0);
1512 SDValue Src = Node->getOperand(Num: 0);
1513 EVT SrcVT = Src.getValueType();
1514
1515 // First build an any-extend node which can be legalized above when we
1516 // recurse through it.
1517 SDValue Op = DAG.getNode(Opcode: ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Operand: Src);
1518
1519 // Now we need sign extend. Do this by shifting the elements. Even if these
1520 // aren't legal operations, they have a better chance of being legalized
1521 // without full scalarization than the sign extension does.
1522 unsigned EltWidth = VT.getScalarSizeInBits();
1523 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1524 SDValue ShiftAmount = DAG.getConstant(Val: EltWidth - SrcEltWidth, DL, VT);
1525 return DAG.getNode(Opcode: ISD::SRA, DL, VT,
1526 N1: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: ShiftAmount),
1527 N2: ShiftAmount);
1528}
1529
1530// Generically expand a vector zext in register to a shuffle of the relevant
1531// lanes into the appropriate locations, a blend of zero into the high bits,
1532// and a bitcast to the wider element type.
1533SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1534 SDLoc DL(Node);
1535 EVT VT = Node->getValueType(ResNo: 0);
1536 int NumElements = VT.getVectorNumElements();
1537 SDValue Src = Node->getOperand(Num: 0);
1538 EVT SrcVT = Src.getValueType();
1539 int NumSrcElements = SrcVT.getVectorNumElements();
1540
1541 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1542 // into a larger vector type.
1543 if (SrcVT.bitsLE(VT)) {
1544 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1545 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1546 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1547 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(),
1548 NumElements: NumSrcElements);
1549 Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SrcVT), SubVec: Src, Idx: 0);
1550 }
1551
1552 // Build up a zero vector to blend into this one.
1553 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
1554
1555 // Shuffle the incoming lanes into the correct position, and pull all other
1556 // lanes from the zero vector.
1557 auto ShuffleMask = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: NumSrcElements));
1558
1559 int ExtLaneScale = NumSrcElements / NumElements;
1560 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1561 for (int i = 0; i < NumElements; ++i)
1562 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1563
1564 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT,
1565 Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Zero, N2: Src, Mask: ShuffleMask));
1566}
1567
1568static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1569 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1570 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1571 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1572 ShuffleMask.push_back(Elt: (I * ScalarSizeInBytes) + J);
1573}
1574
1575SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1576 EVT VT = Node->getValueType(ResNo: 0);
1577
1578 // Scalable vectors can't use shuffle expansion.
1579 if (VT.isScalableVector())
1580 return TLI.expandBSWAP(N: Node, DAG);
1581
1582 // Generate a byte wise shuffle mask for the BSWAP.
1583 SmallVector<int, 16> ShuffleMask;
1584 createBSWAPShuffleMask(VT, ShuffleMask);
1585 EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: ShuffleMask.size());
1586
1587 // Only emit a shuffle if the mask is legal.
1588 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1589 SDLoc DL(Node);
1590 SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0));
1591 Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getPOISON(VT: ByteVT),
1592 Mask: ShuffleMask);
1593 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
1594 }
1595
1596 // If we have the appropriate vector bit operations, it is better to use them
1597 // than unrolling and expanding each component.
1598 if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) &&
1599 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
1600 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) &&
1601 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT))
1602 return TLI.expandBSWAP(N: Node, DAG);
1603
1604 // Otherwise let the caller unroll.
1605 return SDValue();
1606}
1607
1608SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) {
1609 EVT VT = Node->getValueType(ResNo: 0);
1610
1611 // We can't unroll or use shuffles for scalable vectors.
1612 if (VT.isScalableVector())
1613 return TLI.expandBITREVERSE(N: Node, DAG);
1614
1615 // If we have the scalar operation, it's probably cheaper to unroll it.
1616 if (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: VT.getScalarType()))
1617 return SDValue();
1618
1619 // If the vector element width is a whole number of bytes, test if its legal
1620 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1621 // vector. This greatly reduces the number of bit shifts necessary.
1622 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1623 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1624 SmallVector<int, 16> BSWAPMask;
1625 createBSWAPShuffleMask(VT, ShuffleMask&: BSWAPMask);
1626
1627 EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: BSWAPMask.size());
1628 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1629 (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: ByteVT) ||
1630 (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT: ByteVT) &&
1631 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT: ByteVT) &&
1632 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: ByteVT) &&
1633 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: ByteVT)))) {
1634 SDLoc DL(Node);
1635 SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0));
1636 Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getPOISON(VT: ByteVT),
1637 Mask: BSWAPMask);
1638 Op = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ByteVT, Operand: Op);
1639 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
1640 return Op;
1641 }
1642 }
1643
1644 // If we have the appropriate vector bit operations, it is better to use them
1645 // than unrolling and expanding each component.
1646 if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) &&
1647 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
1648 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) &&
1649 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT))
1650 return TLI.expandBITREVERSE(N: Node, DAG);
1651
1652 // Otherwise unroll.
1653 return SDValue();
1654}
1655
1656SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1657 // Implement VSELECT in terms of XOR, AND, OR
1658 // on platforms which do not support blend natively.
1659 SDLoc DL(Node);
1660
1661 SDValue Mask = Node->getOperand(Num: 0);
1662 SDValue Op1 = Node->getOperand(Num: 1);
1663 SDValue Op2 = Node->getOperand(Num: 2);
1664
1665 EVT VT = Mask.getValueType();
1666
1667 // If we can't even use the basic vector operations of
1668 // AND,OR,XOR, we will have to scalarize the op.
1669 // Notice that the operation may be 'promoted' which means that it is
1670 // 'bitcasted' to another type which is handled.
1671 if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand ||
1672 TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand ||
1673 TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand)
1674 return SDValue();
1675
1676 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1677 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1678 // vector constant to mask with.
1679 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1680 auto BoolContents = TLI.getBooleanContents(Type: Op1.getValueType());
1681 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1682 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1683 Op1.getValueType().getVectorElementType() == MVT::i1))
1684 return SDValue();
1685
1686 // If the mask and the type are different sizes, unroll the vector op. This
1687 // can occur when getSetCCResultType returns something that is different in
1688 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1689 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1690 return SDValue();
1691
1692 // Bitcast the operands to be the same type as the mask.
1693 // This is needed when we select between FP types because
1694 // the mask is a vector of integers.
1695 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op1);
1696 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op2);
1697
1698 SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT);
1699
1700 Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op1, N2: Mask);
1701 Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op2, N2: NotMask);
1702 SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Op1, N2: Op2);
1703 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val);
1704}
1705
1706SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1707 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1708 // do not support it natively.
1709 SDLoc DL(Node);
1710
1711 SDValue Mask = Node->getOperand(Num: 0);
1712 SDValue Op1 = Node->getOperand(Num: 1);
1713 SDValue Op2 = Node->getOperand(Num: 2);
1714 SDValue EVL = Node->getOperand(Num: 3);
1715
1716 EVT VT = Mask.getValueType();
1717
1718 // If we can't even use the basic vector operations of
1719 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1720 if (TLI.getOperationAction(Op: ISD::VP_AND, VT) == TargetLowering::Expand ||
1721 TLI.getOperationAction(Op: ISD::VP_XOR, VT) == TargetLowering::Expand ||
1722 TLI.getOperationAction(Op: ISD::VP_OR, VT) == TargetLowering::Expand)
1723 return SDValue();
1724
1725 // This operation also isn't safe when the operands aren't also booleans.
1726 if (Op1.getValueType().getVectorElementType() != MVT::i1)
1727 return SDValue();
1728
1729 SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1730 SDValue NotMask = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT, N1: Mask, N2: Ones, N3: Ones, N4: EVL);
1731
1732 Op1 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op1, N2: Mask, N3: Ones, N4: EVL);
1733 Op2 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op2, N2: NotMask, N3: Ones, N4: EVL);
1734 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: Op1, N2: Op2, N3: Ones, N4: EVL);
1735}
1736
1737SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1738 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1739 // indices less than the EVL/pivot are true. Combine that with the original
1740 // mask for a full-length mask. Use a full-length VSELECT to select between
1741 // the true and false values.
1742 SDLoc DL(Node);
1743
1744 SDValue Mask = Node->getOperand(Num: 0);
1745 SDValue Op1 = Node->getOperand(Num: 1);
1746 SDValue Op2 = Node->getOperand(Num: 2);
1747 SDValue EVL = Node->getOperand(Num: 3);
1748
1749 EVT MaskVT = Mask.getValueType();
1750 bool IsFixedLen = MaskVT.isFixedLengthVector();
1751
1752 EVT EVLVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EVL.getValueType(),
1753 EC: MaskVT.getVectorElementCount());
1754
1755 // If we can't construct the EVL mask efficiently, it's better to unroll.
1756 if ((IsFixedLen &&
1757 !TLI.isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: EVLVecVT)) ||
1758 (!IsFixedLen &&
1759 (!TLI.isOperationLegalOrCustom(Op: ISD::STEP_VECTOR, VT: EVLVecVT) ||
1760 !TLI.isOperationLegalOrCustom(Op: ISD::SPLAT_VECTOR, VT: EVLVecVT))))
1761 return SDValue();
1762
1763 // If using a SETCC would result in a different type than the mask type,
1764 // unroll.
1765 if (TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
1766 VT: EVLVecVT) != MaskVT)
1767 return SDValue();
1768
1769 SDValue StepVec = DAG.getStepVector(DL, ResVT: EVLVecVT);
1770 SDValue SplatEVL = DAG.getSplat(VT: EVLVecVT, DL, Op: EVL);
1771 SDValue EVLMask =
1772 DAG.getSetCC(DL, VT: MaskVT, LHS: StepVec, RHS: SplatEVL, Cond: ISD::CondCode::SETULT);
1773
1774 SDValue FullMask = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskVT, N1: Mask, N2: EVLMask);
1775 return DAG.getSelect(DL, VT: Node->getValueType(ResNo: 0), Cond: FullMask, LHS: Op1, RHS: Op2);
1776}
1777
1778SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1779 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1780 EVT VT = Node->getValueType(ResNo: 0);
1781
1782 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1783
1784 if (!TLI.isOperationLegalOrCustom(Op: DivOpc, VT) ||
1785 !TLI.isOperationLegalOrCustom(Op: ISD::VP_MUL, VT) ||
1786 !TLI.isOperationLegalOrCustom(Op: ISD::VP_SUB, VT))
1787 return SDValue();
1788
1789 SDLoc DL(Node);
1790
1791 SDValue Dividend = Node->getOperand(Num: 0);
1792 SDValue Divisor = Node->getOperand(Num: 1);
1793 SDValue Mask = Node->getOperand(Num: 2);
1794 SDValue EVL = Node->getOperand(Num: 3);
1795
1796 // X % Y -> X-X/Y*Y
1797 SDValue Div = DAG.getNode(Opcode: DivOpc, DL, VT, N1: Dividend, N2: Divisor, N3: Mask, N4: EVL);
1798 SDValue Mul = DAG.getNode(Opcode: ISD::VP_MUL, DL, VT, N1: Divisor, N2: Div, N3: Mask, N4: EVL);
1799 return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Dividend, N2: Mul, N3: Mask, N4: EVL);
1800}
1801
1802SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) {
1803 EVT VT = Node->getValueType(ResNo: 0);
1804 EVT IntVT = VT.changeVectorElementTypeToInteger();
1805
1806 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_XOR, VT: IntVT))
1807 return SDValue();
1808
1809 SDValue Mask = Node->getOperand(Num: 1);
1810 SDValue EVL = Node->getOperand(Num: 2);
1811
1812 SDLoc DL(Node);
1813 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1814 SDValue SignMask = DAG.getConstant(
1815 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1816 SDValue Xor = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: IntVT, N1: Cast, N2: SignMask, N3: Mask, N4: EVL);
1817 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Xor);
1818}
1819
1820SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) {
1821 EVT VT = Node->getValueType(ResNo: 0);
1822 EVT IntVT = VT.changeVectorElementTypeToInteger();
1823
1824 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_AND, VT: IntVT))
1825 return SDValue();
1826
1827 SDValue Mask = Node->getOperand(Num: 1);
1828 SDValue EVL = Node->getOperand(Num: 2);
1829
1830 SDLoc DL(Node);
1831 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1832 SDValue ClearSignMask = DAG.getConstant(
1833 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1834 SDValue ClearSign =
1835 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Cast, N2: ClearSignMask, N3: Mask, N4: EVL);
1836 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: ClearSign);
1837}
1838
1839SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
1840 EVT VT = Node->getValueType(ResNo: 0);
1841
1842 if (VT != Node->getOperand(Num: 1).getValueType())
1843 return SDValue();
1844
1845 EVT IntVT = VT.changeVectorElementTypeToInteger();
1846 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_AND, VT: IntVT) ||
1847 !TLI.isOperationLegalOrCustom(Op: ISD::VP_XOR, VT: IntVT))
1848 return SDValue();
1849
1850 SDValue Mask = Node->getOperand(Num: 2);
1851 SDValue EVL = Node->getOperand(Num: 3);
1852
1853 SDLoc DL(Node);
1854 SDValue Mag = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1855 SDValue Sign = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 1));
1856
1857 SDValue SignMask = DAG.getConstant(
1858 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1859 SDValue SignBit =
1860 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Sign, N2: SignMask, N3: Mask, N4: EVL);
1861
1862 SDValue ClearSignMask = DAG.getConstant(
1863 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1864 SDValue ClearedSign =
1865 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Mag, N2: ClearSignMask, N3: Mask, N4: EVL);
1866
1867 SDValue CopiedSign = DAG.getNode(Opcode: ISD::VP_OR, DL, VT: IntVT, N1: ClearedSign, N2: SignBit,
1868 N3: Mask, N4: EVL, Flags: SDNodeFlags::Disjoint);
1869
1870 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: CopiedSign);
1871}
1872
1873SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) {
1874 SDLoc DL(N);
1875 EVT VT = N->getValueType(ResNo: 0);
1876 SDValue SourceValue = N->getOperand(Num: 0);
1877 SDValue SinkValue = N->getOperand(Num: 1);
1878 SDValue EltSizeInBytes = N->getOperand(Num: 2);
1879
1880 // Note: The lane offset is scalable if the mask is scalable.
1881 ElementCount LaneOffsetEC =
1882 ElementCount::get(MinVal: N->getConstantOperandVal(Num: 3), Scalable: VT.isScalableVT());
1883
1884 EVT PtrVT = SourceValue->getValueType(ResNo: 0);
1885 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
1886
1887 // Take the difference between the pointers and divided by the element size,
1888 // to see how many lanes separate them.
1889 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: SinkValue, N2: SourceValue);
1890 if (IsReadAfterWrite)
1891 Diff = DAG.getNode(Opcode: ISD::ABS, DL, VT: PtrVT, Operand: Diff);
1892 Diff = DAG.getNode(Opcode: ISD::SDIV, DL, VT: PtrVT, N1: Diff, N2: EltSizeInBytes);
1893
1894 // The pointers do not alias if:
1895 // * Diff <= 0 (WAR_MASK)
1896 // * Diff == 0 (RAW_MASK)
1897 EVT CmpVT =
1898 TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: PtrVT);
1899 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: PtrVT);
1900 SDValue Cmp = DAG.getSetCC(DL, VT: CmpVT, LHS: Diff, RHS: Zero,
1901 Cond: IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
1902
1903 // The pointers do not alias if:
1904 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
1905 SDValue LaneOffset = DAG.getElementCount(DL, VT: PtrVT, EC: LaneOffsetEC);
1906 SDValue MaskN =
1907 DAG.getSelect(DL, VT: PtrVT, Cond: Cmp, LHS: DAG.getConstant(Val: -1, DL, VT: PtrVT), RHS: Diff);
1908
1909 return DAG.getNode(Opcode: ISD::GET_ACTIVE_LANE_MASK, DL, VT, N1: LaneOffset, N2: MaskN);
1910}
1911
1912void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1913 SmallVectorImpl<SDValue> &Results) {
1914 // Attempt to expand using TargetLowering.
1915 SDValue Result, Chain;
1916 if (TLI.expandFP_TO_UINT(N: Node, Result, Chain, DAG)) {
1917 Results.push_back(Elt: Result);
1918 if (Node->isStrictFPOpcode())
1919 Results.push_back(Elt: Chain);
1920 return;
1921 }
1922
1923 // Otherwise go ahead and unroll.
1924 if (Node->isStrictFPOpcode()) {
1925 UnrollStrictFPOp(Node, Results);
1926 return;
1927 }
1928
1929 Results.push_back(Elt: DAG.UnrollVectorOp(N: Node));
1930}
1931
1932void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1933 SmallVectorImpl<SDValue> &Results) {
1934 bool IsStrict = Node->isStrictFPOpcode();
1935 unsigned OpNo = IsStrict ? 1 : 0;
1936 SDValue Src = Node->getOperand(Num: OpNo);
1937 EVT SrcVT = Src.getValueType();
1938 EVT DstVT = Node->getValueType(ResNo: 0);
1939 SDLoc DL(Node);
1940
1941 // Attempt to expand using TargetLowering.
1942 SDValue Result;
1943 SDValue Chain;
1944 if (TLI.expandUINT_TO_FP(N: Node, Result, Chain, DAG)) {
1945 Results.push_back(Elt: Result);
1946 if (IsStrict)
1947 Results.push_back(Elt: Chain);
1948 return;
1949 }
1950
1951 // Make sure that the SINT_TO_FP and SRL instructions are available.
1952 if (((!IsStrict && TLI.getOperationAction(Op: ISD::SINT_TO_FP, VT: SrcVT) ==
1953 TargetLowering::Expand) ||
1954 (IsStrict && TLI.getOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: SrcVT) ==
1955 TargetLowering::Expand)) ||
1956 TLI.getOperationAction(Op: ISD::SRL, VT: SrcVT) == TargetLowering::Expand) {
1957 if (IsStrict) {
1958 UnrollStrictFPOp(Node, Results);
1959 return;
1960 }
1961
1962 Results.push_back(Elt: DAG.UnrollVectorOp(N: Node));
1963 return;
1964 }
1965
1966 unsigned BW = SrcVT.getScalarSizeInBits();
1967 assert((BW == 64 || BW == 32) &&
1968 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1969
1970 // If STRICT_/FMUL is not supported by the target (in case of f16) replace the
1971 // UINT_TO_FP with a larger float and round to the smaller type
1972 if ((!IsStrict && !TLI.isOperationLegalOrCustom(Op: ISD::FMUL, VT: DstVT)) ||
1973 (IsStrict && !TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FMUL, VT: DstVT))) {
1974 EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64;
1975 SDValue UIToFP;
1976 SDValue Result;
1977 SDValue TargetZero = DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true);
1978 EVT FloatVecVT = SrcVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: FPVT);
1979 if (IsStrict) {
1980 UIToFP = DAG.getNode(Opcode: ISD::STRICT_UINT_TO_FP, DL, ResultTys: {FloatVecVT, MVT::Other},
1981 Ops: {Node->getOperand(Num: 0), Src});
1982 Result = DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {DstVT, MVT::Other},
1983 Ops: {Node->getOperand(Num: 0), UIToFP, TargetZero});
1984 Results.push_back(Elt: Result);
1985 Results.push_back(Elt: Result.getValue(R: 1));
1986 } else {
1987 UIToFP = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: FloatVecVT, Operand: Src);
1988 Result = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: DstVT, N1: UIToFP, N2: TargetZero);
1989 Results.push_back(Elt: Result);
1990 }
1991
1992 return;
1993 }
1994
1995 SDValue HalfWord = DAG.getConstant(Val: BW / 2, DL, VT: SrcVT);
1996
1997 // Constants to clear the upper part of the word.
1998 // Notice that we can also use SHL+SHR, but using a constant is slightly
1999 // faster on x86.
2000 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
2001 SDValue HalfWordMask = DAG.getConstant(Val: HWMask, DL, VT: SrcVT);
2002
2003 // Two to the power of half-word-size.
2004 SDValue TWOHW = DAG.getConstantFP(Val: 1ULL << (BW / 2), DL, VT: DstVT);
2005
2006 // Clear upper part of LO, lower HI
2007 SDValue HI = DAG.getNode(Opcode: ISD::SRL, DL, VT: SrcVT, N1: Src, N2: HalfWord);
2008 SDValue LO = DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src, N2: HalfWordMask);
2009
2010 if (IsStrict) {
2011 // Convert hi and lo to floats
2012 // Convert the hi part back to the upper values
2013 // TODO: Can any fast-math-flags be set on these nodes?
2014 SDValue fHI = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, ResultTys: {DstVT, MVT::Other},
2015 Ops: {Node->getOperand(Num: 0), HI});
2016 fHI = DAG.getNode(Opcode: ISD::STRICT_FMUL, DL, ResultTys: {DstVT, MVT::Other},
2017 Ops: {fHI.getValue(R: 1), fHI, TWOHW});
2018 SDValue fLO = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, ResultTys: {DstVT, MVT::Other},
2019 Ops: {Node->getOperand(Num: 0), LO});
2020
2021 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: fHI.getValue(R: 1),
2022 N2: fLO.getValue(R: 1));
2023
2024 // Add the two halves
2025 SDValue Result =
2026 DAG.getNode(Opcode: ISD::STRICT_FADD, DL, ResultTys: {DstVT, MVT::Other}, Ops: {TF, fHI, fLO});
2027
2028 Results.push_back(Elt: Result);
2029 Results.push_back(Elt: Result.getValue(R: 1));
2030 return;
2031 }
2032
2033 // Convert hi and lo to floats
2034 // Convert the hi part back to the upper values
2035 // TODO: Can any fast-math-flags be set on these nodes?
2036 SDValue fHI = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: DstVT, Operand: HI);
2037 fHI = DAG.getNode(Opcode: ISD::FMUL, DL, VT: DstVT, N1: fHI, N2: TWOHW);
2038 SDValue fLO = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: DstVT, Operand: LO);
2039
2040 // Add the two halves
2041 Results.push_back(Elt: DAG.getNode(Opcode: ISD::FADD, DL, VT: DstVT, N1: fHI, N2: fLO));
2042}
2043
2044SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
2045 EVT VT = Node->getValueType(ResNo: 0);
2046 EVT IntVT = VT.changeVectorElementTypeToInteger();
2047
2048 if (!TLI.isOperationLegalOrCustom(Op: ISD::XOR, VT: IntVT))
2049 return SDValue();
2050
2051 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2052 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
2053 !VT.isScalableVector())
2054 return SDValue();
2055
2056 SDLoc DL(Node);
2057 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
2058 SDValue SignMask = DAG.getConstant(
2059 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2060 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL, VT: IntVT, N1: Cast, N2: SignMask);
2061 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Xor);
2062}
2063
2064SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
2065 EVT VT = Node->getValueType(ResNo: 0);
2066 EVT IntVT = VT.changeVectorElementTypeToInteger();
2067
2068 if (!TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT))
2069 return SDValue();
2070
2071 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2072 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
2073 !VT.isScalableVector())
2074 return SDValue();
2075
2076 SDLoc DL(Node);
2077 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
2078 SDValue ClearSignMask = DAG.getConstant(
2079 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2080 SDValue ClearedSign = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Cast, N2: ClearSignMask);
2081 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: ClearedSign);
2082}
2083
2084SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
2085 EVT VT = Node->getValueType(ResNo: 0);
2086 EVT IntVT = VT.changeVectorElementTypeToInteger();
2087
2088 if (VT != Node->getOperand(Num: 1).getValueType() ||
2089 !TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT) ||
2090 !TLI.isOperationLegalOrCustom(Op: ISD::OR, VT: IntVT))
2091 return SDValue();
2092
2093 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2094 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
2095 !VT.isScalableVector())
2096 return SDValue();
2097
2098 SDLoc DL(Node);
2099 SDValue Mag = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
2100 SDValue Sign = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 1));
2101
2102 SDValue SignMask = DAG.getConstant(
2103 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2104 SDValue SignBit = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Sign, N2: SignMask);
2105
2106 SDValue ClearSignMask = DAG.getConstant(
2107 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2108 SDValue ClearedSign = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Mag, N2: ClearSignMask);
2109
2110 SDValue CopiedSign = DAG.getNode(Opcode: ISD::OR, DL, VT: IntVT, N1: ClearedSign, N2: SignBit,
2111 Flags: SDNodeFlags::Disjoint);
2112
2113 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: CopiedSign);
2114}
2115
2116void VectorLegalizer::ExpandFSUB(SDNode *Node,
2117 SmallVectorImpl<SDValue> &Results) {
2118 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
2119 // we can defer this to operation legalization where it will be lowered as
2120 // a+(-b).
2121 EVT VT = Node->getValueType(ResNo: 0);
2122 if (TLI.isOperationLegalOrCustom(Op: ISD::FNEG, VT) &&
2123 TLI.isOperationLegalOrCustom(Op: ISD::FADD, VT))
2124 return; // Defer to LegalizeDAG
2125
2126 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
2127 Results.push_back(Elt: Expanded);
2128 return;
2129 }
2130
2131 SDValue Tmp = DAG.UnrollVectorOp(N: Node);
2132 Results.push_back(Elt: Tmp);
2133}
2134
2135void VectorLegalizer::ExpandSETCC(SDNode *Node,
2136 SmallVectorImpl<SDValue> &Results) {
2137 bool NeedInvert = false;
2138 bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
2139 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
2140 Node->getOpcode() == ISD::STRICT_FSETCCS;
2141 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
2142 unsigned Offset = IsStrict ? 1 : 0;
2143
2144 SDValue Chain = IsStrict ? Node->getOperand(Num: 0) : SDValue();
2145 SDValue LHS = Node->getOperand(Num: 0 + Offset);
2146 SDValue RHS = Node->getOperand(Num: 1 + Offset);
2147 SDValue CC = Node->getOperand(Num: 2 + Offset);
2148
2149 MVT OpVT = LHS.getSimpleValueType();
2150 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
2151
2152 if (TLI.getCondCodeAction(CC: CCCode, VT: OpVT) != TargetLowering::Expand) {
2153 if (IsStrict) {
2154 UnrollStrictFPOp(Node, Results);
2155 return;
2156 }
2157 Results.push_back(Elt: UnrollVSETCC(Node));
2158 return;
2159 }
2160
2161 SDValue Mask, EVL;
2162 if (IsVP) {
2163 Mask = Node->getOperand(Num: 3 + Offset);
2164 EVL = Node->getOperand(Num: 4 + Offset);
2165 }
2166
2167 SDLoc dl(Node);
2168 bool Legalized =
2169 TLI.LegalizeSetCCCondCode(DAG, VT: Node->getValueType(ResNo: 0), LHS, RHS, CC, Mask,
2170 EVL, NeedInvert, dl, Chain, IsSignaling);
2171
2172 if (Legalized) {
2173 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
2174 // condition code, create a new SETCC node.
2175 if (CC.getNode()) {
2176 if (IsStrict) {
2177 LHS = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VTList: Node->getVTList(),
2178 Ops: {Chain, LHS, RHS, CC}, Flags: Node->getFlags());
2179 Chain = LHS.getValue(R: 1);
2180 } else if (IsVP) {
2181 LHS = DAG.getNode(Opcode: ISD::VP_SETCC, DL: dl, VT: Node->getValueType(ResNo: 0),
2182 Ops: {LHS, RHS, CC, Mask, EVL}, Flags: Node->getFlags());
2183 } else {
2184 LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), N1: LHS, N2: RHS, N3: CC,
2185 Flags: Node->getFlags());
2186 }
2187 }
2188
2189 // If we expanded the SETCC by inverting the condition code, then wrap
2190 // the existing SETCC in a NOT to restore the intended condition.
2191 if (NeedInvert) {
2192 if (!IsVP)
2193 LHS = DAG.getLogicalNOT(DL: dl, Val: LHS, VT: LHS->getValueType(ResNo: 0));
2194 else
2195 LHS = DAG.getVPLogicalNOT(DL: dl, Val: LHS, Mask, EVL, VT: LHS->getValueType(ResNo: 0));
2196 }
2197 } else {
2198 assert(!IsStrict && "Don't know how to expand for strict nodes.");
2199
2200 // Otherwise, SETCC for the given comparison type must be completely
2201 // illegal; expand it into a SELECT_CC.
2202 EVT VT = Node->getValueType(ResNo: 0);
2203 LHS = DAG.getNode(Opcode: ISD::SELECT_CC, DL: dl, VT, N1: LHS, N2: RHS,
2204 N3: DAG.getBoolConstant(V: true, DL: dl, VT, OpVT: LHS.getValueType()),
2205 N4: DAG.getBoolConstant(V: false, DL: dl, VT, OpVT: LHS.getValueType()),
2206 N5: CC, Flags: Node->getFlags());
2207 }
2208
2209 Results.push_back(Elt: LHS);
2210 if (IsStrict)
2211 Results.push_back(Elt: Chain);
2212}
2213
2214void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
2215 SmallVectorImpl<SDValue> &Results) {
2216 SDValue Result, Overflow;
2217 TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
2218 Results.push_back(Elt: Result);
2219 Results.push_back(Elt: Overflow);
2220}
2221
2222void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
2223 SmallVectorImpl<SDValue> &Results) {
2224 SDValue Result, Overflow;
2225 TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
2226 Results.push_back(Elt: Result);
2227 Results.push_back(Elt: Overflow);
2228}
2229
2230void VectorLegalizer::ExpandMULO(SDNode *Node,
2231 SmallVectorImpl<SDValue> &Results) {
2232 SDValue Result, Overflow;
2233 if (!TLI.expandMULO(Node, Result, Overflow, DAG))
2234 std::tie(args&: Result, args&: Overflow) = DAG.UnrollVectorOverflowOp(N: Node);
2235
2236 Results.push_back(Elt: Result);
2237 Results.push_back(Elt: Overflow);
2238}
2239
2240void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
2241 SmallVectorImpl<SDValue> &Results) {
2242 SDNode *N = Node;
2243 if (SDValue Expanded = TLI.expandFixedPointDiv(Opcode: N->getOpcode(), dl: SDLoc(N),
2244 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Scale: N->getConstantOperandVal(Num: 2), DAG))
2245 Results.push_back(Elt: Expanded);
2246}
2247
2248void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
2249 SmallVectorImpl<SDValue> &Results) {
2250 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
2251 ExpandUINT_TO_FLOAT(Node, Results);
2252 return;
2253 }
2254 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
2255 ExpandFP_TO_UINT(Node, Results);
2256 return;
2257 }
2258
2259 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2260 Node->getOpcode() == ISD::STRICT_FSETCCS) {
2261 ExpandSETCC(Node, Results);
2262 return;
2263 }
2264
2265 UnrollStrictFPOp(Node, Results);
2266}
2267
2268void VectorLegalizer::ExpandREM(SDNode *Node,
2269 SmallVectorImpl<SDValue> &Results) {
2270 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
2271 "Expected REM node");
2272
2273 SDValue Result;
2274 if (!TLI.expandREM(Node, Result, DAG))
2275 Result = DAG.UnrollVectorOp(N: Node);
2276 Results.push_back(Elt: Result);
2277}
2278
2279// Try to expand libm nodes into vector math routine calls. Callers provide the
2280// LibFunc equivalent of the passed in Node, which is used to lookup mappings
2281// within TargetLibraryInfo. The only mappings considered are those where the
2282// result and all operands are the same vector type. While predicated nodes are
2283// not supported, we will emit calls to masked routines by passing in an all
2284// true mask.
2285bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
2286 SmallVectorImpl<SDValue> &Results) {
2287 // Chain must be propagated but currently strict fp operations are down
2288 // converted to their none strict counterpart.
2289 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
2290
2291 RTLIB::LibcallImpl LCImpl = DAG.getLibcalls().getLibcallImpl(Call: LC);
2292 if (LCImpl == RTLIB::Unsupported)
2293 return false;
2294
2295 EVT VT = Node->getValueType(ResNo: 0);
2296 const RTLIB::RuntimeLibcallsInfo &RTLCI = TLI.getRuntimeLibcallsInfo();
2297 LLVMContext &Ctx = *DAG.getContext();
2298
2299 auto [FuncTy, FuncAttrs] = RTLCI.getFunctionTy(
2300 Ctx, TT: DAG.getSubtarget().getTargetTriple(), DL: DAG.getDataLayout(), LibcallImpl: LCImpl);
2301
2302 SDLoc DL(Node);
2303 TargetLowering::ArgListTy Args;
2304
2305 bool HasMaskArg = RTLCI.hasVectorMaskArgument(Impl: LCImpl);
2306
2307 // Sanity check just in case function has unexpected parameters.
2308 assert(FuncTy->getNumParams() == Node->getNumOperands() + HasMaskArg &&
2309 EVT::getEVT(FuncTy->getReturnType(), true) == VT &&
2310 "mismatch in value type and call signature type");
2311
2312 for (unsigned I = 0, E = FuncTy->getNumParams(); I != E; ++I) {
2313 Type *ParamTy = FuncTy->getParamType(i: I);
2314
2315 if (HasMaskArg && I == E - 1) {
2316 assert(cast<VectorType>(ParamTy)->getElementType()->isIntegerTy(1) &&
2317 "unexpected vector mask type");
2318 EVT MaskVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
2319 Args.emplace_back(args: DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT),
2320 args: MaskVT.getTypeForEVT(Context&: Ctx));
2321
2322 } else {
2323 SDValue Op = Node->getOperand(Num: I);
2324 assert(Op.getValueType() == EVT::getEVT(ParamTy, true) &&
2325 "mismatch in value type and call argument type");
2326 Args.emplace_back(args&: Op, args&: ParamTy);
2327 }
2328 }
2329
2330 // Emit a call to the vector function.
2331 SDValue Callee =
2332 DAG.getExternalSymbol(LCImpl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
2333 CallingConv::ID CC = RTLCI.getLibcallImplCallingConv(Call: LCImpl);
2334
2335 TargetLowering::CallLoweringInfo CLI(DAG);
2336 CLI.setDebugLoc(DL)
2337 .setChain(DAG.getEntryNode())
2338 .setLibCallee(CC, ResultType: FuncTy->getReturnType(), Target: Callee, ArgsList: std::move(Args));
2339
2340 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
2341 Results.push_back(Elt: CallResult.first);
2342 return true;
2343}
2344
2345void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
2346 SmallVectorImpl<SDValue> &Results) {
2347 EVT VT = Node->getValueType(ResNo: 0);
2348 EVT EltVT = VT.getVectorElementType();
2349 unsigned NumElems = VT.getVectorNumElements();
2350 unsigned NumOpers = Node->getNumOperands();
2351 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2352
2353 EVT TmpEltVT = EltVT;
2354 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2355 Node->getOpcode() == ISD::STRICT_FSETCCS)
2356 TmpEltVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(),
2357 Context&: *DAG.getContext(), VT: TmpEltVT);
2358
2359 EVT ValueVTs[] = {TmpEltVT, MVT::Other};
2360 SDValue Chain = Node->getOperand(Num: 0);
2361 SDLoc dl(Node);
2362
2363 SmallVector<SDValue, 32> OpValues;
2364 SmallVector<SDValue, 32> OpChains;
2365 for (unsigned i = 0; i < NumElems; ++i) {
2366 SmallVector<SDValue, 4> Opers;
2367 SDValue Idx = DAG.getVectorIdxConstant(Val: i, DL: dl);
2368
2369 // The Chain is the first operand.
2370 Opers.push_back(Elt: Chain);
2371
2372 // Now process the remaining operands.
2373 for (unsigned j = 1; j < NumOpers; ++j) {
2374 SDValue Oper = Node->getOperand(Num: j);
2375 EVT OperVT = Oper.getValueType();
2376
2377 if (OperVT.isVector())
2378 Oper = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl,
2379 VT: OperVT.getVectorElementType(), N1: Oper, N2: Idx);
2380
2381 Opers.push_back(Elt: Oper);
2382 }
2383
2384 SDValue ScalarOp = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, ResultTys: ValueVTs, Ops: Opers);
2385 SDValue ScalarResult = ScalarOp.getValue(R: 0);
2386 SDValue ScalarChain = ScalarOp.getValue(R: 1);
2387
2388 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2389 Node->getOpcode() == ISD::STRICT_FSETCCS)
2390 ScalarResult = DAG.getSelect(DL: dl, VT: EltVT, Cond: ScalarResult,
2391 LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT),
2392 RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT));
2393
2394 OpValues.push_back(Elt: ScalarResult);
2395 OpChains.push_back(Elt: ScalarChain);
2396 }
2397
2398 SDValue Result = DAG.getBuildVector(VT, DL: dl, Ops: OpValues);
2399 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OpChains);
2400
2401 Results.push_back(Elt: Result);
2402 Results.push_back(Elt: NewChain);
2403}
2404
2405SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
2406 EVT VT = Node->getValueType(ResNo: 0);
2407 unsigned NumElems = VT.getVectorNumElements();
2408 EVT EltVT = VT.getVectorElementType();
2409 SDValue LHS = Node->getOperand(Num: 0);
2410 SDValue RHS = Node->getOperand(Num: 1);
2411 SDValue CC = Node->getOperand(Num: 2);
2412 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
2413 SDLoc dl(Node);
2414 SmallVector<SDValue, 8> Ops(NumElems);
2415 for (unsigned i = 0; i < NumElems; ++i) {
2416 SDValue LHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: LHS,
2417 N2: DAG.getVectorIdxConstant(Val: i, DL: dl));
2418 SDValue RHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: RHS,
2419 N2: DAG.getVectorIdxConstant(Val: i, DL: dl));
2420 // FIXME: We should use i1 setcc + boolext here, but it causes regressions.
2421 Ops[i] = DAG.getNode(Opcode: ISD::SETCC, DL: dl,
2422 VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(),
2423 Context&: *DAG.getContext(), VT: TmpEltVT),
2424 N1: LHSElem, N2: RHSElem, N3: CC);
2425 Ops[i] = DAG.getSelect(DL: dl, VT: EltVT, Cond: Ops[i],
2426 LHS: DAG.getBoolConstant(V: true, DL: dl, VT: EltVT, OpVT: VT),
2427 RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT));
2428 }
2429 return DAG.getBuildVector(VT, DL: dl, Ops);
2430}
2431
2432bool SelectionDAG::LegalizeVectors() {
2433 return VectorLegalizer(*this).Run();
2434}
2435