1//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SelectionDAG::LegalizeVectors method.
10//
11// The vector legalizer looks for vector operations which might need to be
12// scalarized and legalizes them. This is a separate step from Legalize because
13// scalarizing can introduce illegal types. For example, suppose we have an
14// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16// operation, which introduces nodes with the illegal type i64 which must be
17// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18// the operation must be unrolled, which introduces nodes with the illegal
19// type i8 which must be promoted.
20//
21// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22// or operations that happen to take a vector which are custom-lowered;
23// the legalization for such operations never produces nodes
24// with illegal types, so it's okay to put off legalizing them until
25// SelectionDAG::Legalize runs.
26//
27//===----------------------------------------------------------------------===//
28
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/SelectionDAG.h"
35#include "llvm/CodeGen/SelectionDAGNodes.h"
36#include "llvm/CodeGen/TargetLowering.h"
37#include "llvm/CodeGen/ValueTypes.h"
38#include "llvm/CodeGenTypes/MachineValueType.h"
39#include "llvm/IR/DataLayout.h"
40#include "llvm/Support/Casting.h"
41#include "llvm/Support/Compiler.h"
42#include "llvm/Support/Debug.h"
43#include "llvm/Support/ErrorHandling.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "legalizevectorops"
52
53namespace {
54
55class VectorLegalizer {
56 SelectionDAG& DAG;
57 const TargetLowering &TLI;
58 bool Changed = false; // Keep track of whether anything changed
59
60 /// For nodes that are of legal width, and that have more than one use, this
61 /// map indicates what regularized operand to use. This allows us to avoid
62 /// legalizing the same thing more than once.
63 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
64
65 /// Adds a node to the translation cache.
66 void AddLegalizedOperand(SDValue From, SDValue To) {
67 LegalizedNodes.insert(KV: std::make_pair(x&: From, y&: To));
68 // If someone requests legalization of the new node, return itself.
69 if (From != To)
70 LegalizedNodes.insert(KV: std::make_pair(x&: To, y&: To));
71 }
72
73 /// Legalizes the given node.
74 SDValue LegalizeOp(SDValue Op);
75
76 /// Assuming the node is legal, "legalize" the results.
77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
78
79 /// Make sure Results are legal and update the translation cache.
80 SDValue RecursivelyLegalizeResults(SDValue Op,
81 MutableArrayRef<SDValue> Results);
82
83 /// Wrapper to interface LowerOperation with a vector of Results.
84 /// Returns false if the target wants to use default expansion. Otherwise
85 /// returns true. If return is true and the Results are empty, then the
86 /// target wants to keep the input node as is.
87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
88
89 /// Implements unrolling a VSETCC.
90 SDValue UnrollVSETCC(SDNode *Node);
91
92 /// Implement expand-based legalization of vector operations.
93 ///
94 /// This is just a high-level routine to dispatch to specific code paths for
95 /// operations to legalize them.
96 void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
97
98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
99 /// FP_TO_SINT isn't legal.
100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
101
102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
103 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
105
106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
107 SDValue ExpandSEXTINREG(SDNode *Node);
108
109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
110 ///
111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
112 /// type. The contents of the bits in the extended part of each element are
113 /// undef.
114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
115
116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
117 ///
118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
119 /// type, then shifts left and arithmetic shifts right to introduce a sign
120 /// extension.
121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
122
123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
124 ///
125 /// Shuffles the low lanes of the operand into place and blends zeros into
126 /// the remaining lanes, finally bitcasting to the proper type.
127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
128
129 /// Expand bswap of vectors into a shuffle if legal.
130 SDValue ExpandBSWAP(SDNode *Node);
131
132 /// Implement vselect in terms of XOR, AND, OR when blend is not
133 /// supported by the target.
134 SDValue ExpandVSELECT(SDNode *Node);
135 SDValue ExpandVP_SELECT(SDNode *Node);
136 SDValue ExpandVP_MERGE(SDNode *Node);
137 SDValue ExpandVP_REM(SDNode *Node);
138 SDValue ExpandVP_FNEG(SDNode *Node);
139 SDValue ExpandVP_FABS(SDNode *Node);
140 SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
141 SDValue ExpandLOOP_DEPENDENCE_MASK(SDNode *N);
142 SDValue ExpandSELECT(SDNode *Node);
143 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
144 SDValue ExpandStore(SDNode *N);
145 SDValue ExpandFNEG(SDNode *Node);
146 SDValue ExpandFABS(SDNode *Node);
147 SDValue ExpandFCOPYSIGN(SDNode *Node);
148 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
150 SDValue ExpandBITREVERSE(SDNode *Node);
151 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
152 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
153 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
154 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
155 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
156 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
157
158 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
159 SmallVectorImpl<SDValue> &Results);
160
161 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
162
163 /// Implements vector promotion.
164 ///
165 /// This is essentially just bitcasting the operands to a different type and
166 /// bitcasting the result back to the original type.
167 void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
168
169 /// Implements [SU]INT_TO_FP vector promotion.
170 ///
171 /// This is a [zs]ext of the input operand to a larger integer type.
172 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
173
174 /// Implements FP_TO_[SU]INT vector promotion of the result type.
175 ///
176 /// It is promoted to a larger integer type. The result is then
177 /// truncated back to the original type.
178 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
179
180 /// Implements vector setcc operation promotion.
181 ///
182 /// All vector operands are promoted to a vector type with larger element
183 /// type.
184 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
185
186 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
187
188 /// Calculate the reduction using a type of higher precision and round the
189 /// result to match the original type. Setting NonArithmetic signifies the
190 /// rounding of the result does not affect its value.
191 void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
192 bool NonArithmetic);
193
194 void PromoteVECTOR_COMPRESS(SDNode *Node, SmallVectorImpl<SDValue> &Results);
195
196public:
197 VectorLegalizer(SelectionDAG& dag) :
198 DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
199
200 /// Begin legalizer the vector operations in the DAG.
201 bool Run();
202};
203
204} // end anonymous namespace
205
206bool VectorLegalizer::Run() {
207 // Before we start legalizing vector nodes, check if there are any vectors.
208 bool HasVectors = false;
209 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
210 E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) {
211 // Check if the values of the nodes contain vectors. We don't need to check
212 // the operands because we are going to check their values at some point.
213 HasVectors = llvm::any_of(Range: I->values(), P: [](EVT T) { return T.isVector(); });
214
215 // If we found a vector node we can start the legalization.
216 if (HasVectors)
217 break;
218 }
219
220 // If this basic block has no vectors then no need to legalize vectors.
221 if (!HasVectors)
222 return false;
223
224 // The legalize process is inherently a bottom-up recursive process (users
225 // legalize their uses before themselves). Given infinite stack space, we
226 // could just start legalizing on the root and traverse the whole graph. In
227 // practice however, this causes us to run out of stack space on large basic
228 // blocks. To avoid this problem, compute an ordering of the nodes where each
229 // node is only legalized after all of its operands are legalized.
230 DAG.AssignTopologicalOrder();
231 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
232 E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I)
233 LegalizeOp(Op: SDValue(&*I, 0));
234
235 // Finally, it's possible the root changed. Get the new root.
236 SDValue OldRoot = DAG.getRoot();
237 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
238 DAG.setRoot(LegalizedNodes[OldRoot]);
239
240 LegalizedNodes.clear();
241
242 // Remove dead nodes now.
243 DAG.RemoveDeadNodes();
244
245 return Changed;
246}
247
248SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
249 assert(Op->getNumValues() == Result->getNumValues() &&
250 "Unexpected number of results");
251 // Generic legalization: just pass the operand through.
252 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
253 AddLegalizedOperand(From: Op.getValue(R: i), To: SDValue(Result, i));
254 return SDValue(Result, Op.getResNo());
255}
256
257SDValue
258VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
259 MutableArrayRef<SDValue> Results) {
260 assert(Results.size() == Op->getNumValues() &&
261 "Unexpected number of results");
262 // Make sure that the generated code is itself legal.
263 for (unsigned i = 0, e = Results.size(); i != e; ++i) {
264 Results[i] = LegalizeOp(Op: Results[i]);
265 AddLegalizedOperand(From: Op.getValue(R: i), To: Results[i]);
266 }
267
268 return Results[Op.getResNo()];
269}
270
271SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
272 // Note that LegalizeOp may be reentered even from single-use nodes, which
273 // means that we always must cache transformed nodes.
274 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Val: Op);
275 if (I != LegalizedNodes.end()) return I->second;
276
277 // Legalize the operands
278 SmallVector<SDValue, 8> Ops;
279 for (const SDValue &Oper : Op->op_values())
280 Ops.push_back(Elt: LegalizeOp(Op: Oper));
281
282 SDNode *Node = DAG.UpdateNodeOperands(N: Op.getNode(), Ops);
283
284 bool HasVectorValueOrOp =
285 llvm::any_of(Range: Node->values(), P: [](EVT T) { return T.isVector(); }) ||
286 llvm::any_of(Range: Node->op_values(),
287 P: [](SDValue O) { return O.getValueType().isVector(); });
288 if (!HasVectorValueOrOp)
289 return TranslateLegalizeResults(Op, Result: Node);
290
291 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
292 EVT ValVT;
293 switch (Op.getOpcode()) {
294 default:
295 return TranslateLegalizeResults(Op, Result: Node);
296 case ISD::LOAD: {
297 LoadSDNode *LD = cast<LoadSDNode>(Val: Node);
298 ISD::LoadExtType ExtType = LD->getExtensionType();
299 EVT LoadedVT = LD->getMemoryVT();
300 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
301 Action = TLI.getLoadExtAction(ExtType, ValVT: LD->getValueType(ResNo: 0), MemVT: LoadedVT);
302 break;
303 }
304 case ISD::STORE: {
305 StoreSDNode *ST = cast<StoreSDNode>(Val: Node);
306 EVT StVT = ST->getMemoryVT();
307 MVT ValVT = ST->getValue().getSimpleValueType();
308 if (StVT.isVector() && ST->isTruncatingStore())
309 Action = TLI.getTruncStoreAction(ValVT, MemVT: StVT);
310 break;
311 }
312 case ISD::MERGE_VALUES:
313 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0));
314 // This operation lies about being legal: when it claims to be legal,
315 // it should actually be expanded.
316 if (Action == TargetLowering::Legal)
317 Action = TargetLowering::Expand;
318 break;
319#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
320 case ISD::STRICT_##DAGN:
321#include "llvm/IR/ConstrainedOps.def"
322 ValVT = Node->getValueType(ResNo: 0);
323 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
324 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
325 ValVT = Node->getOperand(Num: 1).getValueType();
326 if (Op.getOpcode() == ISD::STRICT_FSETCC ||
327 Op.getOpcode() == ISD::STRICT_FSETCCS) {
328 MVT OpVT = Node->getOperand(Num: 1).getSimpleValueType();
329 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 3))->get();
330 Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT);
331 if (Action == TargetLowering::Legal)
332 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT);
333 } else {
334 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: ValVT);
335 }
336 // If we're asked to expand a strict vector floating-point operation,
337 // by default we're going to simply unroll it. That is usually the
338 // best approach, except in the case where the resulting strict (scalar)
339 // operations would themselves use the fallback mutation to non-strict.
340 // In that specific case, just do the fallback on the vector op.
341 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
342 TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: ValVT) ==
343 TargetLowering::Legal) {
344 EVT EltVT = ValVT.getVectorElementType();
345 if (TLI.getOperationAction(Op: Node->getOpcode(), VT: EltVT)
346 == TargetLowering::Expand &&
347 TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: EltVT)
348 == TargetLowering::Legal)
349 Action = TargetLowering::Legal;
350 }
351 break;
352 case ISD::ADD:
353 case ISD::SUB:
354 case ISD::MUL:
355 case ISD::MULHS:
356 case ISD::MULHU:
357 case ISD::SDIV:
358 case ISD::UDIV:
359 case ISD::SREM:
360 case ISD::UREM:
361 case ISD::SDIVREM:
362 case ISD::UDIVREM:
363 case ISD::FADD:
364 case ISD::FSUB:
365 case ISD::FMUL:
366 case ISD::FDIV:
367 case ISD::FREM:
368 case ISD::AND:
369 case ISD::OR:
370 case ISD::XOR:
371 case ISD::SHL:
372 case ISD::SRA:
373 case ISD::SRL:
374 case ISD::FSHL:
375 case ISD::FSHR:
376 case ISD::ROTL:
377 case ISD::ROTR:
378 case ISD::ABS:
379 case ISD::ABDS:
380 case ISD::ABDU:
381 case ISD::AVGCEILS:
382 case ISD::AVGCEILU:
383 case ISD::AVGFLOORS:
384 case ISD::AVGFLOORU:
385 case ISD::BSWAP:
386 case ISD::BITREVERSE:
387 case ISD::CTLZ:
388 case ISD::CTTZ:
389 case ISD::CTLZ_ZERO_UNDEF:
390 case ISD::CTTZ_ZERO_UNDEF:
391 case ISD::CTPOP:
392 case ISD::SELECT:
393 case ISD::VSELECT:
394 case ISD::SELECT_CC:
395 case ISD::ZERO_EXTEND:
396 case ISD::ANY_EXTEND:
397 case ISD::TRUNCATE:
398 case ISD::SIGN_EXTEND:
399 case ISD::FP_TO_SINT:
400 case ISD::FP_TO_UINT:
401 case ISD::FNEG:
402 case ISD::FABS:
403 case ISD::FMINNUM:
404 case ISD::FMAXNUM:
405 case ISD::FMINNUM_IEEE:
406 case ISD::FMAXNUM_IEEE:
407 case ISD::FMINIMUM:
408 case ISD::FMAXIMUM:
409 case ISD::FMINIMUMNUM:
410 case ISD::FMAXIMUMNUM:
411 case ISD::FCOPYSIGN:
412 case ISD::FSQRT:
413 case ISD::FSIN:
414 case ISD::FCOS:
415 case ISD::FTAN:
416 case ISD::FASIN:
417 case ISD::FACOS:
418 case ISD::FATAN:
419 case ISD::FATAN2:
420 case ISD::FSINH:
421 case ISD::FCOSH:
422 case ISD::FTANH:
423 case ISD::FLDEXP:
424 case ISD::FPOWI:
425 case ISD::FPOW:
426 case ISD::FLOG:
427 case ISD::FLOG2:
428 case ISD::FLOG10:
429 case ISD::FEXP:
430 case ISD::FEXP2:
431 case ISD::FEXP10:
432 case ISD::FCEIL:
433 case ISD::FTRUNC:
434 case ISD::FRINT:
435 case ISD::FNEARBYINT:
436 case ISD::FROUND:
437 case ISD::FROUNDEVEN:
438 case ISD::FFLOOR:
439 case ISD::FP_ROUND:
440 case ISD::FP_EXTEND:
441 case ISD::FPTRUNC_ROUND:
442 case ISD::FMA:
443 case ISD::SIGN_EXTEND_INREG:
444 case ISD::ANY_EXTEND_VECTOR_INREG:
445 case ISD::SIGN_EXTEND_VECTOR_INREG:
446 case ISD::ZERO_EXTEND_VECTOR_INREG:
447 case ISD::SMIN:
448 case ISD::SMAX:
449 case ISD::UMIN:
450 case ISD::UMAX:
451 case ISD::SMUL_LOHI:
452 case ISD::UMUL_LOHI:
453 case ISD::SADDO:
454 case ISD::UADDO:
455 case ISD::SSUBO:
456 case ISD::USUBO:
457 case ISD::SMULO:
458 case ISD::UMULO:
459 case ISD::FCANONICALIZE:
460 case ISD::FFREXP:
461 case ISD::FMODF:
462 case ISD::FSINCOS:
463 case ISD::FSINCOSPI:
464 case ISD::SADDSAT:
465 case ISD::UADDSAT:
466 case ISD::SSUBSAT:
467 case ISD::USUBSAT:
468 case ISD::SSHLSAT:
469 case ISD::USHLSAT:
470 case ISD::FP_TO_SINT_SAT:
471 case ISD::FP_TO_UINT_SAT:
472 case ISD::MGATHER:
473 case ISD::VECTOR_COMPRESS:
474 case ISD::SCMP:
475 case ISD::UCMP:
476 case ISD::LOOP_DEPENDENCE_WAR_MASK:
477 case ISD::LOOP_DEPENDENCE_RAW_MASK:
478 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0));
479 break;
480 case ISD::SMULFIX:
481 case ISD::SMULFIXSAT:
482 case ISD::UMULFIX:
483 case ISD::UMULFIXSAT:
484 case ISD::SDIVFIX:
485 case ISD::SDIVFIXSAT:
486 case ISD::UDIVFIX:
487 case ISD::UDIVFIXSAT: {
488 unsigned Scale = Node->getConstantOperandVal(Num: 2);
489 Action = TLI.getFixedPointOperationAction(Op: Node->getOpcode(),
490 VT: Node->getValueType(ResNo: 0), Scale);
491 break;
492 }
493 case ISD::LROUND:
494 case ISD::LLROUND:
495 case ISD::LRINT:
496 case ISD::LLRINT:
497 case ISD::SINT_TO_FP:
498 case ISD::UINT_TO_FP:
499 case ISD::VECREDUCE_ADD:
500 case ISD::VECREDUCE_MUL:
501 case ISD::VECREDUCE_AND:
502 case ISD::VECREDUCE_OR:
503 case ISD::VECREDUCE_XOR:
504 case ISD::VECREDUCE_SMAX:
505 case ISD::VECREDUCE_SMIN:
506 case ISD::VECREDUCE_UMAX:
507 case ISD::VECREDUCE_UMIN:
508 case ISD::VECREDUCE_FADD:
509 case ISD::VECREDUCE_FMAX:
510 case ISD::VECREDUCE_FMAXIMUM:
511 case ISD::VECREDUCE_FMIN:
512 case ISD::VECREDUCE_FMINIMUM:
513 case ISD::VECREDUCE_FMUL:
514 case ISD::VECTOR_FIND_LAST_ACTIVE:
515 Action = TLI.getOperationAction(Op: Node->getOpcode(),
516 VT: Node->getOperand(Num: 0).getValueType());
517 break;
518 case ISD::VECREDUCE_SEQ_FADD:
519 case ISD::VECREDUCE_SEQ_FMUL:
520 Action = TLI.getOperationAction(Op: Node->getOpcode(),
521 VT: Node->getOperand(Num: 1).getValueType());
522 break;
523 case ISD::SETCC: {
524 MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType();
525 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 2))->get();
526 Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT);
527 if (Action == TargetLowering::Legal)
528 Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT);
529 break;
530 }
531 case ISD::PARTIAL_REDUCE_UMLA:
532 case ISD::PARTIAL_REDUCE_SMLA:
533 case ISD::PARTIAL_REDUCE_SUMLA:
534 case ISD::PARTIAL_REDUCE_FMLA:
535 Action =
536 TLI.getPartialReduceMLAAction(Opc: Op.getOpcode(), AccVT: Node->getValueType(ResNo: 0),
537 InputVT: Node->getOperand(Num: 1).getValueType());
538 break;
539
540#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
541 case ISD::VPID: { \
542 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
543 : Node->getOperand(LEGALPOS).getValueType(); \
544 if (ISD::VPID == ISD::VP_SETCC) { \
545 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
546 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
547 if (Action != TargetLowering::Legal) \
548 break; \
549 } \
550 /* Defer non-vector results to LegalizeDAG. */ \
551 if (!Node->getValueType(0).isVector() && \
552 Node->getValueType(0) != MVT::Other) { \
553 Action = TargetLowering::Legal; \
554 break; \
555 } \
556 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
557 } break;
558#include "llvm/IR/VPIntrinsics.def"
559 }
560
561 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
562
563 SmallVector<SDValue, 8> ResultVals;
564 switch (Action) {
565 default: llvm_unreachable("This action is not supported yet!");
566 case TargetLowering::Promote:
567 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
568 "This action is not supported yet!");
569 LLVM_DEBUG(dbgs() << "Promoting\n");
570 Promote(Node, Results&: ResultVals);
571 assert(!ResultVals.empty() && "No results for promotion?");
572 break;
573 case TargetLowering::Legal:
574 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
575 break;
576 case TargetLowering::Custom:
577 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
578 if (LowerOperationWrapper(N: Node, Results&: ResultVals))
579 break;
580 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
581 [[fallthrough]];
582 case TargetLowering::Expand:
583 LLVM_DEBUG(dbgs() << "Expanding\n");
584 Expand(Node, Results&: ResultVals);
585 break;
586 }
587
588 if (ResultVals.empty())
589 return TranslateLegalizeResults(Op, Result: Node);
590
591 Changed = true;
592 return RecursivelyLegalizeResults(Op, Results: ResultVals);
593}
594
595// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
596// merge them somehow?
597bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
598 SmallVectorImpl<SDValue> &Results) {
599 SDValue Res = TLI.LowerOperation(Op: SDValue(Node, 0), DAG);
600
601 if (!Res.getNode())
602 return false;
603
604 if (Res == SDValue(Node, 0))
605 return true;
606
607 // If the original node has one result, take the return value from
608 // LowerOperation as is. It might not be result number 0.
609 if (Node->getNumValues() == 1) {
610 Results.push_back(Elt: Res);
611 return true;
612 }
613
614 // If the original node has multiple results, then the return node should
615 // have the same number of results.
616 assert((Node->getNumValues() == Res->getNumValues()) &&
617 "Lowering returned the wrong number of results!");
618
619 // Places new result values base on N result number.
620 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
621 Results.push_back(Elt: Res.getValue(R: I));
622
623 return true;
624}
625
626void VectorLegalizer::PromoteSETCC(SDNode *Node,
627 SmallVectorImpl<SDValue> &Results) {
628 MVT VecVT = Node->getOperand(Num: 0).getSimpleValueType();
629 MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT);
630
631 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
632
633 SDLoc DL(Node);
634 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
635
636 Operands[0] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 0));
637 Operands[1] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 1));
638 Operands[2] = Node->getOperand(Num: 2);
639
640 if (Node->getOpcode() == ISD::VP_SETCC) {
641 Operands[3] = Node->getOperand(Num: 3); // mask
642 Operands[4] = Node->getOperand(Num: 4); // evl
643 }
644
645 SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: Node->getSimpleValueType(ResNo: 0),
646 Ops: Operands, Flags: Node->getFlags());
647
648 Results.push_back(Elt: Res);
649}
650
651void VectorLegalizer::PromoteSTRICT(SDNode *Node,
652 SmallVectorImpl<SDValue> &Results) {
653 MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType();
654 MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT);
655
656 assert(VecVT.isFloatingPoint());
657
658 SDLoc DL(Node);
659 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
660 SmallVector<SDValue, 2> Chains;
661
662 for (unsigned j = 1; j != Node->getNumOperands(); ++j)
663 if (Node->getOperand(Num: j).getValueType().isVector() &&
664 !(ISD::isVPOpcode(Opcode: Node->getOpcode()) &&
665 ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand.
666 {
667 // promote the vector operand.
668 SDValue Ext =
669 DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {NewVecVT, MVT::Other},
670 Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: j)});
671 Operands[j] = Ext.getValue(R: 0);
672 Chains.push_back(Elt: Ext.getValue(R: 1));
673 } else
674 Operands[j] = Node->getOperand(Num: j); // Skip no vector operand.
675
676 SDVTList VTs = DAG.getVTList(VT1: NewVecVT, VT2: Node->getValueType(ResNo: 1));
677
678 Operands[0] = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains);
679
680 SDValue Res =
681 DAG.getNode(Opcode: Node->getOpcode(), DL, VTList: VTs, Ops: Operands, Flags: Node->getFlags());
682
683 SDValue Round =
684 DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {VecVT, MVT::Other},
685 Ops: {Res.getValue(R: 1), Res.getValue(R: 0),
686 DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)});
687
688 Results.push_back(Elt: Round.getValue(R: 0));
689 Results.push_back(Elt: Round.getValue(R: 1));
690}
691
692void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
693 SmallVectorImpl<SDValue> &Results,
694 bool NonArithmetic) {
695 MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType();
696 assert(OpVT.isFloatingPoint() && "Expected floating point reduction!");
697 MVT NewOpVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: OpVT);
698
699 SDLoc DL(Node);
700 SDValue NewOp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NewOpVT, Operand: Node->getOperand(Num: 0));
701 SDValue Rdx =
702 DAG.getNode(Opcode: Node->getOpcode(), DL, VT: NewOpVT.getVectorElementType(), Operand: NewOp,
703 Flags: Node->getFlags());
704 SDValue Res =
705 DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Node->getValueType(ResNo: 0), N1: Rdx,
706 N2: DAG.getIntPtrConstant(Val: NonArithmetic, DL, /*isTarget=*/true));
707 Results.push_back(Elt: Res);
708}
709
710void VectorLegalizer::PromoteVECTOR_COMPRESS(
711 SDNode *Node, SmallVectorImpl<SDValue> &Results) {
712 SDLoc DL(Node);
713 EVT VT = Node->getValueType(ResNo: 0);
714 MVT PromotedVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VT.getSimpleVT());
715 assert((VT.isInteger() || VT.getSizeInBits() == PromotedVT.getSizeInBits()) &&
716 "Only integer promotion or bitcasts between types is supported");
717
718 SDValue Vec = Node->getOperand(Num: 0);
719 SDValue Mask = Node->getOperand(Num: 1);
720 SDValue Passthru = Node->getOperand(Num: 2);
721 if (VT.isInteger()) {
722 Vec = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: PromotedVT, Operand: Vec);
723 Mask = TLI.promoteTargetBoolean(DAG, Bool: Mask, ValVT: PromotedVT);
724 Passthru = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: PromotedVT, Operand: Passthru);
725 } else {
726 Vec = DAG.getBitcast(VT: PromotedVT, V: Vec);
727 Passthru = DAG.getBitcast(VT: PromotedVT, V: Passthru);
728 }
729
730 SDValue Result =
731 DAG.getNode(Opcode: ISD::VECTOR_COMPRESS, DL, VT: PromotedVT, N1: Vec, N2: Mask, N3: Passthru);
732 Result = VT.isInteger() ? DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Result)
733 : DAG.getBitcast(VT, V: Result);
734 Results.push_back(Elt: Result);
735}
736
737void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
738 // For a few operations there is a specific concept for promotion based on
739 // the operand's type.
740 switch (Node->getOpcode()) {
741 case ISD::SINT_TO_FP:
742 case ISD::UINT_TO_FP:
743 case ISD::STRICT_SINT_TO_FP:
744 case ISD::STRICT_UINT_TO_FP:
745 // "Promote" the operation by extending the operand.
746 PromoteINT_TO_FP(Node, Results);
747 return;
748 case ISD::FP_TO_UINT:
749 case ISD::FP_TO_SINT:
750 case ISD::STRICT_FP_TO_UINT:
751 case ISD::STRICT_FP_TO_SINT:
752 // Promote the operation by extending the operand.
753 PromoteFP_TO_INT(Node, Results);
754 return;
755 case ISD::VP_SETCC:
756 case ISD::SETCC:
757 // Promote the operation by extending the operand.
758 PromoteSETCC(Node, Results);
759 return;
760 case ISD::STRICT_FADD:
761 case ISD::STRICT_FSUB:
762 case ISD::STRICT_FMUL:
763 case ISD::STRICT_FDIV:
764 case ISD::STRICT_FSQRT:
765 case ISD::STRICT_FMA:
766 PromoteSTRICT(Node, Results);
767 return;
768 case ISD::VECREDUCE_FADD:
769 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/false);
770 return;
771 case ISD::VECREDUCE_FMAX:
772 case ISD::VECREDUCE_FMAXIMUM:
773 case ISD::VECREDUCE_FMIN:
774 case ISD::VECREDUCE_FMINIMUM:
775 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
776 return;
777 case ISD::VECTOR_COMPRESS:
778 PromoteVECTOR_COMPRESS(Node, Results);
779 return;
780
781 case ISD::FP_ROUND:
782 case ISD::FP_EXTEND:
783 // These operations are used to do promotion so they can't be promoted
784 // themselves.
785 llvm_unreachable("Don't know how to promote this operation!");
786 case ISD::VP_FABS:
787 case ISD::VP_FCOPYSIGN:
788 case ISD::VP_FNEG:
789 // Promoting fabs, fneg, and fcopysign changes their semantics.
790 llvm_unreachable("These operations should not be promoted");
791 }
792
793 // There are currently two cases of vector promotion:
794 // 1) Bitcasting a vector of integers to a different type to a vector of the
795 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
796 // 2) Extending a vector of floats to a vector of the same number of larger
797 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
798 assert(Node->getNumValues() == 1 &&
799 "Can't promote a vector with multiple results!");
800 MVT VT = Node->getSimpleValueType(ResNo: 0);
801 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
802 SDLoc dl(Node);
803 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
804
805 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
806 // Do not promote the mask operand of a VP OP.
807 bool SkipPromote = ISD::isVPOpcode(Opcode: Node->getOpcode()) &&
808 ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j;
809 if (Node->getOperand(Num: j).getValueType().isVector() && !SkipPromote)
810 if (Node->getOperand(Num: j)
811 .getValueType()
812 .getVectorElementType()
813 .isFloatingPoint() &&
814 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
815 if (ISD::isVPOpcode(Opcode: Node->getOpcode())) {
816 unsigned EVLIdx =
817 *ISD::getVPExplicitVectorLengthIdx(Opcode: Node->getOpcode());
818 unsigned MaskIdx = *ISD::getVPMaskIdx(Opcode: Node->getOpcode());
819 Operands[j] =
820 DAG.getNode(Opcode: ISD::VP_FP_EXTEND, DL: dl, VT: NVT, N1: Node->getOperand(Num: j),
821 N2: Node->getOperand(Num: MaskIdx), N3: Node->getOperand(Num: EVLIdx));
822 } else {
823 Operands[j] =
824 DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
825 }
826 else
827 Operands[j] = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
828 else
829 Operands[j] = Node->getOperand(Num: j);
830 }
831
832 SDValue Res =
833 DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: NVT, Ops: Operands, Flags: Node->getFlags());
834
835 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
836 (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
837 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
838 if (ISD::isVPOpcode(Opcode: Node->getOpcode())) {
839 unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Opcode: Node->getOpcode());
840 unsigned MaskIdx = *ISD::getVPMaskIdx(Opcode: Node->getOpcode());
841 Res = DAG.getNode(Opcode: ISD::VP_FP_ROUND, DL: dl, VT, N1: Res,
842 N2: Node->getOperand(Num: MaskIdx), N3: Node->getOperand(Num: EVLIdx));
843 } else {
844 Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Res,
845 N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true));
846 }
847 else
848 Res = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Res);
849
850 Results.push_back(Elt: Res);
851}
852
853void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
854 SmallVectorImpl<SDValue> &Results) {
855 // INT_TO_FP operations may require the input operand be promoted even
856 // when the type is otherwise legal.
857 bool IsStrict = Node->isStrictFPOpcode();
858 MVT VT = Node->getOperand(Num: IsStrict ? 1 : 0).getSimpleValueType();
859 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
860 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
861 "Vectors have different number of elements!");
862
863 SDLoc dl(Node);
864 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
865
866 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
867 Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
868 ? ISD::ZERO_EXTEND
869 : ISD::SIGN_EXTEND;
870 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
871 if (Node->getOperand(Num: j).getValueType().isVector())
872 Operands[j] = DAG.getNode(Opcode: Opc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j));
873 else
874 Operands[j] = Node->getOperand(Num: j);
875 }
876
877 if (IsStrict) {
878 SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL: dl,
879 ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, Ops: Operands);
880 Results.push_back(Elt: Res);
881 Results.push_back(Elt: Res.getValue(R: 1));
882 return;
883 }
884
885 SDValue Res =
886 DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Ops: Operands);
887 Results.push_back(Elt: Res);
888}
889
890// For FP_TO_INT we promote the result type to a vector type with wider
891// elements and then truncate the result. This is different from the default
892// PromoteVector which uses bitcast to promote thus assumning that the
893// promoted vector type has the same overall size.
894void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
895 SmallVectorImpl<SDValue> &Results) {
896 MVT VT = Node->getSimpleValueType(ResNo: 0);
897 MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT);
898 bool IsStrict = Node->isStrictFPOpcode();
899 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
900 "Vectors have different number of elements!");
901
902 unsigned NewOpc = Node->getOpcode();
903 // Change FP_TO_UINT to FP_TO_SINT if possible.
904 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
905 if (NewOpc == ISD::FP_TO_UINT &&
906 TLI.isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: NVT))
907 NewOpc = ISD::FP_TO_SINT;
908
909 if (NewOpc == ISD::STRICT_FP_TO_UINT &&
910 TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: NVT))
911 NewOpc = ISD::STRICT_FP_TO_SINT;
912
913 SDLoc dl(Node);
914 SDValue Promoted, Chain;
915 if (IsStrict) {
916 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, ResultTys: {NVT, MVT::Other},
917 Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: 1)});
918 Chain = Promoted.getValue(R: 1);
919 } else
920 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: 0));
921
922 // Assert that the converted value fits in the original type. If it doesn't
923 // (eg: because the value being converted is too big), then the result of the
924 // original operation was undefined anyway, so the assert is still correct.
925 if (Node->getOpcode() == ISD::FP_TO_UINT ||
926 Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
927 NewOpc = ISD::AssertZext;
928 else
929 NewOpc = ISD::AssertSext;
930
931 Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, N1: Promoted,
932 N2: DAG.getValueType(VT.getScalarType()));
933 Promoted = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Promoted);
934 Results.push_back(Elt: Promoted);
935 if (IsStrict)
936 Results.push_back(Elt: Chain);
937}
938
939std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
940 LoadSDNode *LD = cast<LoadSDNode>(Val: N);
941 return TLI.scalarizeVectorLoad(LD, DAG);
942}
943
944SDValue VectorLegalizer::ExpandStore(SDNode *N) {
945 StoreSDNode *ST = cast<StoreSDNode>(Val: N);
946 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
947 return TF;
948}
949
950void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
951 switch (Node->getOpcode()) {
952 case ISD::LOAD: {
953 std::pair<SDValue, SDValue> Tmp = ExpandLoad(N: Node);
954 Results.push_back(Elt: Tmp.first);
955 Results.push_back(Elt: Tmp.second);
956 return;
957 }
958 case ISD::STORE:
959 Results.push_back(Elt: ExpandStore(N: Node));
960 return;
961 case ISD::MERGE_VALUES:
962 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
963 Results.push_back(Elt: Node->getOperand(Num: i));
964 return;
965 case ISD::SIGN_EXTEND_INREG:
966 if (SDValue Expanded = ExpandSEXTINREG(Node)) {
967 Results.push_back(Elt: Expanded);
968 return;
969 }
970 break;
971 case ISD::ANY_EXTEND_VECTOR_INREG:
972 Results.push_back(Elt: ExpandANY_EXTEND_VECTOR_INREG(Node));
973 return;
974 case ISD::SIGN_EXTEND_VECTOR_INREG:
975 Results.push_back(Elt: ExpandSIGN_EXTEND_VECTOR_INREG(Node));
976 return;
977 case ISD::ZERO_EXTEND_VECTOR_INREG:
978 Results.push_back(Elt: ExpandZERO_EXTEND_VECTOR_INREG(Node));
979 return;
980 case ISD::BSWAP:
981 if (SDValue Expanded = ExpandBSWAP(Node)) {
982 Results.push_back(Elt: Expanded);
983 return;
984 }
985 break;
986 case ISD::VP_BSWAP:
987 Results.push_back(Elt: TLI.expandVPBSWAP(N: Node, DAG));
988 return;
989 case ISD::VSELECT:
990 if (SDValue Expanded = ExpandVSELECT(Node)) {
991 Results.push_back(Elt: Expanded);
992 return;
993 }
994 break;
995 case ISD::VP_SELECT:
996 if (SDValue Expanded = ExpandVP_SELECT(Node)) {
997 Results.push_back(Elt: Expanded);
998 return;
999 }
1000 break;
1001 case ISD::VP_SREM:
1002 case ISD::VP_UREM:
1003 if (SDValue Expanded = ExpandVP_REM(Node)) {
1004 Results.push_back(Elt: Expanded);
1005 return;
1006 }
1007 break;
1008 case ISD::VP_FNEG:
1009 if (SDValue Expanded = ExpandVP_FNEG(Node)) {
1010 Results.push_back(Elt: Expanded);
1011 return;
1012 }
1013 break;
1014 case ISD::VP_FABS:
1015 if (SDValue Expanded = ExpandVP_FABS(Node)) {
1016 Results.push_back(Elt: Expanded);
1017 return;
1018 }
1019 break;
1020 case ISD::VP_FCOPYSIGN:
1021 if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) {
1022 Results.push_back(Elt: Expanded);
1023 return;
1024 }
1025 break;
1026 case ISD::SELECT:
1027 if (SDValue Expanded = ExpandSELECT(Node)) {
1028 Results.push_back(Elt: Expanded);
1029 return;
1030 }
1031 break;
1032 case ISD::SELECT_CC: {
1033 if (Node->getValueType(ResNo: 0).isScalableVector()) {
1034 EVT CondVT = TLI.getSetCCResultType(
1035 DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0));
1036 SDValue SetCC =
1037 DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc(Node), VT: CondVT, N1: Node->getOperand(Num: 0),
1038 N2: Node->getOperand(Num: 1), N3: Node->getOperand(Num: 4));
1039 Results.push_back(Elt: DAG.getSelect(DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0), Cond: SetCC,
1040 LHS: Node->getOperand(Num: 2),
1041 RHS: Node->getOperand(Num: 3)));
1042 return;
1043 }
1044 break;
1045 }
1046 case ISD::FP_TO_UINT:
1047 ExpandFP_TO_UINT(Node, Results);
1048 return;
1049 case ISD::UINT_TO_FP:
1050 ExpandUINT_TO_FLOAT(Node, Results);
1051 return;
1052 case ISD::FNEG:
1053 if (SDValue Expanded = ExpandFNEG(Node)) {
1054 Results.push_back(Elt: Expanded);
1055 return;
1056 }
1057 break;
1058 case ISD::FABS:
1059 if (SDValue Expanded = ExpandFABS(Node)) {
1060 Results.push_back(Elt: Expanded);
1061 return;
1062 }
1063 break;
1064 case ISD::FCOPYSIGN:
1065 if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
1066 Results.push_back(Elt: Expanded);
1067 return;
1068 }
1069 break;
1070 case ISD::FSUB:
1071 ExpandFSUB(Node, Results);
1072 return;
1073 case ISD::SETCC:
1074 case ISD::VP_SETCC:
1075 ExpandSETCC(Node, Results);
1076 return;
1077 case ISD::ABS:
1078 if (SDValue Expanded = TLI.expandABS(N: Node, DAG)) {
1079 Results.push_back(Elt: Expanded);
1080 return;
1081 }
1082 break;
1083 case ISD::ABDS:
1084 case ISD::ABDU:
1085 if (SDValue Expanded = TLI.expandABD(N: Node, DAG)) {
1086 Results.push_back(Elt: Expanded);
1087 return;
1088 }
1089 break;
1090 case ISD::AVGCEILS:
1091 case ISD::AVGCEILU:
1092 case ISD::AVGFLOORS:
1093 case ISD::AVGFLOORU:
1094 if (SDValue Expanded = TLI.expandAVG(N: Node, DAG)) {
1095 Results.push_back(Elt: Expanded);
1096 return;
1097 }
1098 break;
1099 case ISD::BITREVERSE:
1100 if (SDValue Expanded = ExpandBITREVERSE(Node)) {
1101 Results.push_back(Elt: Expanded);
1102 return;
1103 }
1104 break;
1105 case ISD::VP_BITREVERSE:
1106 if (SDValue Expanded = TLI.expandVPBITREVERSE(N: Node, DAG)) {
1107 Results.push_back(Elt: Expanded);
1108 return;
1109 }
1110 break;
1111 case ISD::CTPOP:
1112 if (SDValue Expanded = TLI.expandCTPOP(N: Node, DAG)) {
1113 Results.push_back(Elt: Expanded);
1114 return;
1115 }
1116 break;
1117 case ISD::VP_CTPOP:
1118 if (SDValue Expanded = TLI.expandVPCTPOP(N: Node, DAG)) {
1119 Results.push_back(Elt: Expanded);
1120 return;
1121 }
1122 break;
1123 case ISD::CTLZ:
1124 case ISD::CTLZ_ZERO_UNDEF:
1125 if (SDValue Expanded = TLI.expandCTLZ(N: Node, DAG)) {
1126 Results.push_back(Elt: Expanded);
1127 return;
1128 }
1129 break;
1130 case ISD::VP_CTLZ:
1131 case ISD::VP_CTLZ_ZERO_UNDEF:
1132 if (SDValue Expanded = TLI.expandVPCTLZ(N: Node, DAG)) {
1133 Results.push_back(Elt: Expanded);
1134 return;
1135 }
1136 break;
1137 case ISD::CTTZ:
1138 case ISD::CTTZ_ZERO_UNDEF:
1139 if (SDValue Expanded = TLI.expandCTTZ(N: Node, DAG)) {
1140 Results.push_back(Elt: Expanded);
1141 return;
1142 }
1143 break;
1144 case ISD::VP_CTTZ:
1145 case ISD::VP_CTTZ_ZERO_UNDEF:
1146 if (SDValue Expanded = TLI.expandVPCTTZ(N: Node, DAG)) {
1147 Results.push_back(Elt: Expanded);
1148 return;
1149 }
1150 break;
1151 case ISD::FSHL:
1152 case ISD::VP_FSHL:
1153 case ISD::FSHR:
1154 case ISD::VP_FSHR:
1155 if (SDValue Expanded = TLI.expandFunnelShift(N: Node, DAG)) {
1156 Results.push_back(Elt: Expanded);
1157 return;
1158 }
1159 break;
1160 case ISD::CLMUL:
1161 case ISD::CLMULR:
1162 case ISD::CLMULH:
1163 if (SDValue Expanded = TLI.expandCLMUL(N: Node, DAG)) {
1164 Results.push_back(Elt: Expanded);
1165 return;
1166 }
1167 break;
1168 case ISD::ROTL:
1169 case ISD::ROTR:
1170 if (SDValue Expanded = TLI.expandROT(N: Node, AllowVectorOps: false /*AllowVectorOps*/, DAG)) {
1171 Results.push_back(Elt: Expanded);
1172 return;
1173 }
1174 break;
1175 case ISD::FMINNUM:
1176 case ISD::FMAXNUM:
1177 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(N: Node, DAG)) {
1178 Results.push_back(Elt: Expanded);
1179 return;
1180 }
1181 break;
1182 case ISD::FMINIMUM:
1183 case ISD::FMAXIMUM:
1184 Results.push_back(Elt: TLI.expandFMINIMUM_FMAXIMUM(N: Node, DAG));
1185 return;
1186 case ISD::FMINIMUMNUM:
1187 case ISD::FMAXIMUMNUM:
1188 Results.push_back(Elt: TLI.expandFMINIMUMNUM_FMAXIMUMNUM(N: Node, DAG));
1189 return;
1190 case ISD::SMIN:
1191 case ISD::SMAX:
1192 case ISD::UMIN:
1193 case ISD::UMAX:
1194 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
1195 Results.push_back(Elt: Expanded);
1196 return;
1197 }
1198 break;
1199 case ISD::UADDO:
1200 case ISD::USUBO:
1201 ExpandUADDSUBO(Node, Results);
1202 return;
1203 case ISD::SADDO:
1204 case ISD::SSUBO:
1205 ExpandSADDSUBO(Node, Results);
1206 return;
1207 case ISD::UMULO:
1208 case ISD::SMULO:
1209 ExpandMULO(Node, Results);
1210 return;
1211 case ISD::USUBSAT:
1212 case ISD::SSUBSAT:
1213 case ISD::UADDSAT:
1214 case ISD::SADDSAT:
1215 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
1216 Results.push_back(Elt: Expanded);
1217 return;
1218 }
1219 break;
1220 case ISD::USHLSAT:
1221 case ISD::SSHLSAT:
1222 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
1223 Results.push_back(Elt: Expanded);
1224 return;
1225 }
1226 break;
1227 case ISD::FP_TO_SINT_SAT:
1228 case ISD::FP_TO_UINT_SAT:
1229 // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
1230 if (Node->getValueType(ResNo: 0).isScalableVector()) {
1231 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(N: Node, DAG)) {
1232 Results.push_back(Elt: Expanded);
1233 return;
1234 }
1235 }
1236 break;
1237 case ISD::SMULFIX:
1238 case ISD::UMULFIX:
1239 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
1240 Results.push_back(Elt: Expanded);
1241 return;
1242 }
1243 break;
1244 case ISD::SMULFIXSAT:
1245 case ISD::UMULFIXSAT:
1246 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
1247 // why. Maybe it results in worse codegen compared to the unroll for some
1248 // targets? This should probably be investigated. And if we still prefer to
1249 // unroll an explanation could be helpful.
1250 break;
1251 case ISD::SDIVFIX:
1252 case ISD::UDIVFIX:
1253 ExpandFixedPointDiv(Node, Results);
1254 return;
1255 case ISD::SDIVFIXSAT:
1256 case ISD::UDIVFIXSAT:
1257 break;
1258#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1259 case ISD::STRICT_##DAGN:
1260#include "llvm/IR/ConstrainedOps.def"
1261 ExpandStrictFPOp(Node, Results);
1262 return;
1263 case ISD::VECREDUCE_ADD:
1264 case ISD::VECREDUCE_MUL:
1265 case ISD::VECREDUCE_AND:
1266 case ISD::VECREDUCE_OR:
1267 case ISD::VECREDUCE_XOR:
1268 case ISD::VECREDUCE_SMAX:
1269 case ISD::VECREDUCE_SMIN:
1270 case ISD::VECREDUCE_UMAX:
1271 case ISD::VECREDUCE_UMIN:
1272 case ISD::VECREDUCE_FADD:
1273 case ISD::VECREDUCE_FMUL:
1274 case ISD::VECREDUCE_FMAX:
1275 case ISD::VECREDUCE_FMIN:
1276 case ISD::VECREDUCE_FMAXIMUM:
1277 case ISD::VECREDUCE_FMINIMUM:
1278 Results.push_back(Elt: TLI.expandVecReduce(Node, DAG));
1279 return;
1280 case ISD::PARTIAL_REDUCE_UMLA:
1281 case ISD::PARTIAL_REDUCE_SMLA:
1282 case ISD::PARTIAL_REDUCE_SUMLA:
1283 case ISD::PARTIAL_REDUCE_FMLA:
1284 Results.push_back(Elt: TLI.expandPartialReduceMLA(Node, DAG));
1285 return;
1286 case ISD::VECREDUCE_SEQ_FADD:
1287 case ISD::VECREDUCE_SEQ_FMUL:
1288 Results.push_back(Elt: TLI.expandVecReduceSeq(Node, DAG));
1289 return;
1290 case ISD::SREM:
1291 case ISD::UREM:
1292 ExpandREM(Node, Results);
1293 return;
1294 case ISD::VP_MERGE:
1295 if (SDValue Expanded = ExpandVP_MERGE(Node)) {
1296 Results.push_back(Elt: Expanded);
1297 return;
1298 }
1299 break;
1300 case ISD::FREM: {
1301 RTLIB::Libcall LC = RTLIB::getREM(VT: Node->getValueType(ResNo: 0));
1302 if (tryExpandVecMathCall(Node, LC, Results))
1303 return;
1304
1305 break;
1306 }
1307 case ISD::FSINCOS:
1308 case ISD::FSINCOSPI: {
1309 EVT VT = Node->getValueType(ResNo: 0);
1310 RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
1311 ? RTLIB::getSINCOS(RetVT: VT)
1312 : RTLIB::getSINCOSPI(RetVT: VT);
1313 if (LC != RTLIB::UNKNOWN_LIBCALL &&
1314 TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results))
1315 return;
1316
1317 // TODO: Try to see if there's a narrower call available to use before
1318 // scalarizing.
1319 break;
1320 }
1321 case ISD::FMODF: {
1322 EVT VT = Node->getValueType(ResNo: 0);
1323 RTLIB::Libcall LC = RTLIB::getMODF(VT);
1324 if (LC != RTLIB::UNKNOWN_LIBCALL &&
1325 TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results,
1326 /*CallRetResNo=*/0))
1327 return;
1328 break;
1329 }
1330 case ISD::VECTOR_COMPRESS:
1331 Results.push_back(Elt: TLI.expandVECTOR_COMPRESS(Node, DAG));
1332 return;
1333 case ISD::VECTOR_FIND_LAST_ACTIVE:
1334 Results.push_back(Elt: TLI.expandVectorFindLastActive(N: Node, DAG));
1335 return;
1336 case ISD::SCMP:
1337 case ISD::UCMP:
1338 Results.push_back(Elt: TLI.expandCMP(Node, DAG));
1339 return;
1340 case ISD::LOOP_DEPENDENCE_WAR_MASK:
1341 case ISD::LOOP_DEPENDENCE_RAW_MASK:
1342 Results.push_back(Elt: ExpandLOOP_DEPENDENCE_MASK(N: Node));
1343 return;
1344
1345 case ISD::FADD:
1346 case ISD::FMUL:
1347 case ISD::FMA:
1348 case ISD::FDIV:
1349 case ISD::FCEIL:
1350 case ISD::FFLOOR:
1351 case ISD::FNEARBYINT:
1352 case ISD::FRINT:
1353 case ISD::FROUND:
1354 case ISD::FROUNDEVEN:
1355 case ISD::FTRUNC:
1356 case ISD::FSQRT:
1357 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1358 Results.push_back(Elt: Expanded);
1359 return;
1360 }
1361 break;
1362 }
1363
1364 SDValue Unrolled = DAG.UnrollVectorOp(N: Node);
1365 if (Node->getNumValues() == 1) {
1366 Results.push_back(Elt: Unrolled);
1367 } else {
1368 assert(Node->getNumValues() == Unrolled->getNumValues() &&
1369 "VectorLegalizer Expand returned wrong number of results!");
1370 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
1371 Results.push_back(Elt: Unrolled.getValue(R: I));
1372 }
1373}
1374
1375SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
1376 // Lower a select instruction where the condition is a scalar and the
1377 // operands are vectors. Lower this select to VSELECT and implement it
1378 // using XOR AND OR. The selector bit is broadcasted.
1379 EVT VT = Node->getValueType(ResNo: 0);
1380 SDLoc DL(Node);
1381
1382 SDValue Mask = Node->getOperand(Num: 0);
1383 SDValue Op1 = Node->getOperand(Num: 1);
1384 SDValue Op2 = Node->getOperand(Num: 2);
1385
1386 assert(VT.isVector() && !Mask.getValueType().isVector()
1387 && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1388
1389 // If we can't even use the basic vector operations of
1390 // AND,OR,XOR, we will have to scalarize the op.
1391 // Notice that the operation may be 'promoted' which means that it is
1392 // 'bitcasted' to another type which is handled.
1393 // Also, we need to be able to construct a splat vector using either
1394 // BUILD_VECTOR or SPLAT_VECTOR.
1395 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1396 // BUILD_VECTOR?
1397 if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand ||
1398 TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand ||
1399 TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand ||
1400 TLI.getOperationAction(Op: VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
1401 : ISD::SPLAT_VECTOR,
1402 VT) == TargetLowering::Expand)
1403 return SDValue();
1404
1405 // Generate a mask operand.
1406 EVT MaskTy = VT.changeVectorElementTypeToInteger();
1407
1408 // What is the size of each element in the vector mask.
1409 EVT BitTy = MaskTy.getScalarType();
1410
1411 Mask = DAG.getSelect(DL, VT: BitTy, Cond: Mask, LHS: DAG.getAllOnesConstant(DL, VT: BitTy),
1412 RHS: DAG.getConstant(Val: 0, DL, VT: BitTy));
1413
1414 // Broadcast the mask so that the entire vector is all one or all zero.
1415 Mask = DAG.getSplat(VT: MaskTy, DL, Op: Mask);
1416
1417 // Bitcast the operands to be the same type as the mask.
1418 // This is needed when we select between FP types because
1419 // the mask is a vector of integers.
1420 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op1);
1421 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op2);
1422
1423 SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT: MaskTy);
1424
1425 Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op1, N2: Mask);
1426 Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op2, N2: NotMask);
1427 SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MaskTy, N1: Op1, N2: Op2);
1428 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val);
1429}
1430
1431SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1432 EVT VT = Node->getValueType(ResNo: 0);
1433
1434 // Make sure that the SRA and SHL instructions are available.
1435 if (TLI.getOperationAction(Op: ISD::SRA, VT) == TargetLowering::Expand ||
1436 TLI.getOperationAction(Op: ISD::SHL, VT) == TargetLowering::Expand)
1437 return SDValue();
1438
1439 SDLoc DL(Node);
1440 EVT OrigTy = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT();
1441
1442 unsigned BW = VT.getScalarSizeInBits();
1443 unsigned OrigBW = OrigTy.getScalarSizeInBits();
1444 SDValue ShiftSz = DAG.getConstant(Val: BW - OrigBW, DL, VT);
1445
1446 SDValue Op = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Node->getOperand(Num: 0), N2: ShiftSz);
1447 return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Op, N2: ShiftSz);
1448}
1449
1450// Generically expand a vector anyext in register to a shuffle of the relevant
1451// lanes into the appropriate locations, with other lanes left undef.
1452SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1453 SDLoc DL(Node);
1454 EVT VT = Node->getValueType(ResNo: 0);
1455 int NumElements = VT.getVectorNumElements();
1456 SDValue Src = Node->getOperand(Num: 0);
1457 EVT SrcVT = Src.getValueType();
1458 int NumSrcElements = SrcVT.getVectorNumElements();
1459
1460 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1461 // into a larger vector type.
1462 if (SrcVT.bitsLE(VT)) {
1463 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1464 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1465 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1466 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(),
1467 NumElements: NumSrcElements);
1468 Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SrcVT), SubVec: Src, Idx: 0);
1469 }
1470
1471 // Build a base mask of undef shuffles.
1472 SmallVector<int, 16> ShuffleMask;
1473 ShuffleMask.resize(N: NumSrcElements, NV: -1);
1474
1475 // Place the extended lanes into the correct locations.
1476 int ExtLaneScale = NumSrcElements / NumElements;
1477 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1478 for (int i = 0; i < NumElements; ++i)
1479 ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1480
1481 return DAG.getNode(
1482 Opcode: ISD::BITCAST, DL, VT,
1483 Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Src, N2: DAG.getPOISON(VT: SrcVT), Mask: ShuffleMask));
1484}
1485
1486SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1487 SDLoc DL(Node);
1488 EVT VT = Node->getValueType(ResNo: 0);
1489 SDValue Src = Node->getOperand(Num: 0);
1490 EVT SrcVT = Src.getValueType();
1491
1492 // First build an any-extend node which can be legalized above when we
1493 // recurse through it.
1494 SDValue Op = DAG.getNode(Opcode: ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Operand: Src);
1495
1496 // Now we need sign extend. Do this by shifting the elements. Even if these
1497 // aren't legal operations, they have a better chance of being legalized
1498 // without full scalarization than the sign extension does.
1499 unsigned EltWidth = VT.getScalarSizeInBits();
1500 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1501 SDValue ShiftAmount = DAG.getConstant(Val: EltWidth - SrcEltWidth, DL, VT);
1502 return DAG.getNode(Opcode: ISD::SRA, DL, VT,
1503 N1: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: ShiftAmount),
1504 N2: ShiftAmount);
1505}
1506
1507// Generically expand a vector zext in register to a shuffle of the relevant
1508// lanes into the appropriate locations, a blend of zero into the high bits,
1509// and a bitcast to the wider element type.
1510SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1511 SDLoc DL(Node);
1512 EVT VT = Node->getValueType(ResNo: 0);
1513 int NumElements = VT.getVectorNumElements();
1514 SDValue Src = Node->getOperand(Num: 0);
1515 EVT SrcVT = Src.getValueType();
1516 int NumSrcElements = SrcVT.getVectorNumElements();
1517
1518 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1519 // into a larger vector type.
1520 if (SrcVT.bitsLE(VT)) {
1521 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1522 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1523 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1524 SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(),
1525 NumElements: NumSrcElements);
1526 Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SrcVT), SubVec: Src, Idx: 0);
1527 }
1528
1529 // Build up a zero vector to blend into this one.
1530 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT);
1531
1532 // Shuffle the incoming lanes into the correct position, and pull all other
1533 // lanes from the zero vector.
1534 auto ShuffleMask = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: NumSrcElements));
1535
1536 int ExtLaneScale = NumSrcElements / NumElements;
1537 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1538 for (int i = 0; i < NumElements; ++i)
1539 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1540
1541 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT,
1542 Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Zero, N2: Src, Mask: ShuffleMask));
1543}
1544
1545static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1546 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1547 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1548 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1549 ShuffleMask.push_back(Elt: (I * ScalarSizeInBytes) + J);
1550}
1551
1552SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1553 EVT VT = Node->getValueType(ResNo: 0);
1554
1555 // Scalable vectors can't use shuffle expansion.
1556 if (VT.isScalableVector())
1557 return TLI.expandBSWAP(N: Node, DAG);
1558
1559 // Generate a byte wise shuffle mask for the BSWAP.
1560 SmallVector<int, 16> ShuffleMask;
1561 createBSWAPShuffleMask(VT, ShuffleMask);
1562 EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: ShuffleMask.size());
1563
1564 // Only emit a shuffle if the mask is legal.
1565 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1566 SDLoc DL(Node);
1567 SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0));
1568 Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getPOISON(VT: ByteVT),
1569 Mask: ShuffleMask);
1570 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
1571 }
1572
1573 // If we have the appropriate vector bit operations, it is better to use them
1574 // than unrolling and expanding each component.
1575 if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) &&
1576 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
1577 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) &&
1578 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT))
1579 return TLI.expandBSWAP(N: Node, DAG);
1580
1581 // Otherwise let the caller unroll.
1582 return SDValue();
1583}
1584
1585SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) {
1586 EVT VT = Node->getValueType(ResNo: 0);
1587
1588 // We can't unroll or use shuffles for scalable vectors.
1589 if (VT.isScalableVector())
1590 return TLI.expandBITREVERSE(N: Node, DAG);
1591
1592 // If we have the scalar operation, it's probably cheaper to unroll it.
1593 if (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: VT.getScalarType()))
1594 return SDValue();
1595
1596 // If the vector element width is a whole number of bytes, test if its legal
1597 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1598 // vector. This greatly reduces the number of bit shifts necessary.
1599 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1600 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1601 SmallVector<int, 16> BSWAPMask;
1602 createBSWAPShuffleMask(VT, ShuffleMask&: BSWAPMask);
1603
1604 EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: BSWAPMask.size());
1605 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1606 (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: ByteVT) ||
1607 (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT: ByteVT) &&
1608 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT: ByteVT) &&
1609 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: ByteVT) &&
1610 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: ByteVT)))) {
1611 SDLoc DL(Node);
1612 SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0));
1613 Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getPOISON(VT: ByteVT),
1614 Mask: BSWAPMask);
1615 Op = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ByteVT, Operand: Op);
1616 Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op);
1617 return Op;
1618 }
1619 }
1620
1621 // If we have the appropriate vector bit operations, it is better to use them
1622 // than unrolling and expanding each component.
1623 if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) &&
1624 TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) &&
1625 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) &&
1626 TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT))
1627 return TLI.expandBITREVERSE(N: Node, DAG);
1628
1629 // Otherwise unroll.
1630 return SDValue();
1631}
1632
1633SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1634 // Implement VSELECT in terms of XOR, AND, OR
1635 // on platforms which do not support blend natively.
1636 SDLoc DL(Node);
1637
1638 SDValue Mask = Node->getOperand(Num: 0);
1639 SDValue Op1 = Node->getOperand(Num: 1);
1640 SDValue Op2 = Node->getOperand(Num: 2);
1641
1642 EVT VT = Mask.getValueType();
1643
1644 // If we can't even use the basic vector operations of
1645 // AND,OR,XOR, we will have to scalarize the op.
1646 // Notice that the operation may be 'promoted' which means that it is
1647 // 'bitcasted' to another type which is handled.
1648 if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand ||
1649 TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand ||
1650 TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand)
1651 return SDValue();
1652
1653 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1654 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1655 // vector constant to mask with.
1656 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1657 auto BoolContents = TLI.getBooleanContents(Type: Op1.getValueType());
1658 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1659 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1660 Op1.getValueType().getVectorElementType() == MVT::i1))
1661 return SDValue();
1662
1663 // If the mask and the type are different sizes, unroll the vector op. This
1664 // can occur when getSetCCResultType returns something that is different in
1665 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1666 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1667 return SDValue();
1668
1669 // Bitcast the operands to be the same type as the mask.
1670 // This is needed when we select between FP types because
1671 // the mask is a vector of integers.
1672 Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op1);
1673 Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op2);
1674
1675 SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT);
1676
1677 Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op1, N2: Mask);
1678 Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op2, N2: NotMask);
1679 SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Op1, N2: Op2);
1680 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val);
1681}
1682
1683SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1684 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1685 // do not support it natively.
1686 SDLoc DL(Node);
1687
1688 SDValue Mask = Node->getOperand(Num: 0);
1689 SDValue Op1 = Node->getOperand(Num: 1);
1690 SDValue Op2 = Node->getOperand(Num: 2);
1691 SDValue EVL = Node->getOperand(Num: 3);
1692
1693 EVT VT = Mask.getValueType();
1694
1695 // If we can't even use the basic vector operations of
1696 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1697 if (TLI.getOperationAction(Op: ISD::VP_AND, VT) == TargetLowering::Expand ||
1698 TLI.getOperationAction(Op: ISD::VP_XOR, VT) == TargetLowering::Expand ||
1699 TLI.getOperationAction(Op: ISD::VP_OR, VT) == TargetLowering::Expand)
1700 return SDValue();
1701
1702 // This operation also isn't safe when the operands aren't also booleans.
1703 if (Op1.getValueType().getVectorElementType() != MVT::i1)
1704 return SDValue();
1705
1706 SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1707 SDValue NotMask = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT, N1: Mask, N2: Ones, N3: Ones, N4: EVL);
1708
1709 Op1 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op1, N2: Mask, N3: Ones, N4: EVL);
1710 Op2 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op2, N2: NotMask, N3: Ones, N4: EVL);
1711 return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: Op1, N2: Op2, N3: Ones, N4: EVL);
1712}
1713
1714SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1715 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1716 // indices less than the EVL/pivot are true. Combine that with the original
1717 // mask for a full-length mask. Use a full-length VSELECT to select between
1718 // the true and false values.
1719 SDLoc DL(Node);
1720
1721 SDValue Mask = Node->getOperand(Num: 0);
1722 SDValue Op1 = Node->getOperand(Num: 1);
1723 SDValue Op2 = Node->getOperand(Num: 2);
1724 SDValue EVL = Node->getOperand(Num: 3);
1725
1726 EVT MaskVT = Mask.getValueType();
1727 bool IsFixedLen = MaskVT.isFixedLengthVector();
1728
1729 EVT EVLVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EVL.getValueType(),
1730 EC: MaskVT.getVectorElementCount());
1731
1732 // If we can't construct the EVL mask efficiently, it's better to unroll.
1733 if ((IsFixedLen &&
1734 !TLI.isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: EVLVecVT)) ||
1735 (!IsFixedLen &&
1736 (!TLI.isOperationLegalOrCustom(Op: ISD::STEP_VECTOR, VT: EVLVecVT) ||
1737 !TLI.isOperationLegalOrCustom(Op: ISD::SPLAT_VECTOR, VT: EVLVecVT))))
1738 return SDValue();
1739
1740 // If using a SETCC would result in a different type than the mask type,
1741 // unroll.
1742 if (TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(),
1743 VT: EVLVecVT) != MaskVT)
1744 return SDValue();
1745
1746 SDValue StepVec = DAG.getStepVector(DL, ResVT: EVLVecVT);
1747 SDValue SplatEVL = DAG.getSplat(VT: EVLVecVT, DL, Op: EVL);
1748 SDValue EVLMask =
1749 DAG.getSetCC(DL, VT: MaskVT, LHS: StepVec, RHS: SplatEVL, Cond: ISD::CondCode::SETULT);
1750
1751 SDValue FullMask = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskVT, N1: Mask, N2: EVLMask);
1752 return DAG.getSelect(DL, VT: Node->getValueType(ResNo: 0), Cond: FullMask, LHS: Op1, RHS: Op2);
1753}
1754
1755SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1756 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1757 EVT VT = Node->getValueType(ResNo: 0);
1758
1759 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1760
1761 if (!TLI.isOperationLegalOrCustom(Op: DivOpc, VT) ||
1762 !TLI.isOperationLegalOrCustom(Op: ISD::VP_MUL, VT) ||
1763 !TLI.isOperationLegalOrCustom(Op: ISD::VP_SUB, VT))
1764 return SDValue();
1765
1766 SDLoc DL(Node);
1767
1768 SDValue Dividend = Node->getOperand(Num: 0);
1769 SDValue Divisor = Node->getOperand(Num: 1);
1770 SDValue Mask = Node->getOperand(Num: 2);
1771 SDValue EVL = Node->getOperand(Num: 3);
1772
1773 // X % Y -> X-X/Y*Y
1774 SDValue Div = DAG.getNode(Opcode: DivOpc, DL, VT, N1: Dividend, N2: Divisor, N3: Mask, N4: EVL);
1775 SDValue Mul = DAG.getNode(Opcode: ISD::VP_MUL, DL, VT, N1: Divisor, N2: Div, N3: Mask, N4: EVL);
1776 return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Dividend, N2: Mul, N3: Mask, N4: EVL);
1777}
1778
1779SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) {
1780 EVT VT = Node->getValueType(ResNo: 0);
1781 EVT IntVT = VT.changeVectorElementTypeToInteger();
1782
1783 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_XOR, VT: IntVT))
1784 return SDValue();
1785
1786 SDValue Mask = Node->getOperand(Num: 1);
1787 SDValue EVL = Node->getOperand(Num: 2);
1788
1789 SDLoc DL(Node);
1790 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1791 SDValue SignMask = DAG.getConstant(
1792 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1793 SDValue Xor = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: IntVT, N1: Cast, N2: SignMask, N3: Mask, N4: EVL);
1794 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Xor);
1795}
1796
1797SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) {
1798 EVT VT = Node->getValueType(ResNo: 0);
1799 EVT IntVT = VT.changeVectorElementTypeToInteger();
1800
1801 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_AND, VT: IntVT))
1802 return SDValue();
1803
1804 SDValue Mask = Node->getOperand(Num: 1);
1805 SDValue EVL = Node->getOperand(Num: 2);
1806
1807 SDLoc DL(Node);
1808 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1809 SDValue ClearSignMask = DAG.getConstant(
1810 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1811 SDValue ClearSign =
1812 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Cast, N2: ClearSignMask, N3: Mask, N4: EVL);
1813 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: ClearSign);
1814}
1815
1816SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
1817 EVT VT = Node->getValueType(ResNo: 0);
1818
1819 if (VT != Node->getOperand(Num: 1).getValueType())
1820 return SDValue();
1821
1822 EVT IntVT = VT.changeVectorElementTypeToInteger();
1823 if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_AND, VT: IntVT) ||
1824 !TLI.isOperationLegalOrCustom(Op: ISD::VP_XOR, VT: IntVT))
1825 return SDValue();
1826
1827 SDValue Mask = Node->getOperand(Num: 2);
1828 SDValue EVL = Node->getOperand(Num: 3);
1829
1830 SDLoc DL(Node);
1831 SDValue Mag = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
1832 SDValue Sign = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 1));
1833
1834 SDValue SignMask = DAG.getConstant(
1835 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1836 SDValue SignBit =
1837 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Sign, N2: SignMask, N3: Mask, N4: EVL);
1838
1839 SDValue ClearSignMask = DAG.getConstant(
1840 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
1841 SDValue ClearedSign =
1842 DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Mag, N2: ClearSignMask, N3: Mask, N4: EVL);
1843
1844 SDValue CopiedSign = DAG.getNode(Opcode: ISD::VP_OR, DL, VT: IntVT, N1: ClearedSign, N2: SignBit,
1845 N3: Mask, N4: EVL, Flags: SDNodeFlags::Disjoint);
1846
1847 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: CopiedSign);
1848}
1849
1850SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) {
1851 SDLoc DL(N);
1852 EVT VT = N->getValueType(ResNo: 0);
1853 SDValue SourceValue = N->getOperand(Num: 0);
1854 SDValue SinkValue = N->getOperand(Num: 1);
1855 SDValue EltSizeInBytes = N->getOperand(Num: 2);
1856
1857 // Note: The lane offset is scalable if the mask is scalable.
1858 ElementCount LaneOffsetEC =
1859 ElementCount::get(MinVal: N->getConstantOperandVal(Num: 3), Scalable: VT.isScalableVT());
1860
1861 EVT PtrVT = SourceValue->getValueType(ResNo: 0);
1862 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
1863
1864 // Take the difference between the pointers and divided by the element size,
1865 // to see how many lanes separate them.
1866 SDValue Diff = DAG.getNode(Opcode: ISD::SUB, DL, VT: PtrVT, N1: SinkValue, N2: SourceValue);
1867 if (IsReadAfterWrite)
1868 Diff = DAG.getNode(Opcode: ISD::ABS, DL, VT: PtrVT, Operand: Diff);
1869 Diff = DAG.getNode(Opcode: ISD::SDIV, DL, VT: PtrVT, N1: Diff, N2: EltSizeInBytes);
1870
1871 // The pointers do not alias if:
1872 // * Diff <= 0 (WAR_MASK)
1873 // * Diff == 0 (RAW_MASK)
1874 EVT CmpVT =
1875 TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: PtrVT);
1876 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: PtrVT);
1877 SDValue Cmp = DAG.getSetCC(DL, VT: CmpVT, LHS: Diff, RHS: Zero,
1878 Cond: IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
1879
1880 // The pointers do not alias if:
1881 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
1882 SDValue LaneOffset = DAG.getElementCount(DL, VT: PtrVT, EC: LaneOffsetEC);
1883 SDValue MaskN =
1884 DAG.getSelect(DL, VT: PtrVT, Cond: Cmp, LHS: DAG.getConstant(Val: -1, DL, VT: PtrVT), RHS: Diff);
1885
1886 return DAG.getNode(Opcode: ISD::GET_ACTIVE_LANE_MASK, DL, VT, N1: LaneOffset, N2: MaskN);
1887}
1888
1889void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1890 SmallVectorImpl<SDValue> &Results) {
1891 // Attempt to expand using TargetLowering.
1892 SDValue Result, Chain;
1893 if (TLI.expandFP_TO_UINT(N: Node, Result, Chain, DAG)) {
1894 Results.push_back(Elt: Result);
1895 if (Node->isStrictFPOpcode())
1896 Results.push_back(Elt: Chain);
1897 return;
1898 }
1899
1900 // Otherwise go ahead and unroll.
1901 if (Node->isStrictFPOpcode()) {
1902 UnrollStrictFPOp(Node, Results);
1903 return;
1904 }
1905
1906 Results.push_back(Elt: DAG.UnrollVectorOp(N: Node));
1907}
1908
1909void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1910 SmallVectorImpl<SDValue> &Results) {
1911 bool IsStrict = Node->isStrictFPOpcode();
1912 unsigned OpNo = IsStrict ? 1 : 0;
1913 SDValue Src = Node->getOperand(Num: OpNo);
1914 EVT SrcVT = Src.getValueType();
1915 EVT DstVT = Node->getValueType(ResNo: 0);
1916 SDLoc DL(Node);
1917
1918 // Attempt to expand using TargetLowering.
1919 SDValue Result;
1920 SDValue Chain;
1921 if (TLI.expandUINT_TO_FP(N: Node, Result, Chain, DAG)) {
1922 Results.push_back(Elt: Result);
1923 if (IsStrict)
1924 Results.push_back(Elt: Chain);
1925 return;
1926 }
1927
1928 // Make sure that the SINT_TO_FP and SRL instructions are available.
1929 if (((!IsStrict && TLI.getOperationAction(Op: ISD::SINT_TO_FP, VT: SrcVT) ==
1930 TargetLowering::Expand) ||
1931 (IsStrict && TLI.getOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: SrcVT) ==
1932 TargetLowering::Expand)) ||
1933 TLI.getOperationAction(Op: ISD::SRL, VT: SrcVT) == TargetLowering::Expand) {
1934 if (IsStrict) {
1935 UnrollStrictFPOp(Node, Results);
1936 return;
1937 }
1938
1939 Results.push_back(Elt: DAG.UnrollVectorOp(N: Node));
1940 return;
1941 }
1942
1943 unsigned BW = SrcVT.getScalarSizeInBits();
1944 assert((BW == 64 || BW == 32) &&
1945 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1946
1947 // If STRICT_/FMUL is not supported by the target (in case of f16) replace the
1948 // UINT_TO_FP with a larger float and round to the smaller type
1949 if ((!IsStrict && !TLI.isOperationLegalOrCustom(Op: ISD::FMUL, VT: DstVT)) ||
1950 (IsStrict && !TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FMUL, VT: DstVT))) {
1951 EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64;
1952 SDValue UIToFP;
1953 SDValue Result;
1954 SDValue TargetZero = DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true);
1955 EVT FloatVecVT = SrcVT.changeVectorElementType(Context&: *DAG.getContext(), EltVT: FPVT);
1956 if (IsStrict) {
1957 UIToFP = DAG.getNode(Opcode: ISD::STRICT_UINT_TO_FP, DL, ResultTys: {FloatVecVT, MVT::Other},
1958 Ops: {Node->getOperand(Num: 0), Src});
1959 Result = DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {DstVT, MVT::Other},
1960 Ops: {Node->getOperand(Num: 0), UIToFP, TargetZero});
1961 Results.push_back(Elt: Result);
1962 Results.push_back(Elt: Result.getValue(R: 1));
1963 } else {
1964 UIToFP = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: FloatVecVT, Operand: Src);
1965 Result = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: DstVT, N1: UIToFP, N2: TargetZero);
1966 Results.push_back(Elt: Result);
1967 }
1968
1969 return;
1970 }
1971
1972 SDValue HalfWord = DAG.getConstant(Val: BW / 2, DL, VT: SrcVT);
1973
1974 // Constants to clear the upper part of the word.
1975 // Notice that we can also use SHL+SHR, but using a constant is slightly
1976 // faster on x86.
1977 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1978 SDValue HalfWordMask = DAG.getConstant(Val: HWMask, DL, VT: SrcVT);
1979
1980 // Two to the power of half-word-size.
1981 SDValue TWOHW = DAG.getConstantFP(Val: 1ULL << (BW / 2), DL, VT: DstVT);
1982
1983 // Clear upper part of LO, lower HI
1984 SDValue HI = DAG.getNode(Opcode: ISD::SRL, DL, VT: SrcVT, N1: Src, N2: HalfWord);
1985 SDValue LO = DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src, N2: HalfWordMask);
1986
1987 if (IsStrict) {
1988 // Convert hi and lo to floats
1989 // Convert the hi part back to the upper values
1990 // TODO: Can any fast-math-flags be set on these nodes?
1991 SDValue fHI = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, ResultTys: {DstVT, MVT::Other},
1992 Ops: {Node->getOperand(Num: 0), HI});
1993 fHI = DAG.getNode(Opcode: ISD::STRICT_FMUL, DL, ResultTys: {DstVT, MVT::Other},
1994 Ops: {fHI.getValue(R: 1), fHI, TWOHW});
1995 SDValue fLO = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, ResultTys: {DstVT, MVT::Other},
1996 Ops: {Node->getOperand(Num: 0), LO});
1997
1998 SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: fHI.getValue(R: 1),
1999 N2: fLO.getValue(R: 1));
2000
2001 // Add the two halves
2002 SDValue Result =
2003 DAG.getNode(Opcode: ISD::STRICT_FADD, DL, ResultTys: {DstVT, MVT::Other}, Ops: {TF, fHI, fLO});
2004
2005 Results.push_back(Elt: Result);
2006 Results.push_back(Elt: Result.getValue(R: 1));
2007 return;
2008 }
2009
2010 // Convert hi and lo to floats
2011 // Convert the hi part back to the upper values
2012 // TODO: Can any fast-math-flags be set on these nodes?
2013 SDValue fHI = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: DstVT, Operand: HI);
2014 fHI = DAG.getNode(Opcode: ISD::FMUL, DL, VT: DstVT, N1: fHI, N2: TWOHW);
2015 SDValue fLO = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: DstVT, Operand: LO);
2016
2017 // Add the two halves
2018 Results.push_back(Elt: DAG.getNode(Opcode: ISD::FADD, DL, VT: DstVT, N1: fHI, N2: fLO));
2019}
2020
2021SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
2022 EVT VT = Node->getValueType(ResNo: 0);
2023 EVT IntVT = VT.changeVectorElementTypeToInteger();
2024
2025 if (!TLI.isOperationLegalOrCustom(Op: ISD::XOR, VT: IntVT))
2026 return SDValue();
2027
2028 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2029 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
2030 !VT.isScalableVector())
2031 return SDValue();
2032
2033 SDLoc DL(Node);
2034 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
2035 SDValue SignMask = DAG.getConstant(
2036 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2037 SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL, VT: IntVT, N1: Cast, N2: SignMask);
2038 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Xor);
2039}
2040
2041SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
2042 EVT VT = Node->getValueType(ResNo: 0);
2043 EVT IntVT = VT.changeVectorElementTypeToInteger();
2044
2045 if (!TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT))
2046 return SDValue();
2047
2048 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2049 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
2050 !VT.isScalableVector())
2051 return SDValue();
2052
2053 SDLoc DL(Node);
2054 SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
2055 SDValue ClearSignMask = DAG.getConstant(
2056 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2057 SDValue ClearedSign = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Cast, N2: ClearSignMask);
2058 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: ClearedSign);
2059}
2060
2061SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
2062 EVT VT = Node->getValueType(ResNo: 0);
2063 EVT IntVT = VT.changeVectorElementTypeToInteger();
2064
2065 if (VT != Node->getOperand(Num: 1).getValueType() ||
2066 !TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT) ||
2067 !TLI.isOperationLegalOrCustom(Op: ISD::OR, VT: IntVT))
2068 return SDValue();
2069
2070 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2071 if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) &&
2072 !VT.isScalableVector())
2073 return SDValue();
2074
2075 SDLoc DL(Node);
2076 SDValue Mag = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0));
2077 SDValue Sign = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 1));
2078
2079 SDValue SignMask = DAG.getConstant(
2080 Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2081 SDValue SignBit = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Sign, N2: SignMask);
2082
2083 SDValue ClearSignMask = DAG.getConstant(
2084 Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT);
2085 SDValue ClearedSign = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Mag, N2: ClearSignMask);
2086
2087 SDValue CopiedSign = DAG.getNode(Opcode: ISD::OR, DL, VT: IntVT, N1: ClearedSign, N2: SignBit,
2088 Flags: SDNodeFlags::Disjoint);
2089
2090 return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: CopiedSign);
2091}
2092
2093void VectorLegalizer::ExpandFSUB(SDNode *Node,
2094 SmallVectorImpl<SDValue> &Results) {
2095 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
2096 // we can defer this to operation legalization where it will be lowered as
2097 // a+(-b).
2098 EVT VT = Node->getValueType(ResNo: 0);
2099 if (TLI.isOperationLegalOrCustom(Op: ISD::FNEG, VT) &&
2100 TLI.isOperationLegalOrCustom(Op: ISD::FADD, VT))
2101 return; // Defer to LegalizeDAG
2102
2103 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
2104 Results.push_back(Elt: Expanded);
2105 return;
2106 }
2107
2108 SDValue Tmp = DAG.UnrollVectorOp(N: Node);
2109 Results.push_back(Elt: Tmp);
2110}
2111
2112void VectorLegalizer::ExpandSETCC(SDNode *Node,
2113 SmallVectorImpl<SDValue> &Results) {
2114 bool NeedInvert = false;
2115 bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
2116 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
2117 Node->getOpcode() == ISD::STRICT_FSETCCS;
2118 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
2119 unsigned Offset = IsStrict ? 1 : 0;
2120
2121 SDValue Chain = IsStrict ? Node->getOperand(Num: 0) : SDValue();
2122 SDValue LHS = Node->getOperand(Num: 0 + Offset);
2123 SDValue RHS = Node->getOperand(Num: 1 + Offset);
2124 SDValue CC = Node->getOperand(Num: 2 + Offset);
2125
2126 MVT OpVT = LHS.getSimpleValueType();
2127 ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get();
2128
2129 if (TLI.getCondCodeAction(CC: CCCode, VT: OpVT) != TargetLowering::Expand) {
2130 if (IsStrict) {
2131 UnrollStrictFPOp(Node, Results);
2132 return;
2133 }
2134 Results.push_back(Elt: UnrollVSETCC(Node));
2135 return;
2136 }
2137
2138 SDValue Mask, EVL;
2139 if (IsVP) {
2140 Mask = Node->getOperand(Num: 3 + Offset);
2141 EVL = Node->getOperand(Num: 4 + Offset);
2142 }
2143
2144 SDLoc dl(Node);
2145 bool Legalized =
2146 TLI.LegalizeSetCCCondCode(DAG, VT: Node->getValueType(ResNo: 0), LHS, RHS, CC, Mask,
2147 EVL, NeedInvert, dl, Chain, IsSignaling);
2148
2149 if (Legalized) {
2150 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
2151 // condition code, create a new SETCC node.
2152 if (CC.getNode()) {
2153 if (IsStrict) {
2154 LHS = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VTList: Node->getVTList(),
2155 Ops: {Chain, LHS, RHS, CC}, Flags: Node->getFlags());
2156 Chain = LHS.getValue(R: 1);
2157 } else if (IsVP) {
2158 LHS = DAG.getNode(Opcode: ISD::VP_SETCC, DL: dl, VT: Node->getValueType(ResNo: 0),
2159 Ops: {LHS, RHS, CC, Mask, EVL}, Flags: Node->getFlags());
2160 } else {
2161 LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), N1: LHS, N2: RHS, N3: CC,
2162 Flags: Node->getFlags());
2163 }
2164 }
2165
2166 // If we expanded the SETCC by inverting the condition code, then wrap
2167 // the existing SETCC in a NOT to restore the intended condition.
2168 if (NeedInvert) {
2169 if (!IsVP)
2170 LHS = DAG.getLogicalNOT(DL: dl, Val: LHS, VT: LHS->getValueType(ResNo: 0));
2171 else
2172 LHS = DAG.getVPLogicalNOT(DL: dl, Val: LHS, Mask, EVL, VT: LHS->getValueType(ResNo: 0));
2173 }
2174 } else {
2175 assert(!IsStrict && "Don't know how to expand for strict nodes.");
2176
2177 // Otherwise, SETCC for the given comparison type must be completely
2178 // illegal; expand it into a SELECT_CC.
2179 EVT VT = Node->getValueType(ResNo: 0);
2180 LHS = DAG.getNode(Opcode: ISD::SELECT_CC, DL: dl, VT, N1: LHS, N2: RHS,
2181 N3: DAG.getBoolConstant(V: true, DL: dl, VT, OpVT: LHS.getValueType()),
2182 N4: DAG.getBoolConstant(V: false, DL: dl, VT, OpVT: LHS.getValueType()),
2183 N5: CC, Flags: Node->getFlags());
2184 }
2185
2186 Results.push_back(Elt: LHS);
2187 if (IsStrict)
2188 Results.push_back(Elt: Chain);
2189}
2190
2191void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
2192 SmallVectorImpl<SDValue> &Results) {
2193 SDValue Result, Overflow;
2194 TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
2195 Results.push_back(Elt: Result);
2196 Results.push_back(Elt: Overflow);
2197}
2198
2199void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
2200 SmallVectorImpl<SDValue> &Results) {
2201 SDValue Result, Overflow;
2202 TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
2203 Results.push_back(Elt: Result);
2204 Results.push_back(Elt: Overflow);
2205}
2206
2207void VectorLegalizer::ExpandMULO(SDNode *Node,
2208 SmallVectorImpl<SDValue> &Results) {
2209 SDValue Result, Overflow;
2210 if (!TLI.expandMULO(Node, Result, Overflow, DAG))
2211 std::tie(args&: Result, args&: Overflow) = DAG.UnrollVectorOverflowOp(N: Node);
2212
2213 Results.push_back(Elt: Result);
2214 Results.push_back(Elt: Overflow);
2215}
2216
2217void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
2218 SmallVectorImpl<SDValue> &Results) {
2219 SDNode *N = Node;
2220 if (SDValue Expanded = TLI.expandFixedPointDiv(Opcode: N->getOpcode(), dl: SDLoc(N),
2221 LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Scale: N->getConstantOperandVal(Num: 2), DAG))
2222 Results.push_back(Elt: Expanded);
2223}
2224
2225void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
2226 SmallVectorImpl<SDValue> &Results) {
2227 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
2228 ExpandUINT_TO_FLOAT(Node, Results);
2229 return;
2230 }
2231 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
2232 ExpandFP_TO_UINT(Node, Results);
2233 return;
2234 }
2235
2236 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2237 Node->getOpcode() == ISD::STRICT_FSETCCS) {
2238 ExpandSETCC(Node, Results);
2239 return;
2240 }
2241
2242 UnrollStrictFPOp(Node, Results);
2243}
2244
2245void VectorLegalizer::ExpandREM(SDNode *Node,
2246 SmallVectorImpl<SDValue> &Results) {
2247 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
2248 "Expected REM node");
2249
2250 SDValue Result;
2251 if (!TLI.expandREM(Node, Result, DAG))
2252 Result = DAG.UnrollVectorOp(N: Node);
2253 Results.push_back(Elt: Result);
2254}
2255
2256// Try to expand libm nodes into vector math routine calls. Callers provide the
2257// LibFunc equivalent of the passed in Node, which is used to lookup mappings
2258// within TargetLibraryInfo. The only mappings considered are those where the
2259// result and all operands are the same vector type. While predicated nodes are
2260// not supported, we will emit calls to masked routines by passing in an all
2261// true mask.
2262bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
2263 SmallVectorImpl<SDValue> &Results) {
2264 // Chain must be propagated but currently strict fp operations are down
2265 // converted to their none strict counterpart.
2266 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
2267
2268 RTLIB::LibcallImpl LCImpl = DAG.getLibcalls().getLibcallImpl(Call: LC);
2269 if (LCImpl == RTLIB::Unsupported)
2270 return false;
2271
2272 EVT VT = Node->getValueType(ResNo: 0);
2273 const RTLIB::RuntimeLibcallsInfo &RTLCI = TLI.getRuntimeLibcallsInfo();
2274 LLVMContext &Ctx = *DAG.getContext();
2275
2276 auto [FuncTy, FuncAttrs] = RTLCI.getFunctionTy(
2277 Ctx, TT: DAG.getSubtarget().getTargetTriple(), DL: DAG.getDataLayout(), LibcallImpl: LCImpl);
2278
2279 SDLoc DL(Node);
2280 TargetLowering::ArgListTy Args;
2281
2282 bool HasMaskArg = RTLCI.hasVectorMaskArgument(Impl: LCImpl);
2283
2284 // Sanity check just in case function has unexpected parameters.
2285 assert(FuncTy->getNumParams() == Node->getNumOperands() + HasMaskArg &&
2286 EVT::getEVT(FuncTy->getReturnType(), true) == VT &&
2287 "mismatch in value type and call signature type");
2288
2289 for (unsigned I = 0, E = FuncTy->getNumParams(); I != E; ++I) {
2290 Type *ParamTy = FuncTy->getParamType(i: I);
2291
2292 if (HasMaskArg && I == E - 1) {
2293 assert(cast<VectorType>(ParamTy)->getElementType()->isIntegerTy(1) &&
2294 "unexpected vector mask type");
2295 EVT MaskVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: Ctx, VT);
2296 Args.emplace_back(args: DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT),
2297 args: MaskVT.getTypeForEVT(Context&: Ctx));
2298
2299 } else {
2300 SDValue Op = Node->getOperand(Num: I);
2301 assert(Op.getValueType() == EVT::getEVT(ParamTy, true) &&
2302 "mismatch in value type and call argument type");
2303 Args.emplace_back(args&: Op, args&: ParamTy);
2304 }
2305 }
2306
2307 // Emit a call to the vector function.
2308 SDValue Callee =
2309 DAG.getExternalSymbol(LCImpl, VT: TLI.getPointerTy(DL: DAG.getDataLayout()));
2310 CallingConv::ID CC = RTLCI.getLibcallImplCallingConv(Call: LCImpl);
2311
2312 TargetLowering::CallLoweringInfo CLI(DAG);
2313 CLI.setDebugLoc(DL)
2314 .setChain(DAG.getEntryNode())
2315 .setLibCallee(CC, ResultType: FuncTy->getReturnType(), Target: Callee, ArgsList: std::move(Args));
2316
2317 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
2318 Results.push_back(Elt: CallResult.first);
2319 return true;
2320}
2321
2322void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
2323 SmallVectorImpl<SDValue> &Results) {
2324 EVT VT = Node->getValueType(ResNo: 0);
2325 EVT EltVT = VT.getVectorElementType();
2326 unsigned NumElems = VT.getVectorNumElements();
2327 unsigned NumOpers = Node->getNumOperands();
2328 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2329
2330 EVT TmpEltVT = EltVT;
2331 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2332 Node->getOpcode() == ISD::STRICT_FSETCCS)
2333 TmpEltVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(),
2334 Context&: *DAG.getContext(), VT: TmpEltVT);
2335
2336 EVT ValueVTs[] = {TmpEltVT, MVT::Other};
2337 SDValue Chain = Node->getOperand(Num: 0);
2338 SDLoc dl(Node);
2339
2340 SmallVector<SDValue, 32> OpValues;
2341 SmallVector<SDValue, 32> OpChains;
2342 for (unsigned i = 0; i < NumElems; ++i) {
2343 SmallVector<SDValue, 4> Opers;
2344 SDValue Idx = DAG.getVectorIdxConstant(Val: i, DL: dl);
2345
2346 // The Chain is the first operand.
2347 Opers.push_back(Elt: Chain);
2348
2349 // Now process the remaining operands.
2350 for (unsigned j = 1; j < NumOpers; ++j) {
2351 SDValue Oper = Node->getOperand(Num: j);
2352 EVT OperVT = Oper.getValueType();
2353
2354 if (OperVT.isVector())
2355 Oper = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl,
2356 VT: OperVT.getVectorElementType(), N1: Oper, N2: Idx);
2357
2358 Opers.push_back(Elt: Oper);
2359 }
2360
2361 SDValue ScalarOp = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, ResultTys: ValueVTs, Ops: Opers);
2362 SDValue ScalarResult = ScalarOp.getValue(R: 0);
2363 SDValue ScalarChain = ScalarOp.getValue(R: 1);
2364
2365 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2366 Node->getOpcode() == ISD::STRICT_FSETCCS)
2367 ScalarResult = DAG.getSelect(DL: dl, VT: EltVT, Cond: ScalarResult,
2368 LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT),
2369 RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT));
2370
2371 OpValues.push_back(Elt: ScalarResult);
2372 OpChains.push_back(Elt: ScalarChain);
2373 }
2374
2375 SDValue Result = DAG.getBuildVector(VT, DL: dl, Ops: OpValues);
2376 SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OpChains);
2377
2378 Results.push_back(Elt: Result);
2379 Results.push_back(Elt: NewChain);
2380}
2381
2382SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
2383 EVT VT = Node->getValueType(ResNo: 0);
2384 unsigned NumElems = VT.getVectorNumElements();
2385 EVT EltVT = VT.getVectorElementType();
2386 SDValue LHS = Node->getOperand(Num: 0);
2387 SDValue RHS = Node->getOperand(Num: 1);
2388 SDValue CC = Node->getOperand(Num: 2);
2389 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
2390 SDLoc dl(Node);
2391 SmallVector<SDValue, 8> Ops(NumElems);
2392 for (unsigned i = 0; i < NumElems; ++i) {
2393 SDValue LHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: LHS,
2394 N2: DAG.getVectorIdxConstant(Val: i, DL: dl));
2395 SDValue RHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: RHS,
2396 N2: DAG.getVectorIdxConstant(Val: i, DL: dl));
2397 // FIXME: We should use i1 setcc + boolext here, but it causes regressions.
2398 Ops[i] = DAG.getNode(Opcode: ISD::SETCC, DL: dl,
2399 VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(),
2400 Context&: *DAG.getContext(), VT: TmpEltVT),
2401 N1: LHSElem, N2: RHSElem, N3: CC);
2402 Ops[i] = DAG.getSelect(DL: dl, VT: EltVT, Cond: Ops[i],
2403 LHS: DAG.getBoolConstant(V: true, DL: dl, VT: EltVT, OpVT: VT),
2404 RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT));
2405 }
2406 return DAG.getBuildVector(VT, DL: dl, Ops);
2407}
2408
2409bool SelectionDAG::LegalizeVectors() {
2410 return VectorLegalizer(*this).Run();
2411}
2412