1 | //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the SelectionDAG::LegalizeVectors method. |
10 | // |
11 | // The vector legalizer looks for vector operations which might need to be |
12 | // scalarized and legalizes them. This is a separate step from Legalize because |
13 | // scalarizing can introduce illegal types. For example, suppose we have an |
14 | // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition |
15 | // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the |
16 | // operation, which introduces nodes with the illegal type i64 which must be |
17 | // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; |
18 | // the operation must be unrolled, which introduces nodes with the illegal |
19 | // type i8 which must be promoted. |
20 | // |
21 | // This does not legalize vector manipulations like ISD::BUILD_VECTOR, |
22 | // or operations that happen to take a vector which are custom-lowered; |
23 | // the legalization for such operations never produces nodes |
24 | // with illegal types, so it's okay to put off legalizing them until |
25 | // SelectionDAG::Legalize runs. |
26 | // |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | #include "llvm/ADT/DenseMap.h" |
30 | #include "llvm/ADT/SmallVector.h" |
31 | #include "llvm/Analysis/TargetLibraryInfo.h" |
32 | #include "llvm/Analysis/VectorUtils.h" |
33 | #include "llvm/CodeGen/ISDOpcodes.h" |
34 | #include "llvm/CodeGen/SelectionDAG.h" |
35 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
36 | #include "llvm/CodeGen/TargetLowering.h" |
37 | #include "llvm/CodeGen/ValueTypes.h" |
38 | #include "llvm/CodeGenTypes/MachineValueType.h" |
39 | #include "llvm/IR/DataLayout.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include "llvm/Support/Compiler.h" |
42 | #include "llvm/Support/Debug.h" |
43 | #include "llvm/Support/ErrorHandling.h" |
44 | #include <cassert> |
45 | #include <cstdint> |
46 | #include <iterator> |
47 | #include <utility> |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE "legalizevectorops" |
52 | |
53 | namespace { |
54 | |
55 | class VectorLegalizer { |
56 | SelectionDAG& DAG; |
57 | const TargetLowering &TLI; |
58 | bool Changed = false; // Keep track of whether anything changed |
59 | |
60 | /// For nodes that are of legal width, and that have more than one use, this |
61 | /// map indicates what regularized operand to use. This allows us to avoid |
62 | /// legalizing the same thing more than once. |
63 | SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; |
64 | |
65 | /// Adds a node to the translation cache. |
66 | void AddLegalizedOperand(SDValue From, SDValue To) { |
67 | LegalizedNodes.insert(KV: std::make_pair(x&: From, y&: To)); |
68 | // If someone requests legalization of the new node, return itself. |
69 | if (From != To) |
70 | LegalizedNodes.insert(KV: std::make_pair(x&: To, y&: To)); |
71 | } |
72 | |
73 | /// Legalizes the given node. |
74 | SDValue LegalizeOp(SDValue Op); |
75 | |
76 | /// Assuming the node is legal, "legalize" the results. |
77 | SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result); |
78 | |
79 | /// Make sure Results are legal and update the translation cache. |
80 | SDValue RecursivelyLegalizeResults(SDValue Op, |
81 | MutableArrayRef<SDValue> Results); |
82 | |
83 | /// Wrapper to interface LowerOperation with a vector of Results. |
84 | /// Returns false if the target wants to use default expansion. Otherwise |
85 | /// returns true. If return is true and the Results are empty, then the |
86 | /// target wants to keep the input node as is. |
87 | bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results); |
88 | |
89 | /// Implements unrolling a VSETCC. |
90 | SDValue UnrollVSETCC(SDNode *Node); |
91 | |
92 | /// Implement expand-based legalization of vector operations. |
93 | /// |
94 | /// This is just a high-level routine to dispatch to specific code paths for |
95 | /// operations to legalize them. |
96 | void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
97 | |
98 | /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if |
99 | /// FP_TO_SINT isn't legal. |
100 | void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
101 | |
102 | /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if |
103 | /// SINT_TO_FLOAT and SHR on vectors isn't legal. |
104 | void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
105 | |
106 | /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. |
107 | SDValue ExpandSEXTINREG(SDNode *Node); |
108 | |
109 | /// Implement expansion for ANY_EXTEND_VECTOR_INREG. |
110 | /// |
111 | /// Shuffles the low lanes of the operand into place and bitcasts to the proper |
112 | /// type. The contents of the bits in the extended part of each element are |
113 | /// undef. |
114 | SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node); |
115 | |
116 | /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. |
117 | /// |
118 | /// Shuffles the low lanes of the operand into place, bitcasts to the proper |
119 | /// type, then shifts left and arithmetic shifts right to introduce a sign |
120 | /// extension. |
121 | SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node); |
122 | |
123 | /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. |
124 | /// |
125 | /// Shuffles the low lanes of the operand into place and blends zeros into |
126 | /// the remaining lanes, finally bitcasting to the proper type. |
127 | SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node); |
128 | |
129 | /// Expand bswap of vectors into a shuffle if legal. |
130 | SDValue ExpandBSWAP(SDNode *Node); |
131 | |
132 | /// Implement vselect in terms of XOR, AND, OR when blend is not |
133 | /// supported by the target. |
134 | SDValue ExpandVSELECT(SDNode *Node); |
135 | SDValue ExpandVP_SELECT(SDNode *Node); |
136 | SDValue ExpandVP_MERGE(SDNode *Node); |
137 | SDValue ExpandVP_REM(SDNode *Node); |
138 | SDValue ExpandSELECT(SDNode *Node); |
139 | std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); |
140 | SDValue ExpandStore(SDNode *N); |
141 | SDValue ExpandFNEG(SDNode *Node); |
142 | void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
143 | void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
144 | void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
145 | void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
146 | void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
147 | void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
148 | void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
149 | void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
150 | void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
151 | |
152 | bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, |
153 | SmallVectorImpl<SDValue> &Results); |
154 | bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32, |
155 | RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, |
156 | RTLIB::Libcall Call_F128, |
157 | RTLIB::Libcall Call_PPCF128, |
158 | SmallVectorImpl<SDValue> &Results); |
159 | |
160 | void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
161 | |
162 | /// Implements vector promotion. |
163 | /// |
164 | /// This is essentially just bitcasting the operands to a different type and |
165 | /// bitcasting the result back to the original type. |
166 | void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
167 | |
168 | /// Implements [SU]INT_TO_FP vector promotion. |
169 | /// |
170 | /// This is a [zs]ext of the input operand to a larger integer type. |
171 | void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
172 | |
173 | /// Implements FP_TO_[SU]INT vector promotion of the result type. |
174 | /// |
175 | /// It is promoted to a larger integer type. The result is then |
176 | /// truncated back to the original type. |
177 | void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
178 | |
179 | /// Implements vector setcc operation promotion. |
180 | /// |
181 | /// All vector operands are promoted to a vector type with larger element |
182 | /// type. |
183 | void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
184 | |
185 | void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
186 | |
187 | public: |
188 | VectorLegalizer(SelectionDAG& dag) : |
189 | DAG(dag), TLI(dag.getTargetLoweringInfo()) {} |
190 | |
191 | /// Begin legalizer the vector operations in the DAG. |
192 | bool Run(); |
193 | }; |
194 | |
195 | } // end anonymous namespace |
196 | |
197 | bool VectorLegalizer::Run() { |
198 | // Before we start legalizing vector nodes, check if there are any vectors. |
199 | bool HasVectors = false; |
200 | for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), |
201 | E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) { |
202 | // Check if the values of the nodes contain vectors. We don't need to check |
203 | // the operands because we are going to check their values at some point. |
204 | HasVectors = llvm::any_of(Range: I->values(), P: [](EVT T) { return T.isVector(); }); |
205 | |
206 | // If we found a vector node we can start the legalization. |
207 | if (HasVectors) |
208 | break; |
209 | } |
210 | |
211 | // If this basic block has no vectors then no need to legalize vectors. |
212 | if (!HasVectors) |
213 | return false; |
214 | |
215 | // The legalize process is inherently a bottom-up recursive process (users |
216 | // legalize their uses before themselves). Given infinite stack space, we |
217 | // could just start legalizing on the root and traverse the whole graph. In |
218 | // practice however, this causes us to run out of stack space on large basic |
219 | // blocks. To avoid this problem, compute an ordering of the nodes where each |
220 | // node is only legalized after all of its operands are legalized. |
221 | DAG.AssignTopologicalOrder(); |
222 | for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), |
223 | E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) |
224 | LegalizeOp(Op: SDValue(&*I, 0)); |
225 | |
226 | // Finally, it's possible the root changed. Get the new root. |
227 | SDValue OldRoot = DAG.getRoot(); |
228 | assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?" ); |
229 | DAG.setRoot(LegalizedNodes[OldRoot]); |
230 | |
231 | LegalizedNodes.clear(); |
232 | |
233 | // Remove dead nodes now. |
234 | DAG.RemoveDeadNodes(); |
235 | |
236 | return Changed; |
237 | } |
238 | |
239 | SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) { |
240 | assert(Op->getNumValues() == Result->getNumValues() && |
241 | "Unexpected number of results" ); |
242 | // Generic legalization: just pass the operand through. |
243 | for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i) |
244 | AddLegalizedOperand(From: Op.getValue(R: i), To: SDValue(Result, i)); |
245 | return SDValue(Result, Op.getResNo()); |
246 | } |
247 | |
248 | SDValue |
249 | VectorLegalizer::RecursivelyLegalizeResults(SDValue Op, |
250 | MutableArrayRef<SDValue> Results) { |
251 | assert(Results.size() == Op->getNumValues() && |
252 | "Unexpected number of results" ); |
253 | // Make sure that the generated code is itself legal. |
254 | for (unsigned i = 0, e = Results.size(); i != e; ++i) { |
255 | Results[i] = LegalizeOp(Op: Results[i]); |
256 | AddLegalizedOperand(From: Op.getValue(R: i), To: Results[i]); |
257 | } |
258 | |
259 | return Results[Op.getResNo()]; |
260 | } |
261 | |
262 | SDValue VectorLegalizer::LegalizeOp(SDValue Op) { |
263 | // Note that LegalizeOp may be reentered even from single-use nodes, which |
264 | // means that we always must cache transformed nodes. |
265 | DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Val: Op); |
266 | if (I != LegalizedNodes.end()) return I->second; |
267 | |
268 | // Legalize the operands |
269 | SmallVector<SDValue, 8> Ops; |
270 | for (const SDValue &Oper : Op->op_values()) |
271 | Ops.push_back(Elt: LegalizeOp(Op: Oper)); |
272 | |
273 | SDNode *Node = DAG.UpdateNodeOperands(N: Op.getNode(), Ops); |
274 | |
275 | bool HasVectorValueOrOp = |
276 | llvm::any_of(Range: Node->values(), P: [](EVT T) { return T.isVector(); }) || |
277 | llvm::any_of(Range: Node->op_values(), |
278 | P: [](SDValue O) { return O.getValueType().isVector(); }); |
279 | if (!HasVectorValueOrOp) |
280 | return TranslateLegalizeResults(Op, Result: Node); |
281 | |
282 | TargetLowering::LegalizeAction Action = TargetLowering::Legal; |
283 | EVT ValVT; |
284 | switch (Op.getOpcode()) { |
285 | default: |
286 | return TranslateLegalizeResults(Op, Result: Node); |
287 | case ISD::LOAD: { |
288 | LoadSDNode *LD = cast<LoadSDNode>(Val: Node); |
289 | ISD::LoadExtType ExtType = LD->getExtensionType(); |
290 | EVT LoadedVT = LD->getMemoryVT(); |
291 | if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD) |
292 | Action = TLI.getLoadExtAction(ExtType, ValVT: LD->getValueType(ResNo: 0), MemVT: LoadedVT); |
293 | break; |
294 | } |
295 | case ISD::STORE: { |
296 | StoreSDNode *ST = cast<StoreSDNode>(Val: Node); |
297 | EVT StVT = ST->getMemoryVT(); |
298 | MVT ValVT = ST->getValue().getSimpleValueType(); |
299 | if (StVT.isVector() && ST->isTruncatingStore()) |
300 | Action = TLI.getTruncStoreAction(ValVT, MemVT: StVT); |
301 | break; |
302 | } |
303 | case ISD::MERGE_VALUES: |
304 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0)); |
305 | // This operation lies about being legal: when it claims to be legal, |
306 | // it should actually be expanded. |
307 | if (Action == TargetLowering::Legal) |
308 | Action = TargetLowering::Expand; |
309 | break; |
310 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
311 | case ISD::STRICT_##DAGN: |
312 | #include "llvm/IR/ConstrainedOps.def" |
313 | ValVT = Node->getValueType(ResNo: 0); |
314 | if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || |
315 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP) |
316 | ValVT = Node->getOperand(Num: 1).getValueType(); |
317 | if (Op.getOpcode() == ISD::STRICT_FSETCC || |
318 | Op.getOpcode() == ISD::STRICT_FSETCCS) { |
319 | MVT OpVT = Node->getOperand(Num: 1).getSimpleValueType(); |
320 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 3))->get(); |
321 | Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT); |
322 | if (Action == TargetLowering::Legal) |
323 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT); |
324 | } else { |
325 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: ValVT); |
326 | } |
327 | // If we're asked to expand a strict vector floating-point operation, |
328 | // by default we're going to simply unroll it. That is usually the |
329 | // best approach, except in the case where the resulting strict (scalar) |
330 | // operations would themselves use the fallback mutation to non-strict. |
331 | // In that specific case, just do the fallback on the vector op. |
332 | if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() && |
333 | TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: ValVT) == |
334 | TargetLowering::Legal) { |
335 | EVT EltVT = ValVT.getVectorElementType(); |
336 | if (TLI.getOperationAction(Op: Node->getOpcode(), VT: EltVT) |
337 | == TargetLowering::Expand && |
338 | TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: EltVT) |
339 | == TargetLowering::Legal) |
340 | Action = TargetLowering::Legal; |
341 | } |
342 | break; |
343 | case ISD::ADD: |
344 | case ISD::SUB: |
345 | case ISD::MUL: |
346 | case ISD::MULHS: |
347 | case ISD::MULHU: |
348 | case ISD::SDIV: |
349 | case ISD::UDIV: |
350 | case ISD::SREM: |
351 | case ISD::UREM: |
352 | case ISD::SDIVREM: |
353 | case ISD::UDIVREM: |
354 | case ISD::FADD: |
355 | case ISD::FSUB: |
356 | case ISD::FMUL: |
357 | case ISD::FDIV: |
358 | case ISD::FREM: |
359 | case ISD::AND: |
360 | case ISD::OR: |
361 | case ISD::XOR: |
362 | case ISD::SHL: |
363 | case ISD::SRA: |
364 | case ISD::SRL: |
365 | case ISD::FSHL: |
366 | case ISD::FSHR: |
367 | case ISD::ROTL: |
368 | case ISD::ROTR: |
369 | case ISD::ABS: |
370 | case ISD::ABDS: |
371 | case ISD::ABDU: |
372 | case ISD::AVGCEILS: |
373 | case ISD::AVGCEILU: |
374 | case ISD::AVGFLOORS: |
375 | case ISD::AVGFLOORU: |
376 | case ISD::BSWAP: |
377 | case ISD::BITREVERSE: |
378 | case ISD::CTLZ: |
379 | case ISD::CTTZ: |
380 | case ISD::CTLZ_ZERO_UNDEF: |
381 | case ISD::CTTZ_ZERO_UNDEF: |
382 | case ISD::CTPOP: |
383 | case ISD::SELECT: |
384 | case ISD::VSELECT: |
385 | case ISD::SELECT_CC: |
386 | case ISD::ZERO_EXTEND: |
387 | case ISD::ANY_EXTEND: |
388 | case ISD::TRUNCATE: |
389 | case ISD::SIGN_EXTEND: |
390 | case ISD::FP_TO_SINT: |
391 | case ISD::FP_TO_UINT: |
392 | case ISD::FNEG: |
393 | case ISD::FABS: |
394 | case ISD::FMINNUM: |
395 | case ISD::FMAXNUM: |
396 | case ISD::FMINNUM_IEEE: |
397 | case ISD::FMAXNUM_IEEE: |
398 | case ISD::FMINIMUM: |
399 | case ISD::FMAXIMUM: |
400 | case ISD::FCOPYSIGN: |
401 | case ISD::FSQRT: |
402 | case ISD::FSIN: |
403 | case ISD::FCOS: |
404 | case ISD::FTAN: |
405 | case ISD::FASIN: |
406 | case ISD::FACOS: |
407 | case ISD::FATAN: |
408 | case ISD::FSINH: |
409 | case ISD::FCOSH: |
410 | case ISD::FTANH: |
411 | case ISD::FLDEXP: |
412 | case ISD::FPOWI: |
413 | case ISD::FPOW: |
414 | case ISD::FLOG: |
415 | case ISD::FLOG2: |
416 | case ISD::FLOG10: |
417 | case ISD::FEXP: |
418 | case ISD::FEXP2: |
419 | case ISD::FEXP10: |
420 | case ISD::FCEIL: |
421 | case ISD::FTRUNC: |
422 | case ISD::FRINT: |
423 | case ISD::FNEARBYINT: |
424 | case ISD::FROUND: |
425 | case ISD::FROUNDEVEN: |
426 | case ISD::FFLOOR: |
427 | case ISD::FP_ROUND: |
428 | case ISD::FP_EXTEND: |
429 | case ISD::FPTRUNC_ROUND: |
430 | case ISD::FMA: |
431 | case ISD::SIGN_EXTEND_INREG: |
432 | case ISD::ANY_EXTEND_VECTOR_INREG: |
433 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
434 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
435 | case ISD::SMIN: |
436 | case ISD::SMAX: |
437 | case ISD::UMIN: |
438 | case ISD::UMAX: |
439 | case ISD::SMUL_LOHI: |
440 | case ISD::UMUL_LOHI: |
441 | case ISD::SADDO: |
442 | case ISD::UADDO: |
443 | case ISD::SSUBO: |
444 | case ISD::USUBO: |
445 | case ISD::SMULO: |
446 | case ISD::UMULO: |
447 | case ISD::FCANONICALIZE: |
448 | case ISD::FFREXP: |
449 | case ISD::SADDSAT: |
450 | case ISD::UADDSAT: |
451 | case ISD::SSUBSAT: |
452 | case ISD::USUBSAT: |
453 | case ISD::SSHLSAT: |
454 | case ISD::USHLSAT: |
455 | case ISD::FP_TO_SINT_SAT: |
456 | case ISD::FP_TO_UINT_SAT: |
457 | case ISD::MGATHER: |
458 | case ISD::VECTOR_COMPRESS: |
459 | case ISD::SCMP: |
460 | case ISD::UCMP: |
461 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0)); |
462 | break; |
463 | case ISD::SMULFIX: |
464 | case ISD::SMULFIXSAT: |
465 | case ISD::UMULFIX: |
466 | case ISD::UMULFIXSAT: |
467 | case ISD::SDIVFIX: |
468 | case ISD::SDIVFIXSAT: |
469 | case ISD::UDIVFIX: |
470 | case ISD::UDIVFIXSAT: { |
471 | unsigned Scale = Node->getConstantOperandVal(Num: 2); |
472 | Action = TLI.getFixedPointOperationAction(Op: Node->getOpcode(), |
473 | VT: Node->getValueType(ResNo: 0), Scale); |
474 | break; |
475 | } |
476 | case ISD::LRINT: |
477 | case ISD::LLRINT: |
478 | case ISD::SINT_TO_FP: |
479 | case ISD::UINT_TO_FP: |
480 | case ISD::VECREDUCE_ADD: |
481 | case ISD::VECREDUCE_MUL: |
482 | case ISD::VECREDUCE_AND: |
483 | case ISD::VECREDUCE_OR: |
484 | case ISD::VECREDUCE_XOR: |
485 | case ISD::VECREDUCE_SMAX: |
486 | case ISD::VECREDUCE_SMIN: |
487 | case ISD::VECREDUCE_UMAX: |
488 | case ISD::VECREDUCE_UMIN: |
489 | case ISD::VECREDUCE_FADD: |
490 | case ISD::VECREDUCE_FMUL: |
491 | case ISD::VECREDUCE_FMAX: |
492 | case ISD::VECREDUCE_FMIN: |
493 | case ISD::VECREDUCE_FMAXIMUM: |
494 | case ISD::VECREDUCE_FMINIMUM: |
495 | Action = TLI.getOperationAction(Op: Node->getOpcode(), |
496 | VT: Node->getOperand(Num: 0).getValueType()); |
497 | break; |
498 | case ISD::VECREDUCE_SEQ_FADD: |
499 | case ISD::VECREDUCE_SEQ_FMUL: |
500 | Action = TLI.getOperationAction(Op: Node->getOpcode(), |
501 | VT: Node->getOperand(Num: 1).getValueType()); |
502 | break; |
503 | case ISD::SETCC: { |
504 | MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType(); |
505 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 2))->get(); |
506 | Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT); |
507 | if (Action == TargetLowering::Legal) |
508 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT); |
509 | break; |
510 | } |
511 | |
512 | #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \ |
513 | case ISD::VPID: { \ |
514 | EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ |
515 | : Node->getOperand(LEGALPOS).getValueType(); \ |
516 | if (ISD::VPID == ISD::VP_SETCC) { \ |
517 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \ |
518 | Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \ |
519 | if (Action != TargetLowering::Legal) \ |
520 | break; \ |
521 | } \ |
522 | /* Defer non-vector results to LegalizeDAG. */ \ |
523 | if (!Node->getValueType(0).isVector() && \ |
524 | Node->getValueType(0) != MVT::Other) { \ |
525 | Action = TargetLowering::Legal; \ |
526 | break; \ |
527 | } \ |
528 | Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ |
529 | } break; |
530 | #include "llvm/IR/VPIntrinsics.def" |
531 | } |
532 | |
533 | LLVM_DEBUG(dbgs() << "\nLegalizing vector op: " ; Node->dump(&DAG)); |
534 | |
535 | SmallVector<SDValue, 8> ResultVals; |
536 | switch (Action) { |
537 | default: llvm_unreachable("This action is not supported yet!" ); |
538 | case TargetLowering::Promote: |
539 | assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) && |
540 | "This action is not supported yet!" ); |
541 | LLVM_DEBUG(dbgs() << "Promoting\n" ); |
542 | Promote(Node, Results&: ResultVals); |
543 | assert(!ResultVals.empty() && "No results for promotion?" ); |
544 | break; |
545 | case TargetLowering::Legal: |
546 | LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n" ); |
547 | break; |
548 | case TargetLowering::Custom: |
549 | LLVM_DEBUG(dbgs() << "Trying custom legalization\n" ); |
550 | if (LowerOperationWrapper(N: Node, Results&: ResultVals)) |
551 | break; |
552 | LLVM_DEBUG(dbgs() << "Could not custom legalize node\n" ); |
553 | [[fallthrough]]; |
554 | case TargetLowering::Expand: |
555 | LLVM_DEBUG(dbgs() << "Expanding\n" ); |
556 | Expand(Node, Results&: ResultVals); |
557 | break; |
558 | } |
559 | |
560 | if (ResultVals.empty()) |
561 | return TranslateLegalizeResults(Op, Result: Node); |
562 | |
563 | Changed = true; |
564 | return RecursivelyLegalizeResults(Op, Results: ResultVals); |
565 | } |
566 | |
567 | // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we |
568 | // merge them somehow? |
569 | bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, |
570 | SmallVectorImpl<SDValue> &Results) { |
571 | SDValue Res = TLI.LowerOperation(Op: SDValue(Node, 0), DAG); |
572 | |
573 | if (!Res.getNode()) |
574 | return false; |
575 | |
576 | if (Res == SDValue(Node, 0)) |
577 | return true; |
578 | |
579 | // If the original node has one result, take the return value from |
580 | // LowerOperation as is. It might not be result number 0. |
581 | if (Node->getNumValues() == 1) { |
582 | Results.push_back(Elt: Res); |
583 | return true; |
584 | } |
585 | |
586 | // If the original node has multiple results, then the return node should |
587 | // have the same number of results. |
588 | assert((Node->getNumValues() == Res->getNumValues()) && |
589 | "Lowering returned the wrong number of results!" ); |
590 | |
591 | // Places new result values base on N result number. |
592 | for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I) |
593 | Results.push_back(Elt: Res.getValue(R: I)); |
594 | |
595 | return true; |
596 | } |
597 | |
598 | void VectorLegalizer::PromoteSETCC(SDNode *Node, |
599 | SmallVectorImpl<SDValue> &Results) { |
600 | MVT VecVT = Node->getOperand(Num: 0).getSimpleValueType(); |
601 | MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT); |
602 | |
603 | unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND; |
604 | |
605 | SDLoc DL(Node); |
606 | SmallVector<SDValue, 5> Operands(Node->getNumOperands()); |
607 | |
608 | Operands[0] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 0)); |
609 | Operands[1] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 1)); |
610 | Operands[2] = Node->getOperand(Num: 2); |
611 | |
612 | if (Node->getOpcode() == ISD::VP_SETCC) { |
613 | Operands[3] = Node->getOperand(Num: 3); // mask |
614 | Operands[4] = Node->getOperand(Num: 4); // evl |
615 | } |
616 | |
617 | SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: Node->getSimpleValueType(ResNo: 0), |
618 | Ops: Operands, Flags: Node->getFlags()); |
619 | |
620 | Results.push_back(Elt: Res); |
621 | } |
622 | |
623 | void VectorLegalizer::PromoteSTRICT(SDNode *Node, |
624 | SmallVectorImpl<SDValue> &Results) { |
625 | MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType(); |
626 | MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT); |
627 | |
628 | assert(VecVT.isFloatingPoint()); |
629 | |
630 | SDLoc DL(Node); |
631 | SmallVector<SDValue, 5> Operands(Node->getNumOperands()); |
632 | SmallVector<SDValue, 2> Chains; |
633 | |
634 | for (unsigned j = 1; j != Node->getNumOperands(); ++j) |
635 | if (Node->getOperand(Num: j).getValueType().isVector() && |
636 | !(ISD::isVPOpcode(Opcode: Node->getOpcode()) && |
637 | ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand. |
638 | { |
639 | // promote the vector operand. |
640 | SDValue Ext = |
641 | DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {NewVecVT, MVT::Other}, |
642 | Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: j)}); |
643 | Operands[j] = Ext.getValue(R: 0); |
644 | Chains.push_back(Elt: Ext.getValue(R: 1)); |
645 | } else |
646 | Operands[j] = Node->getOperand(Num: j); // Skip no vector operand. |
647 | |
648 | SDVTList VTs = DAG.getVTList(VT1: NewVecVT, VT2: Node->getValueType(ResNo: 1)); |
649 | |
650 | Operands[0] = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains); |
651 | |
652 | SDValue Res = |
653 | DAG.getNode(Opcode: Node->getOpcode(), DL, VTList: VTs, Ops: Operands, Flags: Node->getFlags()); |
654 | |
655 | SDValue Round = |
656 | DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {VecVT, MVT::Other}, |
657 | Ops: {Res.getValue(R: 1), Res.getValue(R: 0), |
658 | DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)}); |
659 | |
660 | Results.push_back(Elt: Round.getValue(R: 0)); |
661 | Results.push_back(Elt: Round.getValue(R: 1)); |
662 | } |
663 | |
664 | void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { |
665 | // For a few operations there is a specific concept for promotion based on |
666 | // the operand's type. |
667 | switch (Node->getOpcode()) { |
668 | case ISD::SINT_TO_FP: |
669 | case ISD::UINT_TO_FP: |
670 | case ISD::STRICT_SINT_TO_FP: |
671 | case ISD::STRICT_UINT_TO_FP: |
672 | // "Promote" the operation by extending the operand. |
673 | PromoteINT_TO_FP(Node, Results); |
674 | return; |
675 | case ISD::FP_TO_UINT: |
676 | case ISD::FP_TO_SINT: |
677 | case ISD::STRICT_FP_TO_UINT: |
678 | case ISD::STRICT_FP_TO_SINT: |
679 | // Promote the operation by extending the operand. |
680 | PromoteFP_TO_INT(Node, Results); |
681 | return; |
682 | case ISD::VP_SETCC: |
683 | case ISD::SETCC: |
684 | // Promote the operation by extending the operand. |
685 | PromoteSETCC(Node, Results); |
686 | return; |
687 | case ISD::STRICT_FADD: |
688 | case ISD::STRICT_FSUB: |
689 | case ISD::STRICT_FMUL: |
690 | case ISD::STRICT_FDIV: |
691 | case ISD::STRICT_FSQRT: |
692 | case ISD::STRICT_FMA: |
693 | PromoteSTRICT(Node, Results); |
694 | return; |
695 | case ISD::FP_ROUND: |
696 | case ISD::FP_EXTEND: |
697 | // These operations are used to do promotion so they can't be promoted |
698 | // themselves. |
699 | llvm_unreachable("Don't know how to promote this operation!" ); |
700 | } |
701 | |
702 | // There are currently two cases of vector promotion: |
703 | // 1) Bitcasting a vector of integers to a different type to a vector of the |
704 | // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. |
705 | // 2) Extending a vector of floats to a vector of the same number of larger |
706 | // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. |
707 | assert(Node->getNumValues() == 1 && |
708 | "Can't promote a vector with multiple results!" ); |
709 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
710 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
711 | SDLoc dl(Node); |
712 | SmallVector<SDValue, 4> Operands(Node->getNumOperands()); |
713 | |
714 | for (unsigned j = 0; j != Node->getNumOperands(); ++j) { |
715 | // Do not promote the mask operand of a VP OP. |
716 | bool SkipPromote = ISD::isVPOpcode(Opcode: Node->getOpcode()) && |
717 | ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j; |
718 | if (Node->getOperand(Num: j).getValueType().isVector() && !SkipPromote) |
719 | if (Node->getOperand(Num: j) |
720 | .getValueType() |
721 | .getVectorElementType() |
722 | .isFloatingPoint() && |
723 | NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) |
724 | Operands[j] = DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
725 | else |
726 | Operands[j] = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
727 | else |
728 | Operands[j] = Node->getOperand(Num: j); |
729 | } |
730 | |
731 | SDValue Res = |
732 | DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: NVT, Ops: Operands, Flags: Node->getFlags()); |
733 | |
734 | if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || |
735 | (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && |
736 | NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) |
737 | Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Res, |
738 | N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true)); |
739 | else |
740 | Res = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Res); |
741 | |
742 | Results.push_back(Elt: Res); |
743 | } |
744 | |
745 | void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node, |
746 | SmallVectorImpl<SDValue> &Results) { |
747 | // INT_TO_FP operations may require the input operand be promoted even |
748 | // when the type is otherwise legal. |
749 | bool IsStrict = Node->isStrictFPOpcode(); |
750 | MVT VT = Node->getOperand(Num: IsStrict ? 1 : 0).getSimpleValueType(); |
751 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
752 | assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && |
753 | "Vectors have different number of elements!" ); |
754 | |
755 | SDLoc dl(Node); |
756 | SmallVector<SDValue, 4> Operands(Node->getNumOperands()); |
757 | |
758 | unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP || |
759 | Node->getOpcode() == ISD::STRICT_UINT_TO_FP) |
760 | ? ISD::ZERO_EXTEND |
761 | : ISD::SIGN_EXTEND; |
762 | for (unsigned j = 0; j != Node->getNumOperands(); ++j) { |
763 | if (Node->getOperand(Num: j).getValueType().isVector()) |
764 | Operands[j] = DAG.getNode(Opcode: Opc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
765 | else |
766 | Operands[j] = Node->getOperand(Num: j); |
767 | } |
768 | |
769 | if (IsStrict) { |
770 | SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, |
771 | ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, Ops: Operands); |
772 | Results.push_back(Elt: Res); |
773 | Results.push_back(Elt: Res.getValue(R: 1)); |
774 | return; |
775 | } |
776 | |
777 | SDValue Res = |
778 | DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Ops: Operands); |
779 | Results.push_back(Elt: Res); |
780 | } |
781 | |
782 | // For FP_TO_INT we promote the result type to a vector type with wider |
783 | // elements and then truncate the result. This is different from the default |
784 | // PromoteVector which uses bitcast to promote thus assumning that the |
785 | // promoted vector type has the same overall size. |
786 | void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, |
787 | SmallVectorImpl<SDValue> &Results) { |
788 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
789 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
790 | bool IsStrict = Node->isStrictFPOpcode(); |
791 | assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && |
792 | "Vectors have different number of elements!" ); |
793 | |
794 | unsigned NewOpc = Node->getOpcode(); |
795 | // Change FP_TO_UINT to FP_TO_SINT if possible. |
796 | // TODO: Should we only do this if FP_TO_UINT itself isn't legal? |
797 | if (NewOpc == ISD::FP_TO_UINT && |
798 | TLI.isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: NVT)) |
799 | NewOpc = ISD::FP_TO_SINT; |
800 | |
801 | if (NewOpc == ISD::STRICT_FP_TO_UINT && |
802 | TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: NVT)) |
803 | NewOpc = ISD::STRICT_FP_TO_SINT; |
804 | |
805 | SDLoc dl(Node); |
806 | SDValue Promoted, Chain; |
807 | if (IsStrict) { |
808 | Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, ResultTys: {NVT, MVT::Other}, |
809 | Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: 1)}); |
810 | Chain = Promoted.getValue(R: 1); |
811 | } else |
812 | Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: 0)); |
813 | |
814 | // Assert that the converted value fits in the original type. If it doesn't |
815 | // (eg: because the value being converted is too big), then the result of the |
816 | // original operation was undefined anyway, so the assert is still correct. |
817 | if (Node->getOpcode() == ISD::FP_TO_UINT || |
818 | Node->getOpcode() == ISD::STRICT_FP_TO_UINT) |
819 | NewOpc = ISD::AssertZext; |
820 | else |
821 | NewOpc = ISD::AssertSext; |
822 | |
823 | Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, N1: Promoted, |
824 | N2: DAG.getValueType(VT.getScalarType())); |
825 | Promoted = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Promoted); |
826 | Results.push_back(Elt: Promoted); |
827 | if (IsStrict) |
828 | Results.push_back(Elt: Chain); |
829 | } |
830 | |
831 | std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { |
832 | LoadSDNode *LD = cast<LoadSDNode>(Val: N); |
833 | return TLI.scalarizeVectorLoad(LD, DAG); |
834 | } |
835 | |
836 | SDValue VectorLegalizer::ExpandStore(SDNode *N) { |
837 | StoreSDNode *ST = cast<StoreSDNode>(Val: N); |
838 | SDValue TF = TLI.scalarizeVectorStore(ST, DAG); |
839 | return TF; |
840 | } |
841 | |
842 | void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { |
843 | switch (Node->getOpcode()) { |
844 | case ISD::LOAD: { |
845 | std::pair<SDValue, SDValue> Tmp = ExpandLoad(N: Node); |
846 | Results.push_back(Elt: Tmp.first); |
847 | Results.push_back(Elt: Tmp.second); |
848 | return; |
849 | } |
850 | case ISD::STORE: |
851 | Results.push_back(Elt: ExpandStore(N: Node)); |
852 | return; |
853 | case ISD::MERGE_VALUES: |
854 | for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) |
855 | Results.push_back(Elt: Node->getOperand(Num: i)); |
856 | return; |
857 | case ISD::SIGN_EXTEND_INREG: |
858 | Results.push_back(Elt: ExpandSEXTINREG(Node)); |
859 | return; |
860 | case ISD::ANY_EXTEND_VECTOR_INREG: |
861 | Results.push_back(Elt: ExpandANY_EXTEND_VECTOR_INREG(Node)); |
862 | return; |
863 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
864 | Results.push_back(Elt: ExpandSIGN_EXTEND_VECTOR_INREG(Node)); |
865 | return; |
866 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
867 | Results.push_back(Elt: ExpandZERO_EXTEND_VECTOR_INREG(Node)); |
868 | return; |
869 | case ISD::BSWAP: |
870 | Results.push_back(Elt: ExpandBSWAP(Node)); |
871 | return; |
872 | case ISD::VP_BSWAP: |
873 | Results.push_back(Elt: TLI.expandVPBSWAP(N: Node, DAG)); |
874 | return; |
875 | case ISD::VSELECT: |
876 | Results.push_back(Elt: ExpandVSELECT(Node)); |
877 | return; |
878 | case ISD::VP_SELECT: |
879 | Results.push_back(Elt: ExpandVP_SELECT(Node)); |
880 | return; |
881 | case ISD::VP_SREM: |
882 | case ISD::VP_UREM: |
883 | if (SDValue Expanded = ExpandVP_REM(Node)) { |
884 | Results.push_back(Elt: Expanded); |
885 | return; |
886 | } |
887 | break; |
888 | case ISD::SELECT: |
889 | Results.push_back(Elt: ExpandSELECT(Node)); |
890 | return; |
891 | case ISD::SELECT_CC: { |
892 | if (Node->getValueType(ResNo: 0).isScalableVector()) { |
893 | EVT CondVT = TLI.getSetCCResultType( |
894 | DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0)); |
895 | SDValue SetCC = |
896 | DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc(Node), VT: CondVT, N1: Node->getOperand(Num: 0), |
897 | N2: Node->getOperand(Num: 1), N3: Node->getOperand(Num: 4)); |
898 | Results.push_back(Elt: DAG.getSelect(DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0), Cond: SetCC, |
899 | LHS: Node->getOperand(Num: 2), |
900 | RHS: Node->getOperand(Num: 3))); |
901 | return; |
902 | } |
903 | break; |
904 | } |
905 | case ISD::FP_TO_UINT: |
906 | ExpandFP_TO_UINT(Node, Results); |
907 | return; |
908 | case ISD::UINT_TO_FP: |
909 | ExpandUINT_TO_FLOAT(Node, Results); |
910 | return; |
911 | case ISD::FNEG: |
912 | Results.push_back(Elt: ExpandFNEG(Node)); |
913 | return; |
914 | case ISD::FSUB: |
915 | ExpandFSUB(Node, Results); |
916 | return; |
917 | case ISD::SETCC: |
918 | case ISD::VP_SETCC: |
919 | ExpandSETCC(Node, Results); |
920 | return; |
921 | case ISD::ABS: |
922 | if (SDValue Expanded = TLI.expandABS(N: Node, DAG)) { |
923 | Results.push_back(Elt: Expanded); |
924 | return; |
925 | } |
926 | break; |
927 | case ISD::ABDS: |
928 | case ISD::ABDU: |
929 | if (SDValue Expanded = TLI.expandABD(N: Node, DAG)) { |
930 | Results.push_back(Elt: Expanded); |
931 | return; |
932 | } |
933 | break; |
934 | case ISD::AVGCEILS: |
935 | case ISD::AVGCEILU: |
936 | case ISD::AVGFLOORS: |
937 | case ISD::AVGFLOORU: |
938 | if (SDValue Expanded = TLI.expandAVG(N: Node, DAG)) { |
939 | Results.push_back(Elt: Expanded); |
940 | return; |
941 | } |
942 | break; |
943 | case ISD::BITREVERSE: |
944 | ExpandBITREVERSE(Node, Results); |
945 | return; |
946 | case ISD::VP_BITREVERSE: |
947 | if (SDValue Expanded = TLI.expandVPBITREVERSE(N: Node, DAG)) { |
948 | Results.push_back(Elt: Expanded); |
949 | return; |
950 | } |
951 | break; |
952 | case ISD::CTPOP: |
953 | if (SDValue Expanded = TLI.expandCTPOP(N: Node, DAG)) { |
954 | Results.push_back(Elt: Expanded); |
955 | return; |
956 | } |
957 | break; |
958 | case ISD::VP_CTPOP: |
959 | if (SDValue Expanded = TLI.expandVPCTPOP(N: Node, DAG)) { |
960 | Results.push_back(Elt: Expanded); |
961 | return; |
962 | } |
963 | break; |
964 | case ISD::CTLZ: |
965 | case ISD::CTLZ_ZERO_UNDEF: |
966 | if (SDValue Expanded = TLI.expandCTLZ(N: Node, DAG)) { |
967 | Results.push_back(Elt: Expanded); |
968 | return; |
969 | } |
970 | break; |
971 | case ISD::VP_CTLZ: |
972 | case ISD::VP_CTLZ_ZERO_UNDEF: |
973 | if (SDValue Expanded = TLI.expandVPCTLZ(N: Node, DAG)) { |
974 | Results.push_back(Elt: Expanded); |
975 | return; |
976 | } |
977 | break; |
978 | case ISD::CTTZ: |
979 | case ISD::CTTZ_ZERO_UNDEF: |
980 | if (SDValue Expanded = TLI.expandCTTZ(N: Node, DAG)) { |
981 | Results.push_back(Elt: Expanded); |
982 | return; |
983 | } |
984 | break; |
985 | case ISD::VP_CTTZ: |
986 | case ISD::VP_CTTZ_ZERO_UNDEF: |
987 | if (SDValue Expanded = TLI.expandVPCTTZ(N: Node, DAG)) { |
988 | Results.push_back(Elt: Expanded); |
989 | return; |
990 | } |
991 | break; |
992 | case ISD::FSHL: |
993 | case ISD::VP_FSHL: |
994 | case ISD::FSHR: |
995 | case ISD::VP_FSHR: |
996 | if (SDValue Expanded = TLI.expandFunnelShift(N: Node, DAG)) { |
997 | Results.push_back(Elt: Expanded); |
998 | return; |
999 | } |
1000 | break; |
1001 | case ISD::ROTL: |
1002 | case ISD::ROTR: |
1003 | if (SDValue Expanded = TLI.expandROT(N: Node, AllowVectorOps: false /*AllowVectorOps*/, DAG)) { |
1004 | Results.push_back(Elt: Expanded); |
1005 | return; |
1006 | } |
1007 | break; |
1008 | case ISD::FMINNUM: |
1009 | case ISD::FMAXNUM: |
1010 | if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(N: Node, DAG)) { |
1011 | Results.push_back(Elt: Expanded); |
1012 | return; |
1013 | } |
1014 | break; |
1015 | case ISD::FMINIMUM: |
1016 | case ISD::FMAXIMUM: |
1017 | Results.push_back(Elt: TLI.expandFMINIMUM_FMAXIMUM(N: Node, DAG)); |
1018 | return; |
1019 | case ISD::SMIN: |
1020 | case ISD::SMAX: |
1021 | case ISD::UMIN: |
1022 | case ISD::UMAX: |
1023 | if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) { |
1024 | Results.push_back(Elt: Expanded); |
1025 | return; |
1026 | } |
1027 | break; |
1028 | case ISD::UADDO: |
1029 | case ISD::USUBO: |
1030 | ExpandUADDSUBO(Node, Results); |
1031 | return; |
1032 | case ISD::SADDO: |
1033 | case ISD::SSUBO: |
1034 | ExpandSADDSUBO(Node, Results); |
1035 | return; |
1036 | case ISD::UMULO: |
1037 | case ISD::SMULO: |
1038 | ExpandMULO(Node, Results); |
1039 | return; |
1040 | case ISD::USUBSAT: |
1041 | case ISD::SSUBSAT: |
1042 | case ISD::UADDSAT: |
1043 | case ISD::SADDSAT: |
1044 | if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) { |
1045 | Results.push_back(Elt: Expanded); |
1046 | return; |
1047 | } |
1048 | break; |
1049 | case ISD::USHLSAT: |
1050 | case ISD::SSHLSAT: |
1051 | if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) { |
1052 | Results.push_back(Elt: Expanded); |
1053 | return; |
1054 | } |
1055 | break; |
1056 | case ISD::FP_TO_SINT_SAT: |
1057 | case ISD::FP_TO_UINT_SAT: |
1058 | // Expand the fpsosisat if it is scalable to prevent it from unrolling below. |
1059 | if (Node->getValueType(ResNo: 0).isScalableVector()) { |
1060 | if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(N: Node, DAG)) { |
1061 | Results.push_back(Elt: Expanded); |
1062 | return; |
1063 | } |
1064 | } |
1065 | break; |
1066 | case ISD::SMULFIX: |
1067 | case ISD::UMULFIX: |
1068 | if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { |
1069 | Results.push_back(Elt: Expanded); |
1070 | return; |
1071 | } |
1072 | break; |
1073 | case ISD::SMULFIXSAT: |
1074 | case ISD::UMULFIXSAT: |
1075 | // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly |
1076 | // why. Maybe it results in worse codegen compared to the unroll for some |
1077 | // targets? This should probably be investigated. And if we still prefer to |
1078 | // unroll an explanation could be helpful. |
1079 | break; |
1080 | case ISD::SDIVFIX: |
1081 | case ISD::UDIVFIX: |
1082 | ExpandFixedPointDiv(Node, Results); |
1083 | return; |
1084 | case ISD::SDIVFIXSAT: |
1085 | case ISD::UDIVFIXSAT: |
1086 | break; |
1087 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1088 | case ISD::STRICT_##DAGN: |
1089 | #include "llvm/IR/ConstrainedOps.def" |
1090 | ExpandStrictFPOp(Node, Results); |
1091 | return; |
1092 | case ISD::VECREDUCE_ADD: |
1093 | case ISD::VECREDUCE_MUL: |
1094 | case ISD::VECREDUCE_AND: |
1095 | case ISD::VECREDUCE_OR: |
1096 | case ISD::VECREDUCE_XOR: |
1097 | case ISD::VECREDUCE_SMAX: |
1098 | case ISD::VECREDUCE_SMIN: |
1099 | case ISD::VECREDUCE_UMAX: |
1100 | case ISD::VECREDUCE_UMIN: |
1101 | case ISD::VECREDUCE_FADD: |
1102 | case ISD::VECREDUCE_FMUL: |
1103 | case ISD::VECREDUCE_FMAX: |
1104 | case ISD::VECREDUCE_FMIN: |
1105 | case ISD::VECREDUCE_FMAXIMUM: |
1106 | case ISD::VECREDUCE_FMINIMUM: |
1107 | Results.push_back(Elt: TLI.expandVecReduce(Node, DAG)); |
1108 | return; |
1109 | case ISD::VECREDUCE_SEQ_FADD: |
1110 | case ISD::VECREDUCE_SEQ_FMUL: |
1111 | Results.push_back(Elt: TLI.expandVecReduceSeq(Node, DAG)); |
1112 | return; |
1113 | case ISD::SREM: |
1114 | case ISD::UREM: |
1115 | ExpandREM(Node, Results); |
1116 | return; |
1117 | case ISD::VP_MERGE: |
1118 | Results.push_back(Elt: ExpandVP_MERGE(Node)); |
1119 | return; |
1120 | case ISD::FREM: |
1121 | if (tryExpandVecMathCall(Node, Call_F32: RTLIB::REM_F32, Call_F64: RTLIB::REM_F64, |
1122 | Call_F80: RTLIB::REM_F80, Call_F128: RTLIB::REM_F128, |
1123 | Call_PPCF128: RTLIB::REM_PPCF128, Results)) |
1124 | return; |
1125 | |
1126 | break; |
1127 | case ISD::VECTOR_COMPRESS: |
1128 | Results.push_back(Elt: TLI.expandVECTOR_COMPRESS(Node, DAG)); |
1129 | return; |
1130 | } |
1131 | |
1132 | SDValue Unrolled = DAG.UnrollVectorOp(N: Node); |
1133 | if (Node->getNumValues() == 1) { |
1134 | Results.push_back(Elt: Unrolled); |
1135 | } else { |
1136 | assert(Node->getNumValues() == Unrolled->getNumValues() && |
1137 | "VectorLegalizer Expand returned wrong number of results!" ); |
1138 | for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) |
1139 | Results.push_back(Elt: Unrolled.getValue(R: I)); |
1140 | } |
1141 | } |
1142 | |
1143 | SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { |
1144 | // Lower a select instruction where the condition is a scalar and the |
1145 | // operands are vectors. Lower this select to VSELECT and implement it |
1146 | // using XOR AND OR. The selector bit is broadcasted. |
1147 | EVT VT = Node->getValueType(ResNo: 0); |
1148 | SDLoc DL(Node); |
1149 | |
1150 | SDValue Mask = Node->getOperand(Num: 0); |
1151 | SDValue Op1 = Node->getOperand(Num: 1); |
1152 | SDValue Op2 = Node->getOperand(Num: 2); |
1153 | |
1154 | assert(VT.isVector() && !Mask.getValueType().isVector() |
1155 | && Op1.getValueType() == Op2.getValueType() && "Invalid type" ); |
1156 | |
1157 | // If we can't even use the basic vector operations of |
1158 | // AND,OR,XOR, we will have to scalarize the op. |
1159 | // Notice that the operation may be 'promoted' which means that it is |
1160 | // 'bitcasted' to another type which is handled. |
1161 | // Also, we need to be able to construct a splat vector using either |
1162 | // BUILD_VECTOR or SPLAT_VECTOR. |
1163 | // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to |
1164 | // BUILD_VECTOR? |
1165 | if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand || |
1166 | TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand || |
1167 | TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand || |
1168 | TLI.getOperationAction(Op: VT.isFixedLengthVector() ? ISD::BUILD_VECTOR |
1169 | : ISD::SPLAT_VECTOR, |
1170 | VT) == TargetLowering::Expand) |
1171 | return DAG.UnrollVectorOp(N: Node); |
1172 | |
1173 | // Generate a mask operand. |
1174 | EVT MaskTy = VT.changeVectorElementTypeToInteger(); |
1175 | |
1176 | // What is the size of each element in the vector mask. |
1177 | EVT BitTy = MaskTy.getScalarType(); |
1178 | |
1179 | Mask = DAG.getSelect(DL, VT: BitTy, Cond: Mask, LHS: DAG.getAllOnesConstant(DL, VT: BitTy), |
1180 | RHS: DAG.getConstant(Val: 0, DL, VT: BitTy)); |
1181 | |
1182 | // Broadcast the mask so that the entire vector is all one or all zero. |
1183 | Mask = DAG.getSplat(VT: MaskTy, DL, Op: Mask); |
1184 | |
1185 | // Bitcast the operands to be the same type as the mask. |
1186 | // This is needed when we select between FP types because |
1187 | // the mask is a vector of integers. |
1188 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op1); |
1189 | Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op2); |
1190 | |
1191 | SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT: MaskTy); |
1192 | |
1193 | Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op1, N2: Mask); |
1194 | Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op2, N2: NotMask); |
1195 | SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MaskTy, N1: Op1, N2: Op2); |
1196 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val); |
1197 | } |
1198 | |
1199 | SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) { |
1200 | EVT VT = Node->getValueType(ResNo: 0); |
1201 | |
1202 | // Make sure that the SRA and SHL instructions are available. |
1203 | if (TLI.getOperationAction(Op: ISD::SRA, VT) == TargetLowering::Expand || |
1204 | TLI.getOperationAction(Op: ISD::SHL, VT) == TargetLowering::Expand) |
1205 | return DAG.UnrollVectorOp(N: Node); |
1206 | |
1207 | SDLoc DL(Node); |
1208 | EVT OrigTy = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT(); |
1209 | |
1210 | unsigned BW = VT.getScalarSizeInBits(); |
1211 | unsigned OrigBW = OrigTy.getScalarSizeInBits(); |
1212 | SDValue ShiftSz = DAG.getConstant(Val: BW - OrigBW, DL, VT); |
1213 | |
1214 | SDValue Op = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Node->getOperand(Num: 0), N2: ShiftSz); |
1215 | return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Op, N2: ShiftSz); |
1216 | } |
1217 | |
1218 | // Generically expand a vector anyext in register to a shuffle of the relevant |
1219 | // lanes into the appropriate locations, with other lanes left undef. |
1220 | SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { |
1221 | SDLoc DL(Node); |
1222 | EVT VT = Node->getValueType(ResNo: 0); |
1223 | int NumElements = VT.getVectorNumElements(); |
1224 | SDValue Src = Node->getOperand(Num: 0); |
1225 | EVT SrcVT = Src.getValueType(); |
1226 | int NumSrcElements = SrcVT.getVectorNumElements(); |
1227 | |
1228 | // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector |
1229 | // into a larger vector type. |
1230 | if (SrcVT.bitsLE(VT)) { |
1231 | assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
1232 | "ANY_EXTEND_VECTOR_INREG vector size mismatch" ); |
1233 | NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); |
1234 | SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(), |
1235 | NumElements: NumSrcElements); |
1236 | Src = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SrcVT, N1: DAG.getUNDEF(VT: SrcVT), |
1237 | N2: Src, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
1238 | } |
1239 | |
1240 | // Build a base mask of undef shuffles. |
1241 | SmallVector<int, 16> ShuffleMask; |
1242 | ShuffleMask.resize(N: NumSrcElements, NV: -1); |
1243 | |
1244 | // Place the extended lanes into the correct locations. |
1245 | int ExtLaneScale = NumSrcElements / NumElements; |
1246 | int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; |
1247 | for (int i = 0; i < NumElements; ++i) |
1248 | ShuffleMask[i * ExtLaneScale + EndianOffset] = i; |
1249 | |
1250 | return DAG.getNode( |
1251 | Opcode: ISD::BITCAST, DL, VT, |
1252 | Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Src, N2: DAG.getUNDEF(VT: SrcVT), Mask: ShuffleMask)); |
1253 | } |
1254 | |
1255 | SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) { |
1256 | SDLoc DL(Node); |
1257 | EVT VT = Node->getValueType(ResNo: 0); |
1258 | SDValue Src = Node->getOperand(Num: 0); |
1259 | EVT SrcVT = Src.getValueType(); |
1260 | |
1261 | // First build an any-extend node which can be legalized above when we |
1262 | // recurse through it. |
1263 | SDValue Op = DAG.getNode(Opcode: ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Operand: Src); |
1264 | |
1265 | // Now we need sign extend. Do this by shifting the elements. Even if these |
1266 | // aren't legal operations, they have a better chance of being legalized |
1267 | // without full scalarization than the sign extension does. |
1268 | unsigned EltWidth = VT.getScalarSizeInBits(); |
1269 | unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); |
1270 | SDValue ShiftAmount = DAG.getConstant(Val: EltWidth - SrcEltWidth, DL, VT); |
1271 | return DAG.getNode(Opcode: ISD::SRA, DL, VT, |
1272 | N1: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: ShiftAmount), |
1273 | N2: ShiftAmount); |
1274 | } |
1275 | |
1276 | // Generically expand a vector zext in register to a shuffle of the relevant |
1277 | // lanes into the appropriate locations, a blend of zero into the high bits, |
1278 | // and a bitcast to the wider element type. |
1279 | SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { |
1280 | SDLoc DL(Node); |
1281 | EVT VT = Node->getValueType(ResNo: 0); |
1282 | int NumElements = VT.getVectorNumElements(); |
1283 | SDValue Src = Node->getOperand(Num: 0); |
1284 | EVT SrcVT = Src.getValueType(); |
1285 | int NumSrcElements = SrcVT.getVectorNumElements(); |
1286 | |
1287 | // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector |
1288 | // into a larger vector type. |
1289 | if (SrcVT.bitsLE(VT)) { |
1290 | assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
1291 | "ZERO_EXTEND_VECTOR_INREG vector size mismatch" ); |
1292 | NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); |
1293 | SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(), |
1294 | NumElements: NumSrcElements); |
1295 | Src = DAG.getNode(Opcode: ISD::INSERT_SUBVECTOR, DL, VT: SrcVT, N1: DAG.getUNDEF(VT: SrcVT), |
1296 | N2: Src, N3: DAG.getVectorIdxConstant(Val: 0, DL)); |
1297 | } |
1298 | |
1299 | // Build up a zero vector to blend into this one. |
1300 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT); |
1301 | |
1302 | // Shuffle the incoming lanes into the correct position, and pull all other |
1303 | // lanes from the zero vector. |
1304 | auto ShuffleMask = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: NumSrcElements)); |
1305 | |
1306 | int ExtLaneScale = NumSrcElements / NumElements; |
1307 | int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; |
1308 | for (int i = 0; i < NumElements; ++i) |
1309 | ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; |
1310 | |
1311 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, |
1312 | Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Zero, N2: Src, Mask: ShuffleMask)); |
1313 | } |
1314 | |
1315 | static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { |
1316 | int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; |
1317 | for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) |
1318 | for (int J = ScalarSizeInBytes - 1; J >= 0; --J) |
1319 | ShuffleMask.push_back(Elt: (I * ScalarSizeInBytes) + J); |
1320 | } |
1321 | |
1322 | SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { |
1323 | EVT VT = Node->getValueType(ResNo: 0); |
1324 | |
1325 | // Scalable vectors can't use shuffle expansion. |
1326 | if (VT.isScalableVector()) |
1327 | return TLI.expandBSWAP(N: Node, DAG); |
1328 | |
1329 | // Generate a byte wise shuffle mask for the BSWAP. |
1330 | SmallVector<int, 16> ShuffleMask; |
1331 | createBSWAPShuffleMask(VT, ShuffleMask); |
1332 | EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: ShuffleMask.size()); |
1333 | |
1334 | // Only emit a shuffle if the mask is legal. |
1335 | if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) { |
1336 | SDLoc DL(Node); |
1337 | SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0)); |
1338 | Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT), Mask: ShuffleMask); |
1339 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
1340 | } |
1341 | |
1342 | // If we have the appropriate vector bit operations, it is better to use them |
1343 | // than unrolling and expanding each component. |
1344 | if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) && |
1345 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) && |
1346 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) && |
1347 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)) |
1348 | return TLI.expandBSWAP(N: Node, DAG); |
1349 | |
1350 | // Otherwise unroll. |
1351 | return DAG.UnrollVectorOp(N: Node); |
1352 | } |
1353 | |
1354 | void VectorLegalizer::ExpandBITREVERSE(SDNode *Node, |
1355 | SmallVectorImpl<SDValue> &Results) { |
1356 | EVT VT = Node->getValueType(ResNo: 0); |
1357 | |
1358 | // We can't unroll or use shuffles for scalable vectors. |
1359 | if (VT.isScalableVector()) { |
1360 | Results.push_back(Elt: TLI.expandBITREVERSE(N: Node, DAG)); |
1361 | return; |
1362 | } |
1363 | |
1364 | // If we have the scalar operation, it's probably cheaper to unroll it. |
1365 | if (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: VT.getScalarType())) { |
1366 | SDValue Tmp = DAG.UnrollVectorOp(N: Node); |
1367 | Results.push_back(Elt: Tmp); |
1368 | return; |
1369 | } |
1370 | |
1371 | // If the vector element width is a whole number of bytes, test if its legal |
1372 | // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte |
1373 | // vector. This greatly reduces the number of bit shifts necessary. |
1374 | unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); |
1375 | if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { |
1376 | SmallVector<int, 16> BSWAPMask; |
1377 | createBSWAPShuffleMask(VT, ShuffleMask&: BSWAPMask); |
1378 | |
1379 | EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: BSWAPMask.size()); |
1380 | if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && |
1381 | (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: ByteVT) || |
1382 | (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT: ByteVT) && |
1383 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT: ByteVT) && |
1384 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: ByteVT) && |
1385 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: ByteVT)))) { |
1386 | SDLoc DL(Node); |
1387 | SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0)); |
1388 | Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT), |
1389 | Mask: BSWAPMask); |
1390 | Op = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ByteVT, Operand: Op); |
1391 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
1392 | Results.push_back(Elt: Op); |
1393 | return; |
1394 | } |
1395 | } |
1396 | |
1397 | // If we have the appropriate vector bit operations, it is better to use them |
1398 | // than unrolling and expanding each component. |
1399 | if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) && |
1400 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) && |
1401 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) && |
1402 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)) { |
1403 | Results.push_back(Elt: TLI.expandBITREVERSE(N: Node, DAG)); |
1404 | return; |
1405 | } |
1406 | |
1407 | // Otherwise unroll. |
1408 | SDValue Tmp = DAG.UnrollVectorOp(N: Node); |
1409 | Results.push_back(Elt: Tmp); |
1410 | } |
1411 | |
1412 | SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { |
1413 | // Implement VSELECT in terms of XOR, AND, OR |
1414 | // on platforms which do not support blend natively. |
1415 | SDLoc DL(Node); |
1416 | |
1417 | SDValue Mask = Node->getOperand(Num: 0); |
1418 | SDValue Op1 = Node->getOperand(Num: 1); |
1419 | SDValue Op2 = Node->getOperand(Num: 2); |
1420 | |
1421 | EVT VT = Mask.getValueType(); |
1422 | |
1423 | // If we can't even use the basic vector operations of |
1424 | // AND,OR,XOR, we will have to scalarize the op. |
1425 | // Notice that the operation may be 'promoted' which means that it is |
1426 | // 'bitcasted' to another type which is handled. |
1427 | if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand || |
1428 | TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand || |
1429 | TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand) |
1430 | return DAG.UnrollVectorOp(N: Node); |
1431 | |
1432 | // This operation also isn't safe with AND, OR, XOR when the boolean type is |
1433 | // 0/1 and the select operands aren't also booleans, as we need an all-ones |
1434 | // vector constant to mask with. |
1435 | // FIXME: Sign extend 1 to all ones if that's legal on the target. |
1436 | auto BoolContents = TLI.getBooleanContents(Type: Op1.getValueType()); |
1437 | if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent && |
1438 | !(BoolContents == TargetLowering::ZeroOrOneBooleanContent && |
1439 | Op1.getValueType().getVectorElementType() == MVT::i1)) |
1440 | return DAG.UnrollVectorOp(N: Node); |
1441 | |
1442 | // If the mask and the type are different sizes, unroll the vector op. This |
1443 | // can occur when getSetCCResultType returns something that is different in |
1444 | // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. |
1445 | if (VT.getSizeInBits() != Op1.getValueSizeInBits()) |
1446 | return DAG.UnrollVectorOp(N: Node); |
1447 | |
1448 | // Bitcast the operands to be the same type as the mask. |
1449 | // This is needed when we select between FP types because |
1450 | // the mask is a vector of integers. |
1451 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op1); |
1452 | Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op2); |
1453 | |
1454 | SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT); |
1455 | |
1456 | Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op1, N2: Mask); |
1457 | Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op2, N2: NotMask); |
1458 | SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Op1, N2: Op2); |
1459 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val); |
1460 | } |
1461 | |
1462 | SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) { |
1463 | // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which |
1464 | // do not support it natively. |
1465 | SDLoc DL(Node); |
1466 | |
1467 | SDValue Mask = Node->getOperand(Num: 0); |
1468 | SDValue Op1 = Node->getOperand(Num: 1); |
1469 | SDValue Op2 = Node->getOperand(Num: 2); |
1470 | SDValue EVL = Node->getOperand(Num: 3); |
1471 | |
1472 | EVT VT = Mask.getValueType(); |
1473 | |
1474 | // If we can't even use the basic vector operations of |
1475 | // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op. |
1476 | if (TLI.getOperationAction(Op: ISD::VP_AND, VT) == TargetLowering::Expand || |
1477 | TLI.getOperationAction(Op: ISD::VP_XOR, VT) == TargetLowering::Expand || |
1478 | TLI.getOperationAction(Op: ISD::VP_OR, VT) == TargetLowering::Expand) |
1479 | return DAG.UnrollVectorOp(N: Node); |
1480 | |
1481 | // This operation also isn't safe when the operands aren't also booleans. |
1482 | if (Op1.getValueType().getVectorElementType() != MVT::i1) |
1483 | return DAG.UnrollVectorOp(N: Node); |
1484 | |
1485 | SDValue Ones = DAG.getAllOnesConstant(DL, VT); |
1486 | SDValue NotMask = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT, N1: Mask, N2: Ones, N3: Ones, N4: EVL); |
1487 | |
1488 | Op1 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op1, N2: Mask, N3: Ones, N4: EVL); |
1489 | Op2 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op2, N2: NotMask, N3: Ones, N4: EVL); |
1490 | return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: Op1, N2: Op2, N3: Ones, N4: EVL); |
1491 | } |
1492 | |
1493 | SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { |
1494 | // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector |
1495 | // indices less than the EVL/pivot are true. Combine that with the original |
1496 | // mask for a full-length mask. Use a full-length VSELECT to select between |
1497 | // the true and false values. |
1498 | SDLoc DL(Node); |
1499 | |
1500 | SDValue Mask = Node->getOperand(Num: 0); |
1501 | SDValue Op1 = Node->getOperand(Num: 1); |
1502 | SDValue Op2 = Node->getOperand(Num: 2); |
1503 | SDValue EVL = Node->getOperand(Num: 3); |
1504 | |
1505 | EVT MaskVT = Mask.getValueType(); |
1506 | bool IsFixedLen = MaskVT.isFixedLengthVector(); |
1507 | |
1508 | EVT EVLVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EVL.getValueType(), |
1509 | EC: MaskVT.getVectorElementCount()); |
1510 | |
1511 | // If we can't construct the EVL mask efficiently, it's better to unroll. |
1512 | if ((IsFixedLen && |
1513 | !TLI.isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: EVLVecVT)) || |
1514 | (!IsFixedLen && |
1515 | (!TLI.isOperationLegalOrCustom(Op: ISD::STEP_VECTOR, VT: EVLVecVT) || |
1516 | !TLI.isOperationLegalOrCustom(Op: ISD::SPLAT_VECTOR, VT: EVLVecVT)))) |
1517 | return DAG.UnrollVectorOp(N: Node); |
1518 | |
1519 | // If using a SETCC would result in a different type than the mask type, |
1520 | // unroll. |
1521 | if (TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), |
1522 | VT: EVLVecVT) != MaskVT) |
1523 | return DAG.UnrollVectorOp(N: Node); |
1524 | |
1525 | SDValue StepVec = DAG.getStepVector(DL, ResVT: EVLVecVT); |
1526 | SDValue SplatEVL = DAG.getSplat(VT: EVLVecVT, DL, Op: EVL); |
1527 | SDValue EVLMask = |
1528 | DAG.getSetCC(DL, VT: MaskVT, LHS: StepVec, RHS: SplatEVL, Cond: ISD::CondCode::SETULT); |
1529 | |
1530 | SDValue FullMask = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskVT, N1: Mask, N2: EVLMask); |
1531 | return DAG.getSelect(DL, VT: Node->getValueType(ResNo: 0), Cond: FullMask, LHS: Op1, RHS: Op2); |
1532 | } |
1533 | |
1534 | SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) { |
1535 | // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB. |
1536 | EVT VT = Node->getValueType(ResNo: 0); |
1537 | |
1538 | unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV; |
1539 | |
1540 | if (!TLI.isOperationLegalOrCustom(Op: DivOpc, VT) || |
1541 | !TLI.isOperationLegalOrCustom(Op: ISD::VP_MUL, VT) || |
1542 | !TLI.isOperationLegalOrCustom(Op: ISD::VP_SUB, VT)) |
1543 | return SDValue(); |
1544 | |
1545 | SDLoc DL(Node); |
1546 | |
1547 | SDValue Dividend = Node->getOperand(Num: 0); |
1548 | SDValue Divisor = Node->getOperand(Num: 1); |
1549 | SDValue Mask = Node->getOperand(Num: 2); |
1550 | SDValue EVL = Node->getOperand(Num: 3); |
1551 | |
1552 | // X % Y -> X-X/Y*Y |
1553 | SDValue Div = DAG.getNode(Opcode: DivOpc, DL, VT, N1: Dividend, N2: Divisor, N3: Mask, N4: EVL); |
1554 | SDValue Mul = DAG.getNode(Opcode: ISD::VP_MUL, DL, VT, N1: Divisor, N2: Div, N3: Mask, N4: EVL); |
1555 | return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Dividend, N2: Mul, N3: Mask, N4: EVL); |
1556 | } |
1557 | |
1558 | void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, |
1559 | SmallVectorImpl<SDValue> &Results) { |
1560 | // Attempt to expand using TargetLowering. |
1561 | SDValue Result, Chain; |
1562 | if (TLI.expandFP_TO_UINT(N: Node, Result, Chain, DAG)) { |
1563 | Results.push_back(Elt: Result); |
1564 | if (Node->isStrictFPOpcode()) |
1565 | Results.push_back(Elt: Chain); |
1566 | return; |
1567 | } |
1568 | |
1569 | // Otherwise go ahead and unroll. |
1570 | if (Node->isStrictFPOpcode()) { |
1571 | UnrollStrictFPOp(Node, Results); |
1572 | return; |
1573 | } |
1574 | |
1575 | Results.push_back(Elt: DAG.UnrollVectorOp(N: Node)); |
1576 | } |
1577 | |
1578 | void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, |
1579 | SmallVectorImpl<SDValue> &Results) { |
1580 | bool IsStrict = Node->isStrictFPOpcode(); |
1581 | unsigned OpNo = IsStrict ? 1 : 0; |
1582 | SDValue Src = Node->getOperand(Num: OpNo); |
1583 | EVT VT = Src.getValueType(); |
1584 | SDLoc DL(Node); |
1585 | |
1586 | // Attempt to expand using TargetLowering. |
1587 | SDValue Result; |
1588 | SDValue Chain; |
1589 | if (TLI.expandUINT_TO_FP(N: Node, Result, Chain, DAG)) { |
1590 | Results.push_back(Elt: Result); |
1591 | if (IsStrict) |
1592 | Results.push_back(Elt: Chain); |
1593 | return; |
1594 | } |
1595 | |
1596 | // Make sure that the SINT_TO_FP and SRL instructions are available. |
1597 | if (((!IsStrict && TLI.getOperationAction(Op: ISD::SINT_TO_FP, VT) == |
1598 | TargetLowering::Expand) || |
1599 | (IsStrict && TLI.getOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT) == |
1600 | TargetLowering::Expand)) || |
1601 | TLI.getOperationAction(Op: ISD::SRL, VT) == TargetLowering::Expand) { |
1602 | if (IsStrict) { |
1603 | UnrollStrictFPOp(Node, Results); |
1604 | return; |
1605 | } |
1606 | |
1607 | Results.push_back(Elt: DAG.UnrollVectorOp(N: Node)); |
1608 | return; |
1609 | } |
1610 | |
1611 | unsigned BW = VT.getScalarSizeInBits(); |
1612 | assert((BW == 64 || BW == 32) && |
1613 | "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide" ); |
1614 | |
1615 | SDValue HalfWord = DAG.getConstant(Val: BW / 2, DL, VT); |
1616 | |
1617 | // Constants to clear the upper part of the word. |
1618 | // Notice that we can also use SHL+SHR, but using a constant is slightly |
1619 | // faster on x86. |
1620 | uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; |
1621 | SDValue HalfWordMask = DAG.getConstant(Val: HWMask, DL, VT); |
1622 | |
1623 | // Two to the power of half-word-size. |
1624 | SDValue TWOHW = |
1625 | DAG.getConstantFP(Val: 1ULL << (BW / 2), DL, VT: Node->getValueType(ResNo: 0)); |
1626 | |
1627 | // Clear upper part of LO, lower HI |
1628 | SDValue HI = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Src, N2: HalfWord); |
1629 | SDValue LO = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Src, N2: HalfWordMask); |
1630 | |
1631 | if (IsStrict) { |
1632 | // Convert hi and lo to floats |
1633 | // Convert the hi part back to the upper values |
1634 | // TODO: Can any fast-math-flags be set on these nodes? |
1635 | SDValue fHI = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, |
1636 | ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, |
1637 | Ops: {Node->getOperand(Num: 0), HI}); |
1638 | fHI = DAG.getNode(Opcode: ISD::STRICT_FMUL, DL, ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, |
1639 | Ops: {fHI.getValue(R: 1), fHI, TWOHW}); |
1640 | SDValue fLO = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, |
1641 | ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, |
1642 | Ops: {Node->getOperand(Num: 0), LO}); |
1643 | |
1644 | SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: fHI.getValue(R: 1), |
1645 | N2: fLO.getValue(R: 1)); |
1646 | |
1647 | // Add the two halves |
1648 | SDValue Result = |
1649 | DAG.getNode(Opcode: ISD::STRICT_FADD, DL, ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, |
1650 | Ops: {TF, fHI, fLO}); |
1651 | |
1652 | Results.push_back(Elt: Result); |
1653 | Results.push_back(Elt: Result.getValue(R: 1)); |
1654 | return; |
1655 | } |
1656 | |
1657 | // Convert hi and lo to floats |
1658 | // Convert the hi part back to the upper values |
1659 | // TODO: Can any fast-math-flags be set on these nodes? |
1660 | SDValue fHI = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Node->getValueType(ResNo: 0), Operand: HI); |
1661 | fHI = DAG.getNode(Opcode: ISD::FMUL, DL, VT: Node->getValueType(ResNo: 0), N1: fHI, N2: TWOHW); |
1662 | SDValue fLO = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: Node->getValueType(ResNo: 0), Operand: LO); |
1663 | |
1664 | // Add the two halves |
1665 | Results.push_back( |
1666 | Elt: DAG.getNode(Opcode: ISD::FADD, DL, VT: Node->getValueType(ResNo: 0), N1: fHI, N2: fLO)); |
1667 | } |
1668 | |
1669 | SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { |
1670 | if (TLI.isOperationLegalOrCustom(Op: ISD::FSUB, VT: Node->getValueType(ResNo: 0))) { |
1671 | SDLoc DL(Node); |
1672 | SDValue Zero = DAG.getConstantFP(Val: -0.0, DL, VT: Node->getValueType(ResNo: 0)); |
1673 | // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. |
1674 | return DAG.getNode(Opcode: ISD::FSUB, DL, VT: Node->getValueType(ResNo: 0), N1: Zero, |
1675 | N2: Node->getOperand(Num: 0)); |
1676 | } |
1677 | return DAG.UnrollVectorOp(N: Node); |
1678 | } |
1679 | |
1680 | void VectorLegalizer::ExpandFSUB(SDNode *Node, |
1681 | SmallVectorImpl<SDValue> &Results) { |
1682 | // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, |
1683 | // we can defer this to operation legalization where it will be lowered as |
1684 | // a+(-b). |
1685 | EVT VT = Node->getValueType(ResNo: 0); |
1686 | if (TLI.isOperationLegalOrCustom(Op: ISD::FNEG, VT) && |
1687 | TLI.isOperationLegalOrCustom(Op: ISD::FADD, VT)) |
1688 | return; // Defer to LegalizeDAG |
1689 | |
1690 | SDValue Tmp = DAG.UnrollVectorOp(N: Node); |
1691 | Results.push_back(Elt: Tmp); |
1692 | } |
1693 | |
1694 | void VectorLegalizer::ExpandSETCC(SDNode *Node, |
1695 | SmallVectorImpl<SDValue> &Results) { |
1696 | bool NeedInvert = false; |
1697 | bool IsVP = Node->getOpcode() == ISD::VP_SETCC; |
1698 | bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC || |
1699 | Node->getOpcode() == ISD::STRICT_FSETCCS; |
1700 | bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; |
1701 | unsigned Offset = IsStrict ? 1 : 0; |
1702 | |
1703 | SDValue Chain = IsStrict ? Node->getOperand(Num: 0) : SDValue(); |
1704 | SDValue LHS = Node->getOperand(Num: 0 + Offset); |
1705 | SDValue RHS = Node->getOperand(Num: 1 + Offset); |
1706 | SDValue CC = Node->getOperand(Num: 2 + Offset); |
1707 | |
1708 | MVT OpVT = LHS.getSimpleValueType(); |
1709 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get(); |
1710 | |
1711 | if (TLI.getCondCodeAction(CC: CCCode, VT: OpVT) != TargetLowering::Expand) { |
1712 | if (IsStrict) { |
1713 | UnrollStrictFPOp(Node, Results); |
1714 | return; |
1715 | } |
1716 | Results.push_back(Elt: UnrollVSETCC(Node)); |
1717 | return; |
1718 | } |
1719 | |
1720 | SDValue Mask, EVL; |
1721 | if (IsVP) { |
1722 | Mask = Node->getOperand(Num: 3 + Offset); |
1723 | EVL = Node->getOperand(Num: 4 + Offset); |
1724 | } |
1725 | |
1726 | SDLoc dl(Node); |
1727 | bool Legalized = |
1728 | TLI.LegalizeSetCCCondCode(DAG, VT: Node->getValueType(ResNo: 0), LHS, RHS, CC, Mask, |
1729 | EVL, NeedInvert, dl, Chain, IsSignaling); |
1730 | |
1731 | if (Legalized) { |
1732 | // If we expanded the SETCC by swapping LHS and RHS, or by inverting the |
1733 | // condition code, create a new SETCC node. |
1734 | if (CC.getNode()) { |
1735 | if (IsStrict) { |
1736 | LHS = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VTList: Node->getVTList(), |
1737 | Ops: {Chain, LHS, RHS, CC}, Flags: Node->getFlags()); |
1738 | Chain = LHS.getValue(R: 1); |
1739 | } else if (IsVP) { |
1740 | LHS = DAG.getNode(Opcode: ISD::VP_SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), |
1741 | Ops: {LHS, RHS, CC, Mask, EVL}, Flags: Node->getFlags()); |
1742 | } else { |
1743 | LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), N1: LHS, N2: RHS, N3: CC, |
1744 | Flags: Node->getFlags()); |
1745 | } |
1746 | } |
1747 | |
1748 | // If we expanded the SETCC by inverting the condition code, then wrap |
1749 | // the existing SETCC in a NOT to restore the intended condition. |
1750 | if (NeedInvert) { |
1751 | if (!IsVP) |
1752 | LHS = DAG.getLogicalNOT(DL: dl, Val: LHS, VT: LHS->getValueType(ResNo: 0)); |
1753 | else |
1754 | LHS = DAG.getVPLogicalNOT(DL: dl, Val: LHS, Mask, EVL, VT: LHS->getValueType(ResNo: 0)); |
1755 | } |
1756 | } else { |
1757 | assert(!IsStrict && "Don't know how to expand for strict nodes." ); |
1758 | |
1759 | // Otherwise, SETCC for the given comparison type must be completely |
1760 | // illegal; expand it into a SELECT_CC. |
1761 | EVT VT = Node->getValueType(ResNo: 0); |
1762 | LHS = |
1763 | DAG.getNode(Opcode: ISD::SELECT_CC, DL: dl, VT, N1: LHS, N2: RHS, |
1764 | N3: DAG.getBoolConstant(V: true, DL: dl, VT, OpVT: LHS.getValueType()), |
1765 | N4: DAG.getBoolConstant(V: false, DL: dl, VT, OpVT: LHS.getValueType()), N5: CC); |
1766 | LHS->setFlags(Node->getFlags()); |
1767 | } |
1768 | |
1769 | Results.push_back(Elt: LHS); |
1770 | if (IsStrict) |
1771 | Results.push_back(Elt: Chain); |
1772 | } |
1773 | |
1774 | void VectorLegalizer::ExpandUADDSUBO(SDNode *Node, |
1775 | SmallVectorImpl<SDValue> &Results) { |
1776 | SDValue Result, Overflow; |
1777 | TLI.expandUADDSUBO(Node, Result, Overflow, DAG); |
1778 | Results.push_back(Elt: Result); |
1779 | Results.push_back(Elt: Overflow); |
1780 | } |
1781 | |
1782 | void VectorLegalizer::ExpandSADDSUBO(SDNode *Node, |
1783 | SmallVectorImpl<SDValue> &Results) { |
1784 | SDValue Result, Overflow; |
1785 | TLI.expandSADDSUBO(Node, Result, Overflow, DAG); |
1786 | Results.push_back(Elt: Result); |
1787 | Results.push_back(Elt: Overflow); |
1788 | } |
1789 | |
1790 | void VectorLegalizer::ExpandMULO(SDNode *Node, |
1791 | SmallVectorImpl<SDValue> &Results) { |
1792 | SDValue Result, Overflow; |
1793 | if (!TLI.expandMULO(Node, Result, Overflow, DAG)) |
1794 | std::tie(args&: Result, args&: Overflow) = DAG.UnrollVectorOverflowOp(N: Node); |
1795 | |
1796 | Results.push_back(Elt: Result); |
1797 | Results.push_back(Elt: Overflow); |
1798 | } |
1799 | |
1800 | void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node, |
1801 | SmallVectorImpl<SDValue> &Results) { |
1802 | SDNode *N = Node; |
1803 | if (SDValue Expanded = TLI.expandFixedPointDiv(Opcode: N->getOpcode(), dl: SDLoc(N), |
1804 | LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Scale: N->getConstantOperandVal(Num: 2), DAG)) |
1805 | Results.push_back(Elt: Expanded); |
1806 | } |
1807 | |
1808 | void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, |
1809 | SmallVectorImpl<SDValue> &Results) { |
1810 | if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) { |
1811 | ExpandUINT_TO_FLOAT(Node, Results); |
1812 | return; |
1813 | } |
1814 | if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) { |
1815 | ExpandFP_TO_UINT(Node, Results); |
1816 | return; |
1817 | } |
1818 | |
1819 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
1820 | Node->getOpcode() == ISD::STRICT_FSETCCS) { |
1821 | ExpandSETCC(Node, Results); |
1822 | return; |
1823 | } |
1824 | |
1825 | UnrollStrictFPOp(Node, Results); |
1826 | } |
1827 | |
1828 | void VectorLegalizer::ExpandREM(SDNode *Node, |
1829 | SmallVectorImpl<SDValue> &Results) { |
1830 | assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) && |
1831 | "Expected REM node" ); |
1832 | |
1833 | SDValue Result; |
1834 | if (!TLI.expandREM(Node, Result, DAG)) |
1835 | Result = DAG.UnrollVectorOp(N: Node); |
1836 | Results.push_back(Elt: Result); |
1837 | } |
1838 | |
1839 | // Try to expand libm nodes into vector math routine calls. Callers provide the |
1840 | // LibFunc equivalent of the passed in Node, which is used to lookup mappings |
1841 | // within TargetLibraryInfo. The only mappings considered are those where the |
1842 | // result and all operands are the same vector type. While predicated nodes are |
1843 | // not supported, we will emit calls to masked routines by passing in an all |
1844 | // true mask. |
1845 | bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, |
1846 | SmallVectorImpl<SDValue> &Results) { |
1847 | // Chain must be propagated but currently strict fp operations are down |
1848 | // converted to their none strict counterpart. |
1849 | assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!" ); |
1850 | |
1851 | const char *LCName = TLI.getLibcallName(Call: LC); |
1852 | if (!LCName) |
1853 | return false; |
1854 | LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n" ); |
1855 | |
1856 | EVT VT = Node->getValueType(ResNo: 0); |
1857 | ElementCount VL = VT.getVectorElementCount(); |
1858 | |
1859 | // Lookup a vector function equivalent to the specified libcall. Prefer |
1860 | // unmasked variants but we will generate a mask if need be. |
1861 | const TargetLibraryInfo &TLibInfo = DAG.getLibInfo(); |
1862 | const VecDesc *VD = TLibInfo.getVectorMappingInfo(F: LCName, VF: VL, Masked: false); |
1863 | if (!VD) |
1864 | VD = TLibInfo.getVectorMappingInfo(F: LCName, VF: VL, /*Masked=*/true); |
1865 | if (!VD) |
1866 | return false; |
1867 | |
1868 | LLVMContext *Ctx = DAG.getContext(); |
1869 | Type *Ty = VT.getTypeForEVT(Context&: *Ctx); |
1870 | Type *ScalarTy = Ty->getScalarType(); |
1871 | |
1872 | // Construct a scalar function type based on Node's operands. |
1873 | SmallVector<Type *, 8> ArgTys; |
1874 | for (unsigned i = 0; i < Node->getNumOperands(); ++i) { |
1875 | assert(Node->getOperand(i).getValueType() == VT && |
1876 | "Expected matching vector types!" ); |
1877 | ArgTys.push_back(Elt: ScalarTy); |
1878 | } |
1879 | FunctionType *ScalarFTy = FunctionType::get(Result: ScalarTy, Params: ArgTys, isVarArg: false); |
1880 | |
1881 | // Generate call information for the vector function. |
1882 | const std::string MangledName = VD->getVectorFunctionABIVariantString(); |
1883 | auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, FTy: ScalarFTy); |
1884 | if (!OptVFInfo) |
1885 | return false; |
1886 | |
1887 | LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName() |
1888 | << "\n" ); |
1889 | |
1890 | // Sanity check just in case OptVFInfo has unexpected parameters. |
1891 | if (OptVFInfo->Shape.Parameters.size() != |
1892 | Node->getNumOperands() + VD->isMasked()) |
1893 | return false; |
1894 | |
1895 | // Collect vector call operands. |
1896 | |
1897 | SDLoc DL(Node); |
1898 | TargetLowering::ArgListTy Args; |
1899 | TargetLowering::ArgListEntry Entry; |
1900 | Entry.IsSExt = false; |
1901 | Entry.IsZExt = false; |
1902 | |
1903 | unsigned OpNum = 0; |
1904 | for (auto &VFParam : OptVFInfo->Shape.Parameters) { |
1905 | if (VFParam.ParamKind == VFParamKind::GlobalPredicate) { |
1906 | EVT MaskVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *Ctx, VT); |
1907 | Entry.Node = DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT); |
1908 | Entry.Ty = MaskVT.getTypeForEVT(Context&: *Ctx); |
1909 | Args.push_back(x: Entry); |
1910 | continue; |
1911 | } |
1912 | |
1913 | // Only vector operands are supported. |
1914 | if (VFParam.ParamKind != VFParamKind::Vector) |
1915 | return false; |
1916 | |
1917 | Entry.Node = Node->getOperand(Num: OpNum++); |
1918 | Entry.Ty = Ty; |
1919 | Args.push_back(x: Entry); |
1920 | } |
1921 | |
1922 | // Emit a call to the vector function. |
1923 | SDValue Callee = DAG.getExternalSymbol(Sym: VD->getVectorFnName().data(), |
1924 | VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
1925 | TargetLowering::CallLoweringInfo CLI(DAG); |
1926 | CLI.setDebugLoc(DL) |
1927 | .setChain(DAG.getEntryNode()) |
1928 | .setLibCallee(CC: CallingConv::C, ResultType: Ty, Target: Callee, ArgsList: std::move(Args)); |
1929 | |
1930 | std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); |
1931 | Results.push_back(Elt: CallResult.first); |
1932 | return true; |
1933 | } |
1934 | |
1935 | /// Try to expand the node to a vector libcall based on the result type. |
1936 | bool VectorLegalizer::tryExpandVecMathCall( |
1937 | SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, |
1938 | RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, |
1939 | RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) { |
1940 | RTLIB::Libcall LC = RTLIB::getFPLibCall( |
1941 | VT: Node->getValueType(ResNo: 0).getVectorElementType(), Call_F32, Call_F64, |
1942 | Call_F80, Call_F128, Call_PPCF128); |
1943 | |
1944 | if (LC == RTLIB::UNKNOWN_LIBCALL) |
1945 | return false; |
1946 | |
1947 | return tryExpandVecMathCall(Node, LC, Results); |
1948 | } |
1949 | |
1950 | void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, |
1951 | SmallVectorImpl<SDValue> &Results) { |
1952 | EVT VT = Node->getValueType(ResNo: 0); |
1953 | EVT EltVT = VT.getVectorElementType(); |
1954 | unsigned NumElems = VT.getVectorNumElements(); |
1955 | unsigned NumOpers = Node->getNumOperands(); |
1956 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1957 | |
1958 | EVT TmpEltVT = EltVT; |
1959 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
1960 | Node->getOpcode() == ISD::STRICT_FSETCCS) |
1961 | TmpEltVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), |
1962 | Context&: *DAG.getContext(), VT: TmpEltVT); |
1963 | |
1964 | EVT ValueVTs[] = {TmpEltVT, MVT::Other}; |
1965 | SDValue Chain = Node->getOperand(Num: 0); |
1966 | SDLoc dl(Node); |
1967 | |
1968 | SmallVector<SDValue, 32> OpValues; |
1969 | SmallVector<SDValue, 32> OpChains; |
1970 | for (unsigned i = 0; i < NumElems; ++i) { |
1971 | SmallVector<SDValue, 4> Opers; |
1972 | SDValue Idx = DAG.getVectorIdxConstant(Val: i, DL: dl); |
1973 | |
1974 | // The Chain is the first operand. |
1975 | Opers.push_back(Elt: Chain); |
1976 | |
1977 | // Now process the remaining operands. |
1978 | for (unsigned j = 1; j < NumOpers; ++j) { |
1979 | SDValue Oper = Node->getOperand(Num: j); |
1980 | EVT OperVT = Oper.getValueType(); |
1981 | |
1982 | if (OperVT.isVector()) |
1983 | Oper = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, |
1984 | VT: OperVT.getVectorElementType(), N1: Oper, N2: Idx); |
1985 | |
1986 | Opers.push_back(Elt: Oper); |
1987 | } |
1988 | |
1989 | SDValue ScalarOp = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, ResultTys: ValueVTs, Ops: Opers); |
1990 | SDValue ScalarResult = ScalarOp.getValue(R: 0); |
1991 | SDValue ScalarChain = ScalarOp.getValue(R: 1); |
1992 | |
1993 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
1994 | Node->getOpcode() == ISD::STRICT_FSETCCS) |
1995 | ScalarResult = DAG.getSelect(DL: dl, VT: EltVT, Cond: ScalarResult, |
1996 | LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT), |
1997 | RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT)); |
1998 | |
1999 | OpValues.push_back(Elt: ScalarResult); |
2000 | OpChains.push_back(Elt: ScalarChain); |
2001 | } |
2002 | |
2003 | SDValue Result = DAG.getBuildVector(VT, DL: dl, Ops: OpValues); |
2004 | SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OpChains); |
2005 | |
2006 | Results.push_back(Elt: Result); |
2007 | Results.push_back(Elt: NewChain); |
2008 | } |
2009 | |
2010 | SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { |
2011 | EVT VT = Node->getValueType(ResNo: 0); |
2012 | unsigned NumElems = VT.getVectorNumElements(); |
2013 | EVT EltVT = VT.getVectorElementType(); |
2014 | SDValue LHS = Node->getOperand(Num: 0); |
2015 | SDValue RHS = Node->getOperand(Num: 1); |
2016 | SDValue CC = Node->getOperand(Num: 2); |
2017 | EVT TmpEltVT = LHS.getValueType().getVectorElementType(); |
2018 | SDLoc dl(Node); |
2019 | SmallVector<SDValue, 8> Ops(NumElems); |
2020 | for (unsigned i = 0; i < NumElems; ++i) { |
2021 | SDValue LHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: LHS, |
2022 | N2: DAG.getVectorIdxConstant(Val: i, DL: dl)); |
2023 | SDValue RHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: RHS, |
2024 | N2: DAG.getVectorIdxConstant(Val: i, DL: dl)); |
2025 | Ops[i] = DAG.getNode(Opcode: ISD::SETCC, DL: dl, |
2026 | VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), |
2027 | Context&: *DAG.getContext(), VT: TmpEltVT), |
2028 | N1: LHSElem, N2: RHSElem, N3: CC); |
2029 | Ops[i] = DAG.getSelect(DL: dl, VT: EltVT, Cond: Ops[i], LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT), |
2030 | RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT)); |
2031 | } |
2032 | return DAG.getBuildVector(VT, DL: dl, Ops); |
2033 | } |
2034 | |
2035 | bool SelectionDAG::LegalizeVectors() { |
2036 | return VectorLegalizer(*this).Run(); |
2037 | } |
2038 | |