1 | //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the SelectionDAG::LegalizeVectors method. |
10 | // |
11 | // The vector legalizer looks for vector operations which might need to be |
12 | // scalarized and legalizes them. This is a separate step from Legalize because |
13 | // scalarizing can introduce illegal types. For example, suppose we have an |
14 | // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition |
15 | // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the |
16 | // operation, which introduces nodes with the illegal type i64 which must be |
17 | // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; |
18 | // the operation must be unrolled, which introduces nodes with the illegal |
19 | // type i8 which must be promoted. |
20 | // |
21 | // This does not legalize vector manipulations like ISD::BUILD_VECTOR, |
22 | // or operations that happen to take a vector which are custom-lowered; |
23 | // the legalization for such operations never produces nodes |
24 | // with illegal types, so it's okay to put off legalizing them until |
25 | // SelectionDAG::Legalize runs. |
26 | // |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | #include "llvm/ADT/DenseMap.h" |
30 | #include "llvm/ADT/SmallVector.h" |
31 | #include "llvm/Analysis/TargetLibraryInfo.h" |
32 | #include "llvm/Analysis/VectorUtils.h" |
33 | #include "llvm/CodeGen/ISDOpcodes.h" |
34 | #include "llvm/CodeGen/SelectionDAG.h" |
35 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
36 | #include "llvm/CodeGen/TargetLowering.h" |
37 | #include "llvm/CodeGen/ValueTypes.h" |
38 | #include "llvm/CodeGenTypes/MachineValueType.h" |
39 | #include "llvm/IR/DataLayout.h" |
40 | #include "llvm/Support/Casting.h" |
41 | #include "llvm/Support/Compiler.h" |
42 | #include "llvm/Support/Debug.h" |
43 | #include "llvm/Support/ErrorHandling.h" |
44 | #include <cassert> |
45 | #include <cstdint> |
46 | #include <iterator> |
47 | #include <utility> |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE "legalizevectorops" |
52 | |
53 | namespace { |
54 | |
55 | class VectorLegalizer { |
56 | SelectionDAG& DAG; |
57 | const TargetLowering &TLI; |
58 | bool Changed = false; // Keep track of whether anything changed |
59 | |
60 | /// For nodes that are of legal width, and that have more than one use, this |
61 | /// map indicates what regularized operand to use. This allows us to avoid |
62 | /// legalizing the same thing more than once. |
63 | SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; |
64 | |
65 | /// Adds a node to the translation cache. |
66 | void AddLegalizedOperand(SDValue From, SDValue To) { |
67 | LegalizedNodes.insert(KV: std::make_pair(x&: From, y&: To)); |
68 | // If someone requests legalization of the new node, return itself. |
69 | if (From != To) |
70 | LegalizedNodes.insert(KV: std::make_pair(x&: To, y&: To)); |
71 | } |
72 | |
73 | /// Legalizes the given node. |
74 | SDValue LegalizeOp(SDValue Op); |
75 | |
76 | /// Assuming the node is legal, "legalize" the results. |
77 | SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result); |
78 | |
79 | /// Make sure Results are legal and update the translation cache. |
80 | SDValue RecursivelyLegalizeResults(SDValue Op, |
81 | MutableArrayRef<SDValue> Results); |
82 | |
83 | /// Wrapper to interface LowerOperation with a vector of Results. |
84 | /// Returns false if the target wants to use default expansion. Otherwise |
85 | /// returns true. If return is true and the Results are empty, then the |
86 | /// target wants to keep the input node as is. |
87 | bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results); |
88 | |
89 | /// Implements unrolling a VSETCC. |
90 | SDValue UnrollVSETCC(SDNode *Node); |
91 | |
92 | /// Implement expand-based legalization of vector operations. |
93 | /// |
94 | /// This is just a high-level routine to dispatch to specific code paths for |
95 | /// operations to legalize them. |
96 | void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
97 | |
98 | /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if |
99 | /// FP_TO_SINT isn't legal. |
100 | void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
101 | |
102 | /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if |
103 | /// SINT_TO_FLOAT and SHR on vectors isn't legal. |
104 | void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
105 | |
106 | /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. |
107 | SDValue ExpandSEXTINREG(SDNode *Node); |
108 | |
109 | /// Implement expansion for ANY_EXTEND_VECTOR_INREG. |
110 | /// |
111 | /// Shuffles the low lanes of the operand into place and bitcasts to the proper |
112 | /// type. The contents of the bits in the extended part of each element are |
113 | /// undef. |
114 | SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node); |
115 | |
116 | /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. |
117 | /// |
118 | /// Shuffles the low lanes of the operand into place, bitcasts to the proper |
119 | /// type, then shifts left and arithmetic shifts right to introduce a sign |
120 | /// extension. |
121 | SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node); |
122 | |
123 | /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. |
124 | /// |
125 | /// Shuffles the low lanes of the operand into place and blends zeros into |
126 | /// the remaining lanes, finally bitcasting to the proper type. |
127 | SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node); |
128 | |
129 | /// Expand bswap of vectors into a shuffle if legal. |
130 | SDValue ExpandBSWAP(SDNode *Node); |
131 | |
132 | /// Implement vselect in terms of XOR, AND, OR when blend is not |
133 | /// supported by the target. |
134 | SDValue ExpandVSELECT(SDNode *Node); |
135 | SDValue ExpandVP_SELECT(SDNode *Node); |
136 | SDValue ExpandVP_MERGE(SDNode *Node); |
137 | SDValue ExpandVP_REM(SDNode *Node); |
138 | SDValue ExpandVP_FNEG(SDNode *Node); |
139 | SDValue ExpandVP_FABS(SDNode *Node); |
140 | SDValue ExpandVP_FCOPYSIGN(SDNode *Node); |
141 | SDValue ExpandSELECT(SDNode *Node); |
142 | std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); |
143 | SDValue ExpandStore(SDNode *N); |
144 | SDValue ExpandFNEG(SDNode *Node); |
145 | SDValue ExpandFABS(SDNode *Node); |
146 | SDValue ExpandFCOPYSIGN(SDNode *Node); |
147 | void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
148 | void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
149 | SDValue ExpandBITREVERSE(SDNode *Node); |
150 | void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
151 | void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
152 | void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
153 | void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
154 | void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
155 | void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
156 | |
157 | bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, |
158 | SmallVectorImpl<SDValue> &Results); |
159 | bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32, |
160 | RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, |
161 | RTLIB::Libcall Call_F128, |
162 | RTLIB::Libcall Call_PPCF128, |
163 | SmallVectorImpl<SDValue> &Results); |
164 | |
165 | void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
166 | |
167 | /// Implements vector promotion. |
168 | /// |
169 | /// This is essentially just bitcasting the operands to a different type and |
170 | /// bitcasting the result back to the original type. |
171 | void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
172 | |
173 | /// Implements [SU]INT_TO_FP vector promotion. |
174 | /// |
175 | /// This is a [zs]ext of the input operand to a larger integer type. |
176 | void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
177 | |
178 | /// Implements FP_TO_[SU]INT vector promotion of the result type. |
179 | /// |
180 | /// It is promoted to a larger integer type. The result is then |
181 | /// truncated back to the original type. |
182 | void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
183 | |
184 | /// Implements vector setcc operation promotion. |
185 | /// |
186 | /// All vector operands are promoted to a vector type with larger element |
187 | /// type. |
188 | void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
189 | |
190 | void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results); |
191 | |
192 | /// Calculate the reduction using a type of higher precision and round the |
193 | /// result to match the original type. Setting NonArithmetic signifies the |
194 | /// rounding of the result does not affect its value. |
195 | void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results, |
196 | bool NonArithmetic); |
197 | |
198 | public: |
199 | VectorLegalizer(SelectionDAG& dag) : |
200 | DAG(dag), TLI(dag.getTargetLoweringInfo()) {} |
201 | |
202 | /// Begin legalizer the vector operations in the DAG. |
203 | bool Run(); |
204 | }; |
205 | |
206 | } // end anonymous namespace |
207 | |
208 | bool VectorLegalizer::Run() { |
209 | // Before we start legalizing vector nodes, check if there are any vectors. |
210 | bool HasVectors = false; |
211 | for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), |
212 | E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) { |
213 | // Check if the values of the nodes contain vectors. We don't need to check |
214 | // the operands because we are going to check their values at some point. |
215 | HasVectors = llvm::any_of(Range: I->values(), P: [](EVT T) { return T.isVector(); }); |
216 | |
217 | // If we found a vector node we can start the legalization. |
218 | if (HasVectors) |
219 | break; |
220 | } |
221 | |
222 | // If this basic block has no vectors then no need to legalize vectors. |
223 | if (!HasVectors) |
224 | return false; |
225 | |
226 | // The legalize process is inherently a bottom-up recursive process (users |
227 | // legalize their uses before themselves). Given infinite stack space, we |
228 | // could just start legalizing on the root and traverse the whole graph. In |
229 | // practice however, this causes us to run out of stack space on large basic |
230 | // blocks. To avoid this problem, compute an ordering of the nodes where each |
231 | // node is only legalized after all of its operands are legalized. |
232 | DAG.AssignTopologicalOrder(); |
233 | for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), |
234 | E = std::prev(x: DAG.allnodes_end()); I != std::next(x: E); ++I) |
235 | LegalizeOp(Op: SDValue(&*I, 0)); |
236 | |
237 | // Finally, it's possible the root changed. Get the new root. |
238 | SDValue OldRoot = DAG.getRoot(); |
239 | assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?" ); |
240 | DAG.setRoot(LegalizedNodes[OldRoot]); |
241 | |
242 | LegalizedNodes.clear(); |
243 | |
244 | // Remove dead nodes now. |
245 | DAG.RemoveDeadNodes(); |
246 | |
247 | return Changed; |
248 | } |
249 | |
250 | SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) { |
251 | assert(Op->getNumValues() == Result->getNumValues() && |
252 | "Unexpected number of results" ); |
253 | // Generic legalization: just pass the operand through. |
254 | for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i) |
255 | AddLegalizedOperand(From: Op.getValue(R: i), To: SDValue(Result, i)); |
256 | return SDValue(Result, Op.getResNo()); |
257 | } |
258 | |
259 | SDValue |
260 | VectorLegalizer::RecursivelyLegalizeResults(SDValue Op, |
261 | MutableArrayRef<SDValue> Results) { |
262 | assert(Results.size() == Op->getNumValues() && |
263 | "Unexpected number of results" ); |
264 | // Make sure that the generated code is itself legal. |
265 | for (unsigned i = 0, e = Results.size(); i != e; ++i) { |
266 | Results[i] = LegalizeOp(Op: Results[i]); |
267 | AddLegalizedOperand(From: Op.getValue(R: i), To: Results[i]); |
268 | } |
269 | |
270 | return Results[Op.getResNo()]; |
271 | } |
272 | |
273 | SDValue VectorLegalizer::LegalizeOp(SDValue Op) { |
274 | // Note that LegalizeOp may be reentered even from single-use nodes, which |
275 | // means that we always must cache transformed nodes. |
276 | DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Val: Op); |
277 | if (I != LegalizedNodes.end()) return I->second; |
278 | |
279 | // Legalize the operands |
280 | SmallVector<SDValue, 8> Ops; |
281 | for (const SDValue &Oper : Op->op_values()) |
282 | Ops.push_back(Elt: LegalizeOp(Op: Oper)); |
283 | |
284 | SDNode *Node = DAG.UpdateNodeOperands(N: Op.getNode(), Ops); |
285 | |
286 | bool HasVectorValueOrOp = |
287 | llvm::any_of(Range: Node->values(), P: [](EVT T) { return T.isVector(); }) || |
288 | llvm::any_of(Range: Node->op_values(), |
289 | P: [](SDValue O) { return O.getValueType().isVector(); }); |
290 | if (!HasVectorValueOrOp) |
291 | return TranslateLegalizeResults(Op, Result: Node); |
292 | |
293 | TargetLowering::LegalizeAction Action = TargetLowering::Legal; |
294 | EVT ValVT; |
295 | switch (Op.getOpcode()) { |
296 | default: |
297 | return TranslateLegalizeResults(Op, Result: Node); |
298 | case ISD::LOAD: { |
299 | LoadSDNode *LD = cast<LoadSDNode>(Val: Node); |
300 | ISD::LoadExtType ExtType = LD->getExtensionType(); |
301 | EVT LoadedVT = LD->getMemoryVT(); |
302 | if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD) |
303 | Action = TLI.getLoadExtAction(ExtType, ValVT: LD->getValueType(ResNo: 0), MemVT: LoadedVT); |
304 | break; |
305 | } |
306 | case ISD::STORE: { |
307 | StoreSDNode *ST = cast<StoreSDNode>(Val: Node); |
308 | EVT StVT = ST->getMemoryVT(); |
309 | MVT ValVT = ST->getValue().getSimpleValueType(); |
310 | if (StVT.isVector() && ST->isTruncatingStore()) |
311 | Action = TLI.getTruncStoreAction(ValVT, MemVT: StVT); |
312 | break; |
313 | } |
314 | case ISD::MERGE_VALUES: |
315 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0)); |
316 | // This operation lies about being legal: when it claims to be legal, |
317 | // it should actually be expanded. |
318 | if (Action == TargetLowering::Legal) |
319 | Action = TargetLowering::Expand; |
320 | break; |
321 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
322 | case ISD::STRICT_##DAGN: |
323 | #include "llvm/IR/ConstrainedOps.def" |
324 | ValVT = Node->getValueType(ResNo: 0); |
325 | if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || |
326 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP) |
327 | ValVT = Node->getOperand(Num: 1).getValueType(); |
328 | if (Op.getOpcode() == ISD::STRICT_FSETCC || |
329 | Op.getOpcode() == ISD::STRICT_FSETCCS) { |
330 | MVT OpVT = Node->getOperand(Num: 1).getSimpleValueType(); |
331 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 3))->get(); |
332 | Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT); |
333 | if (Action == TargetLowering::Legal) |
334 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT); |
335 | } else { |
336 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: ValVT); |
337 | } |
338 | // If we're asked to expand a strict vector floating-point operation, |
339 | // by default we're going to simply unroll it. That is usually the |
340 | // best approach, except in the case where the resulting strict (scalar) |
341 | // operations would themselves use the fallback mutation to non-strict. |
342 | // In that specific case, just do the fallback on the vector op. |
343 | if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() && |
344 | TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: ValVT) == |
345 | TargetLowering::Legal) { |
346 | EVT EltVT = ValVT.getVectorElementType(); |
347 | if (TLI.getOperationAction(Op: Node->getOpcode(), VT: EltVT) |
348 | == TargetLowering::Expand && |
349 | TLI.getStrictFPOperationAction(Op: Node->getOpcode(), VT: EltVT) |
350 | == TargetLowering::Legal) |
351 | Action = TargetLowering::Legal; |
352 | } |
353 | break; |
354 | case ISD::ADD: |
355 | case ISD::SUB: |
356 | case ISD::MUL: |
357 | case ISD::MULHS: |
358 | case ISD::MULHU: |
359 | case ISD::SDIV: |
360 | case ISD::UDIV: |
361 | case ISD::SREM: |
362 | case ISD::UREM: |
363 | case ISD::SDIVREM: |
364 | case ISD::UDIVREM: |
365 | case ISD::FADD: |
366 | case ISD::FSUB: |
367 | case ISD::FMUL: |
368 | case ISD::FDIV: |
369 | case ISD::FREM: |
370 | case ISD::AND: |
371 | case ISD::OR: |
372 | case ISD::XOR: |
373 | case ISD::SHL: |
374 | case ISD::SRA: |
375 | case ISD::SRL: |
376 | case ISD::FSHL: |
377 | case ISD::FSHR: |
378 | case ISD::ROTL: |
379 | case ISD::ROTR: |
380 | case ISD::ABS: |
381 | case ISD::ABDS: |
382 | case ISD::ABDU: |
383 | case ISD::AVGCEILS: |
384 | case ISD::AVGCEILU: |
385 | case ISD::AVGFLOORS: |
386 | case ISD::AVGFLOORU: |
387 | case ISD::BSWAP: |
388 | case ISD::BITREVERSE: |
389 | case ISD::CTLZ: |
390 | case ISD::CTTZ: |
391 | case ISD::CTLZ_ZERO_UNDEF: |
392 | case ISD::CTTZ_ZERO_UNDEF: |
393 | case ISD::CTPOP: |
394 | case ISD::SELECT: |
395 | case ISD::VSELECT: |
396 | case ISD::SELECT_CC: |
397 | case ISD::ZERO_EXTEND: |
398 | case ISD::ANY_EXTEND: |
399 | case ISD::TRUNCATE: |
400 | case ISD::SIGN_EXTEND: |
401 | case ISD::FP_TO_SINT: |
402 | case ISD::FP_TO_UINT: |
403 | case ISD::FNEG: |
404 | case ISD::FABS: |
405 | case ISD::FMINNUM: |
406 | case ISD::FMAXNUM: |
407 | case ISD::FMINNUM_IEEE: |
408 | case ISD::FMAXNUM_IEEE: |
409 | case ISD::FMINIMUM: |
410 | case ISD::FMAXIMUM: |
411 | case ISD::FMINIMUMNUM: |
412 | case ISD::FMAXIMUMNUM: |
413 | case ISD::FCOPYSIGN: |
414 | case ISD::FSQRT: |
415 | case ISD::FSIN: |
416 | case ISD::FCOS: |
417 | case ISD::FTAN: |
418 | case ISD::FASIN: |
419 | case ISD::FACOS: |
420 | case ISD::FATAN: |
421 | case ISD::FATAN2: |
422 | case ISD::FSINH: |
423 | case ISD::FCOSH: |
424 | case ISD::FTANH: |
425 | case ISD::FLDEXP: |
426 | case ISD::FPOWI: |
427 | case ISD::FPOW: |
428 | case ISD::FLOG: |
429 | case ISD::FLOG2: |
430 | case ISD::FLOG10: |
431 | case ISD::FEXP: |
432 | case ISD::FEXP2: |
433 | case ISD::FEXP10: |
434 | case ISD::FCEIL: |
435 | case ISD::FTRUNC: |
436 | case ISD::FRINT: |
437 | case ISD::FNEARBYINT: |
438 | case ISD::FROUND: |
439 | case ISD::FROUNDEVEN: |
440 | case ISD::FFLOOR: |
441 | case ISD::FP_ROUND: |
442 | case ISD::FP_EXTEND: |
443 | case ISD::FPTRUNC_ROUND: |
444 | case ISD::FMA: |
445 | case ISD::SIGN_EXTEND_INREG: |
446 | case ISD::ANY_EXTEND_VECTOR_INREG: |
447 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
448 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
449 | case ISD::SMIN: |
450 | case ISD::SMAX: |
451 | case ISD::UMIN: |
452 | case ISD::UMAX: |
453 | case ISD::SMUL_LOHI: |
454 | case ISD::UMUL_LOHI: |
455 | case ISD::SADDO: |
456 | case ISD::UADDO: |
457 | case ISD::SSUBO: |
458 | case ISD::USUBO: |
459 | case ISD::SMULO: |
460 | case ISD::UMULO: |
461 | case ISD::FCANONICALIZE: |
462 | case ISD::FFREXP: |
463 | case ISD::FMODF: |
464 | case ISD::FSINCOS: |
465 | case ISD::FSINCOSPI: |
466 | case ISD::SADDSAT: |
467 | case ISD::UADDSAT: |
468 | case ISD::SSUBSAT: |
469 | case ISD::USUBSAT: |
470 | case ISD::SSHLSAT: |
471 | case ISD::USHLSAT: |
472 | case ISD::FP_TO_SINT_SAT: |
473 | case ISD::FP_TO_UINT_SAT: |
474 | case ISD::MGATHER: |
475 | case ISD::VECTOR_COMPRESS: |
476 | case ISD::SCMP: |
477 | case ISD::UCMP: |
478 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: Node->getValueType(ResNo: 0)); |
479 | break; |
480 | case ISD::SMULFIX: |
481 | case ISD::SMULFIXSAT: |
482 | case ISD::UMULFIX: |
483 | case ISD::UMULFIXSAT: |
484 | case ISD::SDIVFIX: |
485 | case ISD::SDIVFIXSAT: |
486 | case ISD::UDIVFIX: |
487 | case ISD::UDIVFIXSAT: { |
488 | unsigned Scale = Node->getConstantOperandVal(Num: 2); |
489 | Action = TLI.getFixedPointOperationAction(Op: Node->getOpcode(), |
490 | VT: Node->getValueType(ResNo: 0), Scale); |
491 | break; |
492 | } |
493 | case ISD::LROUND: |
494 | case ISD::LLROUND: |
495 | case ISD::LRINT: |
496 | case ISD::LLRINT: |
497 | case ISD::SINT_TO_FP: |
498 | case ISD::UINT_TO_FP: |
499 | case ISD::VECREDUCE_ADD: |
500 | case ISD::VECREDUCE_MUL: |
501 | case ISD::VECREDUCE_AND: |
502 | case ISD::VECREDUCE_OR: |
503 | case ISD::VECREDUCE_XOR: |
504 | case ISD::VECREDUCE_SMAX: |
505 | case ISD::VECREDUCE_SMIN: |
506 | case ISD::VECREDUCE_UMAX: |
507 | case ISD::VECREDUCE_UMIN: |
508 | case ISD::VECREDUCE_FADD: |
509 | case ISD::VECREDUCE_FMAX: |
510 | case ISD::VECREDUCE_FMAXIMUM: |
511 | case ISD::VECREDUCE_FMIN: |
512 | case ISD::VECREDUCE_FMINIMUM: |
513 | case ISD::VECREDUCE_FMUL: |
514 | case ISD::VECTOR_FIND_LAST_ACTIVE: |
515 | Action = TLI.getOperationAction(Op: Node->getOpcode(), |
516 | VT: Node->getOperand(Num: 0).getValueType()); |
517 | break; |
518 | case ISD::VECREDUCE_SEQ_FADD: |
519 | case ISD::VECREDUCE_SEQ_FMUL: |
520 | Action = TLI.getOperationAction(Op: Node->getOpcode(), |
521 | VT: Node->getOperand(Num: 1).getValueType()); |
522 | break; |
523 | case ISD::SETCC: { |
524 | MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType(); |
525 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val: Node->getOperand(Num: 2))->get(); |
526 | Action = TLI.getCondCodeAction(CC: CCCode, VT: OpVT); |
527 | if (Action == TargetLowering::Legal) |
528 | Action = TLI.getOperationAction(Op: Node->getOpcode(), VT: OpVT); |
529 | break; |
530 | } |
531 | case ISD::PARTIAL_REDUCE_UMLA: |
532 | case ISD::PARTIAL_REDUCE_SMLA: |
533 | case ISD::PARTIAL_REDUCE_SUMLA: |
534 | Action = |
535 | TLI.getPartialReduceMLAAction(Opc: Op.getOpcode(), AccVT: Node->getValueType(ResNo: 0), |
536 | InputVT: Node->getOperand(Num: 1).getValueType()); |
537 | break; |
538 | |
539 | #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \ |
540 | case ISD::VPID: { \ |
541 | EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ |
542 | : Node->getOperand(LEGALPOS).getValueType(); \ |
543 | if (ISD::VPID == ISD::VP_SETCC) { \ |
544 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \ |
545 | Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \ |
546 | if (Action != TargetLowering::Legal) \ |
547 | break; \ |
548 | } \ |
549 | /* Defer non-vector results to LegalizeDAG. */ \ |
550 | if (!Node->getValueType(0).isVector() && \ |
551 | Node->getValueType(0) != MVT::Other) { \ |
552 | Action = TargetLowering::Legal; \ |
553 | break; \ |
554 | } \ |
555 | Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ |
556 | } break; |
557 | #include "llvm/IR/VPIntrinsics.def" |
558 | } |
559 | |
560 | LLVM_DEBUG(dbgs() << "\nLegalizing vector op: " ; Node->dump(&DAG)); |
561 | |
562 | SmallVector<SDValue, 8> ResultVals; |
563 | switch (Action) { |
564 | default: llvm_unreachable("This action is not supported yet!" ); |
565 | case TargetLowering::Promote: |
566 | assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) && |
567 | "This action is not supported yet!" ); |
568 | LLVM_DEBUG(dbgs() << "Promoting\n" ); |
569 | Promote(Node, Results&: ResultVals); |
570 | assert(!ResultVals.empty() && "No results for promotion?" ); |
571 | break; |
572 | case TargetLowering::Legal: |
573 | LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n" ); |
574 | break; |
575 | case TargetLowering::Custom: |
576 | LLVM_DEBUG(dbgs() << "Trying custom legalization\n" ); |
577 | if (LowerOperationWrapper(N: Node, Results&: ResultVals)) |
578 | break; |
579 | LLVM_DEBUG(dbgs() << "Could not custom legalize node\n" ); |
580 | [[fallthrough]]; |
581 | case TargetLowering::Expand: |
582 | LLVM_DEBUG(dbgs() << "Expanding\n" ); |
583 | Expand(Node, Results&: ResultVals); |
584 | break; |
585 | } |
586 | |
587 | if (ResultVals.empty()) |
588 | return TranslateLegalizeResults(Op, Result: Node); |
589 | |
590 | Changed = true; |
591 | return RecursivelyLegalizeResults(Op, Results: ResultVals); |
592 | } |
593 | |
594 | // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we |
595 | // merge them somehow? |
596 | bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, |
597 | SmallVectorImpl<SDValue> &Results) { |
598 | SDValue Res = TLI.LowerOperation(Op: SDValue(Node, 0), DAG); |
599 | |
600 | if (!Res.getNode()) |
601 | return false; |
602 | |
603 | if (Res == SDValue(Node, 0)) |
604 | return true; |
605 | |
606 | // If the original node has one result, take the return value from |
607 | // LowerOperation as is. It might not be result number 0. |
608 | if (Node->getNumValues() == 1) { |
609 | Results.push_back(Elt: Res); |
610 | return true; |
611 | } |
612 | |
613 | // If the original node has multiple results, then the return node should |
614 | // have the same number of results. |
615 | assert((Node->getNumValues() == Res->getNumValues()) && |
616 | "Lowering returned the wrong number of results!" ); |
617 | |
618 | // Places new result values base on N result number. |
619 | for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I) |
620 | Results.push_back(Elt: Res.getValue(R: I)); |
621 | |
622 | return true; |
623 | } |
624 | |
625 | void VectorLegalizer::PromoteSETCC(SDNode *Node, |
626 | SmallVectorImpl<SDValue> &Results) { |
627 | MVT VecVT = Node->getOperand(Num: 0).getSimpleValueType(); |
628 | MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT); |
629 | |
630 | unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND; |
631 | |
632 | SDLoc DL(Node); |
633 | SmallVector<SDValue, 5> Operands(Node->getNumOperands()); |
634 | |
635 | Operands[0] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 0)); |
636 | Operands[1] = DAG.getNode(Opcode: ExtOp, DL, VT: NewVecVT, Operand: Node->getOperand(Num: 1)); |
637 | Operands[2] = Node->getOperand(Num: 2); |
638 | |
639 | if (Node->getOpcode() == ISD::VP_SETCC) { |
640 | Operands[3] = Node->getOperand(Num: 3); // mask |
641 | Operands[4] = Node->getOperand(Num: 4); // evl |
642 | } |
643 | |
644 | SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL, VT: Node->getSimpleValueType(ResNo: 0), |
645 | Ops: Operands, Flags: Node->getFlags()); |
646 | |
647 | Results.push_back(Elt: Res); |
648 | } |
649 | |
650 | void VectorLegalizer::PromoteSTRICT(SDNode *Node, |
651 | SmallVectorImpl<SDValue> &Results) { |
652 | MVT VecVT = Node->getOperand(Num: 1).getSimpleValueType(); |
653 | MVT NewVecVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: VecVT); |
654 | |
655 | assert(VecVT.isFloatingPoint()); |
656 | |
657 | SDLoc DL(Node); |
658 | SmallVector<SDValue, 5> Operands(Node->getNumOperands()); |
659 | SmallVector<SDValue, 2> Chains; |
660 | |
661 | for (unsigned j = 1; j != Node->getNumOperands(); ++j) |
662 | if (Node->getOperand(Num: j).getValueType().isVector() && |
663 | !(ISD::isVPOpcode(Opcode: Node->getOpcode()) && |
664 | ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j)) // Skip mask operand. |
665 | { |
666 | // promote the vector operand. |
667 | SDValue Ext = |
668 | DAG.getNode(Opcode: ISD::STRICT_FP_EXTEND, DL, ResultTys: {NewVecVT, MVT::Other}, |
669 | Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: j)}); |
670 | Operands[j] = Ext.getValue(R: 0); |
671 | Chains.push_back(Elt: Ext.getValue(R: 1)); |
672 | } else |
673 | Operands[j] = Node->getOperand(Num: j); // Skip no vector operand. |
674 | |
675 | SDVTList VTs = DAG.getVTList(VT1: NewVecVT, VT2: Node->getValueType(ResNo: 1)); |
676 | |
677 | Operands[0] = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains); |
678 | |
679 | SDValue Res = |
680 | DAG.getNode(Opcode: Node->getOpcode(), DL, VTList: VTs, Ops: Operands, Flags: Node->getFlags()); |
681 | |
682 | SDValue Round = |
683 | DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {VecVT, MVT::Other}, |
684 | Ops: {Res.getValue(R: 1), Res.getValue(R: 0), |
685 | DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true)}); |
686 | |
687 | Results.push_back(Elt: Round.getValue(R: 0)); |
688 | Results.push_back(Elt: Round.getValue(R: 1)); |
689 | } |
690 | |
691 | void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node, |
692 | SmallVectorImpl<SDValue> &Results, |
693 | bool NonArithmetic) { |
694 | MVT OpVT = Node->getOperand(Num: 0).getSimpleValueType(); |
695 | assert(OpVT.isFloatingPoint() && "Expected floating point reduction!" ); |
696 | MVT NewOpVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT: OpVT); |
697 | |
698 | SDLoc DL(Node); |
699 | SDValue NewOp = DAG.getNode(Opcode: ISD::FP_EXTEND, DL, VT: NewOpVT, Operand: Node->getOperand(Num: 0)); |
700 | SDValue Rdx = |
701 | DAG.getNode(Opcode: Node->getOpcode(), DL, VT: NewOpVT.getVectorElementType(), Operand: NewOp, |
702 | Flags: Node->getFlags()); |
703 | SDValue Res = |
704 | DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: Node->getValueType(ResNo: 0), N1: Rdx, |
705 | N2: DAG.getIntPtrConstant(Val: NonArithmetic, DL, /*isTarget=*/true)); |
706 | Results.push_back(Elt: Res); |
707 | } |
708 | |
709 | void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { |
710 | // For a few operations there is a specific concept for promotion based on |
711 | // the operand's type. |
712 | switch (Node->getOpcode()) { |
713 | case ISD::SINT_TO_FP: |
714 | case ISD::UINT_TO_FP: |
715 | case ISD::STRICT_SINT_TO_FP: |
716 | case ISD::STRICT_UINT_TO_FP: |
717 | // "Promote" the operation by extending the operand. |
718 | PromoteINT_TO_FP(Node, Results); |
719 | return; |
720 | case ISD::FP_TO_UINT: |
721 | case ISD::FP_TO_SINT: |
722 | case ISD::STRICT_FP_TO_UINT: |
723 | case ISD::STRICT_FP_TO_SINT: |
724 | // Promote the operation by extending the operand. |
725 | PromoteFP_TO_INT(Node, Results); |
726 | return; |
727 | case ISD::VP_SETCC: |
728 | case ISD::SETCC: |
729 | // Promote the operation by extending the operand. |
730 | PromoteSETCC(Node, Results); |
731 | return; |
732 | case ISD::STRICT_FADD: |
733 | case ISD::STRICT_FSUB: |
734 | case ISD::STRICT_FMUL: |
735 | case ISD::STRICT_FDIV: |
736 | case ISD::STRICT_FSQRT: |
737 | case ISD::STRICT_FMA: |
738 | PromoteSTRICT(Node, Results); |
739 | return; |
740 | case ISD::VECREDUCE_FADD: |
741 | PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/false); |
742 | return; |
743 | case ISD::VECREDUCE_FMAX: |
744 | case ISD::VECREDUCE_FMAXIMUM: |
745 | case ISD::VECREDUCE_FMIN: |
746 | case ISD::VECREDUCE_FMINIMUM: |
747 | PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true); |
748 | return; |
749 | case ISD::FP_ROUND: |
750 | case ISD::FP_EXTEND: |
751 | // These operations are used to do promotion so they can't be promoted |
752 | // themselves. |
753 | llvm_unreachable("Don't know how to promote this operation!" ); |
754 | case ISD::VP_FABS: |
755 | case ISD::VP_FCOPYSIGN: |
756 | case ISD::VP_FNEG: |
757 | // Promoting fabs, fneg, and fcopysign changes their semantics. |
758 | llvm_unreachable("These operations should not be promoted" ); |
759 | } |
760 | |
761 | // There are currently two cases of vector promotion: |
762 | // 1) Bitcasting a vector of integers to a different type to a vector of the |
763 | // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. |
764 | // 2) Extending a vector of floats to a vector of the same number of larger |
765 | // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. |
766 | assert(Node->getNumValues() == 1 && |
767 | "Can't promote a vector with multiple results!" ); |
768 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
769 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
770 | SDLoc dl(Node); |
771 | SmallVector<SDValue, 4> Operands(Node->getNumOperands()); |
772 | |
773 | for (unsigned j = 0; j != Node->getNumOperands(); ++j) { |
774 | // Do not promote the mask operand of a VP OP. |
775 | bool SkipPromote = ISD::isVPOpcode(Opcode: Node->getOpcode()) && |
776 | ISD::getVPMaskIdx(Opcode: Node->getOpcode()) == j; |
777 | if (Node->getOperand(Num: j).getValueType().isVector() && !SkipPromote) |
778 | if (Node->getOperand(Num: j) |
779 | .getValueType() |
780 | .getVectorElementType() |
781 | .isFloatingPoint() && |
782 | NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) |
783 | if (ISD::isVPOpcode(Opcode: Node->getOpcode())) { |
784 | unsigned EVLIdx = |
785 | *ISD::getVPExplicitVectorLengthIdx(Opcode: Node->getOpcode()); |
786 | unsigned MaskIdx = *ISD::getVPMaskIdx(Opcode: Node->getOpcode()); |
787 | Operands[j] = |
788 | DAG.getNode(Opcode: ISD::VP_FP_EXTEND, DL: dl, VT: NVT, N1: Node->getOperand(Num: j), |
789 | N2: Node->getOperand(Num: MaskIdx), N3: Node->getOperand(Num: EVLIdx)); |
790 | } else { |
791 | Operands[j] = |
792 | DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
793 | } |
794 | else |
795 | Operands[j] = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
796 | else |
797 | Operands[j] = Node->getOperand(Num: j); |
798 | } |
799 | |
800 | SDValue Res = |
801 | DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: NVT, Ops: Operands, Flags: Node->getFlags()); |
802 | |
803 | if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || |
804 | (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && |
805 | NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) |
806 | if (ISD::isVPOpcode(Opcode: Node->getOpcode())) { |
807 | unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Opcode: Node->getOpcode()); |
808 | unsigned MaskIdx = *ISD::getVPMaskIdx(Opcode: Node->getOpcode()); |
809 | Res = DAG.getNode(Opcode: ISD::VP_FP_ROUND, DL: dl, VT, N1: Res, |
810 | N2: Node->getOperand(Num: MaskIdx), N3: Node->getOperand(Num: EVLIdx)); |
811 | } else { |
812 | Res = DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT, N1: Res, |
813 | N2: DAG.getIntPtrConstant(Val: 0, DL: dl, /*isTarget=*/true)); |
814 | } |
815 | else |
816 | Res = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT, Operand: Res); |
817 | |
818 | Results.push_back(Elt: Res); |
819 | } |
820 | |
821 | void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node, |
822 | SmallVectorImpl<SDValue> &Results) { |
823 | // INT_TO_FP operations may require the input operand be promoted even |
824 | // when the type is otherwise legal. |
825 | bool IsStrict = Node->isStrictFPOpcode(); |
826 | MVT VT = Node->getOperand(Num: IsStrict ? 1 : 0).getSimpleValueType(); |
827 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
828 | assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && |
829 | "Vectors have different number of elements!" ); |
830 | |
831 | SDLoc dl(Node); |
832 | SmallVector<SDValue, 4> Operands(Node->getNumOperands()); |
833 | |
834 | unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP || |
835 | Node->getOpcode() == ISD::STRICT_UINT_TO_FP) |
836 | ? ISD::ZERO_EXTEND |
837 | : ISD::SIGN_EXTEND; |
838 | for (unsigned j = 0; j != Node->getNumOperands(); ++j) { |
839 | if (Node->getOperand(Num: j).getValueType().isVector()) |
840 | Operands[j] = DAG.getNode(Opcode: Opc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: j)); |
841 | else |
842 | Operands[j] = Node->getOperand(Num: j); |
843 | } |
844 | |
845 | if (IsStrict) { |
846 | SDValue Res = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, |
847 | ResultTys: {Node->getValueType(ResNo: 0), MVT::Other}, Ops: Operands); |
848 | Results.push_back(Elt: Res); |
849 | Results.push_back(Elt: Res.getValue(R: 1)); |
850 | return; |
851 | } |
852 | |
853 | SDValue Res = |
854 | DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VT: Node->getValueType(ResNo: 0), Ops: Operands); |
855 | Results.push_back(Elt: Res); |
856 | } |
857 | |
858 | // For FP_TO_INT we promote the result type to a vector type with wider |
859 | // elements and then truncate the result. This is different from the default |
860 | // PromoteVector which uses bitcast to promote thus assumning that the |
861 | // promoted vector type has the same overall size. |
862 | void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, |
863 | SmallVectorImpl<SDValue> &Results) { |
864 | MVT VT = Node->getSimpleValueType(ResNo: 0); |
865 | MVT NVT = TLI.getTypeToPromoteTo(Op: Node->getOpcode(), VT); |
866 | bool IsStrict = Node->isStrictFPOpcode(); |
867 | assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && |
868 | "Vectors have different number of elements!" ); |
869 | |
870 | unsigned NewOpc = Node->getOpcode(); |
871 | // Change FP_TO_UINT to FP_TO_SINT if possible. |
872 | // TODO: Should we only do this if FP_TO_UINT itself isn't legal? |
873 | if (NewOpc == ISD::FP_TO_UINT && |
874 | TLI.isOperationLegalOrCustom(Op: ISD::FP_TO_SINT, VT: NVT)) |
875 | NewOpc = ISD::FP_TO_SINT; |
876 | |
877 | if (NewOpc == ISD::STRICT_FP_TO_UINT && |
878 | TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FP_TO_SINT, VT: NVT)) |
879 | NewOpc = ISD::STRICT_FP_TO_SINT; |
880 | |
881 | SDLoc dl(Node); |
882 | SDValue Promoted, Chain; |
883 | if (IsStrict) { |
884 | Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, ResultTys: {NVT, MVT::Other}, |
885 | Ops: {Node->getOperand(Num: 0), Node->getOperand(Num: 1)}); |
886 | Chain = Promoted.getValue(R: 1); |
887 | } else |
888 | Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, Operand: Node->getOperand(Num: 0)); |
889 | |
890 | // Assert that the converted value fits in the original type. If it doesn't |
891 | // (eg: because the value being converted is too big), then the result of the |
892 | // original operation was undefined anyway, so the assert is still correct. |
893 | if (Node->getOpcode() == ISD::FP_TO_UINT || |
894 | Node->getOpcode() == ISD::STRICT_FP_TO_UINT) |
895 | NewOpc = ISD::AssertZext; |
896 | else |
897 | NewOpc = ISD::AssertSext; |
898 | |
899 | Promoted = DAG.getNode(Opcode: NewOpc, DL: dl, VT: NVT, N1: Promoted, |
900 | N2: DAG.getValueType(VT.getScalarType())); |
901 | Promoted = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT, Operand: Promoted); |
902 | Results.push_back(Elt: Promoted); |
903 | if (IsStrict) |
904 | Results.push_back(Elt: Chain); |
905 | } |
906 | |
907 | std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { |
908 | LoadSDNode *LD = cast<LoadSDNode>(Val: N); |
909 | return TLI.scalarizeVectorLoad(LD, DAG); |
910 | } |
911 | |
912 | SDValue VectorLegalizer::ExpandStore(SDNode *N) { |
913 | StoreSDNode *ST = cast<StoreSDNode>(Val: N); |
914 | SDValue TF = TLI.scalarizeVectorStore(ST, DAG); |
915 | return TF; |
916 | } |
917 | |
918 | void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { |
919 | switch (Node->getOpcode()) { |
920 | case ISD::LOAD: { |
921 | std::pair<SDValue, SDValue> Tmp = ExpandLoad(N: Node); |
922 | Results.push_back(Elt: Tmp.first); |
923 | Results.push_back(Elt: Tmp.second); |
924 | return; |
925 | } |
926 | case ISD::STORE: |
927 | Results.push_back(Elt: ExpandStore(N: Node)); |
928 | return; |
929 | case ISD::MERGE_VALUES: |
930 | for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) |
931 | Results.push_back(Elt: Node->getOperand(Num: i)); |
932 | return; |
933 | case ISD::SIGN_EXTEND_INREG: |
934 | if (SDValue Expanded = ExpandSEXTINREG(Node)) { |
935 | Results.push_back(Elt: Expanded); |
936 | return; |
937 | } |
938 | break; |
939 | case ISD::ANY_EXTEND_VECTOR_INREG: |
940 | Results.push_back(Elt: ExpandANY_EXTEND_VECTOR_INREG(Node)); |
941 | return; |
942 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
943 | Results.push_back(Elt: ExpandSIGN_EXTEND_VECTOR_INREG(Node)); |
944 | return; |
945 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
946 | Results.push_back(Elt: ExpandZERO_EXTEND_VECTOR_INREG(Node)); |
947 | return; |
948 | case ISD::BSWAP: |
949 | if (SDValue Expanded = ExpandBSWAP(Node)) { |
950 | Results.push_back(Elt: Expanded); |
951 | return; |
952 | } |
953 | break; |
954 | case ISD::VP_BSWAP: |
955 | Results.push_back(Elt: TLI.expandVPBSWAP(N: Node, DAG)); |
956 | return; |
957 | case ISD::VSELECT: |
958 | if (SDValue Expanded = ExpandVSELECT(Node)) { |
959 | Results.push_back(Elt: Expanded); |
960 | return; |
961 | } |
962 | break; |
963 | case ISD::VP_SELECT: |
964 | if (SDValue Expanded = ExpandVP_SELECT(Node)) { |
965 | Results.push_back(Elt: Expanded); |
966 | return; |
967 | } |
968 | break; |
969 | case ISD::VP_SREM: |
970 | case ISD::VP_UREM: |
971 | if (SDValue Expanded = ExpandVP_REM(Node)) { |
972 | Results.push_back(Elt: Expanded); |
973 | return; |
974 | } |
975 | break; |
976 | case ISD::VP_FNEG: |
977 | if (SDValue Expanded = ExpandVP_FNEG(Node)) { |
978 | Results.push_back(Elt: Expanded); |
979 | return; |
980 | } |
981 | break; |
982 | case ISD::VP_FABS: |
983 | if (SDValue Expanded = ExpandVP_FABS(Node)) { |
984 | Results.push_back(Elt: Expanded); |
985 | return; |
986 | } |
987 | break; |
988 | case ISD::VP_FCOPYSIGN: |
989 | if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) { |
990 | Results.push_back(Elt: Expanded); |
991 | return; |
992 | } |
993 | break; |
994 | case ISD::SELECT: |
995 | if (SDValue Expanded = ExpandSELECT(Node)) { |
996 | Results.push_back(Elt: Expanded); |
997 | return; |
998 | } |
999 | break; |
1000 | case ISD::SELECT_CC: { |
1001 | if (Node->getValueType(ResNo: 0).isScalableVector()) { |
1002 | EVT CondVT = TLI.getSetCCResultType( |
1003 | DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: Node->getValueType(ResNo: 0)); |
1004 | SDValue SetCC = |
1005 | DAG.getNode(Opcode: ISD::SETCC, DL: SDLoc(Node), VT: CondVT, N1: Node->getOperand(Num: 0), |
1006 | N2: Node->getOperand(Num: 1), N3: Node->getOperand(Num: 4)); |
1007 | Results.push_back(Elt: DAG.getSelect(DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0), Cond: SetCC, |
1008 | LHS: Node->getOperand(Num: 2), |
1009 | RHS: Node->getOperand(Num: 3))); |
1010 | return; |
1011 | } |
1012 | break; |
1013 | } |
1014 | case ISD::FP_TO_UINT: |
1015 | ExpandFP_TO_UINT(Node, Results); |
1016 | return; |
1017 | case ISD::UINT_TO_FP: |
1018 | ExpandUINT_TO_FLOAT(Node, Results); |
1019 | return; |
1020 | case ISD::FNEG: |
1021 | if (SDValue Expanded = ExpandFNEG(Node)) { |
1022 | Results.push_back(Elt: Expanded); |
1023 | return; |
1024 | } |
1025 | break; |
1026 | case ISD::FABS: |
1027 | if (SDValue Expanded = ExpandFABS(Node)) { |
1028 | Results.push_back(Elt: Expanded); |
1029 | return; |
1030 | } |
1031 | break; |
1032 | case ISD::FCOPYSIGN: |
1033 | if (SDValue Expanded = ExpandFCOPYSIGN(Node)) { |
1034 | Results.push_back(Elt: Expanded); |
1035 | return; |
1036 | } |
1037 | break; |
1038 | case ISD::FSUB: |
1039 | ExpandFSUB(Node, Results); |
1040 | return; |
1041 | case ISD::SETCC: |
1042 | case ISD::VP_SETCC: |
1043 | ExpandSETCC(Node, Results); |
1044 | return; |
1045 | case ISD::ABS: |
1046 | if (SDValue Expanded = TLI.expandABS(N: Node, DAG)) { |
1047 | Results.push_back(Elt: Expanded); |
1048 | return; |
1049 | } |
1050 | break; |
1051 | case ISD::ABDS: |
1052 | case ISD::ABDU: |
1053 | if (SDValue Expanded = TLI.expandABD(N: Node, DAG)) { |
1054 | Results.push_back(Elt: Expanded); |
1055 | return; |
1056 | } |
1057 | break; |
1058 | case ISD::AVGCEILS: |
1059 | case ISD::AVGCEILU: |
1060 | case ISD::AVGFLOORS: |
1061 | case ISD::AVGFLOORU: |
1062 | if (SDValue Expanded = TLI.expandAVG(N: Node, DAG)) { |
1063 | Results.push_back(Elt: Expanded); |
1064 | return; |
1065 | } |
1066 | break; |
1067 | case ISD::BITREVERSE: |
1068 | if (SDValue Expanded = ExpandBITREVERSE(Node)) { |
1069 | Results.push_back(Elt: Expanded); |
1070 | return; |
1071 | } |
1072 | break; |
1073 | case ISD::VP_BITREVERSE: |
1074 | if (SDValue Expanded = TLI.expandVPBITREVERSE(N: Node, DAG)) { |
1075 | Results.push_back(Elt: Expanded); |
1076 | return; |
1077 | } |
1078 | break; |
1079 | case ISD::CTPOP: |
1080 | if (SDValue Expanded = TLI.expandCTPOP(N: Node, DAG)) { |
1081 | Results.push_back(Elt: Expanded); |
1082 | return; |
1083 | } |
1084 | break; |
1085 | case ISD::VP_CTPOP: |
1086 | if (SDValue Expanded = TLI.expandVPCTPOP(N: Node, DAG)) { |
1087 | Results.push_back(Elt: Expanded); |
1088 | return; |
1089 | } |
1090 | break; |
1091 | case ISD::CTLZ: |
1092 | case ISD::CTLZ_ZERO_UNDEF: |
1093 | if (SDValue Expanded = TLI.expandCTLZ(N: Node, DAG)) { |
1094 | Results.push_back(Elt: Expanded); |
1095 | return; |
1096 | } |
1097 | break; |
1098 | case ISD::VP_CTLZ: |
1099 | case ISD::VP_CTLZ_ZERO_UNDEF: |
1100 | if (SDValue Expanded = TLI.expandVPCTLZ(N: Node, DAG)) { |
1101 | Results.push_back(Elt: Expanded); |
1102 | return; |
1103 | } |
1104 | break; |
1105 | case ISD::CTTZ: |
1106 | case ISD::CTTZ_ZERO_UNDEF: |
1107 | if (SDValue Expanded = TLI.expandCTTZ(N: Node, DAG)) { |
1108 | Results.push_back(Elt: Expanded); |
1109 | return; |
1110 | } |
1111 | break; |
1112 | case ISD::VP_CTTZ: |
1113 | case ISD::VP_CTTZ_ZERO_UNDEF: |
1114 | if (SDValue Expanded = TLI.expandVPCTTZ(N: Node, DAG)) { |
1115 | Results.push_back(Elt: Expanded); |
1116 | return; |
1117 | } |
1118 | break; |
1119 | case ISD::FSHL: |
1120 | case ISD::VP_FSHL: |
1121 | case ISD::FSHR: |
1122 | case ISD::VP_FSHR: |
1123 | if (SDValue Expanded = TLI.expandFunnelShift(N: Node, DAG)) { |
1124 | Results.push_back(Elt: Expanded); |
1125 | return; |
1126 | } |
1127 | break; |
1128 | case ISD::ROTL: |
1129 | case ISD::ROTR: |
1130 | if (SDValue Expanded = TLI.expandROT(N: Node, AllowVectorOps: false /*AllowVectorOps*/, DAG)) { |
1131 | Results.push_back(Elt: Expanded); |
1132 | return; |
1133 | } |
1134 | break; |
1135 | case ISD::FMINNUM: |
1136 | case ISD::FMAXNUM: |
1137 | if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(N: Node, DAG)) { |
1138 | Results.push_back(Elt: Expanded); |
1139 | return; |
1140 | } |
1141 | break; |
1142 | case ISD::FMINIMUM: |
1143 | case ISD::FMAXIMUM: |
1144 | Results.push_back(Elt: TLI.expandFMINIMUM_FMAXIMUM(N: Node, DAG)); |
1145 | return; |
1146 | case ISD::FMINIMUMNUM: |
1147 | case ISD::FMAXIMUMNUM: |
1148 | Results.push_back(Elt: TLI.expandFMINIMUMNUM_FMAXIMUMNUM(N: Node, DAG)); |
1149 | return; |
1150 | case ISD::SMIN: |
1151 | case ISD::SMAX: |
1152 | case ISD::UMIN: |
1153 | case ISD::UMAX: |
1154 | if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) { |
1155 | Results.push_back(Elt: Expanded); |
1156 | return; |
1157 | } |
1158 | break; |
1159 | case ISD::UADDO: |
1160 | case ISD::USUBO: |
1161 | ExpandUADDSUBO(Node, Results); |
1162 | return; |
1163 | case ISD::SADDO: |
1164 | case ISD::SSUBO: |
1165 | ExpandSADDSUBO(Node, Results); |
1166 | return; |
1167 | case ISD::UMULO: |
1168 | case ISD::SMULO: |
1169 | ExpandMULO(Node, Results); |
1170 | return; |
1171 | case ISD::USUBSAT: |
1172 | case ISD::SSUBSAT: |
1173 | case ISD::UADDSAT: |
1174 | case ISD::SADDSAT: |
1175 | if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) { |
1176 | Results.push_back(Elt: Expanded); |
1177 | return; |
1178 | } |
1179 | break; |
1180 | case ISD::USHLSAT: |
1181 | case ISD::SSHLSAT: |
1182 | if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) { |
1183 | Results.push_back(Elt: Expanded); |
1184 | return; |
1185 | } |
1186 | break; |
1187 | case ISD::FP_TO_SINT_SAT: |
1188 | case ISD::FP_TO_UINT_SAT: |
1189 | // Expand the fpsosisat if it is scalable to prevent it from unrolling below. |
1190 | if (Node->getValueType(ResNo: 0).isScalableVector()) { |
1191 | if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(N: Node, DAG)) { |
1192 | Results.push_back(Elt: Expanded); |
1193 | return; |
1194 | } |
1195 | } |
1196 | break; |
1197 | case ISD::SMULFIX: |
1198 | case ISD::UMULFIX: |
1199 | if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { |
1200 | Results.push_back(Elt: Expanded); |
1201 | return; |
1202 | } |
1203 | break; |
1204 | case ISD::SMULFIXSAT: |
1205 | case ISD::UMULFIXSAT: |
1206 | // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly |
1207 | // why. Maybe it results in worse codegen compared to the unroll for some |
1208 | // targets? This should probably be investigated. And if we still prefer to |
1209 | // unroll an explanation could be helpful. |
1210 | break; |
1211 | case ISD::SDIVFIX: |
1212 | case ISD::UDIVFIX: |
1213 | ExpandFixedPointDiv(Node, Results); |
1214 | return; |
1215 | case ISD::SDIVFIXSAT: |
1216 | case ISD::UDIVFIXSAT: |
1217 | break; |
1218 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1219 | case ISD::STRICT_##DAGN: |
1220 | #include "llvm/IR/ConstrainedOps.def" |
1221 | ExpandStrictFPOp(Node, Results); |
1222 | return; |
1223 | case ISD::VECREDUCE_ADD: |
1224 | case ISD::VECREDUCE_MUL: |
1225 | case ISD::VECREDUCE_AND: |
1226 | case ISD::VECREDUCE_OR: |
1227 | case ISD::VECREDUCE_XOR: |
1228 | case ISD::VECREDUCE_SMAX: |
1229 | case ISD::VECREDUCE_SMIN: |
1230 | case ISD::VECREDUCE_UMAX: |
1231 | case ISD::VECREDUCE_UMIN: |
1232 | case ISD::VECREDUCE_FADD: |
1233 | case ISD::VECREDUCE_FMUL: |
1234 | case ISD::VECREDUCE_FMAX: |
1235 | case ISD::VECREDUCE_FMIN: |
1236 | case ISD::VECREDUCE_FMAXIMUM: |
1237 | case ISD::VECREDUCE_FMINIMUM: |
1238 | Results.push_back(Elt: TLI.expandVecReduce(Node, DAG)); |
1239 | return; |
1240 | case ISD::PARTIAL_REDUCE_UMLA: |
1241 | case ISD::PARTIAL_REDUCE_SMLA: |
1242 | case ISD::PARTIAL_REDUCE_SUMLA: |
1243 | Results.push_back(Elt: TLI.expandPartialReduceMLA(Node, DAG)); |
1244 | return; |
1245 | case ISD::VECREDUCE_SEQ_FADD: |
1246 | case ISD::VECREDUCE_SEQ_FMUL: |
1247 | Results.push_back(Elt: TLI.expandVecReduceSeq(Node, DAG)); |
1248 | return; |
1249 | case ISD::SREM: |
1250 | case ISD::UREM: |
1251 | ExpandREM(Node, Results); |
1252 | return; |
1253 | case ISD::VP_MERGE: |
1254 | if (SDValue Expanded = ExpandVP_MERGE(Node)) { |
1255 | Results.push_back(Elt: Expanded); |
1256 | return; |
1257 | } |
1258 | break; |
1259 | case ISD::FREM: |
1260 | if (tryExpandVecMathCall(Node, Call_F32: RTLIB::REM_F32, Call_F64: RTLIB::REM_F64, |
1261 | Call_F80: RTLIB::REM_F80, Call_F128: RTLIB::REM_F128, |
1262 | Call_PPCF128: RTLIB::REM_PPCF128, Results)) |
1263 | return; |
1264 | |
1265 | break; |
1266 | case ISD::FSINCOS: |
1267 | case ISD::FSINCOSPI: { |
1268 | EVT VT = Node->getValueType(ResNo: 0).getVectorElementType(); |
1269 | RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS |
1270 | ? RTLIB::getSINCOS(RetVT: VT) |
1271 | : RTLIB::getSINCOSPI(RetVT: VT); |
1272 | if (DAG.expandMultipleResultFPLibCall(LC, Node, Results)) |
1273 | return; |
1274 | break; |
1275 | } |
1276 | case ISD::FMODF: { |
1277 | RTLIB::Libcall LC = |
1278 | RTLIB::getMODF(RetVT: Node->getValueType(ResNo: 0).getVectorElementType()); |
1279 | if (DAG.expandMultipleResultFPLibCall(LC, Node, Results, |
1280 | /*CallRetResNo=*/0)) |
1281 | return; |
1282 | break; |
1283 | } |
1284 | case ISD::VECTOR_COMPRESS: |
1285 | Results.push_back(Elt: TLI.expandVECTOR_COMPRESS(Node, DAG)); |
1286 | return; |
1287 | case ISD::VECTOR_FIND_LAST_ACTIVE: |
1288 | Results.push_back(Elt: TLI.expandVectorFindLastActive(N: Node, DAG)); |
1289 | return; |
1290 | case ISD::SCMP: |
1291 | case ISD::UCMP: |
1292 | Results.push_back(Elt: TLI.expandCMP(Node, DAG)); |
1293 | return; |
1294 | |
1295 | case ISD::FADD: |
1296 | case ISD::FMUL: |
1297 | case ISD::FMA: |
1298 | case ISD::FDIV: |
1299 | case ISD::FCEIL: |
1300 | case ISD::FFLOOR: |
1301 | case ISD::FNEARBYINT: |
1302 | case ISD::FRINT: |
1303 | case ISD::FROUND: |
1304 | case ISD::FROUNDEVEN: |
1305 | case ISD::FTRUNC: |
1306 | case ISD::FSQRT: |
1307 | if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) { |
1308 | Results.push_back(Elt: Expanded); |
1309 | return; |
1310 | } |
1311 | break; |
1312 | } |
1313 | |
1314 | SDValue Unrolled = DAG.UnrollVectorOp(N: Node); |
1315 | if (Node->getNumValues() == 1) { |
1316 | Results.push_back(Elt: Unrolled); |
1317 | } else { |
1318 | assert(Node->getNumValues() == Unrolled->getNumValues() && |
1319 | "VectorLegalizer Expand returned wrong number of results!" ); |
1320 | for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) |
1321 | Results.push_back(Elt: Unrolled.getValue(R: I)); |
1322 | } |
1323 | } |
1324 | |
1325 | SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { |
1326 | // Lower a select instruction where the condition is a scalar and the |
1327 | // operands are vectors. Lower this select to VSELECT and implement it |
1328 | // using XOR AND OR. The selector bit is broadcasted. |
1329 | EVT VT = Node->getValueType(ResNo: 0); |
1330 | SDLoc DL(Node); |
1331 | |
1332 | SDValue Mask = Node->getOperand(Num: 0); |
1333 | SDValue Op1 = Node->getOperand(Num: 1); |
1334 | SDValue Op2 = Node->getOperand(Num: 2); |
1335 | |
1336 | assert(VT.isVector() && !Mask.getValueType().isVector() |
1337 | && Op1.getValueType() == Op2.getValueType() && "Invalid type" ); |
1338 | |
1339 | // If we can't even use the basic vector operations of |
1340 | // AND,OR,XOR, we will have to scalarize the op. |
1341 | // Notice that the operation may be 'promoted' which means that it is |
1342 | // 'bitcasted' to another type which is handled. |
1343 | // Also, we need to be able to construct a splat vector using either |
1344 | // BUILD_VECTOR or SPLAT_VECTOR. |
1345 | // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to |
1346 | // BUILD_VECTOR? |
1347 | if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand || |
1348 | TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand || |
1349 | TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand || |
1350 | TLI.getOperationAction(Op: VT.isFixedLengthVector() ? ISD::BUILD_VECTOR |
1351 | : ISD::SPLAT_VECTOR, |
1352 | VT) == TargetLowering::Expand) |
1353 | return SDValue(); |
1354 | |
1355 | // Generate a mask operand. |
1356 | EVT MaskTy = VT.changeVectorElementTypeToInteger(); |
1357 | |
1358 | // What is the size of each element in the vector mask. |
1359 | EVT BitTy = MaskTy.getScalarType(); |
1360 | |
1361 | Mask = DAG.getSelect(DL, VT: BitTy, Cond: Mask, LHS: DAG.getAllOnesConstant(DL, VT: BitTy), |
1362 | RHS: DAG.getConstant(Val: 0, DL, VT: BitTy)); |
1363 | |
1364 | // Broadcast the mask so that the entire vector is all one or all zero. |
1365 | Mask = DAG.getSplat(VT: MaskTy, DL, Op: Mask); |
1366 | |
1367 | // Bitcast the operands to be the same type as the mask. |
1368 | // This is needed when we select between FP types because |
1369 | // the mask is a vector of integers. |
1370 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op1); |
1371 | Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: MaskTy, Operand: Op2); |
1372 | |
1373 | SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT: MaskTy); |
1374 | |
1375 | Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op1, N2: Mask); |
1376 | Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskTy, N1: Op2, N2: NotMask); |
1377 | SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT: MaskTy, N1: Op1, N2: Op2); |
1378 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val); |
1379 | } |
1380 | |
1381 | SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) { |
1382 | EVT VT = Node->getValueType(ResNo: 0); |
1383 | |
1384 | // Make sure that the SRA and SHL instructions are available. |
1385 | if (TLI.getOperationAction(Op: ISD::SRA, VT) == TargetLowering::Expand || |
1386 | TLI.getOperationAction(Op: ISD::SHL, VT) == TargetLowering::Expand) |
1387 | return SDValue(); |
1388 | |
1389 | SDLoc DL(Node); |
1390 | EVT OrigTy = cast<VTSDNode>(Val: Node->getOperand(Num: 1))->getVT(); |
1391 | |
1392 | unsigned BW = VT.getScalarSizeInBits(); |
1393 | unsigned OrigBW = OrigTy.getScalarSizeInBits(); |
1394 | SDValue ShiftSz = DAG.getConstant(Val: BW - OrigBW, DL, VT); |
1395 | |
1396 | SDValue Op = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Node->getOperand(Num: 0), N2: ShiftSz); |
1397 | return DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: Op, N2: ShiftSz); |
1398 | } |
1399 | |
1400 | // Generically expand a vector anyext in register to a shuffle of the relevant |
1401 | // lanes into the appropriate locations, with other lanes left undef. |
1402 | SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { |
1403 | SDLoc DL(Node); |
1404 | EVT VT = Node->getValueType(ResNo: 0); |
1405 | int NumElements = VT.getVectorNumElements(); |
1406 | SDValue Src = Node->getOperand(Num: 0); |
1407 | EVT SrcVT = Src.getValueType(); |
1408 | int NumSrcElements = SrcVT.getVectorNumElements(); |
1409 | |
1410 | // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector |
1411 | // into a larger vector type. |
1412 | if (SrcVT.bitsLE(VT)) { |
1413 | assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
1414 | "ANY_EXTEND_VECTOR_INREG vector size mismatch" ); |
1415 | NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); |
1416 | SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(), |
1417 | NumElements: NumSrcElements); |
1418 | Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SrcVT), SubVec: Src, Idx: 0); |
1419 | } |
1420 | |
1421 | // Build a base mask of undef shuffles. |
1422 | SmallVector<int, 16> ShuffleMask; |
1423 | ShuffleMask.resize(N: NumSrcElements, NV: -1); |
1424 | |
1425 | // Place the extended lanes into the correct locations. |
1426 | int ExtLaneScale = NumSrcElements / NumElements; |
1427 | int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; |
1428 | for (int i = 0; i < NumElements; ++i) |
1429 | ShuffleMask[i * ExtLaneScale + EndianOffset] = i; |
1430 | |
1431 | return DAG.getNode( |
1432 | Opcode: ISD::BITCAST, DL, VT, |
1433 | Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Src, N2: DAG.getUNDEF(VT: SrcVT), Mask: ShuffleMask)); |
1434 | } |
1435 | |
1436 | SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) { |
1437 | SDLoc DL(Node); |
1438 | EVT VT = Node->getValueType(ResNo: 0); |
1439 | SDValue Src = Node->getOperand(Num: 0); |
1440 | EVT SrcVT = Src.getValueType(); |
1441 | |
1442 | // First build an any-extend node which can be legalized above when we |
1443 | // recurse through it. |
1444 | SDValue Op = DAG.getNode(Opcode: ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Operand: Src); |
1445 | |
1446 | // Now we need sign extend. Do this by shifting the elements. Even if these |
1447 | // aren't legal operations, they have a better chance of being legalized |
1448 | // without full scalarization than the sign extension does. |
1449 | unsigned EltWidth = VT.getScalarSizeInBits(); |
1450 | unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); |
1451 | SDValue ShiftAmount = DAG.getConstant(Val: EltWidth - SrcEltWidth, DL, VT); |
1452 | return DAG.getNode(Opcode: ISD::SRA, DL, VT, |
1453 | N1: DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: ShiftAmount), |
1454 | N2: ShiftAmount); |
1455 | } |
1456 | |
1457 | // Generically expand a vector zext in register to a shuffle of the relevant |
1458 | // lanes into the appropriate locations, a blend of zero into the high bits, |
1459 | // and a bitcast to the wider element type. |
1460 | SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { |
1461 | SDLoc DL(Node); |
1462 | EVT VT = Node->getValueType(ResNo: 0); |
1463 | int NumElements = VT.getVectorNumElements(); |
1464 | SDValue Src = Node->getOperand(Num: 0); |
1465 | EVT SrcVT = Src.getValueType(); |
1466 | int NumSrcElements = SrcVT.getVectorNumElements(); |
1467 | |
1468 | // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector |
1469 | // into a larger vector type. |
1470 | if (SrcVT.bitsLE(VT)) { |
1471 | assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
1472 | "ZERO_EXTEND_VECTOR_INREG vector size mismatch" ); |
1473 | NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); |
1474 | SrcVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: SrcVT.getScalarType(), |
1475 | NumElements: NumSrcElements); |
1476 | Src = DAG.getInsertSubvector(DL, Vec: DAG.getUNDEF(VT: SrcVT), SubVec: Src, Idx: 0); |
1477 | } |
1478 | |
1479 | // Build up a zero vector to blend into this one. |
1480 | SDValue Zero = DAG.getConstant(Val: 0, DL, VT: SrcVT); |
1481 | |
1482 | // Shuffle the incoming lanes into the correct position, and pull all other |
1483 | // lanes from the zero vector. |
1484 | auto ShuffleMask = llvm::to_vector<16>(Range: llvm::seq<int>(Begin: 0, End: NumSrcElements)); |
1485 | |
1486 | int ExtLaneScale = NumSrcElements / NumElements; |
1487 | int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; |
1488 | for (int i = 0; i < NumElements; ++i) |
1489 | ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; |
1490 | |
1491 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, |
1492 | Operand: DAG.getVectorShuffle(VT: SrcVT, dl: DL, N1: Zero, N2: Src, Mask: ShuffleMask)); |
1493 | } |
1494 | |
1495 | static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { |
1496 | int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; |
1497 | for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) |
1498 | for (int J = ScalarSizeInBytes - 1; J >= 0; --J) |
1499 | ShuffleMask.push_back(Elt: (I * ScalarSizeInBytes) + J); |
1500 | } |
1501 | |
1502 | SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { |
1503 | EVT VT = Node->getValueType(ResNo: 0); |
1504 | |
1505 | // Scalable vectors can't use shuffle expansion. |
1506 | if (VT.isScalableVector()) |
1507 | return TLI.expandBSWAP(N: Node, DAG); |
1508 | |
1509 | // Generate a byte wise shuffle mask for the BSWAP. |
1510 | SmallVector<int, 16> ShuffleMask; |
1511 | createBSWAPShuffleMask(VT, ShuffleMask); |
1512 | EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: ShuffleMask.size()); |
1513 | |
1514 | // Only emit a shuffle if the mask is legal. |
1515 | if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) { |
1516 | SDLoc DL(Node); |
1517 | SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0)); |
1518 | Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT), Mask: ShuffleMask); |
1519 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
1520 | } |
1521 | |
1522 | // If we have the appropriate vector bit operations, it is better to use them |
1523 | // than unrolling and expanding each component. |
1524 | if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) && |
1525 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) && |
1526 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) && |
1527 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)) |
1528 | return TLI.expandBSWAP(N: Node, DAG); |
1529 | |
1530 | // Otherwise let the caller unroll. |
1531 | return SDValue(); |
1532 | } |
1533 | |
1534 | SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) { |
1535 | EVT VT = Node->getValueType(ResNo: 0); |
1536 | |
1537 | // We can't unroll or use shuffles for scalable vectors. |
1538 | if (VT.isScalableVector()) |
1539 | return TLI.expandBITREVERSE(N: Node, DAG); |
1540 | |
1541 | // If we have the scalar operation, it's probably cheaper to unroll it. |
1542 | if (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: VT.getScalarType())) |
1543 | return SDValue(); |
1544 | |
1545 | // If the vector element width is a whole number of bytes, test if its legal |
1546 | // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte |
1547 | // vector. This greatly reduces the number of bit shifts necessary. |
1548 | unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); |
1549 | if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { |
1550 | SmallVector<int, 16> BSWAPMask; |
1551 | createBSWAPShuffleMask(VT, ShuffleMask&: BSWAPMask); |
1552 | |
1553 | EVT ByteVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i8, NumElements: BSWAPMask.size()); |
1554 | if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && |
1555 | (TLI.isOperationLegalOrCustom(Op: ISD::BITREVERSE, VT: ByteVT) || |
1556 | (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT: ByteVT) && |
1557 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT: ByteVT) && |
1558 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT: ByteVT) && |
1559 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT: ByteVT)))) { |
1560 | SDLoc DL(Node); |
1561 | SDValue Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ByteVT, Operand: Node->getOperand(Num: 0)); |
1562 | Op = DAG.getVectorShuffle(VT: ByteVT, dl: DL, N1: Op, N2: DAG.getUNDEF(VT: ByteVT), |
1563 | Mask: BSWAPMask); |
1564 | Op = DAG.getNode(Opcode: ISD::BITREVERSE, DL, VT: ByteVT, Operand: Op); |
1565 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
1566 | return Op; |
1567 | } |
1568 | } |
1569 | |
1570 | // If we have the appropriate vector bit operations, it is better to use them |
1571 | // than unrolling and expanding each component. |
1572 | if (TLI.isOperationLegalOrCustom(Op: ISD::SHL, VT) && |
1573 | TLI.isOperationLegalOrCustom(Op: ISD::SRL, VT) && |
1574 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::AND, VT) && |
1575 | TLI.isOperationLegalOrCustomOrPromote(Op: ISD::OR, VT)) |
1576 | return TLI.expandBITREVERSE(N: Node, DAG); |
1577 | |
1578 | // Otherwise unroll. |
1579 | return SDValue(); |
1580 | } |
1581 | |
1582 | SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { |
1583 | // Implement VSELECT in terms of XOR, AND, OR |
1584 | // on platforms which do not support blend natively. |
1585 | SDLoc DL(Node); |
1586 | |
1587 | SDValue Mask = Node->getOperand(Num: 0); |
1588 | SDValue Op1 = Node->getOperand(Num: 1); |
1589 | SDValue Op2 = Node->getOperand(Num: 2); |
1590 | |
1591 | EVT VT = Mask.getValueType(); |
1592 | |
1593 | // If we can't even use the basic vector operations of |
1594 | // AND,OR,XOR, we will have to scalarize the op. |
1595 | // Notice that the operation may be 'promoted' which means that it is |
1596 | // 'bitcasted' to another type which is handled. |
1597 | if (TLI.getOperationAction(Op: ISD::AND, VT) == TargetLowering::Expand || |
1598 | TLI.getOperationAction(Op: ISD::XOR, VT) == TargetLowering::Expand || |
1599 | TLI.getOperationAction(Op: ISD::OR, VT) == TargetLowering::Expand) |
1600 | return SDValue(); |
1601 | |
1602 | // This operation also isn't safe with AND, OR, XOR when the boolean type is |
1603 | // 0/1 and the select operands aren't also booleans, as we need an all-ones |
1604 | // vector constant to mask with. |
1605 | // FIXME: Sign extend 1 to all ones if that's legal on the target. |
1606 | auto BoolContents = TLI.getBooleanContents(Type: Op1.getValueType()); |
1607 | if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent && |
1608 | !(BoolContents == TargetLowering::ZeroOrOneBooleanContent && |
1609 | Op1.getValueType().getVectorElementType() == MVT::i1)) |
1610 | return SDValue(); |
1611 | |
1612 | // If the mask and the type are different sizes, unroll the vector op. This |
1613 | // can occur when getSetCCResultType returns something that is different in |
1614 | // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. |
1615 | if (VT.getSizeInBits() != Op1.getValueSizeInBits()) |
1616 | return SDValue(); |
1617 | |
1618 | // Bitcast the operands to be the same type as the mask. |
1619 | // This is needed when we select between FP types because |
1620 | // the mask is a vector of integers. |
1621 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op1); |
1622 | Op2 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op2); |
1623 | |
1624 | SDValue NotMask = DAG.getNOT(DL, Val: Mask, VT); |
1625 | |
1626 | Op1 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op1, N2: Mask); |
1627 | Op2 = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Op2, N2: NotMask); |
1628 | SDValue Val = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: Op1, N2: Op2); |
1629 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: Node->getValueType(ResNo: 0), Operand: Val); |
1630 | } |
1631 | |
1632 | SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) { |
1633 | // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which |
1634 | // do not support it natively. |
1635 | SDLoc DL(Node); |
1636 | |
1637 | SDValue Mask = Node->getOperand(Num: 0); |
1638 | SDValue Op1 = Node->getOperand(Num: 1); |
1639 | SDValue Op2 = Node->getOperand(Num: 2); |
1640 | SDValue EVL = Node->getOperand(Num: 3); |
1641 | |
1642 | EVT VT = Mask.getValueType(); |
1643 | |
1644 | // If we can't even use the basic vector operations of |
1645 | // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op. |
1646 | if (TLI.getOperationAction(Op: ISD::VP_AND, VT) == TargetLowering::Expand || |
1647 | TLI.getOperationAction(Op: ISD::VP_XOR, VT) == TargetLowering::Expand || |
1648 | TLI.getOperationAction(Op: ISD::VP_OR, VT) == TargetLowering::Expand) |
1649 | return SDValue(); |
1650 | |
1651 | // This operation also isn't safe when the operands aren't also booleans. |
1652 | if (Op1.getValueType().getVectorElementType() != MVT::i1) |
1653 | return SDValue(); |
1654 | |
1655 | SDValue Ones = DAG.getAllOnesConstant(DL, VT); |
1656 | SDValue NotMask = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT, N1: Mask, N2: Ones, N3: Ones, N4: EVL); |
1657 | |
1658 | Op1 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op1, N2: Mask, N3: Ones, N4: EVL); |
1659 | Op2 = DAG.getNode(Opcode: ISD::VP_AND, DL, VT, N1: Op2, N2: NotMask, N3: Ones, N4: EVL); |
1660 | return DAG.getNode(Opcode: ISD::VP_OR, DL, VT, N1: Op1, N2: Op2, N3: Ones, N4: EVL); |
1661 | } |
1662 | |
1663 | SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { |
1664 | // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector |
1665 | // indices less than the EVL/pivot are true. Combine that with the original |
1666 | // mask for a full-length mask. Use a full-length VSELECT to select between |
1667 | // the true and false values. |
1668 | SDLoc DL(Node); |
1669 | |
1670 | SDValue Mask = Node->getOperand(Num: 0); |
1671 | SDValue Op1 = Node->getOperand(Num: 1); |
1672 | SDValue Op2 = Node->getOperand(Num: 2); |
1673 | SDValue EVL = Node->getOperand(Num: 3); |
1674 | |
1675 | EVT MaskVT = Mask.getValueType(); |
1676 | bool IsFixedLen = MaskVT.isFixedLengthVector(); |
1677 | |
1678 | EVT EVLVecVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: EVL.getValueType(), |
1679 | EC: MaskVT.getVectorElementCount()); |
1680 | |
1681 | // If we can't construct the EVL mask efficiently, it's better to unroll. |
1682 | if ((IsFixedLen && |
1683 | !TLI.isOperationLegalOrCustom(Op: ISD::BUILD_VECTOR, VT: EVLVecVT)) || |
1684 | (!IsFixedLen && |
1685 | (!TLI.isOperationLegalOrCustom(Op: ISD::STEP_VECTOR, VT: EVLVecVT) || |
1686 | !TLI.isOperationLegalOrCustom(Op: ISD::SPLAT_VECTOR, VT: EVLVecVT)))) |
1687 | return SDValue(); |
1688 | |
1689 | // If using a SETCC would result in a different type than the mask type, |
1690 | // unroll. |
1691 | if (TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), |
1692 | VT: EVLVecVT) != MaskVT) |
1693 | return SDValue(); |
1694 | |
1695 | SDValue StepVec = DAG.getStepVector(DL, ResVT: EVLVecVT); |
1696 | SDValue SplatEVL = DAG.getSplat(VT: EVLVecVT, DL, Op: EVL); |
1697 | SDValue EVLMask = |
1698 | DAG.getSetCC(DL, VT: MaskVT, LHS: StepVec, RHS: SplatEVL, Cond: ISD::CondCode::SETULT); |
1699 | |
1700 | SDValue FullMask = DAG.getNode(Opcode: ISD::AND, DL, VT: MaskVT, N1: Mask, N2: EVLMask); |
1701 | return DAG.getSelect(DL, VT: Node->getValueType(ResNo: 0), Cond: FullMask, LHS: Op1, RHS: Op2); |
1702 | } |
1703 | |
1704 | SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) { |
1705 | // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB. |
1706 | EVT VT = Node->getValueType(ResNo: 0); |
1707 | |
1708 | unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV; |
1709 | |
1710 | if (!TLI.isOperationLegalOrCustom(Op: DivOpc, VT) || |
1711 | !TLI.isOperationLegalOrCustom(Op: ISD::VP_MUL, VT) || |
1712 | !TLI.isOperationLegalOrCustom(Op: ISD::VP_SUB, VT)) |
1713 | return SDValue(); |
1714 | |
1715 | SDLoc DL(Node); |
1716 | |
1717 | SDValue Dividend = Node->getOperand(Num: 0); |
1718 | SDValue Divisor = Node->getOperand(Num: 1); |
1719 | SDValue Mask = Node->getOperand(Num: 2); |
1720 | SDValue EVL = Node->getOperand(Num: 3); |
1721 | |
1722 | // X % Y -> X-X/Y*Y |
1723 | SDValue Div = DAG.getNode(Opcode: DivOpc, DL, VT, N1: Dividend, N2: Divisor, N3: Mask, N4: EVL); |
1724 | SDValue Mul = DAG.getNode(Opcode: ISD::VP_MUL, DL, VT, N1: Divisor, N2: Div, N3: Mask, N4: EVL); |
1725 | return DAG.getNode(Opcode: ISD::VP_SUB, DL, VT, N1: Dividend, N2: Mul, N3: Mask, N4: EVL); |
1726 | } |
1727 | |
1728 | SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) { |
1729 | EVT VT = Node->getValueType(ResNo: 0); |
1730 | EVT IntVT = VT.changeVectorElementTypeToInteger(); |
1731 | |
1732 | if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_XOR, VT: IntVT)) |
1733 | return SDValue(); |
1734 | |
1735 | SDValue Mask = Node->getOperand(Num: 1); |
1736 | SDValue EVL = Node->getOperand(Num: 2); |
1737 | |
1738 | SDLoc DL(Node); |
1739 | SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0)); |
1740 | SDValue SignMask = DAG.getConstant( |
1741 | Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT); |
1742 | SDValue Xor = DAG.getNode(Opcode: ISD::VP_XOR, DL, VT: IntVT, N1: Cast, N2: SignMask, N3: Mask, N4: EVL); |
1743 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Xor); |
1744 | } |
1745 | |
1746 | SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) { |
1747 | EVT VT = Node->getValueType(ResNo: 0); |
1748 | EVT IntVT = VT.changeVectorElementTypeToInteger(); |
1749 | |
1750 | if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_AND, VT: IntVT)) |
1751 | return SDValue(); |
1752 | |
1753 | SDValue Mask = Node->getOperand(Num: 1); |
1754 | SDValue EVL = Node->getOperand(Num: 2); |
1755 | |
1756 | SDLoc DL(Node); |
1757 | SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0)); |
1758 | SDValue ClearSignMask = DAG.getConstant( |
1759 | Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT); |
1760 | SDValue ClearSign = |
1761 | DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Cast, N2: ClearSignMask, N3: Mask, N4: EVL); |
1762 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: ClearSign); |
1763 | } |
1764 | |
1765 | SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) { |
1766 | EVT VT = Node->getValueType(ResNo: 0); |
1767 | |
1768 | if (VT != Node->getOperand(Num: 1).getValueType()) |
1769 | return SDValue(); |
1770 | |
1771 | EVT IntVT = VT.changeVectorElementTypeToInteger(); |
1772 | if (!TLI.isOperationLegalOrCustom(Op: ISD::VP_AND, VT: IntVT) || |
1773 | !TLI.isOperationLegalOrCustom(Op: ISD::VP_XOR, VT: IntVT)) |
1774 | return SDValue(); |
1775 | |
1776 | SDValue Mask = Node->getOperand(Num: 2); |
1777 | SDValue EVL = Node->getOperand(Num: 3); |
1778 | |
1779 | SDLoc DL(Node); |
1780 | SDValue Mag = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0)); |
1781 | SDValue Sign = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 1)); |
1782 | |
1783 | SDValue SignMask = DAG.getConstant( |
1784 | Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT); |
1785 | SDValue SignBit = |
1786 | DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Sign, N2: SignMask, N3: Mask, N4: EVL); |
1787 | |
1788 | SDValue ClearSignMask = DAG.getConstant( |
1789 | Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT); |
1790 | SDValue ClearedSign = |
1791 | DAG.getNode(Opcode: ISD::VP_AND, DL, VT: IntVT, N1: Mag, N2: ClearSignMask, N3: Mask, N4: EVL); |
1792 | |
1793 | SDValue CopiedSign = DAG.getNode(Opcode: ISD::VP_OR, DL, VT: IntVT, N1: ClearedSign, N2: SignBit, |
1794 | N3: Mask, N4: EVL, Flags: SDNodeFlags::Disjoint); |
1795 | |
1796 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: CopiedSign); |
1797 | } |
1798 | |
1799 | void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, |
1800 | SmallVectorImpl<SDValue> &Results) { |
1801 | // Attempt to expand using TargetLowering. |
1802 | SDValue Result, Chain; |
1803 | if (TLI.expandFP_TO_UINT(N: Node, Result, Chain, DAG)) { |
1804 | Results.push_back(Elt: Result); |
1805 | if (Node->isStrictFPOpcode()) |
1806 | Results.push_back(Elt: Chain); |
1807 | return; |
1808 | } |
1809 | |
1810 | // Otherwise go ahead and unroll. |
1811 | if (Node->isStrictFPOpcode()) { |
1812 | UnrollStrictFPOp(Node, Results); |
1813 | return; |
1814 | } |
1815 | |
1816 | Results.push_back(Elt: DAG.UnrollVectorOp(N: Node)); |
1817 | } |
1818 | |
1819 | void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, |
1820 | SmallVectorImpl<SDValue> &Results) { |
1821 | bool IsStrict = Node->isStrictFPOpcode(); |
1822 | unsigned OpNo = IsStrict ? 1 : 0; |
1823 | SDValue Src = Node->getOperand(Num: OpNo); |
1824 | EVT SrcVT = Src.getValueType(); |
1825 | EVT DstVT = Node->getValueType(ResNo: 0); |
1826 | SDLoc DL(Node); |
1827 | |
1828 | // Attempt to expand using TargetLowering. |
1829 | SDValue Result; |
1830 | SDValue Chain; |
1831 | if (TLI.expandUINT_TO_FP(N: Node, Result, Chain, DAG)) { |
1832 | Results.push_back(Elt: Result); |
1833 | if (IsStrict) |
1834 | Results.push_back(Elt: Chain); |
1835 | return; |
1836 | } |
1837 | |
1838 | // Make sure that the SINT_TO_FP and SRL instructions are available. |
1839 | if (((!IsStrict && TLI.getOperationAction(Op: ISD::SINT_TO_FP, VT: SrcVT) == |
1840 | TargetLowering::Expand) || |
1841 | (IsStrict && TLI.getOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT: SrcVT) == |
1842 | TargetLowering::Expand)) || |
1843 | TLI.getOperationAction(Op: ISD::SRL, VT: SrcVT) == TargetLowering::Expand) { |
1844 | if (IsStrict) { |
1845 | UnrollStrictFPOp(Node, Results); |
1846 | return; |
1847 | } |
1848 | |
1849 | Results.push_back(Elt: DAG.UnrollVectorOp(N: Node)); |
1850 | return; |
1851 | } |
1852 | |
1853 | unsigned BW = SrcVT.getScalarSizeInBits(); |
1854 | assert((BW == 64 || BW == 32) && |
1855 | "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide" ); |
1856 | |
1857 | // If STRICT_/FMUL is not supported by the target (in case of f16) replace the |
1858 | // UINT_TO_FP with a larger float and round to the smaller type |
1859 | if ((!IsStrict && !TLI.isOperationLegalOrCustom(Op: ISD::FMUL, VT: DstVT)) || |
1860 | (IsStrict && !TLI.isOperationLegalOrCustom(Op: ISD::STRICT_FMUL, VT: DstVT))) { |
1861 | EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64; |
1862 | SDValue UIToFP; |
1863 | SDValue Result; |
1864 | SDValue TargetZero = DAG.getIntPtrConstant(Val: 0, DL, /*isTarget=*/true); |
1865 | EVT FloatVecVT = SrcVT.changeVectorElementType(EltVT: FPVT); |
1866 | if (IsStrict) { |
1867 | UIToFP = DAG.getNode(Opcode: ISD::STRICT_UINT_TO_FP, DL, ResultTys: {FloatVecVT, MVT::Other}, |
1868 | Ops: {Node->getOperand(Num: 0), Src}); |
1869 | Result = DAG.getNode(Opcode: ISD::STRICT_FP_ROUND, DL, ResultTys: {DstVT, MVT::Other}, |
1870 | Ops: {Node->getOperand(Num: 0), UIToFP, TargetZero}); |
1871 | Results.push_back(Elt: Result); |
1872 | Results.push_back(Elt: Result.getValue(R: 1)); |
1873 | } else { |
1874 | UIToFP = DAG.getNode(Opcode: ISD::UINT_TO_FP, DL, VT: FloatVecVT, Operand: Src); |
1875 | Result = DAG.getNode(Opcode: ISD::FP_ROUND, DL, VT: DstVT, N1: UIToFP, N2: TargetZero); |
1876 | Results.push_back(Elt: Result); |
1877 | } |
1878 | |
1879 | return; |
1880 | } |
1881 | |
1882 | SDValue HalfWord = DAG.getConstant(Val: BW / 2, DL, VT: SrcVT); |
1883 | |
1884 | // Constants to clear the upper part of the word. |
1885 | // Notice that we can also use SHL+SHR, but using a constant is slightly |
1886 | // faster on x86. |
1887 | uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; |
1888 | SDValue HalfWordMask = DAG.getConstant(Val: HWMask, DL, VT: SrcVT); |
1889 | |
1890 | // Two to the power of half-word-size. |
1891 | SDValue TWOHW = DAG.getConstantFP(Val: 1ULL << (BW / 2), DL, VT: DstVT); |
1892 | |
1893 | // Clear upper part of LO, lower HI |
1894 | SDValue HI = DAG.getNode(Opcode: ISD::SRL, DL, VT: SrcVT, N1: Src, N2: HalfWord); |
1895 | SDValue LO = DAG.getNode(Opcode: ISD::AND, DL, VT: SrcVT, N1: Src, N2: HalfWordMask); |
1896 | |
1897 | if (IsStrict) { |
1898 | // Convert hi and lo to floats |
1899 | // Convert the hi part back to the upper values |
1900 | // TODO: Can any fast-math-flags be set on these nodes? |
1901 | SDValue fHI = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, ResultTys: {DstVT, MVT::Other}, |
1902 | Ops: {Node->getOperand(Num: 0), HI}); |
1903 | fHI = DAG.getNode(Opcode: ISD::STRICT_FMUL, DL, ResultTys: {DstVT, MVT::Other}, |
1904 | Ops: {fHI.getValue(R: 1), fHI, TWOHW}); |
1905 | SDValue fLO = DAG.getNode(Opcode: ISD::STRICT_SINT_TO_FP, DL, ResultTys: {DstVT, MVT::Other}, |
1906 | Ops: {Node->getOperand(Num: 0), LO}); |
1907 | |
1908 | SDValue TF = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, N1: fHI.getValue(R: 1), |
1909 | N2: fLO.getValue(R: 1)); |
1910 | |
1911 | // Add the two halves |
1912 | SDValue Result = |
1913 | DAG.getNode(Opcode: ISD::STRICT_FADD, DL, ResultTys: {DstVT, MVT::Other}, Ops: {TF, fHI, fLO}); |
1914 | |
1915 | Results.push_back(Elt: Result); |
1916 | Results.push_back(Elt: Result.getValue(R: 1)); |
1917 | return; |
1918 | } |
1919 | |
1920 | // Convert hi and lo to floats |
1921 | // Convert the hi part back to the upper values |
1922 | // TODO: Can any fast-math-flags be set on these nodes? |
1923 | SDValue fHI = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: DstVT, Operand: HI); |
1924 | fHI = DAG.getNode(Opcode: ISD::FMUL, DL, VT: DstVT, N1: fHI, N2: TWOHW); |
1925 | SDValue fLO = DAG.getNode(Opcode: ISD::SINT_TO_FP, DL, VT: DstVT, Operand: LO); |
1926 | |
1927 | // Add the two halves |
1928 | Results.push_back(Elt: DAG.getNode(Opcode: ISD::FADD, DL, VT: DstVT, N1: fHI, N2: fLO)); |
1929 | } |
1930 | |
1931 | SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { |
1932 | EVT VT = Node->getValueType(ResNo: 0); |
1933 | EVT IntVT = VT.changeVectorElementTypeToInteger(); |
1934 | |
1935 | if (!TLI.isOperationLegalOrCustom(Op: ISD::XOR, VT: IntVT)) |
1936 | return SDValue(); |
1937 | |
1938 | // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. |
1939 | if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) && |
1940 | !VT.isScalableVector()) |
1941 | return SDValue(); |
1942 | |
1943 | SDLoc DL(Node); |
1944 | SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0)); |
1945 | SDValue SignMask = DAG.getConstant( |
1946 | Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT); |
1947 | SDValue Xor = DAG.getNode(Opcode: ISD::XOR, DL, VT: IntVT, N1: Cast, N2: SignMask); |
1948 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Xor); |
1949 | } |
1950 | |
1951 | SDValue VectorLegalizer::ExpandFABS(SDNode *Node) { |
1952 | EVT VT = Node->getValueType(ResNo: 0); |
1953 | EVT IntVT = VT.changeVectorElementTypeToInteger(); |
1954 | |
1955 | if (!TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT)) |
1956 | return SDValue(); |
1957 | |
1958 | // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. |
1959 | if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) && |
1960 | !VT.isScalableVector()) |
1961 | return SDValue(); |
1962 | |
1963 | SDLoc DL(Node); |
1964 | SDValue Cast = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0)); |
1965 | SDValue ClearSignMask = DAG.getConstant( |
1966 | Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT); |
1967 | SDValue ClearedSign = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Cast, N2: ClearSignMask); |
1968 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: ClearedSign); |
1969 | } |
1970 | |
1971 | SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) { |
1972 | EVT VT = Node->getValueType(ResNo: 0); |
1973 | EVT IntVT = VT.changeVectorElementTypeToInteger(); |
1974 | |
1975 | if (VT != Node->getOperand(Num: 1).getValueType() || |
1976 | !TLI.isOperationLegalOrCustom(Op: ISD::AND, VT: IntVT) || |
1977 | !TLI.isOperationLegalOrCustom(Op: ISD::OR, VT: IntVT)) |
1978 | return SDValue(); |
1979 | |
1980 | // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. |
1981 | if (!TLI.isOperationLegalOrCustomOrPromote(Op: ISD::FSUB, VT) && |
1982 | !VT.isScalableVector()) |
1983 | return SDValue(); |
1984 | |
1985 | SDLoc DL(Node); |
1986 | SDValue Mag = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 0)); |
1987 | SDValue Sign = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Node->getOperand(Num: 1)); |
1988 | |
1989 | SDValue SignMask = DAG.getConstant( |
1990 | Val: APInt::getSignMask(BitWidth: IntVT.getScalarSizeInBits()), DL, VT: IntVT); |
1991 | SDValue SignBit = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Sign, N2: SignMask); |
1992 | |
1993 | SDValue ClearSignMask = DAG.getConstant( |
1994 | Val: APInt::getSignedMaxValue(numBits: IntVT.getScalarSizeInBits()), DL, VT: IntVT); |
1995 | SDValue ClearedSign = DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: Mag, N2: ClearSignMask); |
1996 | |
1997 | SDValue CopiedSign = DAG.getNode(Opcode: ISD::OR, DL, VT: IntVT, N1: ClearedSign, N2: SignBit, |
1998 | Flags: SDNodeFlags::Disjoint); |
1999 | |
2000 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: CopiedSign); |
2001 | } |
2002 | |
2003 | void VectorLegalizer::ExpandFSUB(SDNode *Node, |
2004 | SmallVectorImpl<SDValue> &Results) { |
2005 | // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, |
2006 | // we can defer this to operation legalization where it will be lowered as |
2007 | // a+(-b). |
2008 | EVT VT = Node->getValueType(ResNo: 0); |
2009 | if (TLI.isOperationLegalOrCustom(Op: ISD::FNEG, VT) && |
2010 | TLI.isOperationLegalOrCustom(Op: ISD::FADD, VT)) |
2011 | return; // Defer to LegalizeDAG |
2012 | |
2013 | if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) { |
2014 | Results.push_back(Elt: Expanded); |
2015 | return; |
2016 | } |
2017 | |
2018 | SDValue Tmp = DAG.UnrollVectorOp(N: Node); |
2019 | Results.push_back(Elt: Tmp); |
2020 | } |
2021 | |
2022 | void VectorLegalizer::ExpandSETCC(SDNode *Node, |
2023 | SmallVectorImpl<SDValue> &Results) { |
2024 | bool NeedInvert = false; |
2025 | bool IsVP = Node->getOpcode() == ISD::VP_SETCC; |
2026 | bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC || |
2027 | Node->getOpcode() == ISD::STRICT_FSETCCS; |
2028 | bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; |
2029 | unsigned Offset = IsStrict ? 1 : 0; |
2030 | |
2031 | SDValue Chain = IsStrict ? Node->getOperand(Num: 0) : SDValue(); |
2032 | SDValue LHS = Node->getOperand(Num: 0 + Offset); |
2033 | SDValue RHS = Node->getOperand(Num: 1 + Offset); |
2034 | SDValue CC = Node->getOperand(Num: 2 + Offset); |
2035 | |
2036 | MVT OpVT = LHS.getSimpleValueType(); |
2037 | ISD::CondCode CCCode = cast<CondCodeSDNode>(Val&: CC)->get(); |
2038 | |
2039 | if (TLI.getCondCodeAction(CC: CCCode, VT: OpVT) != TargetLowering::Expand) { |
2040 | if (IsStrict) { |
2041 | UnrollStrictFPOp(Node, Results); |
2042 | return; |
2043 | } |
2044 | Results.push_back(Elt: UnrollVSETCC(Node)); |
2045 | return; |
2046 | } |
2047 | |
2048 | SDValue Mask, EVL; |
2049 | if (IsVP) { |
2050 | Mask = Node->getOperand(Num: 3 + Offset); |
2051 | EVL = Node->getOperand(Num: 4 + Offset); |
2052 | } |
2053 | |
2054 | SDLoc dl(Node); |
2055 | bool Legalized = |
2056 | TLI.LegalizeSetCCCondCode(DAG, VT: Node->getValueType(ResNo: 0), LHS, RHS, CC, Mask, |
2057 | EVL, NeedInvert, dl, Chain, IsSignaling); |
2058 | |
2059 | if (Legalized) { |
2060 | // If we expanded the SETCC by swapping LHS and RHS, or by inverting the |
2061 | // condition code, create a new SETCC node. |
2062 | if (CC.getNode()) { |
2063 | if (IsStrict) { |
2064 | LHS = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, VTList: Node->getVTList(), |
2065 | Ops: {Chain, LHS, RHS, CC}, Flags: Node->getFlags()); |
2066 | Chain = LHS.getValue(R: 1); |
2067 | } else if (IsVP) { |
2068 | LHS = DAG.getNode(Opcode: ISD::VP_SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), |
2069 | Ops: {LHS, RHS, CC, Mask, EVL}, Flags: Node->getFlags()); |
2070 | } else { |
2071 | LHS = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: Node->getValueType(ResNo: 0), N1: LHS, N2: RHS, N3: CC, |
2072 | Flags: Node->getFlags()); |
2073 | } |
2074 | } |
2075 | |
2076 | // If we expanded the SETCC by inverting the condition code, then wrap |
2077 | // the existing SETCC in a NOT to restore the intended condition. |
2078 | if (NeedInvert) { |
2079 | if (!IsVP) |
2080 | LHS = DAG.getLogicalNOT(DL: dl, Val: LHS, VT: LHS->getValueType(ResNo: 0)); |
2081 | else |
2082 | LHS = DAG.getVPLogicalNOT(DL: dl, Val: LHS, Mask, EVL, VT: LHS->getValueType(ResNo: 0)); |
2083 | } |
2084 | } else { |
2085 | assert(!IsStrict && "Don't know how to expand for strict nodes." ); |
2086 | |
2087 | // Otherwise, SETCC for the given comparison type must be completely |
2088 | // illegal; expand it into a SELECT_CC. |
2089 | EVT VT = Node->getValueType(ResNo: 0); |
2090 | LHS = |
2091 | DAG.getNode(Opcode: ISD::SELECT_CC, DL: dl, VT, N1: LHS, N2: RHS, |
2092 | N3: DAG.getBoolConstant(V: true, DL: dl, VT, OpVT: LHS.getValueType()), |
2093 | N4: DAG.getBoolConstant(V: false, DL: dl, VT, OpVT: LHS.getValueType()), N5: CC); |
2094 | LHS->setFlags(Node->getFlags()); |
2095 | } |
2096 | |
2097 | Results.push_back(Elt: LHS); |
2098 | if (IsStrict) |
2099 | Results.push_back(Elt: Chain); |
2100 | } |
2101 | |
2102 | void VectorLegalizer::ExpandUADDSUBO(SDNode *Node, |
2103 | SmallVectorImpl<SDValue> &Results) { |
2104 | SDValue Result, Overflow; |
2105 | TLI.expandUADDSUBO(Node, Result, Overflow, DAG); |
2106 | Results.push_back(Elt: Result); |
2107 | Results.push_back(Elt: Overflow); |
2108 | } |
2109 | |
2110 | void VectorLegalizer::ExpandSADDSUBO(SDNode *Node, |
2111 | SmallVectorImpl<SDValue> &Results) { |
2112 | SDValue Result, Overflow; |
2113 | TLI.expandSADDSUBO(Node, Result, Overflow, DAG); |
2114 | Results.push_back(Elt: Result); |
2115 | Results.push_back(Elt: Overflow); |
2116 | } |
2117 | |
2118 | void VectorLegalizer::ExpandMULO(SDNode *Node, |
2119 | SmallVectorImpl<SDValue> &Results) { |
2120 | SDValue Result, Overflow; |
2121 | if (!TLI.expandMULO(Node, Result, Overflow, DAG)) |
2122 | std::tie(args&: Result, args&: Overflow) = DAG.UnrollVectorOverflowOp(N: Node); |
2123 | |
2124 | Results.push_back(Elt: Result); |
2125 | Results.push_back(Elt: Overflow); |
2126 | } |
2127 | |
2128 | void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node, |
2129 | SmallVectorImpl<SDValue> &Results) { |
2130 | SDNode *N = Node; |
2131 | if (SDValue Expanded = TLI.expandFixedPointDiv(Opcode: N->getOpcode(), dl: SDLoc(N), |
2132 | LHS: N->getOperand(Num: 0), RHS: N->getOperand(Num: 1), Scale: N->getConstantOperandVal(Num: 2), DAG)) |
2133 | Results.push_back(Elt: Expanded); |
2134 | } |
2135 | |
2136 | void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, |
2137 | SmallVectorImpl<SDValue> &Results) { |
2138 | if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) { |
2139 | ExpandUINT_TO_FLOAT(Node, Results); |
2140 | return; |
2141 | } |
2142 | if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) { |
2143 | ExpandFP_TO_UINT(Node, Results); |
2144 | return; |
2145 | } |
2146 | |
2147 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
2148 | Node->getOpcode() == ISD::STRICT_FSETCCS) { |
2149 | ExpandSETCC(Node, Results); |
2150 | return; |
2151 | } |
2152 | |
2153 | UnrollStrictFPOp(Node, Results); |
2154 | } |
2155 | |
2156 | void VectorLegalizer::ExpandREM(SDNode *Node, |
2157 | SmallVectorImpl<SDValue> &Results) { |
2158 | assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) && |
2159 | "Expected REM node" ); |
2160 | |
2161 | SDValue Result; |
2162 | if (!TLI.expandREM(Node, Result, DAG)) |
2163 | Result = DAG.UnrollVectorOp(N: Node); |
2164 | Results.push_back(Elt: Result); |
2165 | } |
2166 | |
2167 | // Try to expand libm nodes into vector math routine calls. Callers provide the |
2168 | // LibFunc equivalent of the passed in Node, which is used to lookup mappings |
2169 | // within TargetLibraryInfo. The only mappings considered are those where the |
2170 | // result and all operands are the same vector type. While predicated nodes are |
2171 | // not supported, we will emit calls to masked routines by passing in an all |
2172 | // true mask. |
2173 | bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, |
2174 | SmallVectorImpl<SDValue> &Results) { |
2175 | // Chain must be propagated but currently strict fp operations are down |
2176 | // converted to their none strict counterpart. |
2177 | assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!" ); |
2178 | |
2179 | const char *LCName = TLI.getLibcallName(Call: LC); |
2180 | if (!LCName) |
2181 | return false; |
2182 | LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n" ); |
2183 | |
2184 | EVT VT = Node->getValueType(ResNo: 0); |
2185 | ElementCount VL = VT.getVectorElementCount(); |
2186 | |
2187 | // Lookup a vector function equivalent to the specified libcall. Prefer |
2188 | // unmasked variants but we will generate a mask if need be. |
2189 | const TargetLibraryInfo &TLibInfo = DAG.getLibInfo(); |
2190 | const VecDesc *VD = TLibInfo.getVectorMappingInfo(F: LCName, VF: VL, Masked: false); |
2191 | if (!VD) |
2192 | VD = TLibInfo.getVectorMappingInfo(F: LCName, VF: VL, /*Masked=*/true); |
2193 | if (!VD) |
2194 | return false; |
2195 | |
2196 | LLVMContext *Ctx = DAG.getContext(); |
2197 | Type *Ty = VT.getTypeForEVT(Context&: *Ctx); |
2198 | Type *ScalarTy = Ty->getScalarType(); |
2199 | |
2200 | // Construct a scalar function type based on Node's operands. |
2201 | SmallVector<Type *, 8> ArgTys; |
2202 | for (unsigned i = 0; i < Node->getNumOperands(); ++i) { |
2203 | assert(Node->getOperand(i).getValueType() == VT && |
2204 | "Expected matching vector types!" ); |
2205 | ArgTys.push_back(Elt: ScalarTy); |
2206 | } |
2207 | FunctionType *ScalarFTy = FunctionType::get(Result: ScalarTy, Params: ArgTys, isVarArg: false); |
2208 | |
2209 | // Generate call information for the vector function. |
2210 | const std::string MangledName = VD->getVectorFunctionABIVariantString(); |
2211 | auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, FTy: ScalarFTy); |
2212 | if (!OptVFInfo) |
2213 | return false; |
2214 | |
2215 | LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName() |
2216 | << "\n" ); |
2217 | |
2218 | // Sanity check just in case OptVFInfo has unexpected parameters. |
2219 | if (OptVFInfo->Shape.Parameters.size() != |
2220 | Node->getNumOperands() + VD->isMasked()) |
2221 | return false; |
2222 | |
2223 | // Collect vector call operands. |
2224 | |
2225 | SDLoc DL(Node); |
2226 | TargetLowering::ArgListTy Args; |
2227 | TargetLowering::ArgListEntry Entry; |
2228 | Entry.IsSExt = false; |
2229 | Entry.IsZExt = false; |
2230 | |
2231 | unsigned OpNum = 0; |
2232 | for (auto &VFParam : OptVFInfo->Shape.Parameters) { |
2233 | if (VFParam.ParamKind == VFParamKind::GlobalPredicate) { |
2234 | EVT MaskVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), Context&: *Ctx, VT); |
2235 | Entry.Node = DAG.getBoolConstant(V: true, DL, VT: MaskVT, OpVT: VT); |
2236 | Entry.Ty = MaskVT.getTypeForEVT(Context&: *Ctx); |
2237 | Args.push_back(x: Entry); |
2238 | continue; |
2239 | } |
2240 | |
2241 | // Only vector operands are supported. |
2242 | if (VFParam.ParamKind != VFParamKind::Vector) |
2243 | return false; |
2244 | |
2245 | Entry.Node = Node->getOperand(Num: OpNum++); |
2246 | Entry.Ty = Ty; |
2247 | Args.push_back(x: Entry); |
2248 | } |
2249 | |
2250 | // Emit a call to the vector function. |
2251 | SDValue Callee = DAG.getExternalSymbol(Sym: VD->getVectorFnName().data(), |
2252 | VT: TLI.getPointerTy(DL: DAG.getDataLayout())); |
2253 | TargetLowering::CallLoweringInfo CLI(DAG); |
2254 | CLI.setDebugLoc(DL) |
2255 | .setChain(DAG.getEntryNode()) |
2256 | .setLibCallee(CC: CallingConv::C, ResultType: Ty, Target: Callee, ArgsList: std::move(Args)); |
2257 | |
2258 | std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); |
2259 | Results.push_back(Elt: CallResult.first); |
2260 | return true; |
2261 | } |
2262 | |
2263 | /// Try to expand the node to a vector libcall based on the result type. |
2264 | bool VectorLegalizer::tryExpandVecMathCall( |
2265 | SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, |
2266 | RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, |
2267 | RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) { |
2268 | RTLIB::Libcall LC = RTLIB::getFPLibCall( |
2269 | VT: Node->getValueType(ResNo: 0).getVectorElementType(), Call_F32, Call_F64, |
2270 | Call_F80, Call_F128, Call_PPCF128); |
2271 | |
2272 | if (LC == RTLIB::UNKNOWN_LIBCALL) |
2273 | return false; |
2274 | |
2275 | return tryExpandVecMathCall(Node, LC, Results); |
2276 | } |
2277 | |
2278 | void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, |
2279 | SmallVectorImpl<SDValue> &Results) { |
2280 | EVT VT = Node->getValueType(ResNo: 0); |
2281 | EVT EltVT = VT.getVectorElementType(); |
2282 | unsigned NumElems = VT.getVectorNumElements(); |
2283 | unsigned NumOpers = Node->getNumOperands(); |
2284 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
2285 | |
2286 | EVT TmpEltVT = EltVT; |
2287 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
2288 | Node->getOpcode() == ISD::STRICT_FSETCCS) |
2289 | TmpEltVT = TLI.getSetCCResultType(DL: DAG.getDataLayout(), |
2290 | Context&: *DAG.getContext(), VT: TmpEltVT); |
2291 | |
2292 | EVT ValueVTs[] = {TmpEltVT, MVT::Other}; |
2293 | SDValue Chain = Node->getOperand(Num: 0); |
2294 | SDLoc dl(Node); |
2295 | |
2296 | SmallVector<SDValue, 32> OpValues; |
2297 | SmallVector<SDValue, 32> OpChains; |
2298 | for (unsigned i = 0; i < NumElems; ++i) { |
2299 | SmallVector<SDValue, 4> Opers; |
2300 | SDValue Idx = DAG.getVectorIdxConstant(Val: i, DL: dl); |
2301 | |
2302 | // The Chain is the first operand. |
2303 | Opers.push_back(Elt: Chain); |
2304 | |
2305 | // Now process the remaining operands. |
2306 | for (unsigned j = 1; j < NumOpers; ++j) { |
2307 | SDValue Oper = Node->getOperand(Num: j); |
2308 | EVT OperVT = Oper.getValueType(); |
2309 | |
2310 | if (OperVT.isVector()) |
2311 | Oper = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, |
2312 | VT: OperVT.getVectorElementType(), N1: Oper, N2: Idx); |
2313 | |
2314 | Opers.push_back(Elt: Oper); |
2315 | } |
2316 | |
2317 | SDValue ScalarOp = DAG.getNode(Opcode: Node->getOpcode(), DL: dl, ResultTys: ValueVTs, Ops: Opers); |
2318 | SDValue ScalarResult = ScalarOp.getValue(R: 0); |
2319 | SDValue ScalarChain = ScalarOp.getValue(R: 1); |
2320 | |
2321 | if (Node->getOpcode() == ISD::STRICT_FSETCC || |
2322 | Node->getOpcode() == ISD::STRICT_FSETCCS) |
2323 | ScalarResult = DAG.getSelect(DL: dl, VT: EltVT, Cond: ScalarResult, |
2324 | LHS: DAG.getAllOnesConstant(DL: dl, VT: EltVT), |
2325 | RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT)); |
2326 | |
2327 | OpValues.push_back(Elt: ScalarResult); |
2328 | OpChains.push_back(Elt: ScalarChain); |
2329 | } |
2330 | |
2331 | SDValue Result = DAG.getBuildVector(VT, DL: dl, Ops: OpValues); |
2332 | SDValue NewChain = DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: OpChains); |
2333 | |
2334 | Results.push_back(Elt: Result); |
2335 | Results.push_back(Elt: NewChain); |
2336 | } |
2337 | |
2338 | SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { |
2339 | EVT VT = Node->getValueType(ResNo: 0); |
2340 | unsigned NumElems = VT.getVectorNumElements(); |
2341 | EVT EltVT = VT.getVectorElementType(); |
2342 | SDValue LHS = Node->getOperand(Num: 0); |
2343 | SDValue RHS = Node->getOperand(Num: 1); |
2344 | SDValue CC = Node->getOperand(Num: 2); |
2345 | EVT TmpEltVT = LHS.getValueType().getVectorElementType(); |
2346 | SDLoc dl(Node); |
2347 | SmallVector<SDValue, 8> Ops(NumElems); |
2348 | for (unsigned i = 0; i < NumElems; ++i) { |
2349 | SDValue LHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: LHS, |
2350 | N2: DAG.getVectorIdxConstant(Val: i, DL: dl)); |
2351 | SDValue RHSElem = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: TmpEltVT, N1: RHS, |
2352 | N2: DAG.getVectorIdxConstant(Val: i, DL: dl)); |
2353 | // FIXME: We should use i1 setcc + boolext here, but it causes regressions. |
2354 | Ops[i] = DAG.getNode(Opcode: ISD::SETCC, DL: dl, |
2355 | VT: TLI.getSetCCResultType(DL: DAG.getDataLayout(), |
2356 | Context&: *DAG.getContext(), VT: TmpEltVT), |
2357 | N1: LHSElem, N2: RHSElem, N3: CC); |
2358 | Ops[i] = DAG.getSelect(DL: dl, VT: EltVT, Cond: Ops[i], |
2359 | LHS: DAG.getBoolConstant(V: true, DL: dl, VT: EltVT, OpVT: VT), |
2360 | RHS: DAG.getConstant(Val: 0, DL: dl, VT: EltVT)); |
2361 | } |
2362 | return DAG.getBuildVector(VT, DL: dl, Ops); |
2363 | } |
2364 | |
2365 | bool SelectionDAG::LegalizeVectors() { |
2366 | return VectorLegalizer(*this).Run(); |
2367 | } |
2368 | |