1 | //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements IR expansion for vector predication intrinsics, allowing |
10 | // targets to enable vector predication until just before codegen. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/CodeGen/ExpandVectorPredication.h" |
15 | #include "llvm/ADT/Statistic.h" |
16 | #include "llvm/Analysis/TargetTransformInfo.h" |
17 | #include "llvm/Analysis/ValueTracking.h" |
18 | #include "llvm/Analysis/VectorUtils.h" |
19 | #include "llvm/IR/Constants.h" |
20 | #include "llvm/IR/Function.h" |
21 | #include "llvm/IR/IRBuilder.h" |
22 | #include "llvm/IR/Instructions.h" |
23 | #include "llvm/IR/IntrinsicInst.h" |
24 | #include "llvm/IR/Intrinsics.h" |
25 | #include "llvm/Support/CommandLine.h" |
26 | #include "llvm/Support/Compiler.h" |
27 | #include "llvm/Support/Debug.h" |
28 | #include "llvm/Transforms/Utils/LoopUtils.h" |
29 | #include <optional> |
30 | |
31 | using namespace llvm; |
32 | |
33 | using VPLegalization = TargetTransformInfo::VPLegalization; |
34 | using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; |
35 | |
36 | // Keep this in sync with TargetTransformInfo::VPLegalization. |
37 | #define VPINTERNAL_VPLEGAL_CASES \ |
38 | VPINTERNAL_CASE(Legal) \ |
39 | VPINTERNAL_CASE(Discard) \ |
40 | VPINTERNAL_CASE(Convert) |
41 | |
42 | #define VPINTERNAL_CASE(X) "|" #X |
43 | |
44 | // Override options. |
45 | static cl::opt<std::string> EVLTransformOverride( |
46 | "expandvp-override-evl-transform" , cl::init(Val: "" ), cl::Hidden, |
47 | cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES |
48 | ". If non-empty, ignore " |
49 | "TargetTransformInfo and " |
50 | "always use this transformation for the %evl parameter (Used in " |
51 | "testing)." )); |
52 | |
53 | static cl::opt<std::string> MaskTransformOverride( |
54 | "expandvp-override-mask-transform" , cl::init(Val: "" ), cl::Hidden, |
55 | cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES |
56 | ". If non-empty, Ignore " |
57 | "TargetTransformInfo and " |
58 | "always use this transformation for the %mask parameter (Used in " |
59 | "testing)." )); |
60 | |
61 | #undef VPINTERNAL_CASE |
62 | #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) |
63 | |
64 | static VPTransform parseOverrideOption(const std::string &TextOpt) { |
65 | return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; |
66 | } |
67 | |
68 | #undef VPINTERNAL_VPLEGAL_CASES |
69 | |
70 | // Whether any override options are set. |
71 | static bool anyExpandVPOverridesSet() { |
72 | return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); |
73 | } |
74 | |
75 | #define DEBUG_TYPE "expandvp" |
76 | |
77 | STATISTIC(NumFoldedVL, "Number of folded vector length params" ); |
78 | STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations" ); |
79 | |
80 | ///// Helpers { |
81 | |
82 | /// \returns Whether the vector mask \p MaskVal has all lane bits set. |
83 | static bool isAllTrueMask(Value *MaskVal) { |
84 | if (Value *SplattedVal = getSplatValue(V: MaskVal)) |
85 | if (auto *ConstValue = dyn_cast<Constant>(Val: SplattedVal)) |
86 | return ConstValue->isAllOnesValue(); |
87 | |
88 | return false; |
89 | } |
90 | |
91 | /// \returns A non-excepting divisor constant for this type. |
92 | static Constant *getSafeDivisor(Type *DivTy) { |
93 | assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type" ); |
94 | return ConstantInt::get(Ty: DivTy, V: 1u, IsSigned: false); |
95 | } |
96 | |
97 | /// Transfer operation properties from \p OldVPI to \p NewVal. |
98 | static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { |
99 | auto *NewInst = dyn_cast<Instruction>(Val: &NewVal); |
100 | if (!NewInst || !isa<FPMathOperator>(Val: NewVal)) |
101 | return; |
102 | |
103 | auto *OldFMOp = dyn_cast<FPMathOperator>(Val: &VPI); |
104 | if (!OldFMOp) |
105 | return; |
106 | |
107 | NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); |
108 | } |
109 | |
110 | /// Transfer all properties from \p OldOp to \p NewOp and replace all uses. |
111 | /// OldVP gets erased. |
112 | static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { |
113 | transferDecorations(NewVal&: NewOp, VPI&: OldOp); |
114 | OldOp.replaceAllUsesWith(V: &NewOp); |
115 | OldOp.eraseFromParent(); |
116 | } |
117 | |
118 | static bool maySpeculateLanes(VPIntrinsic &VPI) { |
119 | // The result of VP reductions depends on the mask and evl. |
120 | if (isa<VPReductionIntrinsic>(Val: VPI)) |
121 | return false; |
122 | // Fallback to whether the intrinsic is speculatable. |
123 | if (auto IntrID = VPI.getFunctionalIntrinsicID()) |
124 | return Intrinsic::getFnAttributes(C&: VPI.getContext(), id: *IntrID) |
125 | .hasAttribute(Kind: Attribute::AttrKind::Speculatable); |
126 | if (auto Opc = VPI.getFunctionalOpcode()) |
127 | return isSafeToSpeculativelyExecuteWithOpcode(Opcode: *Opc, Inst: &VPI); |
128 | return false; |
129 | } |
130 | |
131 | //// } Helpers |
132 | |
133 | namespace { |
134 | |
135 | // Expansion pass state at function scope. |
136 | struct CachingVPExpander { |
137 | const TargetTransformInfo &TTI; |
138 | |
139 | /// \returns A bitmask that is true where the lane position is less-than \p |
140 | /// EVLParam |
141 | /// |
142 | /// \p Builder |
143 | /// Used for instruction creation. |
144 | /// \p VLParam |
145 | /// The explicit vector length parameter to test against the lane |
146 | /// positions. |
147 | /// \p ElemCount |
148 | /// Static (potentially scalable) number of vector elements. |
149 | Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, |
150 | ElementCount ElemCount); |
151 | |
152 | /// If needed, folds the EVL in the mask operand and discards the EVL |
153 | /// parameter. Returns a pair of the value of the intrinsic after the change |
154 | /// (if any) and whether the mask was actually folded. |
155 | std::pair<Value *, bool> foldEVLIntoMask(VPIntrinsic &VPI); |
156 | |
157 | /// "Remove" the %evl parameter of \p PI by setting it to the static vector |
158 | /// length of the operation. Returns true if the %evl (if any) was effectively |
159 | /// changed. |
160 | bool discardEVLParameter(VPIntrinsic &PI); |
161 | |
162 | /// Lower this VP binary operator to a unpredicated binary operator. |
163 | Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, |
164 | VPIntrinsic &PI); |
165 | |
166 | /// Lower this VP int call to a unpredicated int call. |
167 | Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI); |
168 | |
169 | /// Lower this VP fp call to a unpredicated fp call. |
170 | Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI, |
171 | unsigned UnpredicatedIntrinsicID); |
172 | |
173 | /// Lower this VP reduction to a call to an unpredicated reduction intrinsic. |
174 | Value *expandPredicationInReduction(IRBuilder<> &Builder, |
175 | VPReductionIntrinsic &PI); |
176 | |
177 | /// Lower this VP cast operation to a non-VP intrinsic. |
178 | Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder, |
179 | VPIntrinsic &VPI); |
180 | |
181 | /// Lower this VP memory operation to a non-VP intrinsic. |
182 | Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, |
183 | VPIntrinsic &VPI); |
184 | |
185 | /// Lower this VP comparison to a call to an unpredicated comparison. |
186 | Value *expandPredicationInComparison(IRBuilder<> &Builder, |
187 | VPCmpIntrinsic &PI); |
188 | |
189 | /// Query TTI and expand the vector predication in \p P accordingly. |
190 | Value *expandPredication(VPIntrinsic &PI); |
191 | |
192 | /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This |
193 | /// overrides TTI with the cl::opts listed at the top of this file. |
194 | VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; |
195 | bool UsingTTIOverrides; |
196 | |
197 | public: |
198 | CachingVPExpander(const TargetTransformInfo &TTI) |
199 | : TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} |
200 | |
201 | /// Expand llvm.vp.* intrinsics as requested by \p TTI. |
202 | /// Returns the details of the expansion. |
203 | VPExpansionDetails expandVectorPredication(VPIntrinsic &VPI); |
204 | }; |
205 | |
206 | //// CachingVPExpander { |
207 | |
208 | Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, |
209 | Value *EVLParam, |
210 | ElementCount ElemCount) { |
211 | // TODO add caching |
212 | // Scalable vector %evl conversion. |
213 | if (ElemCount.isScalable()) { |
214 | Type *BoolVecTy = VectorType::get(ElementType: Builder.getInt1Ty(), EC: ElemCount); |
215 | // `get_active_lane_mask` performs an implicit less-than comparison. |
216 | Value *ConstZero = Builder.getInt32(C: 0); |
217 | return Builder.CreateIntrinsic(ID: Intrinsic::get_active_lane_mask, |
218 | Types: {BoolVecTy, EVLParam->getType()}, |
219 | Args: {ConstZero, EVLParam}); |
220 | } |
221 | |
222 | // Fixed vector %evl conversion. |
223 | Type *LaneTy = EVLParam->getType(); |
224 | unsigned NumElems = ElemCount.getFixedValue(); |
225 | Value *VLSplat = Builder.CreateVectorSplat(NumElts: NumElems, V: EVLParam); |
226 | Value *IdxVec = Builder.CreateStepVector(DstType: VectorType::get(ElementType: LaneTy, EC: ElemCount)); |
227 | return Builder.CreateICmp(P: CmpInst::ICMP_ULT, LHS: IdxVec, RHS: VLSplat); |
228 | } |
229 | |
230 | Value * |
231 | CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, |
232 | VPIntrinsic &VPI) { |
233 | assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && |
234 | "Implicitly dropping %evl in non-speculatable operator!" ); |
235 | |
236 | auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); |
237 | assert(Instruction::isBinaryOp(OC)); |
238 | |
239 | Value *Op0 = VPI.getOperand(i_nocapture: 0); |
240 | Value *Op1 = VPI.getOperand(i_nocapture: 1); |
241 | Value *Mask = VPI.getMaskParam(); |
242 | |
243 | // Blend in safe operands. |
244 | if (Mask && !isAllTrueMask(MaskVal: Mask)) { |
245 | switch (OC) { |
246 | default: |
247 | // Can safely ignore the predicate. |
248 | break; |
249 | |
250 | // Division operators need a safe divisor on masked-off lanes (1). |
251 | case Instruction::UDiv: |
252 | case Instruction::SDiv: |
253 | case Instruction::URem: |
254 | case Instruction::SRem: |
255 | // 2nd operand must not be zero. |
256 | Value *SafeDivisor = getSafeDivisor(DivTy: VPI.getType()); |
257 | Op1 = Builder.CreateSelect(C: Mask, True: Op1, False: SafeDivisor); |
258 | } |
259 | } |
260 | |
261 | Value *NewBinOp = Builder.CreateBinOp(Opc: OC, LHS: Op0, RHS: Op1, Name: VPI.getName()); |
262 | |
263 | replaceOperation(NewOp&: *NewBinOp, OldOp&: VPI); |
264 | return NewBinOp; |
265 | } |
266 | |
267 | Value *CachingVPExpander::expandPredicationToIntCall(IRBuilder<> &Builder, |
268 | VPIntrinsic &VPI) { |
269 | std::optional<unsigned> FID = VPI.getFunctionalIntrinsicID(); |
270 | if (!FID) |
271 | return nullptr; |
272 | SmallVector<Value *, 2> Argument; |
273 | for (unsigned i = 0; i < VPI.getNumOperands() - 3; i++) { |
274 | Argument.push_back(Elt: VPI.getOperand(i_nocapture: i)); |
275 | } |
276 | Value *NewOp = Builder.CreateIntrinsic(ID: FID.value(), Types: {VPI.getType()}, Args: Argument, |
277 | /*FMFSource=*/nullptr, Name: VPI.getName()); |
278 | replaceOperation(NewOp&: *NewOp, OldOp&: VPI); |
279 | return NewOp; |
280 | } |
281 | |
282 | Value *CachingVPExpander::expandPredicationToFPCall( |
283 | IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { |
284 | assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && |
285 | "Implicitly dropping %evl in non-speculatable operator!" ); |
286 | |
287 | switch (UnpredicatedIntrinsicID) { |
288 | case Intrinsic::fabs: |
289 | case Intrinsic::sqrt: |
290 | case Intrinsic::maxnum: |
291 | case Intrinsic::minnum: { |
292 | SmallVector<Value *, 2> Argument; |
293 | for (unsigned i = 0; i < VPI.getNumOperands() - 3; i++) { |
294 | Argument.push_back(Elt: VPI.getOperand(i_nocapture: i)); |
295 | } |
296 | Value *NewOp = Builder.CreateIntrinsic( |
297 | ID: UnpredicatedIntrinsicID, Types: {VPI.getType()}, Args: Argument, |
298 | /*FMFSource=*/nullptr, Name: VPI.getName()); |
299 | replaceOperation(NewOp&: *NewOp, OldOp&: VPI); |
300 | return NewOp; |
301 | } |
302 | case Intrinsic::fma: |
303 | case Intrinsic::fmuladd: |
304 | case Intrinsic::experimental_constrained_fma: |
305 | case Intrinsic::experimental_constrained_fmuladd: { |
306 | Value *Op0 = VPI.getOperand(i_nocapture: 0); |
307 | Value *Op1 = VPI.getOperand(i_nocapture: 1); |
308 | Value *Op2 = VPI.getOperand(i_nocapture: 2); |
309 | Function *Fn = Intrinsic::getOrInsertDeclaration( |
310 | M: VPI.getModule(), id: UnpredicatedIntrinsicID, Tys: {VPI.getType()}); |
311 | Value *NewOp; |
312 | if (Intrinsic::isConstrainedFPIntrinsic(QID: UnpredicatedIntrinsicID)) |
313 | NewOp = |
314 | Builder.CreateConstrainedFPCall(Callee: Fn, Args: {Op0, Op1, Op2}, Name: VPI.getName()); |
315 | else |
316 | NewOp = Builder.CreateCall(Callee: Fn, Args: {Op0, Op1, Op2}, Name: VPI.getName()); |
317 | replaceOperation(NewOp&: *NewOp, OldOp&: VPI); |
318 | return NewOp; |
319 | } |
320 | } |
321 | |
322 | return nullptr; |
323 | } |
324 | |
325 | static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, |
326 | Type *EltTy) { |
327 | Intrinsic::ID RdxID = *VPI.getFunctionalIntrinsicID(); |
328 | FastMathFlags FMF; |
329 | if (isa<FPMathOperator>(Val: VPI)) |
330 | FMF = VPI.getFastMathFlags(); |
331 | return getReductionIdentity(RdxID, Ty: EltTy, FMF); |
332 | } |
333 | |
334 | Value * |
335 | CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, |
336 | VPReductionIntrinsic &VPI) { |
337 | assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && |
338 | "Implicitly dropping %evl in non-speculatable operator!" ); |
339 | |
340 | Value *Mask = VPI.getMaskParam(); |
341 | Value *RedOp = VPI.getOperand(i_nocapture: VPI.getVectorParamPos()); |
342 | |
343 | // Insert neutral element in masked-out positions |
344 | if (Mask && !isAllTrueMask(MaskVal: Mask)) { |
345 | auto *NeutralElt = getNeutralReductionElement(VPI, EltTy: VPI.getType()); |
346 | auto *NeutralVector = Builder.CreateVectorSplat( |
347 | EC: cast<VectorType>(Val: RedOp->getType())->getElementCount(), V: NeutralElt); |
348 | RedOp = Builder.CreateSelect(C: Mask, True: RedOp, False: NeutralVector); |
349 | } |
350 | |
351 | Value *Reduction; |
352 | Value *Start = VPI.getOperand(i_nocapture: VPI.getStartParamPos()); |
353 | |
354 | switch (VPI.getIntrinsicID()) { |
355 | default: |
356 | llvm_unreachable("Impossible reduction kind" ); |
357 | case Intrinsic::vp_reduce_add: |
358 | case Intrinsic::vp_reduce_mul: |
359 | case Intrinsic::vp_reduce_and: |
360 | case Intrinsic::vp_reduce_or: |
361 | case Intrinsic::vp_reduce_xor: { |
362 | Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); |
363 | unsigned Opc = getArithmeticReductionInstruction(RdxID: RedID); |
364 | assert(Instruction::isBinaryOp(Opc)); |
365 | Reduction = Builder.CreateUnaryIntrinsic(ID: RedID, V: RedOp); |
366 | Reduction = |
367 | Builder.CreateBinOp(Opc: (Instruction::BinaryOps)Opc, LHS: Reduction, RHS: Start); |
368 | break; |
369 | } |
370 | case Intrinsic::vp_reduce_smax: |
371 | case Intrinsic::vp_reduce_smin: |
372 | case Intrinsic::vp_reduce_umax: |
373 | case Intrinsic::vp_reduce_umin: |
374 | case Intrinsic::vp_reduce_fmax: |
375 | case Intrinsic::vp_reduce_fmin: |
376 | case Intrinsic::vp_reduce_fmaximum: |
377 | case Intrinsic::vp_reduce_fminimum: { |
378 | Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); |
379 | Intrinsic::ID ScalarID = getMinMaxReductionIntrinsicOp(RdxID: RedID); |
380 | Reduction = Builder.CreateUnaryIntrinsic(ID: RedID, V: RedOp); |
381 | transferDecorations(NewVal&: *Reduction, VPI); |
382 | Reduction = Builder.CreateBinaryIntrinsic(ID: ScalarID, LHS: Reduction, RHS: Start); |
383 | break; |
384 | } |
385 | case Intrinsic::vp_reduce_fadd: |
386 | Reduction = Builder.CreateFAddReduce(Acc: Start, Src: RedOp); |
387 | break; |
388 | case Intrinsic::vp_reduce_fmul: |
389 | Reduction = Builder.CreateFMulReduce(Acc: Start, Src: RedOp); |
390 | break; |
391 | } |
392 | |
393 | replaceOperation(NewOp&: *Reduction, OldOp&: VPI); |
394 | return Reduction; |
395 | } |
396 | |
397 | Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder, |
398 | VPIntrinsic &VPI) { |
399 | Intrinsic::ID VPID = VPI.getIntrinsicID(); |
400 | unsigned CastOpcode = VPIntrinsic::getFunctionalOpcodeForVP(ID: VPID).value(); |
401 | assert(Instruction::isCast(CastOpcode)); |
402 | Value *CastOp = |
403 | Builder.CreateCast(Op: Instruction::CastOps(CastOpcode), V: VPI.getOperand(i_nocapture: 0), |
404 | DestTy: VPI.getType(), Name: VPI.getName()); |
405 | |
406 | replaceOperation(NewOp&: *CastOp, OldOp&: VPI); |
407 | return CastOp; |
408 | } |
409 | |
410 | Value * |
411 | CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, |
412 | VPIntrinsic &VPI) { |
413 | assert(VPI.canIgnoreVectorLengthParam()); |
414 | |
415 | const auto &DL = VPI.getDataLayout(); |
416 | |
417 | Value *MaskParam = VPI.getMaskParam(); |
418 | Value *PtrParam = VPI.getMemoryPointerParam(); |
419 | Value *DataParam = VPI.getMemoryDataParam(); |
420 | bool IsUnmasked = isAllTrueMask(MaskVal: MaskParam); |
421 | |
422 | MaybeAlign AlignOpt = VPI.getPointerAlignment(); |
423 | |
424 | Value *NewMemoryInst = nullptr; |
425 | switch (VPI.getIntrinsicID()) { |
426 | default: |
427 | llvm_unreachable("Not a VP memory intrinsic" ); |
428 | case Intrinsic::vp_store: |
429 | if (IsUnmasked) { |
430 | StoreInst *NewStore = |
431 | Builder.CreateStore(Val: DataParam, Ptr: PtrParam, /*IsVolatile*/ isVolatile: false); |
432 | if (AlignOpt.has_value()) |
433 | NewStore->setAlignment(*AlignOpt); |
434 | NewMemoryInst = NewStore; |
435 | } else |
436 | NewMemoryInst = Builder.CreateMaskedStore( |
437 | Val: DataParam, Ptr: PtrParam, Alignment: AlignOpt.valueOrOne(), Mask: MaskParam); |
438 | |
439 | break; |
440 | case Intrinsic::vp_load: |
441 | if (IsUnmasked) { |
442 | LoadInst *NewLoad = |
443 | Builder.CreateLoad(Ty: VPI.getType(), Ptr: PtrParam, /*IsVolatile*/ isVolatile: false); |
444 | if (AlignOpt.has_value()) |
445 | NewLoad->setAlignment(*AlignOpt); |
446 | NewMemoryInst = NewLoad; |
447 | } else |
448 | NewMemoryInst = Builder.CreateMaskedLoad( |
449 | Ty: VPI.getType(), Ptr: PtrParam, Alignment: AlignOpt.valueOrOne(), Mask: MaskParam); |
450 | |
451 | break; |
452 | case Intrinsic::vp_scatter: { |
453 | auto *ElementType = |
454 | cast<VectorType>(Val: DataParam->getType())->getElementType(); |
455 | NewMemoryInst = Builder.CreateMaskedScatter( |
456 | Val: DataParam, Ptrs: PtrParam, |
457 | Alignment: AlignOpt.value_or(u: DL.getPrefTypeAlign(Ty: ElementType)), Mask: MaskParam); |
458 | break; |
459 | } |
460 | case Intrinsic::vp_gather: { |
461 | auto *ElementType = cast<VectorType>(Val: VPI.getType())->getElementType(); |
462 | NewMemoryInst = Builder.CreateMaskedGather( |
463 | Ty: VPI.getType(), Ptrs: PtrParam, |
464 | Alignment: AlignOpt.value_or(u: DL.getPrefTypeAlign(Ty: ElementType)), Mask: MaskParam, PassThru: nullptr, |
465 | Name: VPI.getName()); |
466 | break; |
467 | } |
468 | } |
469 | |
470 | assert(NewMemoryInst); |
471 | replaceOperation(NewOp&: *NewMemoryInst, OldOp&: VPI); |
472 | return NewMemoryInst; |
473 | } |
474 | |
475 | Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder, |
476 | VPCmpIntrinsic &VPI) { |
477 | assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && |
478 | "Implicitly dropping %evl in non-speculatable operator!" ); |
479 | |
480 | assert(*VPI.getFunctionalOpcode() == Instruction::ICmp || |
481 | *VPI.getFunctionalOpcode() == Instruction::FCmp); |
482 | |
483 | Value *Op0 = VPI.getOperand(i_nocapture: 0); |
484 | Value *Op1 = VPI.getOperand(i_nocapture: 1); |
485 | auto Pred = VPI.getPredicate(); |
486 | |
487 | auto *NewCmp = Builder.CreateCmp(Pred, LHS: Op0, RHS: Op1); |
488 | |
489 | replaceOperation(NewOp&: *NewCmp, OldOp&: VPI); |
490 | return NewCmp; |
491 | } |
492 | |
493 | bool CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { |
494 | LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n" ); |
495 | |
496 | if (VPI.canIgnoreVectorLengthParam()) |
497 | return false; |
498 | |
499 | Value *EVLParam = VPI.getVectorLengthParam(); |
500 | if (!EVLParam) |
501 | return false; |
502 | |
503 | ElementCount StaticElemCount = VPI.getStaticVectorLength(); |
504 | Value *MaxEVL = nullptr; |
505 | Type *Int32Ty = Type::getInt32Ty(C&: VPI.getContext()); |
506 | if (StaticElemCount.isScalable()) { |
507 | // TODO add caching |
508 | IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); |
509 | Value *FactorConst = Builder.getInt32(C: StaticElemCount.getKnownMinValue()); |
510 | Value *VScale = Builder.CreateVScale(Ty: Int32Ty, Name: "vscale" ); |
511 | MaxEVL = Builder.CreateMul(LHS: VScale, RHS: FactorConst, Name: "scalable_size" , |
512 | /*NUW*/ HasNUW: true, /*NSW*/ HasNSW: false); |
513 | } else { |
514 | MaxEVL = ConstantInt::get(Ty: Int32Ty, V: StaticElemCount.getFixedValue(), IsSigned: false); |
515 | } |
516 | VPI.setVectorLengthParam(MaxEVL); |
517 | return true; |
518 | } |
519 | |
520 | std::pair<Value *, bool> CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { |
521 | LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n'); |
522 | |
523 | IRBuilder<> Builder(&VPI); |
524 | |
525 | // Ineffective %evl parameter and so nothing to do here. |
526 | if (VPI.canIgnoreVectorLengthParam()) |
527 | return {&VPI, false}; |
528 | |
529 | // Only VP intrinsics can have an %evl parameter. |
530 | Value *OldMaskParam = VPI.getMaskParam(); |
531 | Value *OldEVLParam = VPI.getVectorLengthParam(); |
532 | assert(OldMaskParam && "no mask param to fold the vl param into" ); |
533 | assert(OldEVLParam && "no EVL param to fold away" ); |
534 | |
535 | LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n'); |
536 | LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n'); |
537 | |
538 | // Convert the %evl predication into vector mask predication. |
539 | ElementCount ElemCount = VPI.getStaticVectorLength(); |
540 | Value *VLMask = convertEVLToMask(Builder, EVLParam: OldEVLParam, ElemCount); |
541 | Value *NewMaskParam = Builder.CreateAnd(LHS: VLMask, RHS: OldMaskParam); |
542 | VPI.setMaskParam(NewMaskParam); |
543 | |
544 | // Drop the %evl parameter. |
545 | discardEVLParameter(VPI); |
546 | assert(VPI.canIgnoreVectorLengthParam() && |
547 | "transformation did not render the evl param ineffective!" ); |
548 | |
549 | // Reassess the modified instruction. |
550 | return {&VPI, true}; |
551 | } |
552 | |
553 | Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { |
554 | LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n'); |
555 | |
556 | IRBuilder<> Builder(&VPI); |
557 | |
558 | // Try lowering to a LLVM instruction first. |
559 | auto OC = VPI.getFunctionalOpcode(); |
560 | |
561 | if (OC && Instruction::isBinaryOp(Opcode: *OC)) |
562 | return expandPredicationInBinaryOperator(Builder, VPI); |
563 | |
564 | if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(Val: &VPI)) |
565 | return expandPredicationInReduction(Builder, VPI&: *VPRI); |
566 | |
567 | if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(Val: &VPI)) |
568 | return expandPredicationInComparison(Builder, VPI&: *VPCmp); |
569 | |
570 | if (VPCastIntrinsic::isVPCast(ID: VPI.getIntrinsicID())) { |
571 | return expandPredicationToCastIntrinsic(Builder, VPI); |
572 | } |
573 | |
574 | switch (VPI.getIntrinsicID()) { |
575 | default: |
576 | break; |
577 | case Intrinsic::vp_fneg: { |
578 | Value *NewNegOp = Builder.CreateFNeg(V: VPI.getOperand(i_nocapture: 0), Name: VPI.getName()); |
579 | replaceOperation(NewOp&: *NewNegOp, OldOp&: VPI); |
580 | return NewNegOp; |
581 | } |
582 | case Intrinsic::vp_abs: |
583 | case Intrinsic::vp_smax: |
584 | case Intrinsic::vp_smin: |
585 | case Intrinsic::vp_umax: |
586 | case Intrinsic::vp_umin: |
587 | case Intrinsic::vp_bswap: |
588 | case Intrinsic::vp_bitreverse: |
589 | case Intrinsic::vp_ctpop: |
590 | case Intrinsic::vp_ctlz: |
591 | case Intrinsic::vp_cttz: |
592 | case Intrinsic::vp_sadd_sat: |
593 | case Intrinsic::vp_uadd_sat: |
594 | case Intrinsic::vp_ssub_sat: |
595 | case Intrinsic::vp_usub_sat: |
596 | case Intrinsic::vp_fshl: |
597 | case Intrinsic::vp_fshr: |
598 | return expandPredicationToIntCall(Builder, VPI); |
599 | case Intrinsic::vp_fabs: |
600 | case Intrinsic::vp_sqrt: |
601 | case Intrinsic::vp_maxnum: |
602 | case Intrinsic::vp_minnum: |
603 | case Intrinsic::vp_maximum: |
604 | case Intrinsic::vp_minimum: |
605 | case Intrinsic::vp_fma: |
606 | case Intrinsic::vp_fmuladd: |
607 | return expandPredicationToFPCall(Builder, VPI, |
608 | UnpredicatedIntrinsicID: VPI.getFunctionalIntrinsicID().value()); |
609 | case Intrinsic::vp_load: |
610 | case Intrinsic::vp_store: |
611 | case Intrinsic::vp_gather: |
612 | case Intrinsic::vp_scatter: |
613 | return expandPredicationInMemoryIntrinsic(Builder, VPI); |
614 | } |
615 | |
616 | if (auto CID = VPI.getConstrainedIntrinsicID()) |
617 | if (Value *Call = expandPredicationToFPCall(Builder, VPI, UnpredicatedIntrinsicID: *CID)) |
618 | return Call; |
619 | |
620 | return &VPI; |
621 | } |
622 | |
623 | //// } CachingVPExpander |
624 | |
625 | void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { |
626 | // Operations with speculatable lanes do not strictly need predication. |
627 | if (maySpeculateLanes(VPI)) { |
628 | // Converting a speculatable VP intrinsic means dropping %mask and %evl. |
629 | // No need to expand %evl into the %mask only to ignore that code. |
630 | if (LegalizeStrat.OpStrategy == VPLegalization::Convert) |
631 | LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; |
632 | return; |
633 | } |
634 | |
635 | // We have to preserve the predicating effect of %evl for this |
636 | // non-speculatable VP intrinsic. |
637 | // 1) Never discard %evl. |
638 | // 2) If this VP intrinsic will be expanded to non-VP code, make sure that |
639 | // %evl gets folded into %mask. |
640 | if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || |
641 | (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { |
642 | LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; |
643 | } |
644 | } |
645 | |
646 | VPLegalization |
647 | CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { |
648 | auto VPStrat = TTI.getVPLegalizationStrategy(PI: VPI); |
649 | if (LLVM_LIKELY(!UsingTTIOverrides)) { |
650 | // No overrides - we are in production. |
651 | return VPStrat; |
652 | } |
653 | |
654 | // Overrides set - we are in testing, the following does not need to be |
655 | // efficient. |
656 | VPStrat.EVLParamStrategy = parseOverrideOption(TextOpt: EVLTransformOverride); |
657 | VPStrat.OpStrategy = parseOverrideOption(TextOpt: MaskTransformOverride); |
658 | return VPStrat; |
659 | } |
660 | |
661 | VPExpansionDetails |
662 | CachingVPExpander::expandVectorPredication(VPIntrinsic &VPI) { |
663 | auto Strategy = getVPLegalizationStrategy(VPI); |
664 | sanitizeStrategy(VPI, LegalizeStrat&: Strategy); |
665 | |
666 | VPExpansionDetails Changed = VPExpansionDetails::IntrinsicUnchanged; |
667 | |
668 | // Transform the EVL parameter. |
669 | switch (Strategy.EVLParamStrategy) { |
670 | case VPLegalization::Legal: |
671 | break; |
672 | case VPLegalization::Discard: |
673 | if (discardEVLParameter(VPI)) |
674 | Changed = VPExpansionDetails::IntrinsicUpdated; |
675 | break; |
676 | case VPLegalization::Convert: |
677 | if (auto [NewVPI, Folded] = foldEVLIntoMask(VPI); Folded) { |
678 | (void)NewVPI; |
679 | Changed = VPExpansionDetails::IntrinsicUpdated; |
680 | ++NumFoldedVL; |
681 | } |
682 | break; |
683 | } |
684 | |
685 | // Replace with a non-predicated operation. |
686 | switch (Strategy.OpStrategy) { |
687 | case VPLegalization::Legal: |
688 | break; |
689 | case VPLegalization::Discard: |
690 | llvm_unreachable("Invalid strategy for operators." ); |
691 | case VPLegalization::Convert: |
692 | if (Value *V = expandPredication(VPI); V != &VPI) { |
693 | ++NumLoweredVPOps; |
694 | Changed = VPExpansionDetails::IntrinsicReplaced; |
695 | } |
696 | break; |
697 | } |
698 | |
699 | return Changed; |
700 | } |
701 | } // namespace |
702 | |
703 | VPExpansionDetails |
704 | llvm::expandVectorPredicationIntrinsic(VPIntrinsic &VPI, |
705 | const TargetTransformInfo &TTI) { |
706 | return CachingVPExpander(TTI).expandVectorPredication(VPI); |
707 | } |
708 | |