| 1 | //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | // This file implements IR expansion for vector predication intrinsics, allowing | 
|---|
| 10 | // targets to enable vector predication until just before codegen. | 
|---|
| 11 | // | 
|---|
| 12 | //===----------------------------------------------------------------------===// | 
|---|
| 13 |  | 
|---|
| 14 | #include "llvm/CodeGen/ExpandVectorPredication.h" | 
|---|
| 15 | #include "llvm/ADT/Statistic.h" | 
|---|
| 16 | #include "llvm/Analysis/TargetTransformInfo.h" | 
|---|
| 17 | #include "llvm/Analysis/ValueTracking.h" | 
|---|
| 18 | #include "llvm/Analysis/VectorUtils.h" | 
|---|
| 19 | #include "llvm/IR/Constants.h" | 
|---|
| 20 | #include "llvm/IR/Function.h" | 
|---|
| 21 | #include "llvm/IR/IRBuilder.h" | 
|---|
| 22 | #include "llvm/IR/Instructions.h" | 
|---|
| 23 | #include "llvm/IR/IntrinsicInst.h" | 
|---|
| 24 | #include "llvm/IR/Intrinsics.h" | 
|---|
| 25 | #include "llvm/Support/CommandLine.h" | 
|---|
| 26 | #include "llvm/Support/Compiler.h" | 
|---|
| 27 | #include "llvm/Support/Debug.h" | 
|---|
| 28 | #include "llvm/Transforms/Utils/LoopUtils.h" | 
|---|
| 29 | #include <optional> | 
|---|
| 30 |  | 
|---|
| 31 | using namespace llvm; | 
|---|
| 32 |  | 
|---|
| 33 | using VPLegalization = TargetTransformInfo::VPLegalization; | 
|---|
| 34 | using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; | 
|---|
| 35 |  | 
|---|
| 36 | // Keep this in sync with TargetTransformInfo::VPLegalization. | 
|---|
| 37 | #define VPINTERNAL_VPLEGAL_CASES                                               \ | 
|---|
| 38 | VPINTERNAL_CASE(Legal)                                                       \ | 
|---|
| 39 | VPINTERNAL_CASE(Discard)                                                     \ | 
|---|
| 40 | VPINTERNAL_CASE(Convert) | 
|---|
| 41 |  | 
|---|
| 42 | #define VPINTERNAL_CASE(X) "|" #X | 
|---|
| 43 |  | 
|---|
| 44 | // Override options. | 
|---|
| 45 | static cl::opt<std::string> EVLTransformOverride( | 
|---|
| 46 | "expandvp-override-evl-transform", cl::init(Val: ""), cl::Hidden, | 
|---|
| 47 | cl::desc( "Options: <empty>"VPINTERNAL_VPLEGAL_CASES | 
|---|
| 48 | ". If non-empty, ignore " | 
|---|
| 49 | "TargetTransformInfo and " | 
|---|
| 50 | "always use this transformation for the %evl parameter (Used in " | 
|---|
| 51 | "testing).")); | 
|---|
| 52 |  | 
|---|
| 53 | static cl::opt<std::string> MaskTransformOverride( | 
|---|
| 54 | "expandvp-override-mask-transform", cl::init(Val: ""), cl::Hidden, | 
|---|
| 55 | cl::desc( "Options: <empty>"VPINTERNAL_VPLEGAL_CASES | 
|---|
| 56 | ". If non-empty, Ignore " | 
|---|
| 57 | "TargetTransformInfo and " | 
|---|
| 58 | "always use this transformation for the %mask parameter (Used in " | 
|---|
| 59 | "testing).")); | 
|---|
| 60 |  | 
|---|
| 61 | #undef VPINTERNAL_CASE | 
|---|
| 62 | #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) | 
|---|
| 63 |  | 
|---|
| 64 | static VPTransform parseOverrideOption(const std::string &TextOpt) { | 
|---|
| 65 | return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; | 
|---|
| 66 | } | 
|---|
| 67 |  | 
|---|
| 68 | #undef VPINTERNAL_VPLEGAL_CASES | 
|---|
| 69 |  | 
|---|
| 70 | // Whether any override options are set. | 
|---|
| 71 | static bool anyExpandVPOverridesSet() { | 
|---|
| 72 | return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | #define DEBUG_TYPE "expandvp" | 
|---|
| 76 |  | 
|---|
| 77 | STATISTIC(NumFoldedVL, "Number of folded vector length params"); | 
|---|
| 78 | STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); | 
|---|
| 79 |  | 
|---|
| 80 | ///// Helpers { | 
|---|
| 81 |  | 
|---|
| 82 | /// \returns Whether the vector mask \p MaskVal has all lane bits set. | 
|---|
| 83 | static bool isAllTrueMask(Value *MaskVal) { | 
|---|
| 84 | if (Value *SplattedVal = getSplatValue(V: MaskVal)) | 
|---|
| 85 | if (auto *ConstValue = dyn_cast<Constant>(Val: SplattedVal)) | 
|---|
| 86 | return ConstValue->isAllOnesValue(); | 
|---|
| 87 |  | 
|---|
| 88 | return false; | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | /// \returns A non-excepting divisor constant for this type. | 
|---|
| 92 | static Constant *getSafeDivisor(Type *DivTy) { | 
|---|
| 93 | assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type"); | 
|---|
| 94 | return ConstantInt::get(Ty: DivTy, V: 1u, IsSigned: false); | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | /// Transfer operation properties from \p OldVPI to \p NewVal. | 
|---|
| 98 | static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { | 
|---|
| 99 | auto *NewInst = dyn_cast<Instruction>(Val: &NewVal); | 
|---|
| 100 | if (!NewInst || !isa<FPMathOperator>(Val: NewVal)) | 
|---|
| 101 | return; | 
|---|
| 102 |  | 
|---|
| 103 | auto *OldFMOp = dyn_cast<FPMathOperator>(Val: &VPI); | 
|---|
| 104 | if (!OldFMOp) | 
|---|
| 105 | return; | 
|---|
| 106 |  | 
|---|
| 107 | NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); | 
|---|
| 108 | } | 
|---|
| 109 |  | 
|---|
| 110 | /// Transfer all properties from \p OldOp to \p NewOp and replace all uses. | 
|---|
| 111 | /// OldVP gets erased. | 
|---|
| 112 | static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { | 
|---|
| 113 | transferDecorations(NewVal&: NewOp, VPI&: OldOp); | 
|---|
| 114 | OldOp.replaceAllUsesWith(V: &NewOp); | 
|---|
| 115 | OldOp.eraseFromParent(); | 
|---|
| 116 | } | 
|---|
| 117 |  | 
|---|
| 118 | static bool maySpeculateLanes(VPIntrinsic &VPI) { | 
|---|
| 119 | // The result of VP reductions depends on the mask and evl. | 
|---|
| 120 | if (isa<VPReductionIntrinsic>(Val: VPI)) | 
|---|
| 121 | return false; | 
|---|
| 122 | // Fallback to whether the intrinsic is speculatable. | 
|---|
| 123 | if (auto IntrID = VPI.getFunctionalIntrinsicID()) | 
|---|
| 124 | return Intrinsic::getFnAttributes(C&: VPI.getContext(), id: *IntrID) | 
|---|
| 125 | .hasAttribute(Kind: Attribute::AttrKind::Speculatable); | 
|---|
| 126 | if (auto Opc = VPI.getFunctionalOpcode()) | 
|---|
| 127 | return isSafeToSpeculativelyExecuteWithOpcode(Opcode: *Opc, Inst: &VPI); | 
|---|
| 128 | return false; | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | //// } Helpers | 
|---|
| 132 |  | 
|---|
| 133 | namespace { | 
|---|
| 134 |  | 
|---|
| 135 | // Expansion pass state at function scope. | 
|---|
| 136 | struct CachingVPExpander { | 
|---|
| 137 | const TargetTransformInfo &TTI; | 
|---|
| 138 |  | 
|---|
| 139 | /// \returns A bitmask that is true where the lane position is less-than \p | 
|---|
| 140 | /// EVLParam | 
|---|
| 141 | /// | 
|---|
| 142 | /// \p Builder | 
|---|
| 143 | ///    Used for instruction creation. | 
|---|
| 144 | /// \p VLParam | 
|---|
| 145 | ///    The explicit vector length parameter to test against the lane | 
|---|
| 146 | ///    positions. | 
|---|
| 147 | /// \p ElemCount | 
|---|
| 148 | ///    Static (potentially scalable) number of vector elements. | 
|---|
| 149 | Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, | 
|---|
| 150 | ElementCount ElemCount); | 
|---|
| 151 |  | 
|---|
| 152 | /// If needed, folds the EVL in the mask operand and discards the EVL | 
|---|
| 153 | /// parameter. Returns a pair of the value of the intrinsic after the change | 
|---|
| 154 | /// (if any) and whether the mask was actually folded. | 
|---|
| 155 | std::pair<Value *, bool> foldEVLIntoMask(VPIntrinsic &VPI); | 
|---|
| 156 |  | 
|---|
| 157 | /// "Remove" the %evl parameter of \p PI by setting it to the static vector | 
|---|
| 158 | /// length of the operation. Returns true if the %evl (if any) was effectively | 
|---|
| 159 | /// changed. | 
|---|
| 160 | bool discardEVLParameter(VPIntrinsic &PI); | 
|---|
| 161 |  | 
|---|
| 162 | /// Lower this VP binary operator to a unpredicated binary operator. | 
|---|
| 163 | Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, | 
|---|
| 164 | VPIntrinsic &PI); | 
|---|
| 165 |  | 
|---|
| 166 | /// Lower this VP int call to a unpredicated int call. | 
|---|
| 167 | Value *expandPredicationToIntCall(IRBuilder<> &Builder, VPIntrinsic &PI); | 
|---|
| 168 |  | 
|---|
| 169 | /// Lower this VP fp call to a unpredicated fp call. | 
|---|
| 170 | Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI, | 
|---|
| 171 | unsigned UnpredicatedIntrinsicID); | 
|---|
| 172 |  | 
|---|
| 173 | /// Lower this VP reduction to a call to an unpredicated reduction intrinsic. | 
|---|
| 174 | Value *expandPredicationInReduction(IRBuilder<> &Builder, | 
|---|
| 175 | VPReductionIntrinsic &PI); | 
|---|
| 176 |  | 
|---|
| 177 | /// Lower this VP cast operation to a non-VP intrinsic. | 
|---|
| 178 | Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder, | 
|---|
| 179 | VPIntrinsic &VPI); | 
|---|
| 180 |  | 
|---|
| 181 | /// Lower this VP memory operation to a non-VP intrinsic. | 
|---|
| 182 | Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, | 
|---|
| 183 | VPIntrinsic &VPI); | 
|---|
| 184 |  | 
|---|
| 185 | /// Lower this VP comparison to a call to an unpredicated comparison. | 
|---|
| 186 | Value *expandPredicationInComparison(IRBuilder<> &Builder, | 
|---|
| 187 | VPCmpIntrinsic &PI); | 
|---|
| 188 |  | 
|---|
| 189 | /// Query TTI and expand the vector predication in \p P accordingly. | 
|---|
| 190 | Value *expandPredication(VPIntrinsic &PI); | 
|---|
| 191 |  | 
|---|
| 192 | /// Determine how and whether the VPIntrinsic \p VPI shall be expanded. This | 
|---|
| 193 | /// overrides TTI with the cl::opts listed at the top of this file. | 
|---|
| 194 | VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; | 
|---|
| 195 | bool UsingTTIOverrides; | 
|---|
| 196 |  | 
|---|
| 197 | public: | 
|---|
| 198 | CachingVPExpander(const TargetTransformInfo &TTI) | 
|---|
| 199 | : TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} | 
|---|
| 200 |  | 
|---|
| 201 | /// Expand llvm.vp.* intrinsics as requested by \p TTI. | 
|---|
| 202 | /// Returns the details of the expansion. | 
|---|
| 203 | VPExpansionDetails expandVectorPredication(VPIntrinsic &VPI); | 
|---|
| 204 | }; | 
|---|
| 205 |  | 
|---|
| 206 | //// CachingVPExpander { | 
|---|
| 207 |  | 
|---|
| 208 | Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, | 
|---|
| 209 | Value *EVLParam, | 
|---|
| 210 | ElementCount ElemCount) { | 
|---|
| 211 | // TODO add caching | 
|---|
| 212 | // Scalable vector %evl conversion. | 
|---|
| 213 | if (ElemCount.isScalable()) { | 
|---|
| 214 | Type *BoolVecTy = VectorType::get(ElementType: Builder.getInt1Ty(), EC: ElemCount); | 
|---|
| 215 | // `get_active_lane_mask` performs an implicit less-than comparison. | 
|---|
| 216 | Value *ConstZero = Builder.getInt32(C: 0); | 
|---|
| 217 | return Builder.CreateIntrinsic(ID: Intrinsic::get_active_lane_mask, | 
|---|
| 218 | Types: {BoolVecTy, EVLParam->getType()}, | 
|---|
| 219 | Args: {ConstZero, EVLParam}); | 
|---|
| 220 | } | 
|---|
| 221 |  | 
|---|
| 222 | // Fixed vector %evl conversion. | 
|---|
| 223 | Type *LaneTy = EVLParam->getType(); | 
|---|
| 224 | unsigned NumElems = ElemCount.getFixedValue(); | 
|---|
| 225 | Value *VLSplat = Builder.CreateVectorSplat(NumElts: NumElems, V: EVLParam); | 
|---|
| 226 | Value *IdxVec = Builder.CreateStepVector(DstType: VectorType::get(ElementType: LaneTy, EC: ElemCount)); | 
|---|
| 227 | return Builder.CreateICmp(P: CmpInst::ICMP_ULT, LHS: IdxVec, RHS: VLSplat); | 
|---|
| 228 | } | 
|---|
| 229 |  | 
|---|
| 230 | Value * | 
|---|
| 231 | CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, | 
|---|
| 232 | VPIntrinsic &VPI) { | 
|---|
| 233 | assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && | 
|---|
| 234 | "Implicitly dropping %evl in non-speculatable operator!"); | 
|---|
| 235 |  | 
|---|
| 236 | auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); | 
|---|
| 237 | assert(Instruction::isBinaryOp(OC)); | 
|---|
| 238 |  | 
|---|
| 239 | Value *Op0 = VPI.getOperand(i_nocapture: 0); | 
|---|
| 240 | Value *Op1 = VPI.getOperand(i_nocapture: 1); | 
|---|
| 241 | Value *Mask = VPI.getMaskParam(); | 
|---|
| 242 |  | 
|---|
| 243 | // Blend in safe operands. | 
|---|
| 244 | if (Mask && !isAllTrueMask(MaskVal: Mask)) { | 
|---|
| 245 | switch (OC) { | 
|---|
| 246 | default: | 
|---|
| 247 | // Can safely ignore the predicate. | 
|---|
| 248 | break; | 
|---|
| 249 |  | 
|---|
| 250 | // Division operators need a safe divisor on masked-off lanes (1). | 
|---|
| 251 | case Instruction::UDiv: | 
|---|
| 252 | case Instruction::SDiv: | 
|---|
| 253 | case Instruction::URem: | 
|---|
| 254 | case Instruction::SRem: | 
|---|
| 255 | // 2nd operand must not be zero. | 
|---|
| 256 | Value *SafeDivisor = getSafeDivisor(DivTy: VPI.getType()); | 
|---|
| 257 | Op1 = Builder.CreateSelect(C: Mask, True: Op1, False: SafeDivisor); | 
|---|
| 258 | } | 
|---|
| 259 | } | 
|---|
| 260 |  | 
|---|
| 261 | Value *NewBinOp = Builder.CreateBinOp(Opc: OC, LHS: Op0, RHS: Op1, Name: VPI.getName()); | 
|---|
| 262 |  | 
|---|
| 263 | replaceOperation(NewOp&: *NewBinOp, OldOp&: VPI); | 
|---|
| 264 | return NewBinOp; | 
|---|
| 265 | } | 
|---|
| 266 |  | 
|---|
| 267 | Value *CachingVPExpander::expandPredicationToIntCall(IRBuilder<> &Builder, | 
|---|
| 268 | VPIntrinsic &VPI) { | 
|---|
| 269 | std::optional<unsigned> FID = VPI.getFunctionalIntrinsicID(); | 
|---|
| 270 | if (!FID) | 
|---|
| 271 | return nullptr; | 
|---|
| 272 | SmallVector<Value *, 2> Argument; | 
|---|
| 273 | for (unsigned i = 0; i < VPI.getNumOperands() - 3; i++) { | 
|---|
| 274 | Argument.push_back(Elt: VPI.getOperand(i_nocapture: i)); | 
|---|
| 275 | } | 
|---|
| 276 | Value *NewOp = Builder.CreateIntrinsic(ID: FID.value(), Types: {VPI.getType()}, Args: Argument, | 
|---|
| 277 | /*FMFSource=*/nullptr, Name: VPI.getName()); | 
|---|
| 278 | replaceOperation(NewOp&: *NewOp, OldOp&: VPI); | 
|---|
| 279 | return NewOp; | 
|---|
| 280 | } | 
|---|
| 281 |  | 
|---|
| 282 | Value *CachingVPExpander::expandPredicationToFPCall( | 
|---|
| 283 | IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) { | 
|---|
| 284 | assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && | 
|---|
| 285 | "Implicitly dropping %evl in non-speculatable operator!"); | 
|---|
| 286 |  | 
|---|
| 287 | switch (UnpredicatedIntrinsicID) { | 
|---|
| 288 | case Intrinsic::fabs: | 
|---|
| 289 | case Intrinsic::sqrt: | 
|---|
| 290 | case Intrinsic::maxnum: | 
|---|
| 291 | case Intrinsic::minnum: { | 
|---|
| 292 | SmallVector<Value *, 2> Argument; | 
|---|
| 293 | for (unsigned i = 0; i < VPI.getNumOperands() - 3; i++) { | 
|---|
| 294 | Argument.push_back(Elt: VPI.getOperand(i_nocapture: i)); | 
|---|
| 295 | } | 
|---|
| 296 | Value *NewOp = Builder.CreateIntrinsic( | 
|---|
| 297 | ID: UnpredicatedIntrinsicID, Types: {VPI.getType()}, Args: Argument, | 
|---|
| 298 | /*FMFSource=*/nullptr, Name: VPI.getName()); | 
|---|
| 299 | replaceOperation(NewOp&: *NewOp, OldOp&: VPI); | 
|---|
| 300 | return NewOp; | 
|---|
| 301 | } | 
|---|
| 302 | case Intrinsic::fma: | 
|---|
| 303 | case Intrinsic::fmuladd: | 
|---|
| 304 | case Intrinsic::experimental_constrained_fma: | 
|---|
| 305 | case Intrinsic::experimental_constrained_fmuladd: { | 
|---|
| 306 | Value *Op0 = VPI.getOperand(i_nocapture: 0); | 
|---|
| 307 | Value *Op1 = VPI.getOperand(i_nocapture: 1); | 
|---|
| 308 | Value *Op2 = VPI.getOperand(i_nocapture: 2); | 
|---|
| 309 | Function *Fn = Intrinsic::getOrInsertDeclaration( | 
|---|
| 310 | M: VPI.getModule(), id: UnpredicatedIntrinsicID, Tys: {VPI.getType()}); | 
|---|
| 311 | Value *NewOp; | 
|---|
| 312 | if (Intrinsic::isConstrainedFPIntrinsic(QID: UnpredicatedIntrinsicID)) | 
|---|
| 313 | NewOp = | 
|---|
| 314 | Builder.CreateConstrainedFPCall(Callee: Fn, Args: {Op0, Op1, Op2}, Name: VPI.getName()); | 
|---|
| 315 | else | 
|---|
| 316 | NewOp = Builder.CreateCall(Callee: Fn, Args: {Op0, Op1, Op2}, Name: VPI.getName()); | 
|---|
| 317 | replaceOperation(NewOp&: *NewOp, OldOp&: VPI); | 
|---|
| 318 | return NewOp; | 
|---|
| 319 | } | 
|---|
| 320 | } | 
|---|
| 321 |  | 
|---|
| 322 | return nullptr; | 
|---|
| 323 | } | 
|---|
| 324 |  | 
|---|
| 325 | static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, | 
|---|
| 326 | Type *EltTy) { | 
|---|
| 327 | Intrinsic::ID RdxID = *VPI.getFunctionalIntrinsicID(); | 
|---|
| 328 | FastMathFlags FMF; | 
|---|
| 329 | if (isa<FPMathOperator>(Val: VPI)) | 
|---|
| 330 | FMF = VPI.getFastMathFlags(); | 
|---|
| 331 | return getReductionIdentity(RdxID, Ty: EltTy, FMF); | 
|---|
| 332 | } | 
|---|
| 333 |  | 
|---|
| 334 | Value * | 
|---|
| 335 | CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, | 
|---|
| 336 | VPReductionIntrinsic &VPI) { | 
|---|
| 337 | assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && | 
|---|
| 338 | "Implicitly dropping %evl in non-speculatable operator!"); | 
|---|
| 339 |  | 
|---|
| 340 | Value *Mask = VPI.getMaskParam(); | 
|---|
| 341 | Value *RedOp = VPI.getOperand(i_nocapture: VPI.getVectorParamPos()); | 
|---|
| 342 |  | 
|---|
| 343 | // Insert neutral element in masked-out positions | 
|---|
| 344 | if (Mask && !isAllTrueMask(MaskVal: Mask)) { | 
|---|
| 345 | auto *NeutralElt = getNeutralReductionElement(VPI, EltTy: VPI.getType()); | 
|---|
| 346 | auto *NeutralVector = Builder.CreateVectorSplat( | 
|---|
| 347 | EC: cast<VectorType>(Val: RedOp->getType())->getElementCount(), V: NeutralElt); | 
|---|
| 348 | RedOp = Builder.CreateSelect(C: Mask, True: RedOp, False: NeutralVector); | 
|---|
| 349 | } | 
|---|
| 350 |  | 
|---|
| 351 | Value *Reduction; | 
|---|
| 352 | Value *Start = VPI.getOperand(i_nocapture: VPI.getStartParamPos()); | 
|---|
| 353 |  | 
|---|
| 354 | switch (VPI.getIntrinsicID()) { | 
|---|
| 355 | default: | 
|---|
| 356 | llvm_unreachable( "Impossible reduction kind"); | 
|---|
| 357 | case Intrinsic::vp_reduce_add: | 
|---|
| 358 | case Intrinsic::vp_reduce_mul: | 
|---|
| 359 | case Intrinsic::vp_reduce_and: | 
|---|
| 360 | case Intrinsic::vp_reduce_or: | 
|---|
| 361 | case Intrinsic::vp_reduce_xor: { | 
|---|
| 362 | Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); | 
|---|
| 363 | unsigned Opc = getArithmeticReductionInstruction(RdxID: RedID); | 
|---|
| 364 | assert(Instruction::isBinaryOp(Opc)); | 
|---|
| 365 | Reduction = Builder.CreateUnaryIntrinsic(ID: RedID, V: RedOp); | 
|---|
| 366 | Reduction = | 
|---|
| 367 | Builder.CreateBinOp(Opc: (Instruction::BinaryOps)Opc, LHS: Reduction, RHS: Start); | 
|---|
| 368 | break; | 
|---|
| 369 | } | 
|---|
| 370 | case Intrinsic::vp_reduce_smax: | 
|---|
| 371 | case Intrinsic::vp_reduce_smin: | 
|---|
| 372 | case Intrinsic::vp_reduce_umax: | 
|---|
| 373 | case Intrinsic::vp_reduce_umin: | 
|---|
| 374 | case Intrinsic::vp_reduce_fmax: | 
|---|
| 375 | case Intrinsic::vp_reduce_fmin: | 
|---|
| 376 | case Intrinsic::vp_reduce_fmaximum: | 
|---|
| 377 | case Intrinsic::vp_reduce_fminimum: { | 
|---|
| 378 | Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); | 
|---|
| 379 | Intrinsic::ID ScalarID = getMinMaxReductionIntrinsicOp(RdxID: RedID); | 
|---|
| 380 | Reduction = Builder.CreateUnaryIntrinsic(ID: RedID, V: RedOp); | 
|---|
| 381 | transferDecorations(NewVal&: *Reduction, VPI); | 
|---|
| 382 | Reduction = Builder.CreateBinaryIntrinsic(ID: ScalarID, LHS: Reduction, RHS: Start); | 
|---|
| 383 | break; | 
|---|
| 384 | } | 
|---|
| 385 | case Intrinsic::vp_reduce_fadd: | 
|---|
| 386 | Reduction = Builder.CreateFAddReduce(Acc: Start, Src: RedOp); | 
|---|
| 387 | break; | 
|---|
| 388 | case Intrinsic::vp_reduce_fmul: | 
|---|
| 389 | Reduction = Builder.CreateFMulReduce(Acc: Start, Src: RedOp); | 
|---|
| 390 | break; | 
|---|
| 391 | } | 
|---|
| 392 |  | 
|---|
| 393 | replaceOperation(NewOp&: *Reduction, OldOp&: VPI); | 
|---|
| 394 | return Reduction; | 
|---|
| 395 | } | 
|---|
| 396 |  | 
|---|
| 397 | Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder, | 
|---|
| 398 | VPIntrinsic &VPI) { | 
|---|
| 399 | Intrinsic::ID VPID = VPI.getIntrinsicID(); | 
|---|
| 400 | unsigned CastOpcode = VPIntrinsic::getFunctionalOpcodeForVP(ID: VPID).value(); | 
|---|
| 401 | assert(Instruction::isCast(CastOpcode)); | 
|---|
| 402 | Value *CastOp = | 
|---|
| 403 | Builder.CreateCast(Op: Instruction::CastOps(CastOpcode), V: VPI.getOperand(i_nocapture: 0), | 
|---|
| 404 | DestTy: VPI.getType(), Name: VPI.getName()); | 
|---|
| 405 |  | 
|---|
| 406 | replaceOperation(NewOp&: *CastOp, OldOp&: VPI); | 
|---|
| 407 | return CastOp; | 
|---|
| 408 | } | 
|---|
| 409 |  | 
|---|
| 410 | Value * | 
|---|
| 411 | CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, | 
|---|
| 412 | VPIntrinsic &VPI) { | 
|---|
| 413 | assert(VPI.canIgnoreVectorLengthParam()); | 
|---|
| 414 |  | 
|---|
| 415 | const auto &DL = VPI.getDataLayout(); | 
|---|
| 416 |  | 
|---|
| 417 | Value *MaskParam = VPI.getMaskParam(); | 
|---|
| 418 | Value *PtrParam = VPI.getMemoryPointerParam(); | 
|---|
| 419 | Value *DataParam = VPI.getMemoryDataParam(); | 
|---|
| 420 | bool IsUnmasked = isAllTrueMask(MaskVal: MaskParam); | 
|---|
| 421 |  | 
|---|
| 422 | MaybeAlign AlignOpt = VPI.getPointerAlignment(); | 
|---|
| 423 |  | 
|---|
| 424 | Value *NewMemoryInst = nullptr; | 
|---|
| 425 | switch (VPI.getIntrinsicID()) { | 
|---|
| 426 | default: | 
|---|
| 427 | llvm_unreachable( "Not a VP memory intrinsic"); | 
|---|
| 428 | case Intrinsic::vp_store: | 
|---|
| 429 | if (IsUnmasked) { | 
|---|
| 430 | StoreInst *NewStore = | 
|---|
| 431 | Builder.CreateStore(Val: DataParam, Ptr: PtrParam, /*IsVolatile*/ isVolatile: false); | 
|---|
| 432 | if (AlignOpt.has_value()) | 
|---|
| 433 | NewStore->setAlignment(*AlignOpt); | 
|---|
| 434 | NewMemoryInst = NewStore; | 
|---|
| 435 | } else | 
|---|
| 436 | NewMemoryInst = Builder.CreateMaskedStore( | 
|---|
| 437 | Val: DataParam, Ptr: PtrParam, Alignment: AlignOpt.valueOrOne(), Mask: MaskParam); | 
|---|
| 438 |  | 
|---|
| 439 | break; | 
|---|
| 440 | case Intrinsic::vp_load: | 
|---|
| 441 | if (IsUnmasked) { | 
|---|
| 442 | LoadInst *NewLoad = | 
|---|
| 443 | Builder.CreateLoad(Ty: VPI.getType(), Ptr: PtrParam, /*IsVolatile*/ isVolatile: false); | 
|---|
| 444 | if (AlignOpt.has_value()) | 
|---|
| 445 | NewLoad->setAlignment(*AlignOpt); | 
|---|
| 446 | NewMemoryInst = NewLoad; | 
|---|
| 447 | } else | 
|---|
| 448 | NewMemoryInst = Builder.CreateMaskedLoad( | 
|---|
| 449 | Ty: VPI.getType(), Ptr: PtrParam, Alignment: AlignOpt.valueOrOne(), Mask: MaskParam); | 
|---|
| 450 |  | 
|---|
| 451 | break; | 
|---|
| 452 | case Intrinsic::vp_scatter: { | 
|---|
| 453 | auto *ElementType = | 
|---|
| 454 | cast<VectorType>(Val: DataParam->getType())->getElementType(); | 
|---|
| 455 | NewMemoryInst = Builder.CreateMaskedScatter( | 
|---|
| 456 | Val: DataParam, Ptrs: PtrParam, | 
|---|
| 457 | Alignment: AlignOpt.value_or(u: DL.getPrefTypeAlign(Ty: ElementType)), Mask: MaskParam); | 
|---|
| 458 | break; | 
|---|
| 459 | } | 
|---|
| 460 | case Intrinsic::vp_gather: { | 
|---|
| 461 | auto *ElementType = cast<VectorType>(Val: VPI.getType())->getElementType(); | 
|---|
| 462 | NewMemoryInst = Builder.CreateMaskedGather( | 
|---|
| 463 | Ty: VPI.getType(), Ptrs: PtrParam, | 
|---|
| 464 | Alignment: AlignOpt.value_or(u: DL.getPrefTypeAlign(Ty: ElementType)), Mask: MaskParam, PassThru: nullptr, | 
|---|
| 465 | Name: VPI.getName()); | 
|---|
| 466 | break; | 
|---|
| 467 | } | 
|---|
| 468 | } | 
|---|
| 469 |  | 
|---|
| 470 | assert(NewMemoryInst); | 
|---|
| 471 | replaceOperation(NewOp&: *NewMemoryInst, OldOp&: VPI); | 
|---|
| 472 | return NewMemoryInst; | 
|---|
| 473 | } | 
|---|
| 474 |  | 
|---|
| 475 | Value *CachingVPExpander::expandPredicationInComparison(IRBuilder<> &Builder, | 
|---|
| 476 | VPCmpIntrinsic &VPI) { | 
|---|
| 477 | assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && | 
|---|
| 478 | "Implicitly dropping %evl in non-speculatable operator!"); | 
|---|
| 479 |  | 
|---|
| 480 | assert(*VPI.getFunctionalOpcode() == Instruction::ICmp || | 
|---|
| 481 | *VPI.getFunctionalOpcode() == Instruction::FCmp); | 
|---|
| 482 |  | 
|---|
| 483 | Value *Op0 = VPI.getOperand(i_nocapture: 0); | 
|---|
| 484 | Value *Op1 = VPI.getOperand(i_nocapture: 1); | 
|---|
| 485 | auto Pred = VPI.getPredicate(); | 
|---|
| 486 |  | 
|---|
| 487 | auto *NewCmp = Builder.CreateCmp(Pred, LHS: Op0, RHS: Op1); | 
|---|
| 488 |  | 
|---|
| 489 | replaceOperation(NewOp&: *NewCmp, OldOp&: VPI); | 
|---|
| 490 | return NewCmp; | 
|---|
| 491 | } | 
|---|
| 492 |  | 
|---|
| 493 | bool CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { | 
|---|
| 494 | LLVM_DEBUG(dbgs() << "Discard EVL parameter in "<< VPI << "\n"); | 
|---|
| 495 |  | 
|---|
| 496 | if (VPI.canIgnoreVectorLengthParam()) | 
|---|
| 497 | return false; | 
|---|
| 498 |  | 
|---|
| 499 | Value *EVLParam = VPI.getVectorLengthParam(); | 
|---|
| 500 | if (!EVLParam) | 
|---|
| 501 | return false; | 
|---|
| 502 |  | 
|---|
| 503 | ElementCount StaticElemCount = VPI.getStaticVectorLength(); | 
|---|
| 504 | Value *MaxEVL = nullptr; | 
|---|
| 505 | Type *Int32Ty = Type::getInt32Ty(C&: VPI.getContext()); | 
|---|
| 506 | if (StaticElemCount.isScalable()) { | 
|---|
| 507 | // TODO add caching | 
|---|
| 508 | IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); | 
|---|
| 509 | Value *FactorConst = Builder.getInt32(C: StaticElemCount.getKnownMinValue()); | 
|---|
| 510 | Value *VScale = Builder.CreateVScale(Ty: Int32Ty, Name: "vscale"); | 
|---|
| 511 | MaxEVL = Builder.CreateMul(LHS: VScale, RHS: FactorConst, Name: "scalable_size", | 
|---|
| 512 | /*NUW*/ HasNUW: true, /*NSW*/ HasNSW: false); | 
|---|
| 513 | } else { | 
|---|
| 514 | MaxEVL = ConstantInt::get(Ty: Int32Ty, V: StaticElemCount.getFixedValue(), IsSigned: false); | 
|---|
| 515 | } | 
|---|
| 516 | VPI.setVectorLengthParam(MaxEVL); | 
|---|
| 517 | return true; | 
|---|
| 518 | } | 
|---|
| 519 |  | 
|---|
| 520 | std::pair<Value *, bool> CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { | 
|---|
| 521 | LLVM_DEBUG(dbgs() << "Folding vlen for "<< VPI << '\n'); | 
|---|
| 522 |  | 
|---|
| 523 | IRBuilder<> Builder(&VPI); | 
|---|
| 524 |  | 
|---|
| 525 | // Ineffective %evl parameter and so nothing to do here. | 
|---|
| 526 | if (VPI.canIgnoreVectorLengthParam()) | 
|---|
| 527 | return {&VPI, false}; | 
|---|
| 528 |  | 
|---|
| 529 | // Only VP intrinsics can have an %evl parameter. | 
|---|
| 530 | Value *OldMaskParam = VPI.getMaskParam(); | 
|---|
| 531 | Value *OldEVLParam = VPI.getVectorLengthParam(); | 
|---|
| 532 | assert(OldMaskParam && "no mask param to fold the vl param into"); | 
|---|
| 533 | assert(OldEVLParam && "no EVL param to fold away"); | 
|---|
| 534 |  | 
|---|
| 535 | LLVM_DEBUG(dbgs() << "OLD evl: "<< *OldEVLParam << '\n'); | 
|---|
| 536 | LLVM_DEBUG(dbgs() << "OLD mask: "<< *OldMaskParam << '\n'); | 
|---|
| 537 |  | 
|---|
| 538 | // Convert the %evl predication into vector mask predication. | 
|---|
| 539 | ElementCount ElemCount = VPI.getStaticVectorLength(); | 
|---|
| 540 | Value *VLMask = convertEVLToMask(Builder, EVLParam: OldEVLParam, ElemCount); | 
|---|
| 541 | Value *NewMaskParam = Builder.CreateAnd(LHS: VLMask, RHS: OldMaskParam); | 
|---|
| 542 | VPI.setMaskParam(NewMaskParam); | 
|---|
| 543 |  | 
|---|
| 544 | // Drop the %evl parameter. | 
|---|
| 545 | discardEVLParameter(VPI); | 
|---|
| 546 | assert(VPI.canIgnoreVectorLengthParam() && | 
|---|
| 547 | "transformation did not render the evl param ineffective!"); | 
|---|
| 548 |  | 
|---|
| 549 | // Reassess the modified instruction. | 
|---|
| 550 | return {&VPI, true}; | 
|---|
| 551 | } | 
|---|
| 552 |  | 
|---|
| 553 | Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { | 
|---|
| 554 | LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: "<< VPI << '\n'); | 
|---|
| 555 |  | 
|---|
| 556 | IRBuilder<> Builder(&VPI); | 
|---|
| 557 |  | 
|---|
| 558 | // Try lowering to a LLVM instruction first. | 
|---|
| 559 | auto OC = VPI.getFunctionalOpcode(); | 
|---|
| 560 |  | 
|---|
| 561 | if (OC && Instruction::isBinaryOp(Opcode: *OC)) | 
|---|
| 562 | return expandPredicationInBinaryOperator(Builder, VPI); | 
|---|
| 563 |  | 
|---|
| 564 | if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(Val: &VPI)) | 
|---|
| 565 | return expandPredicationInReduction(Builder, VPI&: *VPRI); | 
|---|
| 566 |  | 
|---|
| 567 | if (auto *VPCmp = dyn_cast<VPCmpIntrinsic>(Val: &VPI)) | 
|---|
| 568 | return expandPredicationInComparison(Builder, VPI&: *VPCmp); | 
|---|
| 569 |  | 
|---|
| 570 | if (VPCastIntrinsic::isVPCast(ID: VPI.getIntrinsicID())) { | 
|---|
| 571 | return expandPredicationToCastIntrinsic(Builder, VPI); | 
|---|
| 572 | } | 
|---|
| 573 |  | 
|---|
| 574 | switch (VPI.getIntrinsicID()) { | 
|---|
| 575 | default: | 
|---|
| 576 | break; | 
|---|
| 577 | case Intrinsic::vp_fneg: { | 
|---|
| 578 | Value *NewNegOp = Builder.CreateFNeg(V: VPI.getOperand(i_nocapture: 0), Name: VPI.getName()); | 
|---|
| 579 | replaceOperation(NewOp&: *NewNegOp, OldOp&: VPI); | 
|---|
| 580 | return NewNegOp; | 
|---|
| 581 | } | 
|---|
| 582 | case Intrinsic::vp_abs: | 
|---|
| 583 | case Intrinsic::vp_smax: | 
|---|
| 584 | case Intrinsic::vp_smin: | 
|---|
| 585 | case Intrinsic::vp_umax: | 
|---|
| 586 | case Intrinsic::vp_umin: | 
|---|
| 587 | case Intrinsic::vp_bswap: | 
|---|
| 588 | case Intrinsic::vp_bitreverse: | 
|---|
| 589 | case Intrinsic::vp_ctpop: | 
|---|
| 590 | case Intrinsic::vp_ctlz: | 
|---|
| 591 | case Intrinsic::vp_cttz: | 
|---|
| 592 | case Intrinsic::vp_sadd_sat: | 
|---|
| 593 | case Intrinsic::vp_uadd_sat: | 
|---|
| 594 | case Intrinsic::vp_ssub_sat: | 
|---|
| 595 | case Intrinsic::vp_usub_sat: | 
|---|
| 596 | case Intrinsic::vp_fshl: | 
|---|
| 597 | case Intrinsic::vp_fshr: | 
|---|
| 598 | return expandPredicationToIntCall(Builder, VPI); | 
|---|
| 599 | case Intrinsic::vp_fabs: | 
|---|
| 600 | case Intrinsic::vp_sqrt: | 
|---|
| 601 | case Intrinsic::vp_maxnum: | 
|---|
| 602 | case Intrinsic::vp_minnum: | 
|---|
| 603 | case Intrinsic::vp_maximum: | 
|---|
| 604 | case Intrinsic::vp_minimum: | 
|---|
| 605 | case Intrinsic::vp_fma: | 
|---|
| 606 | case Intrinsic::vp_fmuladd: | 
|---|
| 607 | return expandPredicationToFPCall(Builder, VPI, | 
|---|
| 608 | UnpredicatedIntrinsicID: VPI.getFunctionalIntrinsicID().value()); | 
|---|
| 609 | case Intrinsic::vp_load: | 
|---|
| 610 | case Intrinsic::vp_store: | 
|---|
| 611 | case Intrinsic::vp_gather: | 
|---|
| 612 | case Intrinsic::vp_scatter: | 
|---|
| 613 | return expandPredicationInMemoryIntrinsic(Builder, VPI); | 
|---|
| 614 | } | 
|---|
| 615 |  | 
|---|
| 616 | if (auto CID = VPI.getConstrainedIntrinsicID()) | 
|---|
| 617 | if (Value *Call = expandPredicationToFPCall(Builder, VPI, UnpredicatedIntrinsicID: *CID)) | 
|---|
| 618 | return Call; | 
|---|
| 619 |  | 
|---|
| 620 | return &VPI; | 
|---|
| 621 | } | 
|---|
| 622 |  | 
|---|
| 623 | //// } CachingVPExpander | 
|---|
| 624 |  | 
|---|
| 625 | void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { | 
|---|
| 626 | // Operations with speculatable lanes do not strictly need predication. | 
|---|
| 627 | if (maySpeculateLanes(VPI)) { | 
|---|
| 628 | // Converting a speculatable VP intrinsic means dropping %mask and %evl. | 
|---|
| 629 | // No need to expand %evl into the %mask only to ignore that code. | 
|---|
| 630 | if (LegalizeStrat.OpStrategy == VPLegalization::Convert) | 
|---|
| 631 | LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; | 
|---|
| 632 | return; | 
|---|
| 633 | } | 
|---|
| 634 |  | 
|---|
| 635 | // We have to preserve the predicating effect of %evl for this | 
|---|
| 636 | // non-speculatable VP intrinsic. | 
|---|
| 637 | // 1) Never discard %evl. | 
|---|
| 638 | // 2) If this VP intrinsic will be expanded to non-VP code, make sure that | 
|---|
| 639 | //    %evl gets folded into %mask. | 
|---|
| 640 | if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || | 
|---|
| 641 | (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { | 
|---|
| 642 | LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; | 
|---|
| 643 | } | 
|---|
| 644 | } | 
|---|
| 645 |  | 
|---|
| 646 | VPLegalization | 
|---|
| 647 | CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { | 
|---|
| 648 | auto VPStrat = TTI.getVPLegalizationStrategy(PI: VPI); | 
|---|
| 649 | if (LLVM_LIKELY(!UsingTTIOverrides)) { | 
|---|
| 650 | // No overrides - we are in production. | 
|---|
| 651 | return VPStrat; | 
|---|
| 652 | } | 
|---|
| 653 |  | 
|---|
| 654 | // Overrides set - we are in testing, the following does not need to be | 
|---|
| 655 | // efficient. | 
|---|
| 656 | VPStrat.EVLParamStrategy = parseOverrideOption(TextOpt: EVLTransformOverride); | 
|---|
| 657 | VPStrat.OpStrategy = parseOverrideOption(TextOpt: MaskTransformOverride); | 
|---|
| 658 | return VPStrat; | 
|---|
| 659 | } | 
|---|
| 660 |  | 
|---|
| 661 | VPExpansionDetails | 
|---|
| 662 | CachingVPExpander::expandVectorPredication(VPIntrinsic &VPI) { | 
|---|
| 663 | auto Strategy = getVPLegalizationStrategy(VPI); | 
|---|
| 664 | sanitizeStrategy(VPI, LegalizeStrat&: Strategy); | 
|---|
| 665 |  | 
|---|
| 666 | VPExpansionDetails Changed = VPExpansionDetails::IntrinsicUnchanged; | 
|---|
| 667 |  | 
|---|
| 668 | // Transform the EVL parameter. | 
|---|
| 669 | switch (Strategy.EVLParamStrategy) { | 
|---|
| 670 | case VPLegalization::Legal: | 
|---|
| 671 | break; | 
|---|
| 672 | case VPLegalization::Discard: | 
|---|
| 673 | if (discardEVLParameter(VPI)) | 
|---|
| 674 | Changed = VPExpansionDetails::IntrinsicUpdated; | 
|---|
| 675 | break; | 
|---|
| 676 | case VPLegalization::Convert: | 
|---|
| 677 | if (auto [NewVPI, Folded] = foldEVLIntoMask(VPI); Folded) { | 
|---|
| 678 | (void)NewVPI; | 
|---|
| 679 | Changed = VPExpansionDetails::IntrinsicUpdated; | 
|---|
| 680 | ++NumFoldedVL; | 
|---|
| 681 | } | 
|---|
| 682 | break; | 
|---|
| 683 | } | 
|---|
| 684 |  | 
|---|
| 685 | // Replace with a non-predicated operation. | 
|---|
| 686 | switch (Strategy.OpStrategy) { | 
|---|
| 687 | case VPLegalization::Legal: | 
|---|
| 688 | break; | 
|---|
| 689 | case VPLegalization::Discard: | 
|---|
| 690 | llvm_unreachable( "Invalid strategy for operators."); | 
|---|
| 691 | case VPLegalization::Convert: | 
|---|
| 692 | if (Value *V = expandPredication(VPI); V != &VPI) { | 
|---|
| 693 | ++NumLoweredVPOps; | 
|---|
| 694 | Changed = VPExpansionDetails::IntrinsicReplaced; | 
|---|
| 695 | } | 
|---|
| 696 | break; | 
|---|
| 697 | } | 
|---|
| 698 |  | 
|---|
| 699 | return Changed; | 
|---|
| 700 | } | 
|---|
| 701 | } // namespace | 
|---|
| 702 |  | 
|---|
| 703 | VPExpansionDetails | 
|---|
| 704 | llvm::expandVectorPredicationIntrinsic(VPIntrinsic &VPI, | 
|---|
| 705 | const TargetTransformInfo &TTI) { | 
|---|
| 706 | return CachingVPExpander(TTI).expandVectorPredication(VPI); | 
|---|
| 707 | } | 
|---|
| 708 |  | 
|---|