1//===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanUtils.h"
10#include "LoopVectorizationPlanner.h"
11#include "VPlanAnalysis.h"
12#include "VPlanCFG.h"
13#include "VPlanDominatorTree.h"
14#include "VPlanPatternMatch.h"
15#include "llvm/ADT/TypeSwitch.h"
16#include "llvm/Analysis/MemoryLocation.h"
17#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
19#include "llvm/IR/Dominators.h"
20#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
21
22using namespace llvm;
23using namespace llvm::VPlanPatternMatch;
24using namespace llvm::SCEVPatternMatch;
25
26bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
27 return all_of(Range: Def->users(),
28 P: [Def](const VPUser *U) { return U->usesFirstLaneOnly(Op: Def); });
29}
30
31bool vputils::onlyFirstPartUsed(const VPValue *Def) {
32 return all_of(Range: Def->users(),
33 P: [Def](const VPUser *U) { return U->usesFirstPartOnly(Op: Def); });
34}
35
36bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
37 return all_of(Range: Def->users(),
38 P: [Def](const VPUser *U) { return U->usesScalars(Op: Def); });
39}
40
41VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
42 if (auto *E = dyn_cast<SCEVConstant>(Val: Expr))
43 return Plan.getOrAddLiveIn(V: E->getValue());
44 // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
45 // value. Otherwise the value may be defined in a loop and using it directly
46 // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
47 // form.
48 auto *U = dyn_cast<SCEVUnknown>(Val: Expr);
49 if (U && !isa<Instruction>(Val: U->getValue()))
50 return Plan.getOrAddLiveIn(V: U->getValue());
51 auto *Expanded = new VPExpandSCEVRecipe(Expr);
52 VPBasicBlock *EntryVPBB = Plan.getEntry();
53 auto Iter = EntryVPBB->getFirstNonPhi();
54 while (Iter != EntryVPBB->end() && isa<VPIRInstruction>(Val: *Iter))
55 ++Iter;
56 EntryVPBB->insert(Recipe: Expanded, InsertPt: Iter);
57 return Expanded;
58}
59
60bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
61 if (isa<VPActiveLaneMaskPHIRecipe>(Val: V))
62 return true;
63
64 VPValue *A, *B;
65
66 auto m_CanonicalScalarIVSteps = m_ScalarIVSteps(
67 Op0: m_CombineOr(Ps: m_CanonicalIV(),
68 Ps: m_DerivedIV(Op0: m_ZeroInt(), Op1: m_CanonicalIV(), Op2: m_One())),
69 Op1: m_One(), Op2: m_Specific(VPV: &Plan.getVF()));
70
71 // A wide canonical IV is either an explicit VPWidenCanonicalIVRecipe or a
72 // canonical VPWidenIntOrFpInductionRecipe.
73 auto m_WideCanonicalIV =
74 m_CombineOr(Ps: m_Isa<VPWidenCanonicalIVRecipe>(), Ps: m_CanonicalWidenIV());
75
76 if (match(V, P: m_ActiveLaneMask(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B), Op2: m_One())))
77 return B == Plan.getTripCount() &&
78 (match(V: A, P: m_CanonicalScalarIVSteps) || match(V: A, P: m_WideCanonicalIV));
79
80 // For scalar plans, the header mask uses the scalar steps.
81 if (match(V, P: m_ICmp(Op0: m_CanonicalScalarIVSteps,
82 Op1: m_Specific(VPV: Plan.getBackedgeTakenCount())))) {
83 assert(Plan.hasScalarVFOnly() &&
84 "Non-scalar VF using scalar IV steps for header mask?");
85 return true;
86 }
87
88 auto MaskMatch = m_ICmp(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B));
89 if (match(V, P: m_CombineOr(Ps: MaskMatch, Ps: m_Reverse(Op0: MaskMatch))))
90 return match(V: A, P: m_WideCanonicalIV) && B == Plan.getBackedgeTakenCount();
91
92 return match(
93 V, P: m_c_BinaryAnd(Op0: m_VPValue(),
94 Op1: m_VPInstruction<VPInstruction::IncomingAliasMask>()));
95}
96
97/// Returns true if \p R propagates poison from any operand to its result.
98static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) {
99 return TypeSwitch<const VPRecipeBase *, bool>(R)
100 .Case<VPWidenGEPRecipe, VPWidenCastRecipe>(
101 caseFn: [](const VPRecipeBase *) { return true; })
102 .Case(caseFn: [](const VPReplicateRecipe *Rep) {
103 // GEP and casts propagate poison from all operands.
104 unsigned Opcode = Rep->getOpcode();
105 return Opcode == Instruction::GetElementPtr ||
106 Instruction::isCast(Opcode);
107 })
108 .Default(defaultFn: [](const VPRecipeBase *) { return false; });
109}
110
111/// Returns true if \p V being poison is guaranteed to trigger UB because it
112/// propagates to the address of a memory recipe.
113static bool poisonGuaranteesUB(const VPValue *V) {
114 SmallPtrSet<const VPValue *, 8> Visited;
115 SmallVector<const VPValue *, 16> Worklist;
116
117 Worklist.push_back(Elt: V);
118
119 while (!Worklist.empty()) {
120 const VPValue *Current = Worklist.pop_back_val();
121 if (!Visited.insert(Ptr: Current).second)
122 continue;
123
124 for (VPUser *U : Current->users()) {
125 // Check if Current is used as an address operand for load/store.
126 if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(Val: cast<VPRecipeBase>(Val: U))) {
127 if (MemR->getAddr() == Current)
128 return true;
129 continue;
130 }
131 if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: U)) {
132 unsigned Opcode = Rep->getOpcode();
133 if ((Opcode == Instruction::Load && Rep->getOperand(N: 0) == Current) ||
134 (Opcode == Instruction::Store && Rep->getOperand(N: 1) == Current))
135 return true;
136 }
137
138 // Check if poison propagates through this recipe to any of its users.
139 auto *R = cast<VPRecipeBase>(Val: U);
140 for (const VPValue *Op : R->operands()) {
141 if (Op == Current && propagatesPoisonFromRecipeOp(R)) {
142 Worklist.push_back(Elt: R->getVPSingleValue());
143 break;
144 }
145 }
146 }
147 }
148
149 return false;
150}
151
152GEPNoWrapFlags vputils::getGEPFlagsForPtr(VPValue *Ptr) {
153 // Like IR stripPointerCasts, look through GEPs with all-zero indices and
154 // casts to find a root GEP VPInstruction.
155 while (auto *PtrVPI = dyn_cast<VPInstruction>(Val: Ptr)) {
156 unsigned Opcode = PtrVPI->getOpcode();
157 if (Opcode == Instruction::GetElementPtr) {
158 if (!all_of(Range: drop_begin(RangeOrContainer: PtrVPI->operands()), P: match_fn(P: m_ZeroInt())))
159 return PtrVPI->getGEPNoWrapFlags();
160 Ptr = PtrVPI->getOperand(N: 0);
161 continue;
162 }
163 if (Opcode != Instruction::BitCast && Opcode != Instruction::AddrSpaceCast)
164 break;
165 Ptr = PtrVPI->getOperand(N: 0);
166 }
167 return GEPNoWrapFlags::none();
168}
169
170const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
171 PredicatedScalarEvolution &PSE,
172 const Loop *L) {
173 ScalarEvolution &SE = *PSE.getSE();
174 if (isa<VPIRValue, VPSymbolicValue>(Val: V)) {
175 Value *LiveIn = V->getUnderlyingValue();
176 if (LiveIn && SE.isSCEVable(Ty: LiveIn->getType()))
177 return SE.getSCEV(V: LiveIn);
178 return SE.getCouldNotCompute();
179 }
180
181 if (auto *RV = dyn_cast<VPRegionValue>(Val: V)) {
182 assert(RV == RV->getDefiningRegion()->getCanonicalIV() &&
183 "RegionValue must be canonical IV");
184 if (!L)
185 return SE.getCouldNotCompute();
186 return SE.getAddRecExpr(Start: SE.getZero(Ty: RV->getType()), Step: SE.getOne(Ty: RV->getType()),
187 L, Flags: SCEV::FlagAnyWrap);
188 }
189
190 // Helper to create SCEVs for binary and unary operations.
191 auto CreateSCEV = [&](ArrayRef<VPValue *> Ops,
192 function_ref<const SCEV *(ArrayRef<SCEVUse>)> CreateFn)
193 -> const SCEV * {
194 SmallVector<SCEVUse, 2> SCEVOps;
195 for (VPValue *Op : Ops) {
196 const SCEV *S = getSCEVExprForVPValue(V: Op, PSE, L);
197 if (isa<SCEVCouldNotCompute>(Val: S))
198 return SE.getCouldNotCompute();
199 SCEVOps.push_back(Elt: S);
200 }
201 return PSE.getPredicatedSCEV(Expr: CreateFn(SCEVOps));
202 };
203
204 VPValue *LHSVal, *RHSVal;
205 if (match(V, P: m_Add(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
206 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
207 return SE.getAddExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0);
208 });
209 if (match(V, P: m_Sub(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
210 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
211 return SE.getMinusSCEV(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0);
212 });
213 if (match(V, P: m_Not(Op0: m_VPValue(V&: LHSVal)))) {
214 // not X = xor X, -1 = -1 - X
215 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
216 return SE.getMinusSCEV(LHS: SE.getMinusOne(Ty: Ops[0]->getType()), RHS: Ops[0]);
217 });
218 }
219 if (match(V, P: m_Mul(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
220 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
221 return SE.getMulExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0);
222 });
223 // Handle shl by constant: x << c is equivalent to x * (1 << c). A shift
224 // amount >= the bit width produces poison; do not rewrite it, as
225 // getPowerOfTwo requires the power to be in range.
226 uint64_t ShiftAmt;
227 if (match(V, P: m_Shl(Op0: m_VPValue(V&: LHSVal), Op1: m_ConstantInt(C&: ShiftAmt))) &&
228 ShiftAmt < LHSVal->getScalarType()->getScalarSizeInBits())
229 return CreateSCEV(LHSVal, [&](ArrayRef<SCEVUse> Ops) {
230 return SE.getMulExpr(LHS: Ops[0],
231 RHS: SE.getPowerOfTwo(Ty: Ops[0]->getType(), Power: ShiftAmt));
232 });
233 if (match(V, P: m_LShr(Op0: m_VPValue(V&: LHSVal), Op1: m_ConstantInt(C&: ShiftAmt)))) {
234 Type *Ty = V->getScalarType();
235 if (ShiftAmt < SE.getTypeSizeInBits(Ty))
236 return CreateSCEV(LHSVal, [&](ArrayRef<SCEVUse> Ops) {
237 return SE.getUDivExpr(LHS: Ops[0], RHS: SE.getPowerOfTwo(Ty, Power: ShiftAmt));
238 });
239 }
240 if (match(V, P: m_UDiv(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
241 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
242 return SE.getUDivExpr(LHS: Ops[0], RHS: Ops[1]);
243 });
244 if (match(V, P: m_URem(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
245 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
246 return SE.getURemExpr(LHS: Ops[0], RHS: Ops[1]);
247 });
248 // A SRem with non-negative operands is equivalent to an URem.
249 if (match(V, P: m_SRem(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal)))) {
250 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
251 if (!SE.isKnownNonNegative(S: Ops[0]) || !SE.isKnownNonNegative(S: Ops[1]))
252 return SE.getCouldNotCompute();
253 return SE.getURemExpr(LHS: Ops[0], RHS: Ops[1]);
254 });
255 }
256 // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n.
257 const APInt *Mask;
258 if (match(V, P: m_c_BinaryAnd(Op0: m_VPValue(V&: LHSVal), Op1: m_APInt(C&: Mask))) &&
259 (*Mask + 1).isPowerOf2())
260 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
261 return SE.getURemExpr(LHS: Ops[0], RHS: SE.getConstant(Val: *Mask + 1));
262 });
263 if (match(V, P: m_Trunc(Op0: m_VPValue(V&: LHSVal)))) {
264 Type *DestTy = V->getScalarType();
265 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
266 return SE.getTruncateExpr(Op: Ops[0], Ty: DestTy);
267 });
268 }
269 if (match(V, P: m_ZExt(Op0: m_VPValue(V&: LHSVal)))) {
270 Type *DestTy = V->getScalarType();
271 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
272 return SE.getZeroExtendExpr(Op: Ops[0], Ty: DestTy);
273 });
274 }
275 if (match(V, P: m_SExt(Op0: m_VPValue(V&: LHSVal)))) {
276 Type *DestTy = V->getScalarType();
277
278 // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension
279 // onto the operands before computing the subtraction.
280 VPValue *SubLHS, *SubRHS;
281 auto *SubR = dyn_cast<VPRecipeWithIRFlags>(Val: LHSVal);
282 if (match(V: LHSVal, P: m_Sub(Op0: m_VPValue(V&: SubLHS), Op1: m_VPValue(V&: SubRHS))) && SubR &&
283 SubR->hasNoSignedWrap() && poisonGuaranteesUB(V: LHSVal)) {
284 const SCEV *V1 = getSCEVExprForVPValue(V: SubLHS, PSE, L);
285 const SCEV *V2 = getSCEVExprForVPValue(V: SubRHS, PSE, L);
286 if (!isa<SCEVCouldNotCompute>(Val: V1) && !isa<SCEVCouldNotCompute>(Val: V2))
287 return SE.getMinusSCEV(LHS: SE.getSignExtendExpr(Op: V1, Ty: DestTy),
288 RHS: SE.getSignExtendExpr(Op: V2, Ty: DestTy), Flags: SCEV::FlagNSW);
289 }
290
291 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
292 return SE.getSignExtendExpr(Op: Ops[0], Ty: DestTy);
293 });
294 }
295 if (match(V,
296 P: m_Intrinsic<Intrinsic::umax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
297 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
298 return SE.getUMaxExpr(LHS: Ops[0], RHS: Ops[1]);
299 });
300 if (match(V,
301 P: m_Intrinsic<Intrinsic::smax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
302 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
303 return SE.getSMaxExpr(LHS: Ops[0], RHS: Ops[1]);
304 });
305 if (match(V,
306 P: m_Intrinsic<Intrinsic::umin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
307 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
308 return SE.getUMinExpr(LHS: Ops[0], RHS: Ops[1]);
309 });
310 if (match(V,
311 P: m_Intrinsic<Intrinsic::smin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
312 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
313 return SE.getSMinExpr(LHS: Ops[0], RHS: Ops[1]);
314 });
315 if (match(V, P: m_Intrinsic<Intrinsic::abs>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue())))
316 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
317 // is_int_min_poison is local to this intrinsic: poison on INT_MIN is
318 // not proof that the input is never INT_MIN, nor that poison reaches
319 // UB. Do not translate it to SCEV's global IsNSW flag.
320 return SE.getAbsExpr(Op: Ops[0], /*IsNSW=*/false);
321 });
322
323 ArrayRef<VPValue *> Ops;
324 Type *SourceElementType;
325 if (match(V, P: m_GetElementPtr(SourceElementType, Operands&: Ops))) {
326 return CreateSCEV(Ops, [&](ArrayRef<SCEVUse> Ops) {
327 return SE.getGEPExpr(BaseExpr: Ops.front(), IndexExprs: Ops.drop_front(), SrcElementTy: SourceElementType);
328 });
329 }
330
331 // TODO: Support constructing SCEVs for more recipes as needed.
332 const VPRecipeBase *DefR = V->getDefiningRecipe();
333 const SCEV *Expr =
334 TypeSwitch<const VPRecipeBase *, const SCEV *>(DefR)
335 .Case(caseFn: [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
336 .Case(caseFn: [&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) {
337 const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L);
338 if (!L || isa<SCEVCouldNotCompute>(Val: Step))
339 return SE.getCouldNotCompute();
340 const SCEV *Start =
341 getSCEVExprForVPValue(V: R->getStartValue(), PSE, L);
342 const SCEV *AddRec =
343 SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap);
344 if (R->getTruncInst())
345 return SE.getTruncateExpr(Op: AddRec, Ty: R->getScalarType());
346 return AddRec;
347 })
348 .Case(caseFn: [&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) {
349 const SCEV *Start =
350 getSCEVExprForVPValue(V: R->getStartValue(), PSE, L);
351 if (!L || isa<SCEVCouldNotCompute>(Val: Start))
352 return SE.getCouldNotCompute();
353 const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L);
354 if (isa<SCEVCouldNotCompute>(Val: Step))
355 return SE.getCouldNotCompute();
356 return SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap);
357 })
358 .Case(caseFn: [&SE, &PSE, L](const VPDerivedIVRecipe *R) {
359 const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L);
360 const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L);
361 const SCEV *Scale = getSCEVExprForVPValue(V: R->getOperand(N: 2), PSE, L);
362 if (any_of(Range: ArrayRef({Start, IV, Scale}),
363 P: IsaPred<SCEVCouldNotCompute>))
364 return SE.getCouldNotCompute();
365
366 return SE.getAddExpr(
367 LHS: SE.getTruncateOrSignExtend(V: Start, Ty: IV->getType()),
368 RHS: SE.getMulExpr(
369 LHS: IV, RHS: SE.getTruncateOrSignExtend(V: Scale, Ty: IV->getType())));
370 })
371 .Case(caseFn: [&SE, &PSE, L](const VPScalarIVStepsRecipe *R) {
372 const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L);
373 const SCEV *Step = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L);
374 if (isa<SCEVCouldNotCompute>(Val: IV) || !isa<SCEVConstant>(Val: Step))
375 return SE.getCouldNotCompute();
376 return SE.getTruncateOrSignExtend(V: IV, Ty: Step->getType());
377 })
378 .Default(
379 defaultFn: [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
380
381 return PSE.getPredicatedSCEV(Expr);
382}
383
384bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE,
385 const Loop *L) {
386 // If address is an SCEVAddExpr, we require that all operands must be either
387 // be invariant or a (possibly sign-extend) affine AddRec.
388 if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Val: Addr)) {
389 return all_of(Range: PtrAdd->operands(), P: [&SE, L](const SCEV *Op) {
390 return SE.isLoopInvariant(S: Op, L) ||
391 match(S: Op, P: m_scev_SExt(Op0: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()))) ||
392 match(S: Op, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()));
393 });
394 }
395
396 // Otherwise, check if address is loop invariant or an affine add recurrence.
397 return SE.isLoopInvariant(S: Addr, L) ||
398 match(S: Addr, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()));
399}
400
401/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are
402/// uniform, the result will also be uniform.
403static bool preservesUniformity(unsigned Opcode) {
404 if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
405 return true;
406 switch (Opcode) {
407 case Instruction::Freeze:
408 case Instruction::GetElementPtr:
409 case Instruction::ICmp:
410 case Instruction::FCmp:
411 case Instruction::Select:
412 case VPInstruction::Not:
413 case VPInstruction::Broadcast:
414 case VPInstruction::MaskedCond:
415 case VPInstruction::PtrAdd:
416 return true;
417 default:
418 return false;
419 }
420}
421
422bool vputils::isSingleScalar(const VPValue *VPV) {
423 // Live-in, symbolic and region-values represent single-scalar values.
424 if (isa<VPIRValue, VPSymbolicValue, VPRegionValue>(Val: VPV))
425 return true;
426
427 if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: VPV)) {
428 const VPRegionBlock *RegionOfR = Rep->getRegion();
429 // Don't consider recipes in replicate regions as uniform yet; their first
430 // lane cannot be accessed when executing the replicate region for other
431 // lanes.
432 if (RegionOfR && RegionOfR->isReplicator())
433 return false;
434 return Rep->isSingleScalar() || (preservesUniformity(Opcode: Rep->getOpcode()) &&
435 all_of(Range: Rep->operands(), P: isSingleScalar));
436 }
437 if (isa<VPWidenGEPRecipe, VPBlendRecipe>(Val: VPV))
438 return all_of(Range: VPV->getDefiningRecipe()->operands(), P: isSingleScalar);
439 if (auto *WidenR = dyn_cast<VPWidenRecipe>(Val: VPV)) {
440 return preservesUniformity(Opcode: WidenR->getOpcode()) &&
441 all_of(Range: WidenR->operands(), P: isSingleScalar);
442 }
443 if (auto *VPI = dyn_cast<VPInstruction>(Val: VPV))
444 return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
445 (preservesUniformity(Opcode: VPI->getOpcode()) &&
446 all_of(Range: VPI->operands(), P: isSingleScalar));
447 if (auto *RR = dyn_cast<VPReductionRecipe>(Val: VPV))
448 return !RR->isPartialReduction();
449 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe, VPDerivedIVRecipe>(
450 Val: VPV))
451 return true;
452 if (auto *Expr = dyn_cast<VPExpressionRecipe>(Val: VPV))
453 return Expr->isVectorToScalar();
454
455 // VPExpandSCEVRecipes must be placed in the entry and are always uniform.
456 return isa<VPExpandSCEVRecipe>(Val: VPV);
457}
458
459bool vputils::isUniformAcrossVFsAndUFs(const VPValue *V) {
460 // Live-ins and region values are uniform.
461 if (isa<VPIRValue, VPSymbolicValue, VPRegionValue>(Val: V))
462 return true;
463
464 const VPRecipeBase *R = V->getDefiningRecipe();
465 const VPBasicBlock *VPBB = R ? R->getParent() : nullptr;
466 const VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr;
467 if (VPBB) {
468 if ((VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) {
469 if (match(V: V->getDefiningRecipe(),
470 P: m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
471 return false;
472 return all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
473 }
474 }
475
476 return TypeSwitch<const VPRecipeBase *, bool>(R)
477 .Case(caseFn: [](const VPDerivedIVRecipe *R) { return true; })
478 .Case(caseFn: [](const VPReplicateRecipe *R) {
479 // Be conservative about side-effects, except for the
480 // known-side-effecting assumes and stores, which we know will be
481 // uniform.
482 return R->isSingleScalar() &&
483 (!R->mayHaveSideEffects() ||
484 isa<AssumeInst, StoreInst>(Val: R->getUnderlyingInstr())) &&
485 all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
486 })
487 .Case(caseFn: [](const VPWidenRecipe *R) {
488 return preservesUniformity(Opcode: R->getOpcode()) &&
489 all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
490 })
491 .Case(caseFn: [](const VPPhi *) {
492 // Bail out on VPPhi, as we can end up in infinite cycles.
493 return false;
494 })
495 .Case(caseFn: [](const VPInstruction *VPI) {
496 return (VPI->isSingleScalar() || VPI->isVectorToScalar() ||
497 preservesUniformity(Opcode: VPI->getOpcode())) &&
498 all_of(Range: VPI->operands(), P: isUniformAcrossVFsAndUFs);
499 })
500 .Case(caseFn: [](const VPWidenCastRecipe *R) {
501 // A cast is uniform according to its operand.
502 return isUniformAcrossVFsAndUFs(V: R->getOperand(N: 0));
503 })
504 .Default(defaultFn: [](const VPRecipeBase *) { // A value is considered non-uniform
505 // unless proven otherwise.
506 return false;
507 });
508}
509
510VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
511 auto DepthFirst = vp_depth_first_shallow(G: Plan.getEntry());
512 auto I = find_if(Range&: DepthFirst, P: [&VPDT](VPBlockBase *VPB) {
513 return VPBlockUtils::isHeader(VPB, VPDT);
514 });
515 return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(Val: *I);
516}
517
518unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
519 if (!R)
520 return 1;
521 if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R))
522 return RR->getVFScaleFactor();
523 if (auto *RR = dyn_cast<VPReductionRecipe>(Val: R))
524 return RR->getVFScaleFactor();
525 if (auto *ER = dyn_cast<VPExpressionRecipe>(Val: R))
526 return ER->getVFScaleFactor();
527 assert(
528 (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
529 VPInstruction::ReductionStartVector) &&
530 "getting scaling factor of reduction-start-vector not implemented yet");
531 return 1;
532}
533
534bool vputils::cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking) {
535 // Assumes don't alias anything or throw; as long as they're guaranteed to
536 // execute, they're safe to hoist. They should however not be sunk, as it
537 // would destroy information.
538 if (match(V: &R, P: m_Intrinsic<Intrinsic::assume>()))
539 return Sinking;
540 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
541 return true;
542 // Allocas cannot be hoisted.
543 auto *RepR = dyn_cast<VPReplicateRecipe>(Val: &R);
544 return RepR && RepR->getOpcode() == Instruction::Alloca;
545}
546
547VPSingleDefRecipe *vputils::findHeaderMask(VPlan &Plan) {
548 if (VPValue *AliasMask = findIncomingAliasMask(Plan)) {
549 assert(match(AliasMask->getSingleUser(),
550 m_c_BinaryAnd(m_VPValue(), m_Specific(AliasMask))) &&
551 "AliasMask must only be used with the original header mask");
552 return cast<VPSingleDefRecipe>(Val: AliasMask->getSingleUser());
553 }
554
555 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
556 SmallVector<VPValue *> WideCanonicalIVs;
557 auto *WideCanonicalIV =
558 findUserOf<VPWidenCanonicalIVRecipe>(V: LoopRegion->getCanonicalIV());
559 assert(count_if(LoopRegion->getCanonicalIV()->users(),
560 IsaPred<VPWidenCanonicalIVRecipe>) <= 1 &&
561 "Must have at most one VPWideCanonicalIVRecipe");
562 if (WideCanonicalIV)
563 WideCanonicalIVs.push_back(Elt: WideCanonicalIV);
564
565 // Also include VPWidenIntOrFpInductionRecipes that represent a widened
566 // version of the canonical induction.
567 VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
568 for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
569 VPWidenIntOrFpInductionRecipe *WidenIV;
570 if (match(V: &Phi, P: m_CanonicalWidenIV(V&: WidenIV)))
571 WideCanonicalIVs.push_back(Elt: WidenIV);
572 }
573
574 // Walk users of wide canonical IVs and find the single compare of the form
575 // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
576 VPSingleDefRecipe *HeaderMask = nullptr;
577 for (auto *Wide : WideCanonicalIVs) {
578 for (VPUser *U : Wide->users()) {
579 auto *VPI = dyn_cast<VPInstruction>(Val: U);
580 if (!VPI || !vputils::isHeaderMask(V: VPI, Plan))
581 continue;
582
583 assert(VPI->getOperand(0) == Wide &&
584 "WidenCanonicalIV must be the first operand of the compare");
585 assert(!HeaderMask && "Multiple header masks found?");
586 HeaderMask = VPI;
587 }
588 }
589
590 for (VPRecipeBase &R : LoopRegion->getEntryBasicBlock()->phis()) {
591 auto *Def = cast<VPSingleDefRecipe>(Val: &R);
592 if (vputils::isHeaderMask(V: Def, Plan)) {
593 assert(!HeaderMask && "Multiple header masks found?");
594 HeaderMask = Def;
595 }
596 }
597
598 return HeaderMask;
599}
600
601SmallVector<VPBasicBlock *>
602VPBlockUtils::blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB,
603 VPBasicBlock *LastBB) {
604 assert(FirstBB->getParent() == LastBB->getParent() &&
605 "FirstBB and LastBB from different regions");
606#ifndef NDEBUG
607 bool InSingleSuccChain = false;
608 for (VPBlockBase *Succ = FirstBB; Succ; Succ = Succ->getSingleSuccessor())
609 InSingleSuccChain |= (Succ == LastBB);
610 assert(InSingleSuccChain &&
611 "LastBB unreachable from FirstBB in single-successor chain");
612#endif
613 auto Blocks = to_vector(
614 Range: VPBlockUtils::blocksOnly<VPBasicBlock>(Range: vp_depth_first_deep(G: FirstBB)));
615 auto *LastIt = find(Range&: Blocks, Val: LastBB);
616 assert(LastIt != Blocks.end() &&
617 "LastBB unreachable from FirstBB in depth-first traversal");
618 Blocks.erase(CS: std::next(x: LastIt), CE: Blocks.end());
619 return Blocks;
620}
621
622VPValue *vputils::findIncomingAliasMask(const VPlan &Plan) {
623 for (VPRecipeBase &R : *Plan.getVectorPreheader())
624 if (match(V: &R, P: m_VPInstruction<VPInstruction::IncomingAliasMask>()))
625 return cast<VPInstruction>(Val: &R);
626 return nullptr;
627}
628
629bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
630 const VPDominatorTree &VPDT) {
631 auto *VPBB = dyn_cast<VPBasicBlock>(Val: VPB);
632 if (!VPBB)
633 return false;
634
635 // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
636 // VPBB as its entry, i.e., free of predecessors.
637 if (auto *R = VPBB->getParent())
638 return !R->isReplicator() && !VPBB->hasPredecessors();
639
640 // A header dominates its second predecessor (the latch), with the other
641 // predecessor being the preheader
642 return VPB->getPredecessors().size() == 2 &&
643 VPDT.dominates(A: VPB, B: VPB->getPredecessors()[1]);
644}
645
646bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
647 const VPDominatorTree &VPDT) {
648 // A latch has a header as its last successor, with its other successors
649 // leaving the loop. A preheader OTOH has a header as its first (and only)
650 // successor.
651 return VPB->getNumSuccessors() >= 2 &&
652 VPBlockUtils::isHeader(VPB: VPB->getSuccessors().back(), VPDT);
653}
654
655std::pair<VPBasicBlock *, VPBasicBlock *>
656VPBlockUtils::getPlainCFGHeaderAndLatch(const VPlan &Plan) {
657 auto *Header = cast<VPBasicBlock>(
658 Val: Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
659 auto *Latch = cast<VPBasicBlock>(Val: Header->getPredecessors()[1]);
660 return {Header, Latch};
661}
662
663VPBasicBlock *VPBlockUtils::getPlainCFGMiddleBlock(const VPlan &Plan) {
664 return cast<VPBasicBlock>(Val: Plan.getScalarPreheader()->getPredecessors()[0]);
665}
666
667std::optional<MemoryLocation>
668vputils::getMemoryLocation(const VPRecipeBase &R) {
669 auto *M = dyn_cast<VPIRMetadata>(Val: &R);
670 if (!M)
671 return std::nullopt;
672 MemoryLocation Loc;
673 // Populate noalias metadata from VPIRMetadata.
674 if (MDNode *NoAliasMD = M->getMetadata(Kind: LLVMContext::MD_noalias))
675 Loc.AATags.NoAlias = NoAliasMD;
676 if (MDNode *AliasScopeMD = M->getMetadata(Kind: LLVMContext::MD_alias_scope))
677 Loc.AATags.Scope = AliasScopeMD;
678 return Loc;
679}
680
681VPInstruction *vputils::findCanonicalIVIncrement(VPlan &Plan) {
682 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
683 VPRegionValue *CanIV = LoopRegion->getCanonicalIV();
684 assert(CanIV && "Expected loop region to have a canonical IV");
685
686 VPSymbolicValue &VFxUF = Plan.getVFxUF();
687
688 // Check if \p Step matches the expected increment step, accounting for
689 // materialization of VFxUF and UF.
690 auto IsIncrementStep = [&](VPValue *Step) -> bool {
691 if (!VFxUF.isMaterialized())
692 return Step == &VFxUF;
693
694 VPSymbolicValue &UF = Plan.getUF();
695 if (!UF.isMaterialized())
696 return Step == &UF ||
697 match(V: Step, P: m_c_Mul(Op0: m_Specific(VPV: &Plan.getUF()),
698 Op1: m_VPInstruction<VPInstruction::VScale>()));
699
700 // Alias masking: step is number of active lanes of a dependence mask.
701 if (match(V: Step, P: m_ZExtOrTruncOrSelf(
702 Op0: m_VPInstruction<VPInstruction::NumActiveLanes>())))
703 return true;
704
705 unsigned ConcreteUF = Plan.getConcreteUF();
706 // Fixed VF: step is just the concrete UF.
707 if (match(V: Step, P: m_SpecificInt(V: ConcreteUF)))
708 return true;
709
710 // Scalable VF: step involves VScale.
711 if (ConcreteUF == 1)
712 return match(V: Step, P: m_VPInstruction<VPInstruction::VScale>());
713 if (match(V: Step, P: m_c_Mul(Op0: m_SpecificInt(V: ConcreteUF),
714 Op1: m_VPInstruction<VPInstruction::VScale>())))
715 return true;
716 // mul(VScale, ConcreteUF) may have been simplified to
717 // shl(VScale, log2(ConcreteUF)) when ConcreteUF is a power of 2.
718 return isPowerOf2_32(Value: ConcreteUF) &&
719 match(V: Step, P: m_Shl(Op0: m_VPInstruction<VPInstruction::VScale>(),
720 Op1: m_SpecificInt(V: Log2_32(Value: ConcreteUF))));
721 };
722
723 VPInstruction *Increment = nullptr;
724 for (VPUser *U : CanIV->users()) {
725 VPValue *Step;
726 if (isa<VPInstruction>(Val: U) &&
727 match(U, P: m_c_Add(Op0: m_Specific(VPV: CanIV), Op1: m_VPValue(V&: Step))) &&
728 IsIncrementStep(Step)) {
729 assert(!Increment && "There must be a unique increment");
730 Increment = cast<VPInstruction>(Val: U);
731 }
732 }
733
734 assert((!VFxUF.isMaterialized() || Increment) &&
735 "After materializing VFxUF, an increment must exist");
736 assert((!Increment ||
737 LoopRegion->hasCanonicalIVNUW() == Increment->hasNoUnsignedWrap()) &&
738 "NUW flag in region and increment must match");
739 return Increment;
740}
741
742/// Find the ComputeReductionResult recipe for \p PhiR, looking through selects
743/// inserted for predicated reductions or tail folding.
744VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) {
745 VPValue *BackedgeVal = PhiR->getBackedgeValue();
746 if (auto *Res =
747 findUserOf<VPInstruction::ComputeReductionResult>(V: BackedgeVal))
748 return Res;
749
750 // Look through selects inserted for tail folding or predicated reductions.
751 VPRecipeBase *SelR =
752 findUserOf(V: BackedgeVal, P: m_Select(Op0: m_VPValue(), Op1: m_VPValue(), Op2: m_VPValue()));
753 if (!SelR)
754 return nullptr;
755 return findUserOf<VPInstruction::ComputeReductionResult>(
756 V: cast<VPSingleDefRecipe>(Val: SelR));
757}
758
759bool vputils::isUsedByLoadStoreAddress(const VPValue *V) {
760 SmallPtrSet<const VPValue *, 4> Seen;
761 SmallVector<const VPValue *> WorkList = {V};
762
763 while (!WorkList.empty()) {
764 const VPValue *Cur = WorkList.pop_back_val();
765 if (!Seen.insert(Ptr: Cur).second)
766 continue;
767
768 auto *Blend = dyn_cast<VPBlendRecipe>(Val: Cur);
769 // Skip blends that use V only through a compare by checking if any incoming
770 // value was already visited.
771 if (Blend && none_of(Range: seq<unsigned>(Begin: 0, End: Blend->getNumIncomingValues()),
772 P: [&](unsigned I) {
773 return Seen.contains(Ptr: Blend->getIncomingValue(Idx: I));
774 }))
775 continue;
776
777 for (VPUser *U : Cur->users()) {
778 if (auto *InterleaveR = dyn_cast<VPInterleaveBase>(Val: U))
779 if (InterleaveR->getAddr() == Cur)
780 return true;
781 // Cur is used as the pointer of a (possibly masked) load (operand 0) or
782 // store (operand 1).
783 if (match(U, P: m_CombineOr(Ps: m_Unary<Instruction::Load>(Op0: m_Specific(VPV: Cur)),
784 Ps: m_Binary<Instruction::Store>(Op0: m_VPValue(),
785 Op1: m_Specific(VPV: Cur)))))
786 return true;
787 if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(Val: cast<VPRecipeBase>(Val: U))) {
788 if (MemR->getAddr() == Cur && MemR->isConsecutive())
789 return true;
790 }
791 }
792
793 // The legacy cost model only supports scalarization loads/stores with phi
794 // addresses, if the phi is directly used as load/store address. Don't
795 // traverse further for Blends.
796 if (Blend)
797 continue;
798
799 // Only traverse further through users that also define a value (and can
800 // thus have their own users walked). Skip when Cur is only used as mask ,
801 // as well as loads: a loaded value does not depend on the load's operand.
802 for (VPUser *U : Cur->users()) {
803 auto *VPI = dyn_cast<VPInstruction>(Val: U);
804 if (VPI && VPI->getMask() == Cur &&
805 none_of(Range: VPI->operandsWithoutMask(),
806 P: [Cur](VPValue *Op) { return Op == Cur; }))
807 continue;
808 if (match(U, P: m_VPInstruction<Instruction::Load>()))
809 continue;
810 if (auto *SDR = dyn_cast<VPSingleDefRecipe>(Val: U))
811 WorkList.push_back(Elt: SDR);
812 }
813 }
814 return false;
815}
816
817/// Try to find a loop-invariant IR value for \p S in the plan's entry block
818/// that can be reused. Returns the corresponding live-in VPValue, or nullptr
819/// if no reusable IR value is found.
820VPValue *VPSCEVExpander::tryToReuseIRValue(const SCEV *S) {
821 if (isa<SCEVConstant, SCEVUnknown>(Val: S))
822 return nullptr;
823 VPlan &Plan = Builder.getPlan();
824 BasicBlock *PH = cast<VPIRBasicBlock>(Val: Plan.getEntry())->getIRBasicBlock();
825 for (Value *V : SE.getSCEVValues(S)) {
826 // Only reuse instructions in the plan's entry block, or, when a
827 // DominatorTree is available, any instruction that dominates it.
828 // Instructions in sibling branches may not dominate the entry block.
829 auto *I = dyn_cast<Instruction>(Val: V);
830 if (!I)
831 return Plan.getOrAddLiveIn(V);
832 if (!SE.DT.dominates(A: I->getParent(), B: PH))
833 continue;
834 SmallVector<Instruction *> DropPoisonGeneratingInsts;
835 if (!SE.canReuseInstruction(S, I, DropPoisonGeneratingInsts))
836 continue;
837 for (Instruction *DropI : DropPoisonGeneratingInsts)
838 SCEVExpander::dropPoisonGeneratingAnnotationsAndReinfer(SE, I: DropI);
839 return Plan.getOrAddLiveIn(V);
840 }
841 return nullptr;
842}
843
844VPValue *VPSCEVExpander::tryToExpand(const SCEV *S) {
845 if (VPValue *V = tryToReuseIRValue(S))
846 return V;
847
848 switch (S->getSCEVType()) {
849 case scConstant:
850 return Builder.getPlan().getOrAddLiveIn(V: cast<SCEVConstant>(Val: S)->getValue());
851 case scUnknown:
852 return Builder.getPlan().getOrAddLiveIn(V: cast<SCEVUnknown>(Val: S)->getValue());
853 case scVScale:
854 return Builder.createNaryOp(Opcode: VPInstruction::VScale, Operands: {}, ResultTy: S->getType());
855 case scAddExpr:
856 case scMulExpr: {
857 if (S->getType()->isPointerTy())
858 return nullptr;
859 auto *NAry = cast<SCEVNAryExpr>(Val: S);
860 unsigned Opcode =
861 S->getSCEVType() == scAddExpr ? Instruction::Add : Instruction::Mul;
862 // Iterate in reverse so that constants are emitted last.
863 SmallVector<VPValue *, 2> Ops;
864 for (const SCEVUse &Op : reverse(C: NAry->operands())) {
865 VPValue *OpV = tryToExpand(S: Op);
866 if (!OpV)
867 return nullptr;
868 Ops.push_back(Elt: OpV);
869 }
870 VPIRFlags::WrapFlagsTy WrapFlags(NAry->hasNoUnsignedWrap(),
871 NAry->hasNoSignedWrap());
872 VPValue *Result = Ops.front();
873 for (VPValue *Op : drop_begin(RangeOrContainer&: Ops))
874 Result = Builder.createOverflowingOp(Opcode, Operands: {Result, Op}, WrapFlags, DL);
875 return Result;
876 }
877 case scUDivExpr: {
878 auto *UDiv = cast<SCEVUDivExpr>(Val: S);
879 VPValue *LHS = tryToExpand(S: UDiv->getLHS());
880 if (!LHS)
881 return nullptr;
882 VPValue *RHS = tryToExpand(S: UDiv->getRHS());
883 if (!RHS)
884 return nullptr;
885 return Builder.createNaryOp(Opcode: Instruction::UDiv, Operands: {LHS, RHS},
886 Flags: VPIRFlags::getDefaultFlags(Opcode: Instruction::UDiv),
887 DL);
888 }
889 default:
890 return nullptr;
891 }
892}
893