1//===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanUtils.h"
10#include "VPlanAnalysis.h"
11#include "VPlanCFG.h"
12#include "VPlanDominatorTree.h"
13#include "VPlanPatternMatch.h"
14#include "llvm/ADT/TypeSwitch.h"
15#include "llvm/Analysis/MemoryLocation.h"
16#include "llvm/Analysis/ScalarEvolutionExpressions.h"
17#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
18
19using namespace llvm;
20using namespace llvm::VPlanPatternMatch;
21using namespace llvm::SCEVPatternMatch;
22
23bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
24 return all_of(Range: Def->users(),
25 P: [Def](const VPUser *U) { return U->usesFirstLaneOnly(Op: Def); });
26}
27
28bool vputils::onlyFirstPartUsed(const VPValue *Def) {
29 return all_of(Range: Def->users(),
30 P: [Def](const VPUser *U) { return U->usesFirstPartOnly(Op: Def); });
31}
32
33bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
34 return all_of(Range: Def->users(),
35 P: [Def](const VPUser *U) { return U->usesScalars(Op: Def); });
36}
37
38VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
39 if (auto *E = dyn_cast<SCEVConstant>(Val: Expr))
40 return Plan.getOrAddLiveIn(V: E->getValue());
41 // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
42 // value. Otherwise the value may be defined in a loop and using it directly
43 // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
44 // form.
45 auto *U = dyn_cast<SCEVUnknown>(Val: Expr);
46 if (U && !isa<Instruction>(Val: U->getValue()))
47 return Plan.getOrAddLiveIn(V: U->getValue());
48 auto *Expanded = new VPExpandSCEVRecipe(Expr);
49 Plan.getEntry()->appendRecipe(Recipe: Expanded);
50 return Expanded;
51}
52
53bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
54 if (isa<VPActiveLaneMaskPHIRecipe>(Val: V))
55 return true;
56
57 auto IsWideCanonicalIV = [](VPValue *A) {
58 return isa<VPWidenCanonicalIVRecipe>(Val: A) ||
59 (isa<VPWidenIntOrFpInductionRecipe>(Val: A) &&
60 cast<VPWidenIntOrFpInductionRecipe>(Val: A)->isCanonical());
61 };
62
63 VPValue *A, *B;
64
65 auto m_CanonicalScalarIVSteps =
66 m_ScalarIVSteps(Op0: m_Specific(VPV: Plan.getVectorLoopRegion()->getCanonicalIV()),
67 Op1: m_One(), Op2: m_Specific(VPV: &Plan.getVF()));
68
69 if (match(V, P: m_ActiveLaneMask(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B), Op2: m_One())))
70 return B == Plan.getTripCount() &&
71 (match(V: A, P: m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A));
72
73 // For scalar plans, the header mask uses the scalar steps.
74 if (match(V, P: m_ICmp(Op0: m_CanonicalScalarIVSteps,
75 Op1: m_Specific(VPV: Plan.getBackedgeTakenCount())))) {
76 assert(Plan.hasScalarVFOnly() &&
77 "Non-scalar VF using scalar IV steps for header mask?");
78 return true;
79 }
80
81 return match(V, P: m_ICmp(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B))) && IsWideCanonicalIV(A) &&
82 B == Plan.getBackedgeTakenCount();
83}
84
85/// Returns true if \p R propagates poison from any operand to its result.
86static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) {
87 return TypeSwitch<const VPRecipeBase *, bool>(R)
88 .Case<VPWidenGEPRecipe, VPWidenCastRecipe>(
89 caseFn: [](const VPRecipeBase *) { return true; })
90 .Case(caseFn: [](const VPReplicateRecipe *Rep) {
91 // GEP and casts propagate poison from all operands.
92 unsigned Opcode = Rep->getOpcode();
93 return Opcode == Instruction::GetElementPtr ||
94 Instruction::isCast(Opcode);
95 })
96 .Default(defaultFn: [](const VPRecipeBase *) { return false; });
97}
98
99/// Returns true if \p V being poison is guaranteed to trigger UB because it
100/// propagates to the address of a memory recipe.
101static bool poisonGuaranteesUB(const VPValue *V) {
102 SmallPtrSet<const VPValue *, 8> Visited;
103 SmallVector<const VPValue *, 16> Worklist;
104
105 Worklist.push_back(Elt: V);
106
107 while (!Worklist.empty()) {
108 const VPValue *Current = Worklist.pop_back_val();
109 if (!Visited.insert(Ptr: Current).second)
110 continue;
111
112 for (VPUser *U : Current->users()) {
113 // Check if Current is used as an address operand for load/store.
114 if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(Val: U)) {
115 if (MemR->getAddr() == Current)
116 return true;
117 continue;
118 }
119 if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: U)) {
120 unsigned Opcode = Rep->getOpcode();
121 if ((Opcode == Instruction::Load && Rep->getOperand(N: 0) == Current) ||
122 (Opcode == Instruction::Store && Rep->getOperand(N: 1) == Current))
123 return true;
124 }
125
126 // Check if poison propagates through this recipe to any of its users.
127 auto *R = cast<VPRecipeBase>(Val: U);
128 for (const VPValue *Op : R->operands()) {
129 if (Op == Current && propagatesPoisonFromRecipeOp(R)) {
130 Worklist.push_back(Elt: R->getVPSingleValue());
131 break;
132 }
133 }
134 }
135 }
136
137 return false;
138}
139
140const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
141 PredicatedScalarEvolution &PSE,
142 const Loop *L) {
143 ScalarEvolution &SE = *PSE.getSE();
144 if (isa<VPIRValue, VPSymbolicValue>(Val: V)) {
145 Value *LiveIn = V->getUnderlyingValue();
146 if (LiveIn && SE.isSCEVable(Ty: LiveIn->getType()))
147 return SE.getSCEV(V: LiveIn);
148 return SE.getCouldNotCompute();
149 }
150
151 // Helper to create SCEVs for binary and unary operations.
152 auto CreateSCEV = [&](ArrayRef<VPValue *> Ops,
153 function_ref<const SCEV *(ArrayRef<SCEVUse>)> CreateFn)
154 -> const SCEV * {
155 SmallVector<SCEVUse, 2> SCEVOps;
156 for (VPValue *Op : Ops) {
157 const SCEV *S = getSCEVExprForVPValue(V: Op, PSE, L);
158 if (isa<SCEVCouldNotCompute>(Val: S))
159 return SE.getCouldNotCompute();
160 SCEVOps.push_back(Elt: S);
161 }
162 return CreateFn(SCEVOps);
163 };
164
165 VPValue *LHSVal, *RHSVal;
166 if (match(V, P: m_Add(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
167 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
168 return SE.getAddExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0);
169 });
170 if (match(V, P: m_Sub(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
171 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
172 return SE.getMinusSCEV(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0);
173 });
174 if (match(V, P: m_Not(Op0: m_VPValue(V&: LHSVal)))) {
175 // not X = xor X, -1 = -1 - X
176 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
177 return SE.getMinusSCEV(LHS: SE.getMinusOne(Ty: Ops[0]->getType()), RHS: Ops[0]);
178 });
179 }
180 if (match(V, P: m_Mul(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
181 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
182 return SE.getMulExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0);
183 });
184 if (match(V,
185 P: m_Binary<Instruction::UDiv>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
186 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
187 return SE.getUDivExpr(LHS: Ops[0], RHS: Ops[1]);
188 });
189 // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n.
190 const APInt *Mask;
191 if (match(V, P: m_c_BinaryAnd(Op0: m_VPValue(V&: LHSVal), Op1: m_APInt(C&: Mask))) &&
192 (*Mask + 1).isPowerOf2())
193 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
194 return SE.getURemExpr(LHS: Ops[0], RHS: SE.getConstant(Val: *Mask + 1));
195 });
196 if (match(V, P: m_Trunc(Op0: m_VPValue(V&: LHSVal)))) {
197 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
198 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
199 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
200 return SE.getTruncateExpr(Op: Ops[0], Ty: DestTy);
201 });
202 }
203 if (match(V, P: m_ZExt(Op0: m_VPValue(V&: LHSVal)))) {
204 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
205 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
206 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
207 return SE.getZeroExtendExpr(Op: Ops[0], Ty: DestTy);
208 });
209 }
210 if (match(V, P: m_SExt(Op0: m_VPValue(V&: LHSVal)))) {
211 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
212 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
213
214 // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension
215 // onto the operands before computing the subtraction.
216 VPValue *SubLHS, *SubRHS;
217 auto *SubR = dyn_cast<VPRecipeWithIRFlags>(Val: LHSVal);
218 if (match(V: LHSVal, P: m_Sub(Op0: m_VPValue(V&: SubLHS), Op1: m_VPValue(V&: SubRHS))) && SubR &&
219 SubR->hasNoSignedWrap() && poisonGuaranteesUB(V: LHSVal)) {
220 const SCEV *V1 = getSCEVExprForVPValue(V: SubLHS, PSE, L);
221 const SCEV *V2 = getSCEVExprForVPValue(V: SubRHS, PSE, L);
222 if (!isa<SCEVCouldNotCompute>(Val: V1) && !isa<SCEVCouldNotCompute>(Val: V2))
223 return SE.getMinusSCEV(LHS: SE.getSignExtendExpr(Op: V1, Ty: DestTy),
224 RHS: SE.getSignExtendExpr(Op: V2, Ty: DestTy), Flags: SCEV::FlagNSW);
225 }
226
227 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
228 return SE.getSignExtendExpr(Op: Ops[0], Ty: DestTy);
229 });
230 }
231 if (match(V,
232 P: m_Intrinsic<Intrinsic::umax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
233 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
234 return SE.getUMaxExpr(LHS: Ops[0], RHS: Ops[1]);
235 });
236 if (match(V,
237 P: m_Intrinsic<Intrinsic::smax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
238 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
239 return SE.getSMaxExpr(LHS: Ops[0], RHS: Ops[1]);
240 });
241 if (match(V,
242 P: m_Intrinsic<Intrinsic::umin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
243 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
244 return SE.getUMinExpr(LHS: Ops[0], RHS: Ops[1]);
245 });
246 if (match(V,
247 P: m_Intrinsic<Intrinsic::smin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
248 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
249 return SE.getSMinExpr(LHS: Ops[0], RHS: Ops[1]);
250 });
251
252 ArrayRef<VPValue *> Ops;
253 Type *SourceElementType;
254 if (match(V, P: m_GetElementPtr(SourceElementType, Operands&: Ops))) {
255 const SCEV *GEPExpr = CreateSCEV(Ops, [&](ArrayRef<SCEVUse> Ops) {
256 return SE.getGEPExpr(BaseExpr: Ops.front(), IndexExprs: Ops.drop_front(), SrcElementTy: SourceElementType);
257 });
258 return PSE.getPredicatedSCEV(Expr: GEPExpr);
259 }
260
261 // TODO: Support constructing SCEVs for more recipes as needed.
262 const VPRecipeBase *DefR = V->getDefiningRecipe();
263 const SCEV *Expr =
264 TypeSwitch<const VPRecipeBase *, const SCEV *>(DefR)
265 .Case(caseFn: [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
266 .Case(caseFn: [&SE, &PSE, L](const VPCanonicalIVPHIRecipe *R) {
267 if (!L)
268 return SE.getCouldNotCompute();
269 const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L);
270 return SE.getAddRecExpr(Start, Step: SE.getOne(Ty: Start->getType()), L,
271 Flags: SCEV::FlagAnyWrap);
272 })
273 .Case(caseFn: [&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) {
274 const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L);
275 if (!L || isa<SCEVCouldNotCompute>(Val: Step))
276 return SE.getCouldNotCompute();
277 const SCEV *Start =
278 getSCEVExprForVPValue(V: R->getStartValue(), PSE, L);
279 const SCEV *AddRec =
280 SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap);
281 if (R->getTruncInst())
282 return SE.getTruncateExpr(Op: AddRec, Ty: R->getScalarType());
283 return AddRec;
284 })
285 .Case(caseFn: [&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) {
286 const SCEV *Start =
287 getSCEVExprForVPValue(V: R->getStartValue(), PSE, L);
288 if (!L || isa<SCEVCouldNotCompute>(Val: Start))
289 return SE.getCouldNotCompute();
290 const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L);
291 if (isa<SCEVCouldNotCompute>(Val: Step))
292 return SE.getCouldNotCompute();
293 return SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap);
294 })
295 .Case(caseFn: [&SE, &PSE, L](const VPDerivedIVRecipe *R) {
296 const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L);
297 const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L);
298 const SCEV *Scale = getSCEVExprForVPValue(V: R->getOperand(N: 2), PSE, L);
299 if (any_of(Range: ArrayRef({Start, IV, Scale}),
300 P: IsaPred<SCEVCouldNotCompute>))
301 return SE.getCouldNotCompute();
302
303 return SE.getAddExpr(
304 LHS: SE.getTruncateOrSignExtend(V: Start, Ty: IV->getType()),
305 RHS: SE.getMulExpr(
306 LHS: IV, RHS: SE.getTruncateOrSignExtend(V: Scale, Ty: IV->getType())));
307 })
308 .Case(caseFn: [&SE, &PSE, L](const VPScalarIVStepsRecipe *R) {
309 const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L);
310 const SCEV *Step = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L);
311 if (isa<SCEVCouldNotCompute>(Val: IV) || !isa<SCEVConstant>(Val: Step))
312 return SE.getCouldNotCompute();
313 return SE.getTruncateOrSignExtend(V: IV, Ty: Step->getType());
314 })
315 .Default(
316 defaultFn: [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
317
318 return PSE.getPredicatedSCEV(Expr);
319}
320
321bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE,
322 const Loop *L) {
323 // If address is an SCEVAddExpr, we require that all operands must be either
324 // be invariant or a (possibly sign-extend) affine AddRec.
325 if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Val: Addr)) {
326 return all_of(Range: PtrAdd->operands(), P: [&SE, L](const SCEV *Op) {
327 return SE.isLoopInvariant(S: Op, L) ||
328 match(S: Op, P: m_scev_SExt(Op0: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()))) ||
329 match(S: Op, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()));
330 });
331 }
332
333 // Otherwise, check if address is loop invariant or an affine add recurrence.
334 return SE.isLoopInvariant(S: Addr, L) ||
335 match(S: Addr, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()));
336}
337
338/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are
339/// uniform, the result will also be uniform.
340static bool preservesUniformity(unsigned Opcode) {
341 if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
342 return true;
343 switch (Opcode) {
344 case Instruction::Freeze:
345 case Instruction::GetElementPtr:
346 case Instruction::ICmp:
347 case Instruction::FCmp:
348 case Instruction::Select:
349 case VPInstruction::Not:
350 case VPInstruction::Broadcast:
351 case VPInstruction::MaskedCond:
352 case VPInstruction::PtrAdd:
353 return true;
354 default:
355 return false;
356 }
357}
358
359bool vputils::isSingleScalar(const VPValue *VPV) {
360 // A live-in must be uniform across the scope of VPlan.
361 if (isa<VPIRValue, VPSymbolicValue>(Val: VPV))
362 return true;
363
364 if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: VPV)) {
365 const VPRegionBlock *RegionOfR = Rep->getRegion();
366 // Don't consider recipes in replicate regions as uniform yet; their first
367 // lane cannot be accessed when executing the replicate region for other
368 // lanes.
369 if (RegionOfR && RegionOfR->isReplicator())
370 return false;
371 return Rep->isSingleScalar() || (preservesUniformity(Opcode: Rep->getOpcode()) &&
372 all_of(Range: Rep->operands(), P: isSingleScalar));
373 }
374 if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(Val: VPV))
375 return all_of(Range: VPV->getDefiningRecipe()->operands(), P: isSingleScalar);
376 if (auto *WidenR = dyn_cast<VPWidenRecipe>(Val: VPV)) {
377 return preservesUniformity(Opcode: WidenR->getOpcode()) &&
378 all_of(Range: WidenR->operands(), P: isSingleScalar);
379 }
380 if (auto *VPI = dyn_cast<VPInstruction>(Val: VPV))
381 return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
382 (preservesUniformity(Opcode: VPI->getOpcode()) &&
383 all_of(Range: VPI->operands(), P: isSingleScalar));
384 if (auto *RR = dyn_cast<VPReductionRecipe>(Val: VPV))
385 return !RR->isPartialReduction();
386 if (isa<VPCanonicalIVPHIRecipe, VPVectorPointerRecipe,
387 VPVectorEndPointerRecipe>(Val: VPV))
388 return true;
389 if (auto *Expr = dyn_cast<VPExpressionRecipe>(Val: VPV))
390 return Expr->isSingleScalar();
391
392 // VPExpandSCEVRecipes must be placed in the entry and are always uniform.
393 return isa<VPExpandSCEVRecipe>(Val: VPV);
394}
395
396bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
397 // Live-ins are uniform.
398 if (isa<VPIRValue, VPSymbolicValue>(Val: V))
399 return true;
400
401 VPRecipeBase *R = V->getDefiningRecipe();
402 VPBasicBlock *VPBB = R ? R->getParent() : nullptr;
403 VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr;
404 if (VPBB &&
405 (VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) {
406 if (match(V: V->getDefiningRecipe(),
407 P: m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
408 return false;
409 return all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
410 }
411
412 if (VPRegionBlock *EnclosingRegion = VPBB->getEnclosingLoopRegion()) {
413 // Canonical IV is uniform.
414 if (V == EnclosingRegion->getCanonicalIV())
415 return true;
416 }
417
418 return TypeSwitch<const VPRecipeBase *, bool>(R)
419 .Case(caseFn: [](const VPDerivedIVRecipe *R) { return true; })
420 .Case(caseFn: [](const VPReplicateRecipe *R) {
421 // Be conservative about side-effects, except for the
422 // known-side-effecting assumes and stores, which we know will be
423 // uniform.
424 return R->isSingleScalar() &&
425 (!R->mayHaveSideEffects() ||
426 isa<AssumeInst, StoreInst>(Val: R->getUnderlyingInstr())) &&
427 all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
428 })
429 .Case(caseFn: [](const VPWidenRecipe *R) {
430 return preservesUniformity(Opcode: R->getOpcode()) &&
431 all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
432 })
433 .Case(caseFn: [](const VPInstruction *VPI) {
434 return (VPI->isScalarCast() &&
435 isUniformAcrossVFsAndUFs(V: VPI->getOperand(N: 0))) ||
436 (preservesUniformity(Opcode: VPI->getOpcode()) &&
437 all_of(Range: VPI->operands(), P: isUniformAcrossVFsAndUFs));
438 })
439 .Case(caseFn: [](const VPWidenCastRecipe *R) {
440 // A cast is uniform according to its operand.
441 return isUniformAcrossVFsAndUFs(V: R->getOperand(N: 0));
442 })
443 .Default(defaultFn: [](const VPRecipeBase *) { // A value is considered non-uniform
444 // unless proven otherwise.
445 return false;
446 });
447}
448
449VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
450 auto DepthFirst = vp_depth_first_shallow(G: Plan.getEntry());
451 auto I = find_if(Range&: DepthFirst, P: [&VPDT](VPBlockBase *VPB) {
452 return VPBlockUtils::isHeader(VPB, VPDT);
453 });
454 return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(Val: *I);
455}
456
457unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
458 if (!R)
459 return 1;
460 if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R))
461 return RR->getVFScaleFactor();
462 if (auto *RR = dyn_cast<VPReductionRecipe>(Val: R))
463 return RR->getVFScaleFactor();
464 if (auto *ER = dyn_cast<VPExpressionRecipe>(Val: R))
465 return ER->getVFScaleFactor();
466 assert(
467 (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
468 VPInstruction::ReductionStartVector) &&
469 "getting scaling factor of reduction-start-vector not implemented yet");
470 return 1;
471}
472
473std::optional<VPValue *>
474vputils::getRecipesForUncountableExit(VPlan &Plan,
475 SmallVectorImpl<VPRecipeBase *> &Recipes,
476 SmallVectorImpl<VPRecipeBase *> &GEPs) {
477 // Given a VPlan like the following (just including the recipes contributing
478 // to loop control exiting here, not the actual work), we're looking to match
479 // the recipes contributing to the uncountable exit condition comparison
480 // (here, vp<%4>) back to either live-ins or the address nodes for the load
481 // used as part of the uncountable exit comparison so that we can copy them
482 // to a preheader and rotate the address in the loop to the next vector
483 // iteration.
484 //
485 // Currently, the address of the load is restricted to a GEP with 2 operands
486 // and a live-in base address. This constraint may be relaxed later.
487 //
488 // VPlan ' for UF>=1' {
489 // Live-in vp<%0> = VF
490 // Live-in ir<64> = original trip-count
491 //
492 // entry:
493 // Successor(s): preheader, vector.ph
494 //
495 // vector.ph:
496 // Successor(s): vector loop
497 //
498 // <x1> vector loop: {
499 // vector.body:
500 // EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
501 // vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
502 // CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
503 // WIDEN ir<%ee.load> = load ir<%ee.addr>
504 // WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
505 // EMIT vp<%5> = any-of vp<%4>
506 // EMIT vp<%6> = add vp<%2>, vp<%0>
507 // EMIT vp<%7> = icmp eq vp<%6>, ir<64>
508 // EMIT branch-on-two-conds vp<%5>, vp<%7>
509 // No successors
510 // }
511 // Successor(s): early.exit, middle.block
512 //
513 // middle.block:
514 // Successor(s): preheader
515 //
516 // preheader:
517 // No successors
518 // }
519
520 // Find the uncountable loop exit condition.
521 auto *Region = Plan.getVectorLoopRegion();
522 VPValue *UncountableCondition = nullptr;
523 if (!match(V: Region->getExitingBasicBlock()->getTerminator(),
524 P: m_BranchOnTwoConds(Op0: m_AnyOf(Op0: m_VPValue(V&: UncountableCondition)),
525 Op1: m_VPValue())))
526 return std::nullopt;
527
528 SmallVector<VPValue *, 4> Worklist;
529 Worklist.push_back(Elt: UncountableCondition);
530 while (!Worklist.empty()) {
531 VPValue *V = Worklist.pop_back_val();
532
533 // Any value defined outside the loop does not need to be copied.
534 if (V->isDefinedOutsideLoopRegions())
535 continue;
536
537 // FIXME: Remove the single user restriction; it's here because we're
538 // starting with the simplest set of loops we can, and multiple
539 // users means needing to add PHI nodes in the transform.
540 if (V->getNumUsers() > 1)
541 return std::nullopt;
542
543 VPValue *Op1, *Op2;
544 // Walk back through recipes until we find at least one load from memory.
545 if (match(V, P: m_ICmp(Op0: m_VPValue(V&: Op1), Op1: m_VPValue(V&: Op2)))) {
546 Worklist.push_back(Elt: Op1);
547 Worklist.push_back(Elt: Op2);
548 Recipes.push_back(Elt: V->getDefiningRecipe());
549 } else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(Val: V)) {
550 // Reject masked loads for the time being; they make the exit condition
551 // more complex.
552 if (Load->isMasked())
553 return std::nullopt;
554
555 VPValue *GEP = Load->getAddr();
556 if (!match(V: GEP, P: m_GetElementPtr(Op0: m_LiveIn(), Op1: m_VPValue())))
557 return std::nullopt;
558
559 Recipes.push_back(Elt: Load);
560 Recipes.push_back(Elt: GEP->getDefiningRecipe());
561 GEPs.push_back(Elt: GEP->getDefiningRecipe());
562 } else
563 return std::nullopt;
564 }
565
566 return UncountableCondition;
567}
568
569VPSingleDefRecipe *vputils::findHeaderMask(VPlan &Plan) {
570 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
571 SmallVector<VPValue *> WideCanonicalIVs;
572 auto *FoundWidenCanonicalIVUser = find_if(
573 Range: LoopRegion->getCanonicalIV()->users(), P: IsaPred<VPWidenCanonicalIVRecipe>);
574 assert(count_if(LoopRegion->getCanonicalIV()->users(),
575 IsaPred<VPWidenCanonicalIVRecipe>) <= 1 &&
576 "Must have at most one VPWideCanonicalIVRecipe");
577 if (FoundWidenCanonicalIVUser !=
578 LoopRegion->getCanonicalIV()->users().end()) {
579 auto *WideCanonicalIV =
580 cast<VPWidenCanonicalIVRecipe>(Val: *FoundWidenCanonicalIVUser);
581 WideCanonicalIVs.push_back(Elt: WideCanonicalIV);
582 }
583
584 // Also include VPWidenIntOrFpInductionRecipes that represent a widened
585 // version of the canonical induction.
586 VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
587 for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
588 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Val: &Phi);
589 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
590 WideCanonicalIVs.push_back(Elt: WidenOriginalIV);
591 }
592
593 // Walk users of wide canonical IVs and find the single compare of the form
594 // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
595 VPSingleDefRecipe *HeaderMask = nullptr;
596 for (auto *Wide : WideCanonicalIVs) {
597 for (VPUser *U : Wide->users()) {
598 auto *VPI = dyn_cast<VPInstruction>(Val: U);
599 if (!VPI || !vputils::isHeaderMask(V: VPI, Plan))
600 continue;
601
602 assert(VPI->getOperand(0) == Wide &&
603 "WidenCanonicalIV must be the first operand of the compare");
604 assert(!HeaderMask && "Multiple header masks found?");
605 HeaderMask = VPI;
606 }
607 }
608 return HeaderMask;
609}
610
611bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
612 const VPDominatorTree &VPDT) {
613 auto *VPBB = dyn_cast<VPBasicBlock>(Val: VPB);
614 if (!VPBB)
615 return false;
616
617 // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
618 // VPBB as its entry, i.e., free of predecessors.
619 if (auto *R = VPBB->getParent())
620 return !R->isReplicator() && !VPBB->hasPredecessors();
621
622 // A header dominates its second predecessor (the latch), with the other
623 // predecessor being the preheader
624 return VPB->getPredecessors().size() == 2 &&
625 VPDT.dominates(A: VPB, B: VPB->getPredecessors()[1]);
626}
627
628bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
629 const VPDominatorTree &VPDT) {
630 // A latch has a header as its second successor, with its other successor
631 // leaving the loop. A preheader OTOH has a header as its first (and only)
632 // successor.
633 return VPB->getNumSuccessors() == 2 &&
634 VPBlockUtils::isHeader(VPB: VPB->getSuccessors()[1], VPDT);
635}
636
637std::optional<MemoryLocation>
638vputils::getMemoryLocation(const VPRecipeBase &R) {
639 auto *M = dyn_cast<VPIRMetadata>(Val: &R);
640 if (!M)
641 return std::nullopt;
642 MemoryLocation Loc;
643 // Populate noalias metadata from VPIRMetadata.
644 if (MDNode *NoAliasMD = M->getMetadata(Kind: LLVMContext::MD_noalias))
645 Loc.AATags.NoAlias = NoAliasMD;
646 if (MDNode *AliasScopeMD = M->getMetadata(Kind: LLVMContext::MD_alias_scope))
647 Loc.AATags.Scope = AliasScopeMD;
648 return Loc;
649}
650
651/// Find the ComputeReductionResult recipe for \p PhiR, looking through selects
652/// inserted for predicated reductions or tail folding.
653VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) {
654 VPValue *BackedgeVal = PhiR->getBackedgeValue();
655 if (auto *Res = vputils::findUserOf<VPInstruction::ComputeReductionResult>(
656 V: BackedgeVal))
657 return Res;
658
659 // Look through selects inserted for tail folding or predicated reductions.
660 VPRecipeBase *SelR = vputils::findUserOf(
661 V: BackedgeVal, P: m_Select(Op0: m_VPValue(), Op1: m_VPValue(), Op2: m_VPValue()));
662 if (!SelR)
663 return nullptr;
664 return vputils::findUserOf<VPInstruction::ComputeReductionResult>(
665 V: cast<VPSingleDefRecipe>(Val: SelR));
666}
667