1//===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "VPlanUtils.h"
10#include "VPlanAnalysis.h"
11#include "VPlanCFG.h"
12#include "VPlanDominatorTree.h"
13#include "VPlanPatternMatch.h"
14#include "llvm/ADT/TypeSwitch.h"
15#include "llvm/Analysis/MemoryLocation.h"
16#include "llvm/Analysis/ScalarEvolutionExpressions.h"
17#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
18
19using namespace llvm;
20using namespace llvm::VPlanPatternMatch;
21using namespace llvm::SCEVPatternMatch;
22
23bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
24 return all_of(Range: Def->users(),
25 P: [Def](const VPUser *U) { return U->usesFirstLaneOnly(Op: Def); });
26}
27
28bool vputils::onlyFirstPartUsed(const VPValue *Def) {
29 return all_of(Range: Def->users(),
30 P: [Def](const VPUser *U) { return U->usesFirstPartOnly(Op: Def); });
31}
32
33bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
34 return all_of(Range: Def->users(),
35 P: [Def](const VPUser *U) { return U->usesScalars(Op: Def); });
36}
37
38VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr) {
39 if (auto *E = dyn_cast<SCEVConstant>(Val: Expr))
40 return Plan.getOrAddLiveIn(V: E->getValue());
41 // Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
42 // value. Otherwise the value may be defined in a loop and using it directly
43 // will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
44 // form.
45 auto *U = dyn_cast<SCEVUnknown>(Val: Expr);
46 if (U && !isa<Instruction>(Val: U->getValue()))
47 return Plan.getOrAddLiveIn(V: U->getValue());
48 auto *Expanded = new VPExpandSCEVRecipe(Expr);
49 Plan.getEntry()->appendRecipe(Recipe: Expanded);
50 return Expanded;
51}
52
53bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
54 if (isa<VPActiveLaneMaskPHIRecipe>(Val: V))
55 return true;
56
57 auto IsWideCanonicalIV = [](VPValue *A) {
58 return isa<VPWidenCanonicalIVRecipe>(Val: A) ||
59 (isa<VPWidenIntOrFpInductionRecipe>(Val: A) &&
60 cast<VPWidenIntOrFpInductionRecipe>(Val: A)->isCanonical());
61 };
62
63 VPValue *A, *B;
64
65 auto m_CanonicalScalarIVSteps = m_ScalarIVSteps(
66 Op0: m_CombineOr(L: m_CanonicalIV(),
67 R: m_DerivedIV(Op0: m_ZeroInt(), Op1: m_CanonicalIV(), Op2: m_One())),
68 Op1: m_One(), Op2: m_Specific(VPV: &Plan.getVF()));
69
70 if (match(V, P: m_ActiveLaneMask(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B), Op2: m_One())))
71 return B == Plan.getTripCount() &&
72 (match(V: A, P: m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A));
73
74 // For scalar plans, the header mask uses the scalar steps.
75 if (match(V, P: m_ICmp(Op0: m_CanonicalScalarIVSteps,
76 Op1: m_Specific(VPV: Plan.getBackedgeTakenCount())))) {
77 assert(Plan.hasScalarVFOnly() &&
78 "Non-scalar VF using scalar IV steps for header mask?");
79 return true;
80 }
81
82 return match(V, P: m_ICmp(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B))) && IsWideCanonicalIV(A) &&
83 B == Plan.getBackedgeTakenCount();
84}
85
86/// Returns true if \p R propagates poison from any operand to its result.
87static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) {
88 return TypeSwitch<const VPRecipeBase *, bool>(R)
89 .Case<VPWidenGEPRecipe, VPWidenCastRecipe>(
90 caseFn: [](const VPRecipeBase *) { return true; })
91 .Case(caseFn: [](const VPReplicateRecipe *Rep) {
92 // GEP and casts propagate poison from all operands.
93 unsigned Opcode = Rep->getOpcode();
94 return Opcode == Instruction::GetElementPtr ||
95 Instruction::isCast(Opcode);
96 })
97 .Default(defaultFn: [](const VPRecipeBase *) { return false; });
98}
99
100/// Returns true if \p V being poison is guaranteed to trigger UB because it
101/// propagates to the address of a memory recipe.
102static bool poisonGuaranteesUB(const VPValue *V) {
103 SmallPtrSet<const VPValue *, 8> Visited;
104 SmallVector<const VPValue *, 16> Worklist;
105
106 Worklist.push_back(Elt: V);
107
108 while (!Worklist.empty()) {
109 const VPValue *Current = Worklist.pop_back_val();
110 if (!Visited.insert(Ptr: Current).second)
111 continue;
112
113 for (VPUser *U : Current->users()) {
114 // Check if Current is used as an address operand for load/store.
115 if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(Val: U)) {
116 if (MemR->getAddr() == Current)
117 return true;
118 continue;
119 }
120 if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: U)) {
121 unsigned Opcode = Rep->getOpcode();
122 if ((Opcode == Instruction::Load && Rep->getOperand(N: 0) == Current) ||
123 (Opcode == Instruction::Store && Rep->getOperand(N: 1) == Current))
124 return true;
125 }
126
127 // Check if poison propagates through this recipe to any of its users.
128 auto *R = cast<VPRecipeBase>(Val: U);
129 for (const VPValue *Op : R->operands()) {
130 if (Op == Current && propagatesPoisonFromRecipeOp(R)) {
131 Worklist.push_back(Elt: R->getVPSingleValue());
132 break;
133 }
134 }
135 }
136 }
137
138 return false;
139}
140
141const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
142 PredicatedScalarEvolution &PSE,
143 const Loop *L) {
144 ScalarEvolution &SE = *PSE.getSE();
145 if (isa<VPIRValue, VPSymbolicValue>(Val: V)) {
146 Value *LiveIn = V->getUnderlyingValue();
147 if (LiveIn && SE.isSCEVable(Ty: LiveIn->getType()))
148 return SE.getSCEV(V: LiveIn);
149 return SE.getCouldNotCompute();
150 }
151
152 // Helper to create SCEVs for binary and unary operations.
153 auto CreateSCEV = [&](ArrayRef<VPValue *> Ops,
154 function_ref<const SCEV *(ArrayRef<SCEVUse>)> CreateFn)
155 -> const SCEV * {
156 SmallVector<SCEVUse, 2> SCEVOps;
157 for (VPValue *Op : Ops) {
158 const SCEV *S = getSCEVExprForVPValue(V: Op, PSE, L);
159 if (isa<SCEVCouldNotCompute>(Val: S))
160 return SE.getCouldNotCompute();
161 SCEVOps.push_back(Elt: S);
162 }
163 return CreateFn(SCEVOps);
164 };
165
166 VPValue *LHSVal, *RHSVal;
167 if (match(V, P: m_Add(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
168 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
169 return SE.getAddExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0);
170 });
171 if (match(V, P: m_Sub(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
172 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
173 return SE.getMinusSCEV(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0);
174 });
175 if (match(V, P: m_Not(Op0: m_VPValue(V&: LHSVal)))) {
176 // not X = xor X, -1 = -1 - X
177 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
178 return SE.getMinusSCEV(LHS: SE.getMinusOne(Ty: Ops[0]->getType()), RHS: Ops[0]);
179 });
180 }
181 if (match(V, P: m_Mul(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
182 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
183 return SE.getMulExpr(LHS: Ops[0], RHS: Ops[1], Flags: SCEV::FlagAnyWrap, Depth: 0);
184 });
185 if (match(V,
186 P: m_Binary<Instruction::UDiv>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
187 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
188 return SE.getUDivExpr(LHS: Ops[0], RHS: Ops[1]);
189 });
190 // Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n.
191 const APInt *Mask;
192 if (match(V, P: m_c_BinaryAnd(Op0: m_VPValue(V&: LHSVal), Op1: m_APInt(C&: Mask))) &&
193 (*Mask + 1).isPowerOf2())
194 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
195 return SE.getURemExpr(LHS: Ops[0], RHS: SE.getConstant(Val: *Mask + 1));
196 });
197 if (match(V, P: m_Trunc(Op0: m_VPValue(V&: LHSVal)))) {
198 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
199 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
200 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
201 return SE.getTruncateExpr(Op: Ops[0], Ty: DestTy);
202 });
203 }
204 if (match(V, P: m_ZExt(Op0: m_VPValue(V&: LHSVal)))) {
205 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
206 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
207 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
208 return SE.getZeroExtendExpr(Op: Ops[0], Ty: DestTy);
209 });
210 }
211 if (match(V, P: m_SExt(Op0: m_VPValue(V&: LHSVal)))) {
212 const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
213 Type *DestTy = VPTypeAnalysis(*Plan).inferScalarType(V);
214
215 // Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension
216 // onto the operands before computing the subtraction.
217 VPValue *SubLHS, *SubRHS;
218 auto *SubR = dyn_cast<VPRecipeWithIRFlags>(Val: LHSVal);
219 if (match(V: LHSVal, P: m_Sub(Op0: m_VPValue(V&: SubLHS), Op1: m_VPValue(V&: SubRHS))) && SubR &&
220 SubR->hasNoSignedWrap() && poisonGuaranteesUB(V: LHSVal)) {
221 const SCEV *V1 = getSCEVExprForVPValue(V: SubLHS, PSE, L);
222 const SCEV *V2 = getSCEVExprForVPValue(V: SubRHS, PSE, L);
223 if (!isa<SCEVCouldNotCompute>(Val: V1) && !isa<SCEVCouldNotCompute>(Val: V2))
224 return SE.getMinusSCEV(LHS: SE.getSignExtendExpr(Op: V1, Ty: DestTy),
225 RHS: SE.getSignExtendExpr(Op: V2, Ty: DestTy), Flags: SCEV::FlagNSW);
226 }
227
228 return CreateSCEV({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
229 return SE.getSignExtendExpr(Op: Ops[0], Ty: DestTy);
230 });
231 }
232 if (match(V,
233 P: m_Intrinsic<Intrinsic::umax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
234 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
235 return SE.getUMaxExpr(LHS: Ops[0], RHS: Ops[1]);
236 });
237 if (match(V,
238 P: m_Intrinsic<Intrinsic::smax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
239 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
240 return SE.getSMaxExpr(LHS: Ops[0], RHS: Ops[1]);
241 });
242 if (match(V,
243 P: m_Intrinsic<Intrinsic::umin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
244 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
245 return SE.getUMinExpr(LHS: Ops[0], RHS: Ops[1]);
246 });
247 if (match(V,
248 P: m_Intrinsic<Intrinsic::smin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
249 return CreateSCEV({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
250 return SE.getSMinExpr(LHS: Ops[0], RHS: Ops[1]);
251 });
252
253 ArrayRef<VPValue *> Ops;
254 Type *SourceElementType;
255 if (match(V, P: m_GetElementPtr(SourceElementType, Operands&: Ops))) {
256 const SCEV *GEPExpr = CreateSCEV(Ops, [&](ArrayRef<SCEVUse> Ops) {
257 return SE.getGEPExpr(BaseExpr: Ops.front(), IndexExprs: Ops.drop_front(), SrcElementTy: SourceElementType);
258 });
259 return PSE.getPredicatedSCEV(Expr: GEPExpr);
260 }
261
262 // TODO: Support constructing SCEVs for more recipes as needed.
263 const VPRecipeBase *DefR = V->getDefiningRecipe();
264 const SCEV *Expr =
265 TypeSwitch<const VPRecipeBase *, const SCEV *>(DefR)
266 .Case(caseFn: [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
267 .Case(caseFn: [&SE, &PSE, L](const VPCanonicalIVPHIRecipe *R) {
268 if (!L)
269 return SE.getCouldNotCompute();
270 const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L);
271 return SE.getAddRecExpr(Start, Step: SE.getOne(Ty: Start->getType()), L,
272 Flags: SCEV::FlagAnyWrap);
273 })
274 .Case(caseFn: [&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) {
275 const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L);
276 if (!L || isa<SCEVCouldNotCompute>(Val: Step))
277 return SE.getCouldNotCompute();
278 const SCEV *Start =
279 getSCEVExprForVPValue(V: R->getStartValue(), PSE, L);
280 const SCEV *AddRec =
281 SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap);
282 if (R->getTruncInst())
283 return SE.getTruncateExpr(Op: AddRec, Ty: R->getScalarType());
284 return AddRec;
285 })
286 .Case(caseFn: [&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) {
287 const SCEV *Start =
288 getSCEVExprForVPValue(V: R->getStartValue(), PSE, L);
289 if (!L || isa<SCEVCouldNotCompute>(Val: Start))
290 return SE.getCouldNotCompute();
291 const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L);
292 if (isa<SCEVCouldNotCompute>(Val: Step))
293 return SE.getCouldNotCompute();
294 return SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap);
295 })
296 .Case(caseFn: [&SE, &PSE, L](const VPDerivedIVRecipe *R) {
297 const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L);
298 const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L);
299 const SCEV *Scale = getSCEVExprForVPValue(V: R->getOperand(N: 2), PSE, L);
300 if (any_of(Range: ArrayRef({Start, IV, Scale}),
301 P: IsaPred<SCEVCouldNotCompute>))
302 return SE.getCouldNotCompute();
303
304 return SE.getAddExpr(
305 LHS: SE.getTruncateOrSignExtend(V: Start, Ty: IV->getType()),
306 RHS: SE.getMulExpr(
307 LHS: IV, RHS: SE.getTruncateOrSignExtend(V: Scale, Ty: IV->getType())));
308 })
309 .Case(caseFn: [&SE, &PSE, L](const VPScalarIVStepsRecipe *R) {
310 const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: 0), PSE, L);
311 const SCEV *Step = getSCEVExprForVPValue(V: R->getOperand(N: 1), PSE, L);
312 if (isa<SCEVCouldNotCompute>(Val: IV) || !isa<SCEVConstant>(Val: Step))
313 return SE.getCouldNotCompute();
314 return SE.getTruncateOrSignExtend(V: IV, Ty: Step->getType());
315 })
316 .Default(
317 defaultFn: [&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
318
319 return PSE.getPredicatedSCEV(Expr);
320}
321
322bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE,
323 const Loop *L) {
324 // If address is an SCEVAddExpr, we require that all operands must be either
325 // be invariant or a (possibly sign-extend) affine AddRec.
326 if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Val: Addr)) {
327 return all_of(Range: PtrAdd->operands(), P: [&SE, L](const SCEV *Op) {
328 return SE.isLoopInvariant(S: Op, L) ||
329 match(S: Op, P: m_scev_SExt(Op0: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()))) ||
330 match(S: Op, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()));
331 });
332 }
333
334 // Otherwise, check if address is loop invariant or an affine add recurrence.
335 return SE.isLoopInvariant(S: Addr, L) ||
336 match(S: Addr, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()));
337}
338
339/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are
340/// uniform, the result will also be uniform.
341static bool preservesUniformity(unsigned Opcode) {
342 if (Instruction::isBinaryOp(Opcode) || Instruction::isCast(Opcode))
343 return true;
344 switch (Opcode) {
345 case Instruction::Freeze:
346 case Instruction::GetElementPtr:
347 case Instruction::ICmp:
348 case Instruction::FCmp:
349 case Instruction::Select:
350 case VPInstruction::Not:
351 case VPInstruction::Broadcast:
352 case VPInstruction::MaskedCond:
353 case VPInstruction::PtrAdd:
354 return true;
355 default:
356 return false;
357 }
358}
359
360bool vputils::isSingleScalar(const VPValue *VPV) {
361 // A live-in must be uniform across the scope of VPlan.
362 if (isa<VPIRValue, VPSymbolicValue>(Val: VPV))
363 return true;
364
365 if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: VPV)) {
366 const VPRegionBlock *RegionOfR = Rep->getRegion();
367 // Don't consider recipes in replicate regions as uniform yet; their first
368 // lane cannot be accessed when executing the replicate region for other
369 // lanes.
370 if (RegionOfR && RegionOfR->isReplicator())
371 return false;
372 return Rep->isSingleScalar() || (preservesUniformity(Opcode: Rep->getOpcode()) &&
373 all_of(Range: Rep->operands(), P: isSingleScalar));
374 }
375 if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(Val: VPV))
376 return all_of(Range: VPV->getDefiningRecipe()->operands(), P: isSingleScalar);
377 if (auto *WidenR = dyn_cast<VPWidenRecipe>(Val: VPV)) {
378 return preservesUniformity(Opcode: WidenR->getOpcode()) &&
379 all_of(Range: WidenR->operands(), P: isSingleScalar);
380 }
381 if (auto *VPI = dyn_cast<VPInstruction>(Val: VPV))
382 return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
383 (preservesUniformity(Opcode: VPI->getOpcode()) &&
384 all_of(Range: VPI->operands(), P: isSingleScalar));
385 if (auto *RR = dyn_cast<VPReductionRecipe>(Val: VPV))
386 return !RR->isPartialReduction();
387 if (isa<VPCanonicalIVPHIRecipe, VPVectorPointerRecipe,
388 VPVectorEndPointerRecipe>(Val: VPV))
389 return true;
390 if (auto *Expr = dyn_cast<VPExpressionRecipe>(Val: VPV))
391 return Expr->isSingleScalar();
392
393 // VPExpandSCEVRecipes must be placed in the entry and are always uniform.
394 return isa<VPExpandSCEVRecipe>(Val: VPV);
395}
396
397bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
398 // Live-ins are uniform.
399 if (isa<VPIRValue, VPSymbolicValue>(Val: V))
400 return true;
401
402 VPRecipeBase *R = V->getDefiningRecipe();
403 VPBasicBlock *VPBB = R ? R->getParent() : nullptr;
404 VPlan *Plan = VPBB ? VPBB->getPlan() : nullptr;
405 if (VPBB &&
406 (VPBB == Plan->getVectorPreheader() || VPBB == Plan->getEntry())) {
407 if (match(V: V->getDefiningRecipe(),
408 P: m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
409 return false;
410 return all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
411 }
412
413 if (VPRegionBlock *EnclosingRegion = VPBB->getEnclosingLoopRegion()) {
414 // Canonical IV is uniform.
415 if (V == EnclosingRegion->getCanonicalIV())
416 return true;
417 }
418
419 return TypeSwitch<const VPRecipeBase *, bool>(R)
420 .Case(caseFn: [](const VPDerivedIVRecipe *R) { return true; })
421 .Case(caseFn: [](const VPReplicateRecipe *R) {
422 // Be conservative about side-effects, except for the
423 // known-side-effecting assumes and stores, which we know will be
424 // uniform.
425 return R->isSingleScalar() &&
426 (!R->mayHaveSideEffects() ||
427 isa<AssumeInst, StoreInst>(Val: R->getUnderlyingInstr())) &&
428 all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
429 })
430 .Case(caseFn: [](const VPWidenRecipe *R) {
431 return preservesUniformity(Opcode: R->getOpcode()) &&
432 all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
433 })
434 .Case(caseFn: [](const VPInstruction *VPI) {
435 return preservesUniformity(Opcode: VPI->getOpcode()) &&
436 all_of(Range: VPI->operands(), P: isUniformAcrossVFsAndUFs);
437 })
438 .Case(caseFn: [](const VPWidenCastRecipe *R) {
439 // A cast is uniform according to its operand.
440 return isUniformAcrossVFsAndUFs(V: R->getOperand(N: 0));
441 })
442 .Default(defaultFn: [](const VPRecipeBase *) { // A value is considered non-uniform
443 // unless proven otherwise.
444 return false;
445 });
446}
447
448VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
449 auto DepthFirst = vp_depth_first_shallow(G: Plan.getEntry());
450 auto I = find_if(Range&: DepthFirst, P: [&VPDT](VPBlockBase *VPB) {
451 return VPBlockUtils::isHeader(VPB, VPDT);
452 });
453 return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(Val: *I);
454}
455
456unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
457 if (!R)
458 return 1;
459 if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R))
460 return RR->getVFScaleFactor();
461 if (auto *RR = dyn_cast<VPReductionRecipe>(Val: R))
462 return RR->getVFScaleFactor();
463 if (auto *ER = dyn_cast<VPExpressionRecipe>(Val: R))
464 return ER->getVFScaleFactor();
465 assert(
466 (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
467 VPInstruction::ReductionStartVector) &&
468 "getting scaling factor of reduction-start-vector not implemented yet");
469 return 1;
470}
471
472std::optional<VPValue *>
473vputils::getRecipesForUncountableExit(SmallVectorImpl<VPInstruction *> &Recipes,
474 SmallVectorImpl<VPInstruction *> &GEPs,
475 VPBasicBlock *LatchVPBB) {
476 // Given a plain CFG VPlan loop with countable latch exiting block
477 // \p LatchVPBB, we're looking to match the recipes contributing to the
478 // uncountable exit condition comparison (here, vp<%4>) back to either
479 // live-ins or the address nodes for the load used as part of the uncountable
480 // exit comparison so that we can either move them within the loop, or copy
481 // them to the preheader depending on the chosen method for dealing with
482 // stores in uncountable exit loops.
483 //
484 // Currently, the address of the load is restricted to a GEP with 2 operands
485 // and a live-in base address. This constraint may be relaxed later.
486 //
487 // VPlan ' for UF>=1' {
488 // Live-in vp<%0> = VF * UF
489 // Live-in vp<%1> = vector-trip-count
490 // Live-in ir<20> = original trip-count
491 //
492 // ir-bb<entry>:
493 // Successor(s): scalar.ph, vector.ph
494 //
495 // vector.ph:
496 // Successor(s): for.body
497 //
498 // for.body:
499 // EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
500 // EMIT-SCALAR ir<%iv> = phi [ ir<0>, vector.ph ], [ ir<%iv.next>, for.inc ]
501 // EMIT ir<%uncountable.addr> = getelementptr inbounds nuw ir<%pred>,ir<%iv>
502 // EMIT ir<%uncountable.val> = load ir<%uncountable.addr>
503 // EMIT ir<%uncountable.cond> = icmp sgt ir<%uncountable.val>, ir<500>
504 // EMIT vp<%3> = masked-cond ir<%uncountable.cond>
505 // Successor(s): for.inc
506 //
507 // for.inc:
508 // EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>
509 // EMIT ir<%countable.cond> = icmp eq ir<%iv.next>, ir<20>
510 // EMIT vp<%index.next> = add nuw vp<%2>, vp<%0>
511 // EMIT vp<%4> = any-of ir<%3>
512 // EMIT vp<%5> = icmp eq vp<%index.next>, vp<%1>
513 // EMIT vp<%6> = or vp<%4>, vp<%5>
514 // EMIT branch-on-cond vp<%6>
515 // Successor(s): middle.block, for.body
516 //
517 // middle.block:
518 // Successor(s): ir-bb<exit>, scalar.ph
519 //
520 // ir-bb<exit>:
521 // No successors
522 //
523 // scalar.ph:
524 // }
525
526 // Find the uncountable loop exit condition.
527 VPValue *UncountableCondition = nullptr;
528 if (!match(V: LatchVPBB->getTerminator(),
529 P: m_BranchOnCond(Op0: m_c_BinaryOr(
530 Op0: m_AnyOf(Op0: m_VPValue(V&: UncountableCondition)), Op1: m_VPValue()))))
531 return std::nullopt;
532
533 SmallVector<VPValue *, 4> Worklist;
534 Worklist.push_back(Elt: UncountableCondition);
535 while (!Worklist.empty()) {
536 VPValue *V = Worklist.pop_back_val();
537
538 // Any value defined outside the loop does not need to be copied.
539 if (V->isDefinedOutsideLoopRegions())
540 continue;
541
542 // FIXME: Remove the single user restriction; it's here because we're
543 // starting with the simplest set of loops we can, and multiple
544 // users means needing to add PHI nodes in the transform.
545 if (V->getNumUsers() > 1)
546 return std::nullopt;
547
548 VPValue *Op1, *Op2;
549 // Walk back through recipes until we find at least one load from memory.
550 if (match(V, P: m_ICmp(Op0: m_VPValue(V&: Op1), Op1: m_VPValue(V&: Op2)))) {
551 Worklist.push_back(Elt: Op1);
552 Worklist.push_back(Elt: Op2);
553 Recipes.push_back(Elt: cast<VPInstruction>(Val: V->getDefiningRecipe()));
554 } else if (match(V, P: m_VPInstruction<Instruction::Load>(Ops: m_VPValue(V&: Op1)))) {
555 VPRecipeBase *GepR = Op1->getDefiningRecipe();
556 // Only matching base + single offset term for now.
557 if (GepR->getNumOperands() != 2)
558 return std::nullopt;
559 // Matching a GEP with a loop-invariant base ptr.
560 if (!match(V: GepR, P: m_VPInstruction<Instruction::GetElementPtr>(
561 Ops: m_LiveIn(), Ops: m_VPValue())))
562 return std::nullopt;
563 Recipes.push_back(Elt: cast<VPInstruction>(Val: V->getDefiningRecipe()));
564 Recipes.push_back(Elt: cast<VPInstruction>(Val: GepR));
565 GEPs.push_back(Elt: cast<VPInstruction>(Val: GepR));
566 } else if (match(V, P: m_VPInstruction<VPInstruction::MaskedCond>(
567 Ops: m_VPValue(V&: Op1)))) {
568 Worklist.push_back(Elt: Op1);
569 Recipes.push_back(Elt: cast<VPInstruction>(Val: V->getDefiningRecipe()));
570 } else
571 return std::nullopt;
572 }
573
574 // If we couldn't match anything, don't return the condition. It may be
575 // defined outside the loop.
576 if (Recipes.empty() || GEPs.empty())
577 return std::nullopt;
578
579 return UncountableCondition;
580}
581
582VPSingleDefRecipe *vputils::findHeaderMask(VPlan &Plan) {
583 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
584 SmallVector<VPValue *> WideCanonicalIVs;
585 auto *FoundWidenCanonicalIVUser = find_if(
586 Range: LoopRegion->getCanonicalIV()->users(), P: IsaPred<VPWidenCanonicalIVRecipe>);
587 assert(count_if(LoopRegion->getCanonicalIV()->users(),
588 IsaPred<VPWidenCanonicalIVRecipe>) <= 1 &&
589 "Must have at most one VPWideCanonicalIVRecipe");
590 if (FoundWidenCanonicalIVUser !=
591 LoopRegion->getCanonicalIV()->users().end()) {
592 auto *WideCanonicalIV =
593 cast<VPWidenCanonicalIVRecipe>(Val: *FoundWidenCanonicalIVUser);
594 WideCanonicalIVs.push_back(Elt: WideCanonicalIV);
595 }
596
597 // Also include VPWidenIntOrFpInductionRecipes that represent a widened
598 // version of the canonical induction.
599 VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
600 for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
601 auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Val: &Phi);
602 if (WidenOriginalIV && WidenOriginalIV->isCanonical())
603 WideCanonicalIVs.push_back(Elt: WidenOriginalIV);
604 }
605
606 // Walk users of wide canonical IVs and find the single compare of the form
607 // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
608 VPSingleDefRecipe *HeaderMask = nullptr;
609 for (auto *Wide : WideCanonicalIVs) {
610 for (VPUser *U : Wide->users()) {
611 auto *VPI = dyn_cast<VPInstruction>(Val: U);
612 if (!VPI || !vputils::isHeaderMask(V: VPI, Plan))
613 continue;
614
615 assert(VPI->getOperand(0) == Wide &&
616 "WidenCanonicalIV must be the first operand of the compare");
617 assert(!HeaderMask && "Multiple header masks found?");
618 HeaderMask = VPI;
619 }
620 }
621 return HeaderMask;
622}
623
624SmallVector<VPBasicBlock *>
625VPBlockUtils::blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB,
626 VPBasicBlock *LastBB) {
627 assert(FirstBB->getParent() == LastBB->getParent() &&
628 "FirstBB and LastBB from different regions");
629#ifndef NDEBUG
630 bool InSingleSuccChain = false;
631 for (VPBlockBase *Succ = FirstBB; Succ; Succ = Succ->getSingleSuccessor())
632 InSingleSuccChain |= (Succ == LastBB);
633 assert(InSingleSuccChain &&
634 "LastBB unreachable from FirstBB in single-successor chain");
635#endif
636 auto Blocks = to_vector(
637 Range: VPBlockUtils::blocksOnly<VPBasicBlock>(Range: vp_depth_first_deep(G: FirstBB)));
638 auto *LastIt = find(Range&: Blocks, Val: LastBB);
639 assert(LastIt != Blocks.end() &&
640 "LastBB unreachable from FirstBB in depth-first traversal");
641 Blocks.erase(CS: std::next(x: LastIt), CE: Blocks.end());
642 return Blocks;
643}
644
645bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
646 const VPDominatorTree &VPDT) {
647 auto *VPBB = dyn_cast<VPBasicBlock>(Val: VPB);
648 if (!VPBB)
649 return false;
650
651 // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
652 // VPBB as its entry, i.e., free of predecessors.
653 if (auto *R = VPBB->getParent())
654 return !R->isReplicator() && !VPBB->hasPredecessors();
655
656 // A header dominates its second predecessor (the latch), with the other
657 // predecessor being the preheader
658 return VPB->getPredecessors().size() == 2 &&
659 VPDT.dominates(A: VPB, B: VPB->getPredecessors()[1]);
660}
661
662bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
663 const VPDominatorTree &VPDT) {
664 // A latch has a header as its second successor, with its other successor
665 // leaving the loop. A preheader OTOH has a header as its first (and only)
666 // successor.
667 return VPB->getNumSuccessors() == 2 &&
668 VPBlockUtils::isHeader(VPB: VPB->getSuccessors()[1], VPDT);
669}
670
671std::optional<MemoryLocation>
672vputils::getMemoryLocation(const VPRecipeBase &R) {
673 auto *M = dyn_cast<VPIRMetadata>(Val: &R);
674 if (!M)
675 return std::nullopt;
676 MemoryLocation Loc;
677 // Populate noalias metadata from VPIRMetadata.
678 if (MDNode *NoAliasMD = M->getMetadata(Kind: LLVMContext::MD_noalias))
679 Loc.AATags.NoAlias = NoAliasMD;
680 if (MDNode *AliasScopeMD = M->getMetadata(Kind: LLVMContext::MD_alias_scope))
681 Loc.AATags.Scope = AliasScopeMD;
682 return Loc;
683}
684
685/// Find the ComputeReductionResult recipe for \p PhiR, looking through selects
686/// inserted for predicated reductions or tail folding.
687VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) {
688 VPValue *BackedgeVal = PhiR->getBackedgeValue();
689 if (auto *Res = vputils::findUserOf<VPInstruction::ComputeReductionResult>(
690 V: BackedgeVal))
691 return Res;
692
693 // Look through selects inserted for tail folding or predicated reductions.
694 VPRecipeBase *SelR = vputils::findUserOf(
695 V: BackedgeVal, P: m_Select(Op0: m_VPValue(), Op1: m_VPValue(), Op2: m_VPValue()));
696 if (!SelR)
697 return nullptr;
698 return vputils::findUserOf<VPInstruction::ComputeReductionResult>(
699 V: cast<VPSingleDefRecipe>(Val: SelR));
700}
701