VPlanUtils.cpp source code [llvm_projects/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp]

1	//===- VPlanUtils.cpp - VPlan-related utilities ---------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "VPlanUtils.h"
10	#include "VPlanAnalysis.h"
11	#include "VPlanCFG.h"
12	#include "VPlanDominatorTree.h"
13	#include "VPlanPatternMatch.h"
14	#include "llvm/ADT/TypeSwitch.h"
15	#include "llvm/Analysis/MemoryLocation.h"
16	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
17	#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
18
19	using namespace llvm;
20	using namespace llvm::VPlanPatternMatch;
21	using namespace llvm::SCEVPatternMatch;
22
23	bool vputils::onlyFirstLaneUsed(const VPValue *Def) {
24	return all_of(Range: Def->users(),
25	P: [Def](const VPUser U) { return* U->usesFirstLaneOnly(Op: Def); });
26	}
27
28	bool vputils::onlyFirstPartUsed(const VPValue *Def) {
29	return all_of(Range: Def->users(),
30	P: [Def](const VPUser U) { return* U->usesFirstPartOnly(Op: Def); });
31	}
32
33	bool vputils::onlyScalarValuesUsed(const VPValue *Def) {
34	return all_of(Range: Def->users(),
35	P: [Def](const VPUser U) { return* U->usesScalars(Op: Def); });
36	}
37
38	VPValue vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const* SCEV *Expr) {
39	if (auto *E = dyn_cast<SCEVConstant>(Val: Expr))
40	return Plan.getOrAddLiveIn(V: E->getValue());
41	// Skip SCEV expansion if Expr is a SCEVUnknown wrapping a non-instruction
42	// value. Otherwise the value may be defined in a loop and using it directly
43	// will break LCSSA form. The SCEV expansion takes care of preserving LCSSA
44	// form.
45	auto *U = dyn_cast<SCEVUnknown>(Val: Expr);
46	if (U && !isa<Instruction>(Val: U->getValue()))
47	return Plan.getOrAddLiveIn(V: U->getValue());
48	auto Expanded = new* VPExpandSCEVRecipe (Expr);
49	Plan.getEntry()->appendRecipe(Recipe: Expanded);
50	return Expanded;
51	}
52
53	bool vputils::isHeaderMask(const VPValue V, const* VPlan &Plan) {
54	if (isa<VPActiveLaneMaskPHIRecipe>(Val: V))
55	return true;
56
57	auto IsWideCanonicalIV = [](VPValue *A) {
58	return isa<VPWidenCanonicalIVRecipe>(Val: A) \|\|
59	(isa<VPWidenIntOrFpInductionRecipe>(Val: A) &&
60	cast<VPWidenIntOrFpInductionRecipe>(Val: A)->isCanonical());
61	};
62
63	VPValue A, B;
64
65	auto m_CanonicalScalarIVSteps =
66	m_ScalarIVSteps(Op0: m_Specific(VPV: Plan.getVectorLoopRegion()->getCanonicalIV()),
67	Op1: m_One(), Op2: m_Specific(VPV: &Plan.getVF()));
68
69	if (match(V, P: m_ActiveLaneMask(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B), Op2: m_One())))
70	return B == Plan.getTripCount() &&
71	(match(V: A, P: m_CanonicalScalarIVSteps) \|\| IsWideCanonicalIV (A));
72
73	// For scalar plans, the header mask uses the scalar steps.
74	if (match(V, P: m_ICmp(Op0: m_CanonicalScalarIVSteps,
75	Op1: m_Specific(VPV: Plan.getBackedgeTakenCount())))) {
76	assert(Plan.hasScalarVFOnly() &&
77	"Non-scalar VF using scalar IV steps for header mask?");
78	return true;
79	}
80
81	return match(V, P: m_ICmp(Op0: m_VPValue(V&: A), Op1: m_VPValue(V&: B))) && IsWideCanonicalIV (A) &&
82	B == Plan.getBackedgeTakenCount();
83	}
84
85	/// Returns true if \p R propagates poison from any operand to its result.
86	static bool propagatesPoisonFromRecipeOp(const VPRecipeBase *R) {
87	return TypeSwitch<const VPRecipeBase , bool*>(R)
88	.Case<VPWidenGEPRecipe, VPWidenCastRecipe>(
89	caseFn: [](const VPRecipeBase ) { return* true; })
90	.Case(caseFn: [](const VPReplicateRecipe *Rep) {
91	// GEP and casts propagate poison from all operands.
92	unsigned Opcode = Rep->getOpcode();
93	return Opcode == Instruction::GetElementPtr \|\|
94	Instruction::isCast(Opcode);
95	})
96	.Default(defaultFn: [](const VPRecipeBase ) { return* false; });
97	}
98
99	/// Returns true if \p V being poison is guaranteed to trigger UB because it
100	/// propagates to the address of a memory recipe.
101	static bool poisonGuaranteesUB(const VPValue *V) {
102	SmallPtrSet<const VPValue *, `8`> Visited;
103	SmallVector<const VPValue *, `16`> Worklist;
104
105	Worklist.push_back(Elt: V);
106
107	while (!Worklist.empty()) {
108	const VPValue *Current = Worklist.pop_back_val();
109	if (!Visited.insert(Ptr: Current).second)
110	continue;
111
112	for (VPUser *U : Current->users()) {
113	// Check if Current is used as an address operand for load/store.
114	if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(Val: U)) {
115	if (MemR->getAddr() == Current)
116	return true;
117	continue;
118	}
119	if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: U)) {
120	unsigned Opcode = Rep->getOpcode();
121	if ((Opcode == Instruction::Load && Rep->getOperand(N: `0`) == Current) \|\|
122	(Opcode == Instruction::Store && Rep->getOperand(N: `1`) == Current))
123	return true;
124	}
125
126	// Check if poison propagates through this recipe to any of its users.
127	auto *R = cast<VPRecipeBase>(Val: U);
128	for (const VPValue *Op : R->operands()) {
129	if (Op == Current && propagatesPoisonFromRecipeOp(R)) {
130	Worklist.push_back(Elt: R->getVPSingleValue());
131	break;
132	}
133	}
134	}
135	}
136
137	return false;
138	}
139
140	const SCEV vputils::getSCEVExprForVPValue(const* VPValue *V,
141	PredicatedScalarEvolution &PSE,
142	const Loop *L) {
143	ScalarEvolution &SE = *PSE.getSE();
144	if (isa<VPIRValue, VPSymbolicValue>(Val: V)) {
145	Value *LiveIn = V->getUnderlyingValue();
146	if (LiveIn && SE.isSCEVable(Ty: LiveIn->getType()))
147	return SE.getSCEV(V: LiveIn);
148	return SE.getCouldNotCompute();
149	}
150
151	// Helper to create SCEVs for binary and unary operations.
152	auto CreateSCEV = [&](ArrayRef<VPValue *> Ops,
153	function_ref<const SCEV *(ArrayRef<SCEVUse>)> CreateFn)
154	-> const SCEV * {
155	SmallVector<SCEVUse, `2`> SCEVOps;
156	for (VPValue *Op : Ops) {
157	const SCEV *S = getSCEVExprForVPValue(V: Op, PSE, L);
158	if (isa<SCEVCouldNotCompute>(Val: S))
159	return SE.getCouldNotCompute();
160	SCEVOps.push_back(Elt: S);
161	}
162	return CreateFn (SCEVOps);
163	};
164
165	VPValue LHSVal, RHSVal;
166	if (match(V, P: m_Add(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
167	return CreateSCEV ({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
168	return SE.getAddExpr(LHS: Ops [`0`], RHS: Ops [`1`], Flags: SCEV::FlagAnyWrap, Depth: `0`);
169	});
170	if (match(V, P: m_Sub(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
171	return CreateSCEV ({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
172	return SE.getMinusSCEV(LHS: Ops [`0`], RHS: Ops [`1`], Flags: SCEV::FlagAnyWrap, Depth: `0`);
173	});
174	if (match(V, P: m_Not(Op0: m_VPValue(V&: LHSVal)))) {
175	// not X = xor X, -1 = -1 - X
176	return CreateSCEV ({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
177	return SE.getMinusSCEV(LHS: SE.getMinusOne(Ty: Ops [`0`]->getType()), RHS: Ops [`0`]);
178	});
179	}
180	if (match(V, P: m_Mul(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
181	return CreateSCEV ({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
182	return SE.getMulExpr(LHS: Ops [`0`], RHS: Ops [`1`], Flags: SCEV::FlagAnyWrap, Depth: `0`);
183	});
184	if (match(V,
185	P: m_Binary<Instruction::UDiv>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
186	return CreateSCEV ({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
187	return SE.getUDivExpr(LHS: Ops [`0`], RHS: Ops [`1`]);
188	});
189	// Handle AND with constant mask: x & (2^n - 1) can be represented as x % 2^n.
190	const APInt *Mask;
191	if (match(V, P: m_c_BinaryAnd(Op0: m_VPValue(V&: LHSVal), Op1: m_APInt(C&: Mask))) &&
192	(*Mask + `1`).isPowerOf2())
193	return CreateSCEV ({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
194	return SE.getURemExpr(LHS: Ops [`0`], RHS: SE.getConstant(Val: *Mask + `1`));
195	});
196	if (match(V, P: m_Trunc(Op0: m_VPValue(V&: LHSVal)))) {
197	const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
198	Type DestTy = VPTypeAnalysis (Plan).inferScalarType(V);
199	return CreateSCEV ({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
200	return SE.getTruncateExpr(Op: Ops [`0`], Ty: DestTy);
201	});
202	}
203	if (match(V, P: m_ZExt(Op0: m_VPValue(V&: LHSVal)))) {
204	const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
205	Type DestTy = VPTypeAnalysis (Plan).inferScalarType(V);
206	return CreateSCEV ({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
207	return SE.getZeroExtendExpr(Op: Ops [`0`], Ty: DestTy);
208	});
209	}
210	if (match(V, P: m_SExt(Op0: m_VPValue(V&: LHSVal)))) {
211	const VPlan *Plan = V->getDefiningRecipe()->getParent()->getPlan();
212	Type DestTy = VPTypeAnalysis (Plan).inferScalarType(V);
213
214	// Mirror SCEV's createSCEV handling for sext(sub nsw): push sign extension
215	// onto the operands before computing the subtraction.
216	VPValue SubLHS, SubRHS;
217	auto *SubR = dyn_cast<VPRecipeWithIRFlags>(Val: LHSVal);
218	if (match(V: LHSVal, P: m_Sub(Op0: m_VPValue(V&: SubLHS), Op1: m_VPValue(V&: SubRHS))) && SubR &&
219	SubR->hasNoSignedWrap() && poisonGuaranteesUB(V: LHSVal)) {
220	const SCEV *V1 = getSCEVExprForVPValue(V: SubLHS, PSE, L);
221	const SCEV *V2 = getSCEVExprForVPValue(V: SubRHS, PSE, L);
222	if (!isa<SCEVCouldNotCompute>(Val: V1) && !isa<SCEVCouldNotCompute>(Val: V2))
223	return SE.getMinusSCEV(LHS: SE.getSignExtendExpr(Op: V1, Ty: DestTy),
224	RHS: SE.getSignExtendExpr(Op: V2, Ty: DestTy), Flags: SCEV::FlagNSW);
225	}
226
227	return CreateSCEV ({LHSVal}, [&](ArrayRef<SCEVUse> Ops) {
228	return SE.getSignExtendExpr(Op: Ops [`0`], Ty: DestTy);
229	});
230	}
231	if (match(V,
232	P: m_Intrinsic<Intrinsic::umax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
233	return CreateSCEV ({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
234	return SE.getUMaxExpr(LHS: Ops [`0`], RHS: Ops [`1`]);
235	});
236	if (match(V,
237	P: m_Intrinsic<Intrinsic::smax>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
238	return CreateSCEV ({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
239	return SE.getSMaxExpr(LHS: Ops [`0`], RHS: Ops [`1`]);
240	});
241	if (match(V,
242	P: m_Intrinsic<Intrinsic::umin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
243	return CreateSCEV ({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
244	return SE.getUMinExpr(LHS: Ops [`0`], RHS: Ops [`1`]);
245	});
246	if (match(V,
247	P: m_Intrinsic<Intrinsic::smin>(Op0: m_VPValue(V&: LHSVal), Op1: m_VPValue(V&: RHSVal))))
248	return CreateSCEV ({LHSVal, RHSVal}, [&](ArrayRef<SCEVUse> Ops) {
249	return SE.getSMinExpr(LHS: Ops [`0`], RHS: Ops [`1`]);
250	});
251
252	ArrayRef<VPValue *> Ops;
253	Type *SourceElementType;
254	if (match(V, P: m_GetElementPtr(SourceElementType, Operands&: Ops))) {
255	const SCEV *GEPExpr = CreateSCEV (Ops, [&](ArrayRef<SCEVUse> Ops) {
256	return SE.getGEPExpr(BaseExpr: Ops.front(), IndexExprs: Ops.drop_front(), SrcElementTy: SourceElementType);
257	});
258	return PSE.getPredicatedSCEV(Expr: GEPExpr);
259	}
260
261	// TODO: Support constructing SCEVs for more recipes as needed.
262	const VPRecipeBase *DefR = V->getDefiningRecipe();
263	const SCEV *Expr =
264	TypeSwitch<const VPRecipeBase , const* SCEV *>(DefR)
265	.Case(caseFn: [](const VPExpandSCEVRecipe R) { return* R->getSCEV(); })
266	.Case(caseFn: [&SE, &PSE, L](const VPCanonicalIVPHIRecipe *R) {
267	if (!L)
268	return SE.getCouldNotCompute();
269	const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: `0`), PSE, L);
270	return SE.getAddRecExpr(Start, Step: SE.getOne(Ty: Start->getType()), L,
271	Flags: SCEV::FlagAnyWrap);
272	})
273	.Case(caseFn: [&SE, &PSE, L](const VPWidenIntOrFpInductionRecipe *R) {
274	const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L);
275	if (!L \|\| isa<SCEVCouldNotCompute>(Val: Step))
276	return SE.getCouldNotCompute();
277	const SCEV *Start =
278	getSCEVExprForVPValue(V: R->getStartValue(), PSE, L);
279	const SCEV *AddRec =
280	SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap);
281	if (R->getTruncInst())
282	return SE.getTruncateExpr(Op: AddRec, Ty: R->getScalarType());
283	return AddRec;
284	})
285	.Case(caseFn: [&SE, &PSE, L](const VPWidenPointerInductionRecipe *R) {
286	const SCEV *Start =
287	getSCEVExprForVPValue(V: R->getStartValue(), PSE, L);
288	if (!L \|\| isa<SCEVCouldNotCompute>(Val: Start))
289	return SE.getCouldNotCompute();
290	const SCEV *Step = getSCEVExprForVPValue(V: R->getStepValue(), PSE, L);
291	if (isa<SCEVCouldNotCompute>(Val: Step))
292	return SE.getCouldNotCompute();
293	return SE.getAddRecExpr(Start, Step, L, Flags: SCEV::FlagAnyWrap);
294	})
295	.Case(caseFn: [&SE, &PSE, L](const VPDerivedIVRecipe *R) {
296	const SCEV *Start = getSCEVExprForVPValue(V: R->getOperand(N: `0`), PSE, L);
297	const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: `1`), PSE, L);
298	const SCEV *Scale = getSCEVExprForVPValue(V: R->getOperand(N: `2`), PSE, L);
299	if (any_of(Range: ArrayRef({Start, IV, Scale}),
300	P: IsaPred<SCEVCouldNotCompute>))
301	return SE.getCouldNotCompute();
302
303	return SE.getAddExpr(
304	LHS: SE.getTruncateOrSignExtend(V: Start, Ty: IV->getType()),
305	RHS: SE.getMulExpr(
306	LHS: IV, RHS: SE.getTruncateOrSignExtend(V: Scale, Ty: IV->getType())));
307	})
308	.Case(caseFn: [&SE, &PSE, L](const VPScalarIVStepsRecipe *R) {
309	const SCEV *IV = getSCEVExprForVPValue(V: R->getOperand(N: `0`), PSE, L);
310	const SCEV *Step = getSCEVExprForVPValue(V: R->getOperand(N: `1`), PSE, L);
311	if (isa<SCEVCouldNotCompute>(Val: IV) \|\| !isa<SCEVConstant>(Val: Step))
312	return SE.getCouldNotCompute();
313	return SE.getTruncateOrSignExtend(V: IV, Ty: Step->getType());
314	})
315	.Default(
316	defaultFn: [&SE](const VPRecipeBase ) { return* SE.getCouldNotCompute(); });
317
318	return PSE.getPredicatedSCEV(Expr);
319	}
320
321	bool vputils::isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE,
322	const Loop *L) {
323	// If address is an SCEVAddExpr, we require that all operands must be either
324	// be invariant or a (possibly sign-extend) affine AddRec.
325	if (auto *PtrAdd = dyn_cast<SCEVAddExpr>(Val: Addr)) {
326	return all_of(Range: PtrAdd->operands(), P: [&SE, L](const SCEV *Op) {
327	return SE.isLoopInvariant(S: Op, L) \|\|
328	match(S: Op, P: m_scev_SExt(Op0: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()))) \|\|
329	match(S: Op, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()));
330	});
331	}
332
333	// Otherwise, check if address is loop invariant or an affine add recurrence.
334	return SE.isLoopInvariant(S: Addr, L) \|\|
335	match(S: Addr, P: m_scev_AffineAddRec(Op0: m_SCEV(), Op1: m_SCEV()));
336	}
337
338	/// Returns true if \p Opcode preserves uniformity, i.e., if all operands are
339	/// uniform, the result will also be uniform.
340	static bool preservesUniformity(unsigned Opcode) {
341	if (Instruction::isBinaryOp(Opcode) \|\| Instruction::isCast(Opcode))
342	return true;
343	switch (Opcode) {
344	case Instruction::Freeze:
345	case Instruction::GetElementPtr:
346	case Instruction::ICmp:
347	case Instruction::FCmp:
348	case Instruction::Select:
349	case VPInstruction::Not:
350	case VPInstruction::Broadcast:
351	case VPInstruction::MaskedCond:
352	case VPInstruction::PtrAdd:
353	return true;
354	default:
355	return false;
356	}
357	}
358
359	bool vputils::isSingleScalar(const VPValue *VPV) {
360	// A live-in must be uniform across the scope of VPlan.
361	if (isa<VPIRValue, VPSymbolicValue>(Val: VPV))
362	return true;
363
364	if (auto *Rep = dyn_cast<VPReplicateRecipe>(Val: VPV)) {
365	const VPRegionBlock *RegionOfR = Rep->getRegion();
366	// Don't consider recipes in replicate regions as uniform yet; their first
367	// lane cannot be accessed when executing the replicate region for other
368	// lanes.
369	if (RegionOfR && RegionOfR->isReplicator())
370	return false;
371	return Rep->isSingleScalar() \|\| (preservesUniformity(Opcode: Rep->getOpcode()) &&
372	all_of(Range: Rep->operands(), P: isSingleScalar));
373	}
374	if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe, VPBlendRecipe>(Val: VPV))
375	return all_of(Range: VPV->getDefiningRecipe()->operands(), P: isSingleScalar);
376	if (auto *WidenR = dyn_cast<VPWidenRecipe>(Val: VPV)) {
377	return preservesUniformity(Opcode: WidenR->getOpcode()) &&
378	all_of(Range: WidenR->operands(), P: isSingleScalar);
379	}
380	if (auto *VPI = dyn_cast<VPInstruction>(Val: VPV))
381	return VPI->isSingleScalar() \|\| VPI->isVectorToScalar() \|\|
382	(preservesUniformity(Opcode: VPI->getOpcode()) &&
383	all_of(Range: VPI->operands(), P: isSingleScalar));
384	if (auto *RR = dyn_cast<VPReductionRecipe>(Val: VPV))
385	return !RR->isPartialReduction();
386	if (isa<VPCanonicalIVPHIRecipe, VPVectorPointerRecipe,
387	VPVectorEndPointerRecipe>(Val: VPV))
388	return true;
389	if (auto *Expr = dyn_cast<VPExpressionRecipe>(Val: VPV))
390	return Expr->isSingleScalar();
391
392	// VPExpandSCEVRecipes must be placed in the entry and are always uniform.
393	return isa<VPExpandSCEVRecipe>(Val: VPV);
394	}
395
396	bool vputils::isUniformAcrossVFsAndUFs(VPValue *V) {
397	// Live-ins are uniform.
398	if (isa<VPIRValue, VPSymbolicValue>(Val: V))
399	return true;
400
401	VPRecipeBase *R = V->getDefiningRecipe();
402	VPBasicBlock VPBB = R ? R->getParent() : nullptr*;
403	VPlan Plan = VPBB ? VPBB->getPlan() : nullptr*;
404	if (VPBB &&
405	(VPBB == Plan->getVectorPreheader() \|\| VPBB == Plan->getEntry())) {
406	if (match(V: V->getDefiningRecipe(),
407	P: m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
408	return false;
409	return all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
410	}
411
412	if (VPRegionBlock *EnclosingRegion = VPBB->getEnclosingLoopRegion()) {
413	// Canonical IV is uniform.
414	if (V == EnclosingRegion->getCanonicalIV())
415	return true;
416	}
417
418	return TypeSwitch<const VPRecipeBase , bool*>(R)
419	.Case(caseFn: [](const VPDerivedIVRecipe R) { return* true; })
420	.Case(caseFn: [](const VPReplicateRecipe *R) {
421	// Be conservative about side-effects, except for the
422	// known-side-effecting assumes and stores, which we know will be
423	// uniform.
424	return R->isSingleScalar() &&
425	(!R->mayHaveSideEffects() \|\|
426	isa<AssumeInst, StoreInst>(Val: R->getUnderlyingInstr())) &&
427	all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
428	})
429	.Case(caseFn: [](const VPWidenRecipe *R) {
430	return preservesUniformity(Opcode: R->getOpcode()) &&
431	all_of(Range: R->operands(), P: isUniformAcrossVFsAndUFs);
432	})
433	.Case(caseFn: [](const VPInstruction *VPI) {
434	return (VPI->isScalarCast() &&
435	isUniformAcrossVFsAndUFs(V: VPI->getOperand(N: `0`))) \|\|
436	(preservesUniformity(Opcode: VPI->getOpcode()) &&
437	all_of(Range: VPI->operands(), P: isUniformAcrossVFsAndUFs));
438	})
439	.Case(caseFn: [](const VPWidenCastRecipe *R) {
440	// A cast is uniform according to its operand.
441	return isUniformAcrossVFsAndUFs(V: R->getOperand(N: `0`));
442	})
443	.Default(defaultFn: [](const VPRecipeBase ) { // A value is considered non-uniform*
444	// unless proven otherwise.
445	return false;
446	});
447	}
448
449	VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) {
450	auto DepthFirst = vp_depth_first_shallow(G: Plan.getEntry());
451	auto I = find_if(Range&: DepthFirst, P: [&VPDT](VPBlockBase *VPB) {
452	return VPBlockUtils::isHeader(VPB, VPDT);
453	});
454	return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(Val: *I);
455	}
456
457	unsigned vputils::getVFScaleFactor(VPRecipeBase *R) {
458	if (!R)
459	return `1`;
460	if (auto *RR = dyn_cast<VPReductionPHIRecipe>(Val: R))
461	return RR->getVFScaleFactor();
462	if (auto *RR = dyn_cast<VPReductionRecipe>(Val: R))
463	return RR->getVFScaleFactor();
464	if (auto *ER = dyn_cast<VPExpressionRecipe>(Val: R))
465	return ER->getVFScaleFactor();
466	assert(
467	(!isa<VPInstruction>(R) \|\| cast<VPInstruction>(R)->getOpcode() !=
468	VPInstruction::ReductionStartVector) &&
469	"getting scaling factor of reduction-start-vector not implemented yet");
470	return `1`;
471	}
472
473	std::optional<VPValue *>
474	vputils::getRecipesForUncountableExit(VPlan &Plan,
475	SmallVectorImpl<VPRecipeBase *> &Recipes,
476	SmallVectorImpl<VPRecipeBase *> &GEPs) {
477	// Given a VPlan like the following (just including the recipes contributing
478	// to loop control exiting here, not the actual work), we're looking to match
479	// the recipes contributing to the uncountable exit condition comparison
480	// (here, vp<%4>) back to either live-ins or the address nodes for the load
481	// used as part of the uncountable exit comparison so that we can copy them
482	// to a preheader and rotate the address in the loop to the next vector
483	// iteration.
484	//
485	// Currently, the address of the load is restricted to a GEP with 2 operands
486	// and a live-in base address. This constraint may be relaxed later.
487	//
488	// VPlan ' for UF>=1' {
489	// Live-in vp<%0> = VF
490	// Live-in ir<64> = original trip-count
491	//
492	// entry:
493	// Successor(s): preheader, vector.ph
494	//
495	// vector.ph:
496	// Successor(s): vector loop
497	//
498	// <x1> vector loop: {
499	// vector.body:
500	// EMIT vp<%2> = CANONICAL-INDUCTION ir<0>
501	// vp<%3> = SCALAR-STEPS vp<%2>, ir<1>, vp<%0>
502	// CLONE ir<%ee.addr> = getelementptr ir<0>, vp<%3>
503	// WIDEN ir<%ee.load> = load ir<%ee.addr>
504	// WIDEN vp<%4> = icmp eq ir<%ee.load>, ir<0>
505	// EMIT vp<%5> = any-of vp<%4>
506	// EMIT vp<%6> = add vp<%2>, vp<%0>
507	// EMIT vp<%7> = icmp eq vp<%6>, ir<64>
508	// EMIT branch-on-two-conds vp<%5>, vp<%7>
509	// No successors
510	// }
511	// Successor(s): early.exit, middle.block
512	//
513	// middle.block:
514	// Successor(s): preheader
515	//
516	// preheader:
517	// No successors
518	// }
519
520	// Find the uncountable loop exit condition.
521	auto *Region = Plan.getVectorLoopRegion();
522	VPValue UncountableCondition = nullptr*;
523	if (!match(V: Region->getExitingBasicBlock()->getTerminator(),
524	P: m_BranchOnTwoConds(Op0: m_AnyOf(Op0: m_VPValue(V&: UncountableCondition)),
525	Op1: m_VPValue())))
526	return std::nullopt;
527
528	SmallVector<VPValue *, `4`> Worklist;
529	Worklist.push_back(Elt: UncountableCondition);
530	while (!Worklist.empty()) {
531	VPValue *V = Worklist.pop_back_val();
532
533	// Any value defined outside the loop does not need to be copied.
534	if (V->isDefinedOutsideLoopRegions())
535	continue;
536
537	// FIXME: Remove the single user restriction; it's here because we're
538	// starting with the simplest set of loops we can, and multiple
539	// users means needing to add PHI nodes in the transform.
540	if (V->getNumUsers() > `1`)
541	return std::nullopt;
542
543	VPValue Op1, Op2;
544	// Walk back through recipes until we find at least one load from memory.
545	if (match(V, P: m_ICmp(Op0: m_VPValue(V&: Op1), Op1: m_VPValue(V&: Op2)))) {
546	Worklist.push_back(Elt: Op1);
547	Worklist.push_back(Elt: Op2);
548	Recipes.push_back(Elt: V->getDefiningRecipe());
549	} else if (auto *Load = dyn_cast<VPWidenLoadRecipe>(Val: V)) {
550	// Reject masked loads for the time being; they make the exit condition
551	// more complex.
552	if (Load->isMasked())
553	return std::nullopt;
554
555	VPValue *GEP = Load->getAddr();
556	if (!match(V: GEP, P: m_GetElementPtr(Op0: m_LiveIn(), Op1: m_VPValue())))
557	return std::nullopt;
558
559	Recipes.push_back(Elt: Load);
560	Recipes.push_back(Elt: GEP->getDefiningRecipe());
561	GEPs.push_back(Elt: GEP->getDefiningRecipe());
562	} else
563	return std::nullopt;
564	}
565
566	return UncountableCondition;
567	}
568
569	VPSingleDefRecipe *vputils::findHeaderMask(VPlan &Plan) {
570	VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
571	SmallVector<VPValue *> WideCanonicalIVs;
572	auto *FoundWidenCanonicalIVUser = find_if(
573	Range: LoopRegion->getCanonicalIV()->users(), P: IsaPred<VPWidenCanonicalIVRecipe>);
574	assert(count_if(LoopRegion->getCanonicalIV()->users(),
575	IsaPred<VPWidenCanonicalIVRecipe>) <= `1` &&
576	"Must have at most one VPWideCanonicalIVRecipe");
577	if (FoundWidenCanonicalIVUser !=
578	LoopRegion->getCanonicalIV()->users().end()) {
579	auto *WideCanonicalIV =
580	cast<VPWidenCanonicalIVRecipe>(Val: *FoundWidenCanonicalIVUser);
581	WideCanonicalIVs.push_back(Elt: WideCanonicalIV);
582	}
583
584	// Also include VPWidenIntOrFpInductionRecipes that represent a widened
585	// version of the canonical induction.
586	VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
587	for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
588	auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(Val: &Phi);
589	if (WidenOriginalIV && WidenOriginalIV->isCanonical())
590	WideCanonicalIVs.push_back(Elt: WidenOriginalIV);
591	}
592
593	// Walk users of wide canonical IVs and find the single compare of the form
594	// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
595	VPSingleDefRecipe HeaderMask = nullptr*;
596	for (auto *Wide : WideCanonicalIVs) {
597	for (VPUser *U : Wide->users()) {
598	auto *VPI = dyn_cast<VPInstruction>(Val: U);
599	if (!VPI \|\| !vputils::isHeaderMask(V: VPI, Plan))
600	continue;
601
602	assert(VPI->getOperand(`0`) == Wide &&
603	"WidenCanonicalIV must be the first operand of the compare");
604	assert(!HeaderMask && "Multiple header masks found?");
605	HeaderMask = VPI;
606	}
607	}
608	return HeaderMask;
609	}
610
611	bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
612	const VPDominatorTree &VPDT) {
613	auto *VPBB = dyn_cast<VPBasicBlock>(Val: VPB);
614	if (!VPBB)
615	return false;
616
617	// If VPBB is in a region R, VPBB is a loop header if R is a loop region with
618	// VPBB as its entry, i.e., free of predecessors.
619	if (auto *R = VPBB->getParent())
620	return !R->isReplicator() && !VPBB->hasPredecessors();
621
622	// A header dominates its second predecessor (the latch), with the other
623	// predecessor being the preheader
624	return VPB->getPredecessors().size() == `2` &&
625	VPDT.dominates(A: VPB, B: VPB->getPredecessors()[`1`]);
626	}
627
628	bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
629	const VPDominatorTree &VPDT) {
630	// A latch has a header as its second successor, with its other successor
631	// leaving the loop. A preheader OTOH has a header as its first (and only)
632	// successor.
633	return VPB->getNumSuccessors() == `2` &&
634	VPBlockUtils::isHeader(VPB: VPB->getSuccessors()[`1`], VPDT);
635	}
636
637	std::optional<MemoryLocation>
638	vputils::getMemoryLocation(const VPRecipeBase &R) {
639	auto *M = dyn_cast<VPIRMetadata>(Val: &R);
640	if (!M)
641	return std::nullopt;
642	MemoryLocation Loc;
643	// Populate noalias metadata from VPIRMetadata.
644	if (MDNode *NoAliasMD = M->getMetadata(Kind: LLVMContext::MD_noalias))
645	Loc.AATags.NoAlias = NoAliasMD;
646	if (MDNode *AliasScopeMD = M->getMetadata(Kind: LLVMContext::MD_alias_scope))
647	Loc.AATags.Scope = AliasScopeMD;
648	return Loc;
649	}
650
651	/// Find the ComputeReductionResult recipe for \p PhiR, looking through selects
652	/// inserted for predicated reductions or tail folding.
653	VPInstruction vputils::findComputeReductionResult(VPReductionPHIRecipe PhiR) {
654	VPValue *BackedgeVal = PhiR->getBackedgeValue();
655	if (auto *Res = vputils::findUserOf<VPInstruction::ComputeReductionResult>(
656	V: BackedgeVal))
657	return Res;
658
659	// Look through selects inserted for tail folding or predicated reductions.
660	VPRecipeBase *SelR = vputils::findUserOf(
661	V: BackedgeVal, P: m_Select(Op0: m_VPValue(), Op1: m_VPValue(), Op2: m_VPValue()));
662	if (!SelR)
663	return nullptr;
664	return vputils::findUserOf<VPInstruction::ComputeReductionResult>(
665	V: cast<VPSingleDefRecipe>(Val: SelR));
666	}
667

Browse the source code of llvm_projects/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp