VPlanPredicator.cpp source code [llvm_projects/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp]

1	//===-- VPlanPredicator.cpp - VPlan predicator ----------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements predication for VPlans.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "VPRecipeBuilder.h"
15	#include "VPlan.h"
16	#include "VPlanCFG.h"
17	#include "VPlanDominatorTree.h"
18	#include "VPlanPatternMatch.h"
19	#include "VPlanTransforms.h"
20	#include "VPlanUtils.h"
21	#include "llvm/ADT/PostOrderIterator.h"
22
23	using namespace llvm;
24	using namespace VPlanPatternMatch;
25
26	namespace {
27	class VPPredicator {
28	/// Builder to construct recipes to compute masks.
29	VPBuilder Builder;
30
31	/// Dominator tree for the VPlan.
32	VPDominatorTree VPDT;
33
34	/// Post-dominator tree for the VPlan.
35	VPPostDominatorTree VPPDT;
36
37	/// When we if-convert we need to create edge masks. We have to cache values
38	/// so that we don't end up with exponential recursion/IR.
39	using EdgeMaskCacheTy =
40	DenseMap<std::pair<const VPBasicBlock , const* VPBasicBlock *>,
41	VPValue *>;
42	using BlockMaskCacheTy = DenseMap<const VPBasicBlock , VPValue >;
43	EdgeMaskCacheTy EdgeMaskCache;
44
45	BlockMaskCacheTy BlockMaskCache;
46
47	/// Create an edge mask for every destination of cases and/or default.
48	void createSwitchEdgeMasks(const VPInstruction *SI);
49
50	/// Computes and return the predicate of the edge between \p Src and \p Dst,
51	/// possibly inserting new recipes at \p Dst (using Builder's insertion point)
52	VPValue createEdgeMask(const* VPBasicBlock Src, const* VPBasicBlock *Dst);
53
54	/// Record \p Mask as the entry* mask of \p VPBB, which is expected to not*
55	/// already have a mask.
56	void setBlockInMask(const VPBasicBlock VPBB, VPValue Mask) {
57	// TODO: Include the masks as operands in the predicated VPlan directly to
58	// avoid keeping the map of masks beyond the predication transform.
59	assert(!getBlockInMask(VPBB) && "Mask already set");
60	BlockMaskCache [VPBB] = Mask;
61	}
62
63	/// Record \p Mask as the mask of the edge from \p Src to \p Dst. The edge is
64	/// expected to not have a mask already.
65	VPValue setEdgeMask(const* VPBasicBlock Src, const* VPBasicBlock *Dst,
66	VPValue *Mask) {
67	assert(Src != Dst && "Src and Dst must be different");
68	assert(!getEdgeMask(Src, Dst) && "Mask already set");
69	return EdgeMaskCache [{Src, Dst}] = Mask;
70	}
71
72	public:
73	VPPredicator(VPlan &Plan) : VPDT (Plan), VPPDT (Plan) {}
74
75	/// Returns the entry* mask for \p VPBB.*
76	VPValue getBlockInMask(const* VPBasicBlock VPBB) const* {
77	return BlockMaskCache.lookup(Val: VPBB);
78	}
79
80	/// Returns the precomputed predicate of the edge from \p Src to \p Dst.
81	VPValue getEdgeMask(const* VPBasicBlock Src, const* VPBasicBlock Dst) const* {
82	return EdgeMaskCache.lookup(Val: {Src, Dst});
83	}
84
85	/// Compute the predicate of \p VPBB.
86	void createBlockInMask(VPBasicBlock *VPBB);
87
88	/// Convert phi recipes in \p VPBB to VPBlendRecipes.
89	void convertPhisToBlends(VPBasicBlock *VPBB);
90	};
91	} // namespace
92
93	VPValue VPPredicator::createEdgeMask(const* VPBasicBlock *Src,
94	const VPBasicBlock *Dst) {
95	assert(is_contained(Dst->getPredecessors(), Src) && "Invalid edge");
96
97	// Look for cached value.
98	VPValue *EdgeMask = getEdgeMask(Src, Dst);
99	if (EdgeMask)
100	return EdgeMask;
101
102	VPValue *SrcMask = getBlockInMask(VPBB: Src);
103
104	// If there's a single successor, there's no terminator recipe.
105	if (Src->getNumSuccessors() == `1`)
106	return setEdgeMask(Src, Dst, Mask: SrcMask);
107
108	auto *Term = cast<VPInstruction>(Val: Src->getTerminator());
109	if (Term->getOpcode() == Instruction::Switch) {
110	createSwitchEdgeMasks(SI: Term);
111	return getEdgeMask(Src, Dst);
112	}
113
114	assert(Term->getOpcode() == VPInstruction::BranchOnCond &&
115	"Unsupported terminator");
116	if (Src->getSuccessors()[`0`] == Src->getSuccessors()[`1`])
117	return setEdgeMask(Src, Dst, Mask: SrcMask);
118
119	EdgeMask = Term->getOperand(N: `0`);
120	assert(EdgeMask && "No Edge Mask found for condition");
121
122	if (Src->getSuccessors()[`0`] != Dst)
123	EdgeMask = Builder.createNot(Operand: EdgeMask, DL: Term->getDebugLoc());
124
125	if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
126	// The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
127	// is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
128	// instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
129	EdgeMask = Builder.createLogicalAnd(LHS: SrcMask, RHS: EdgeMask, DL: Term->getDebugLoc());
130	}
131
132	return setEdgeMask(Src, Dst, Mask: EdgeMask);
133	}
134
135	void VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
136	// Start inserting after the block's phis, which be replaced by blends later.
137	Builder.setInsertPoint(TheBB: VPBB, IP: VPBB->getFirstNonPhi());
138
139	// Reuse the mask of the immediate dominator if the VPBB post-dominates the
140	// immediate dominator.
141	auto *IDom = VPDT.getNode(BB: VPBB)->getIDom();
142	assert(IDom && "Block in loop must have immediate dominator");
143	auto *IDomBB = cast<VPBasicBlock>(Val: IDom->getBlock());
144	if (VPPDT.properlyDominates(A: VPBB, B: IDomBB)) {
145	setBlockInMask(VPBB, Mask: getBlockInMask(VPBB: IDomBB));
146	return;
147	}
148	// All-one mask is modelled as no-mask following the convention for masked
149	// load/store/gather/scatter. Initialize BlockMask to no-mask.
150	VPValue BlockMask = nullptr*;
151	// This is the block mask. We OR all unique incoming edges.
152	for (auto Predecessor : SetVector<VPBlockBase >(
153	VPBB->getPredecessors().begin(), VPBB->getPredecessors().end())) {
154	VPValue *EdgeMask = createEdgeMask(Src: cast<VPBasicBlock>(Val: Predecessor), Dst: VPBB);
155	if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
156	// too.
157	setBlockInMask(VPBB, Mask: EdgeMask);
158	return;
159	}
160
161	if (!BlockMask) { // BlockMask has its initial nullptr value.
162	BlockMask = EdgeMask;
163	continue;
164	}
165
166	BlockMask = Builder.createOr(LHS: BlockMask, RHS: EdgeMask, DL: {});
167	}
168
169	setBlockInMask(VPBB, Mask: BlockMask);
170	}
171
172	void VPPredicator::createSwitchEdgeMasks(const VPInstruction *SI) {
173	const VPBasicBlock *Src = SI->getParent();
174
175	// Create masks where SI is a switch. We create masks for all edges from SI's
176	// parent block at the same time. This is more efficient, as we can create and
177	// collect compares for all cases once.
178	VPValue *Cond = SI->getOperand(N: `0`);
179	VPBasicBlock *DefaultDst = cast<VPBasicBlock>(Val: Src->getSuccessors()[`0`]);
180	MapVector<VPBasicBlock , SmallVector<VPValue >> Dst2Compares;
181	for (const auto &[Idx, Succ] : enumerate(First: drop_begin(RangeOrContainer: Src->getSuccessors()))) {
182	VPBasicBlock *Dst = cast<VPBasicBlock>(Val: Succ);
183	assert(!getEdgeMask(Src, Dst) && "Edge masks already created");
184	// Cases whose destination is the same as default are redundant and can
185	// be ignored - they will get there anyhow.
186	if (Dst == DefaultDst)
187	continue;
188	auto &Compares = Dst2Compares [Dst];
189	VPValue *V = SI->getOperand(N: Idx + `1`);
190	Compares.push_back(Elt: Builder.createICmp(Pred: CmpInst::ICMP_EQ, A: Cond, B: V));
191	}
192
193	// We need to handle 2 separate cases below for all entries in Dst2Compares,
194	// which excludes destinations matching the default destination.
195	VPValue *SrcMask = getBlockInMask(VPBB: Src);
196	VPValue DefaultMask = nullptr*;
197	for (const auto &[Dst, Conds] : Dst2Compares) {
198	// 1. Dst is not the default destination. Dst is reached if any of the
199	// cases with destination == Dst are taken. Join the conditions for each
200	// case whose destination == Dst using an OR.
201	VPValue *Mask = Conds [`0`];
202	for (VPValue *V : drop_begin(RangeOrContainer: Conds))
203	Mask = Builder.createOr(LHS: Mask, RHS: V);
204	if (SrcMask)
205	Mask = Builder.createLogicalAnd(LHS: SrcMask, RHS: Mask);
206	setEdgeMask(Src, Dst, Mask);
207
208	// 2. Create the mask for the default destination, which is reached if
209	// none of the cases with destination != default destination are taken.
210	// Join the conditions for each case where the destination is != Dst using
211	// an OR and negate it.
212	DefaultMask = DefaultMask ? Builder.createOr(LHS: DefaultMask, RHS: Mask) : Mask;
213	}
214
215	if (DefaultMask) {
216	DefaultMask = Builder.createNot(Operand: DefaultMask);
217	if (SrcMask)
218	DefaultMask = Builder.createLogicalAnd(LHS: SrcMask, RHS: DefaultMask);
219	} else {
220	// There are no destinations other than the default destination, so this is
221	// an unconditional branch.
222	DefaultMask = SrcMask;
223	}
224	setEdgeMask(Src, Dst: DefaultDst, Mask: DefaultMask);
225	}
226
227	void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
228	SmallVector<VPPhi *> Phis;
229	for (VPRecipeBase &R : VPBB->phis())
230	Phis.push_back(Elt: cast<VPPhi>(Val: &R));
231	for (VPPhi *PhiR : Phis) {
232	// The non-header Phi is converted into a Blend recipe below,
233	// so we don't have to worry about the insertion order and we can just use
234	// the builder. At this point we generate the predication tree. There may
235	// be duplications since this is a simple recursive scan, but future
236	// optimizations will clean it up.
237
238	auto NotPoison = make_filter_range(Range: PhiR->incoming_values(), Pred: [](VPValue *V) {
239	return !match(V, P: m_Poison());
240	});
241	if (all_equal(Range&: NotPoison)) {
242	PhiR->replaceAllUsesWith(New: NotPoison.empty() ? PhiR->getIncomingValue(Idx: `0`)
243	: *NotPoison.begin());
244	PhiR->eraseFromParent();
245	continue;
246	}
247
248	SmallVector<VPValue *, `2`> OperandsWithMask;
249	for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) {
250	OperandsWithMask.push_back(Elt: InVPV);
251	OperandsWithMask.push_back(Elt: createEdgeMask(Src: InVPBB, Dst: VPBB));
252	}
253	PHINode *IRPhi = cast_or_null<PHINode>(Val: PhiR->getUnderlyingValue());
254	auto *Blend =
255	new VPBlendRecipe (IRPhi, OperandsWithMask, *PhiR, PhiR->getDebugLoc());
256	Builder.insert(R: Blend);
257	PhiR->replaceAllUsesWith(New: Blend);
258	PhiR->eraseFromParent();
259	}
260	}
261
262	void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan) {
263	VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
264	// Scan the body of the loop in a topological order to visit each basic block
265	// after having visited its predecessor basic blocks.
266	VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
267	ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
268	Header);
269	VPPredicator Predicator(Plan);
270	for (VPBlockBase *VPB : RPOT) {
271	// Non-outer regions with VPBBs only are supported at the moment.
272	auto *VPBB = cast<VPBasicBlock>(Val: VPB);
273	// Introduce the mask for VPBB, which may introduce needed edge masks, and
274	// convert all phi recipes of VPBB to blend recipes unless VPBB is the
275	// header.
276	if (VPBB != Header) {
277	Predicator.createBlockInMask(VPBB);
278	Predicator.convertPhisToBlends(VPBB);
279	}
280
281	VPValue *BlockMask = Predicator.getBlockInMask(VPBB);
282	if (!BlockMask)
283	continue;
284
285	// Mask all VPInstructions in the block.
286	for (VPRecipeBase &R : *VPBB) {
287	if (auto *VPI = dyn_cast<VPInstruction>(Val: &R))
288	VPI->addMask(Mask: BlockMask);
289	}
290	}
291
292	// Linearize the blocks of the loop into one serial chain.
293	VPBlockBase PrevVPBB = nullptr*;
294	for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Range: RPOT)) {
295	auto Successors = to_vector(Range&: VPBB->getSuccessors());
296	if (Successors.size() > `1`)
297	VPBB->getTerminator()->eraseFromParent();
298
299	// Flatten the CFG in the loop. To do so, first disconnect VPBB from its
300	// successors. Then connect VPBB to the previously visited VPBB.
301	for (auto *Succ : Successors)
302	VPBlockUtils::disconnectBlocks(From: VPBB, To: Succ);
303	if (PrevVPBB)
304	VPBlockUtils::connectBlocks(From: PrevVPBB, To: VPBB);
305
306	PrevVPBB = VPBB;
307	}
308	}
309

Browse the source code of llvm_projects/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp