RISCVCodeGenPrepare.cpp source code [llvm_projects/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp]

1	//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This is a RISC-V specific version of CodeGenPrepare.
10	// It munges the code in the input function to better prepare it for
11	// SelectionDAG-based code generation. This works around limitations in it's
12	// basic-block-at-a-time approach.
13	//
14	//===----------------------------------------------------------------------===//
15
16	#include "RISCV.h"
17	#include "RISCVTargetMachine.h"
18	#include "llvm/ADT/Statistic.h"
19	#include "llvm/Analysis/ValueTracking.h"
20	#include "llvm/CodeGen/TargetPassConfig.h"
21	#include "llvm/IR/Dominators.h"
22	#include "llvm/IR/IRBuilder.h"
23	#include "llvm/IR/InstVisitor.h"
24	#include "llvm/IR/Intrinsics.h"
25	#include "llvm/IR/PatternMatch.h"
26	#include "llvm/InitializePasses.h"
27	#include "llvm/Pass.h"
28	#include "llvm/Transforms/Utils/Local.h"
29
30	using namespace llvm;
31
32	#define DEBUG_TYPE "riscv-codegenprepare"
33	#define PASS_NAME "RISC-V CodeGenPrepare"
34
35	namespace {
36	class RISCVCodeGenPrepare : public InstVisitor<RISCVCodeGenPrepare, bool> {
37	Function &F;
38	const DataLayout *DL;
39	const DominatorTree *DT;
40	const RISCVSubtarget *ST;
41
42	public:
43	RISCVCodeGenPrepare(Function &F, const DominatorTree *DT,
44	const RISCVSubtarget *ST)
45	: F(F), DL(&F.getDataLayout()), DT(DT), ST(ST) {}
46	bool run();
47	bool visitInstruction(Instruction &I) { return false; }
48	bool visitAnd(BinaryOperator &BO);
49	bool visitIntrinsicInst(IntrinsicInst &I);
50	bool expandVPStrideLoad(IntrinsicInst &I);
51	bool widenVPMerge(IntrinsicInst &I);
52	};
53	} // namespace
54
55	namespace {
56	class RISCVCodeGenPrepareLegacyPass : public FunctionPass {
57	public:
58	static char ID;
59
60	RISCVCodeGenPrepareLegacyPass() : FunctionPass (ID) {}
61
62	bool runOnFunction(Function &F) override;
63	StringRef getPassName() const override { return PASS_NAME; }
64
65	void getAnalysisUsage(AnalysisUsage &AU) const override {
66	AU.setPreservesCFG();
67	AU.addRequired<DominatorTreeWrapperPass>();
68	AU.addRequired<TargetPassConfig>();
69	}
70	};
71	} // namespace
72
73	// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
74	// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
75	// the upper 32 bits with ones.
76	bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
77	if (!ST->is64Bit())
78	return false;
79
80	if (!BO.getType()->isIntegerTy(Bitwidth: `64`))
81	return false;
82
83	using namespace PatternMatch;
84
85	// Left hand side should be a zext nneg.
86	Value *LHSSrc;
87	if (!match(V: BO.getOperand(i_nocapture: `0`), P: m_NNegZExt(Op: m_Value(V&: LHSSrc))))
88	return false;
89
90	if (!LHSSrc->getType()->isIntegerTy(Bitwidth: `32`))
91	return false;
92
93	// Right hand side should be a constant.
94	Value *RHS = BO.getOperand(i_nocapture: `1`);
95
96	auto *CI = dyn_cast<ConstantInt>(Val: RHS);
97	if (!CI)
98	return false;
99	uint64_t C = CI->getZExtValue();
100
101	// Look for constants that fit in 32 bits but not simm12, and can be made
102	// into simm12 by sign extending bit 31. This will allow use of ANDI.
103	// TODO: Is worth making simm32?
104	if (!isUInt<`32`>(x: C) \|\| isInt<`12`>(x: C) \|\| !isInt<`12`>(x: SignExtend64<`32`>(x: C)))
105	return false;
106
107	// Sign extend the constant and replace the And operand.
108	C = SignExtend64<`32`>(x: C);
109	BO.setOperand(i_nocapture: `1`, Val_nocapture: ConstantInt::get(Ty: RHS->getType(), V: C));
110
111	return true;
112	}
113
114	// With EVL tail folding, an AnyOf reduction will generate an i1 vp.merge like
115	// follows:
116	//
117	// loop:
118	// %phi = phi <vscale x 4 x i1> [ zeroinitializer, %entry ], [ %rec, %loop ]
119	// %cmp = icmp ...
120	// %rec = call <vscale x 4 x i1> @llvm.vp.merge(%cmp, i1 true, %phi, %evl)
121	// ...
122	// middle:
123	// %res = call i1 @llvm.vector.reduce.or(<vscale x 4 x i1> %rec)
124	//
125	// However RVV doesn't have any tail undisturbed mask instructions and so we
126	// need a convoluted sequence of mask instructions to lower the i1 vp.merge: see
127	// llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll.
128	//
129	// To avoid that this widens the i1 vp.merge to an i8 vp.merge, which will
130	// generate a single vmerge.vim:
131	//
132	// loop:
133	// %phi = phi <vscale x 4 x i8> [ zeroinitializer, %entry ], [ %rec, %loop ]
134	// %cmp = icmp ...
135	// %rec = call <vscale x 4 x i8> @llvm.vp.merge(%cmp, i8 true, %phi, %evl)
136	// %trunc = trunc <vscale x 4 x i8> %rec to <vscale x 4 x i1>
137	// ...
138	// middle:
139	// %res = call i1 @llvm.vector.reduce.or(<vscale x 4 x i1> %rec)
140	//
141	// The trunc will normally be sunk outside of the loop, but even if there are
142	// users inside the loop it is still profitable.
143	bool RISCVCodeGenPrepare::widenVPMerge(IntrinsicInst &II) {
144	if (!II.getType()->getScalarType()->isIntegerTy(Bitwidth: `1`))
145	return false;
146
147	Value Mask, True, PhiV, EVL;
148	using namespace PatternMatch;
149	if (!match(V: &II,
150	P: m_Intrinsic<Intrinsic::vp_merge>(Op0: m_Value(V&: Mask), Op1: m_Value(V&: True),
151	Op2: m_Value(V&: PhiV), Op3: m_Value(V&: EVL))))
152	return false;
153
154	auto *Phi = dyn_cast<PHINode>(Val: PhiV);
155	if (!Phi \|\| !Phi->hasOneUse() \|\| Phi->getNumIncomingValues() != `2` \|\|
156	!match(V: Phi->getIncomingValue(i: `0`), P: m_Zero()) \|\|
157	Phi->getIncomingValue(i: `1`) != &II)
158	return false;
159
160	Type *WideTy =
161	VectorType::get(ElementType: IntegerType::getInt8Ty(C&: II.getContext()),
162	EC: cast<VectorType>(Val: II.getType())->getElementCount());
163
164	IRBuilder<> Builder(Phi);
165	PHINode *WidePhi = Builder.CreatePHI(Ty: WideTy, NumReservedValues: `2`);
166	WidePhi->addIncoming(V: ConstantAggregateZero::get(Ty: WideTy),
167	BB: Phi->getIncomingBlock(i: `0`));
168	Builder.SetInsertPoint(&II);
169	Value *WideTrue = Builder.CreateZExt(V: True, DestTy: WideTy);
170	Value *WideMerge = Builder.CreateIntrinsic(ID: Intrinsic::vp_merge, Types: {WideTy},
171	Args: {Mask, WideTrue, WidePhi, EVL});
172	WidePhi->addIncoming(V: WideMerge, BB: Phi->getIncomingBlock(i: `1`));
173	Value *Trunc = Builder.CreateTrunc(V: WideMerge, DestTy: II.getType());
174
175	II.replaceAllUsesWith(V: Trunc);
176
177	// Break the cycle and delete the old chain.
178	Phi->setIncomingValue(i: `1`, V: Phi->getIncomingValue(i: `0`));
179	llvm::RecursivelyDeleteTriviallyDeadInstructions(V: &II);
180
181	return true;
182	}
183
184	// LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
185	// reduction instructions write the result in the first element of a vector
186	// register. So when a reduction in a loop uses a scalar phi, we end up with
187	// unnecessary scalar moves:
188	//
189	// loop:
190	// vfmv.s.f v10, fa0
191	// vfredosum.vs v8, v8, v10
192	// vfmv.f.s fa0, v8
193	//
194	// This mainly affects ordered fadd reductions and VP reductions that have a
195	// scalar start value, since other types of reduction typically use element-wise
196	// vectorisation in the loop body. This tries to vectorize any scalar phis that
197	// feed into these reductions:
198	//
199	// loop:
200	// %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
201	// %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi,
202	// <vscale x 2 x float> %vec)
203	//
204	// ->
205	//
206	// loop:
207	// %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
208	// %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
209	// %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x,
210	// <vscale x 2 x float> %vec)
211	// %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
212	//
213	// Which eliminates the scalar -> vector -> scalar crossing during instruction
214	// selection.
215	bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
216	if (expandVPStrideLoad(I))
217	return true;
218
219	if (widenVPMerge(II&: I))
220	return true;
221
222	if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd &&
223	!isa<VPReductionIntrinsic>(Val: &I))
224	return false;
225
226	auto *PHI = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: `0`));
227	if (!PHI \|\| !PHI->hasOneUse() \|\|
228	!llvm::is_contained(Range: PHI->incoming_values(), Element: &I))
229	return false;
230
231	Type *VecTy = I.getOperand(i_nocapture: `1`)->getType();
232	IRBuilder<> Builder(PHI);
233	auto *VecPHI = Builder.CreatePHI(Ty: VecTy, NumReservedValues: PHI->getNumIncomingValues());
234
235	for (auto *BB : PHI->blocks()) {
236	Builder.SetInsertPoint(BB->getTerminator());
237	Value *InsertElt = Builder.CreateInsertElement(
238	VecTy, NewElt: PHI->getIncomingValueForBlock(BB), Idx: (uint64_t)`0`);
239	VecPHI->addIncoming(V: InsertElt, BB);
240	}
241
242	Builder.SetInsertPoint(&I);
243	I.setOperand(i_nocapture: `0`, Val_nocapture: Builder.CreateExtractElement(Vec: VecPHI, Idx: (uint64_t)`0`));
244
245	PHI->eraseFromParent();
246
247	return true;
248	}
249
250	// Always expand zero strided loads so we match more .vx splat patterns, even if
251	// we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
252	// it back to a strided load if it's optimized.
253	bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
254	Value BasePtr, VL;
255
256	using namespace PatternMatch;
257	if (!match(V: &II, P: m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
258	Op0: m_Value(V&: BasePtr), Op1: m_Zero(), Op2: m_AllOnes(), Op3: m_Value(V&: VL))))
259	return false;
260
261	// If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so
262	// avoid expanding here.
263	if (II.getType()->getScalarSizeInBits() > ST->getXLen())
264	return false;
265
266	if (!isKnownNonZero(V: VL, Q: {DL, DT, nullptr*, &II}))
267	return false;
268
269	auto *VTy = cast<VectorType>(Val: II.getType());
270
271	IRBuilder<> Builder(&II);
272	Type *STy = VTy->getElementType();
273	Value *Val = Builder.CreateLoad(Ty: STy, Ptr: BasePtr);
274	Value *Res = Builder.CreateIntrinsic(
275	ID: Intrinsic::vp_merge, Types: VTy,
276	Args: {II.getOperand(i_nocapture: `2`), Builder.CreateVectorSplat(EC: VTy->getElementCount(), V: Val),
277	PoisonValue::get(T: VTy), VL});
278
279	II.replaceAllUsesWith(V: Res);
280	II.eraseFromParent();
281	return true;
282	}
283
284	bool RISCVCodeGenPrepare::run() {
285	bool MadeChange = false;
286	for (auto &BB : F)
287	for (Instruction &I : llvm::make_early_inc_range(Range&: BB))
288	MadeChange \|= visit(I);
289
290	return MadeChange;
291	}
292
293	bool RISCVCodeGenPrepareLegacyPass::runOnFunction(Function &F) {
294	if (skipFunction(F))
295	return false;
296
297	auto &TPC = getAnalysis<TargetPassConfig>();
298	auto &TM = TPC.getTM<RISCVTargetMachine>();
299	auto ST = &TM.getSubtarget<RISCVSubtarget>(F);
300	auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
301
302	RISCVCodeGenPrepare RVCGP(F, DT, ST);
303	return RVCGP.run();
304	}
305
306	INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepareLegacyPass, DEBUG_TYPE, PASS_NAME,
307	false, false)
308	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
309	INITIALIZE_PASS_END(RISCVCodeGenPrepareLegacyPass, DEBUG_TYPE, PASS_NAME, false,
310	false)
311
312	char RISCVCodeGenPrepareLegacyPass::ID = `0`;
313
314	FunctionPass *llvm::createRISCVCodeGenPrepareLegacyPass() {
315	return new RISCVCodeGenPrepareLegacyPass ();
316	}
317
318	PreservedAnalyses RISCVCodeGenPreparePass::run(Function &F,
319	FunctionAnalysisManager &FAM) {
320	DominatorTree *DT = &FAM.getResult<DominatorTreeAnalysis>(IR&: F);
321	auto ST = &TM->getSubtarget<RISCVSubtarget>(F);
322	bool Changed = RISCVCodeGenPrepare (F, DT, ST).run();
323	if (!Changed)
324	return PreservedAnalyses::all();
325
326	PreservedAnalyses PA = PreservedAnalyses::none();
327	PA.preserveSet<CFGAnalyses>();
328	return PA;
329	}
330

Browse the source code of llvm_projects/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp