RISCVCodeGenPrepare.cpp source code [llvm_projects/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp]

1	//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This is a RISC-V specific version of CodeGenPrepare.
10	// It munges the code in the input function to better prepare it for
11	// SelectionDAG-based code generation. This works around limitations in it's
12	// basic-block-at-a-time approach.
13	//
14	//===----------------------------------------------------------------------===//
15
16	#include "RISCV.h"
17	#include "RISCVTargetMachine.h"
18	#include "llvm/ADT/Statistic.h"
19	#include "llvm/Analysis/ValueTracking.h"
20	#include "llvm/CodeGen/TargetPassConfig.h"
21	#include "llvm/IR/Dominators.h"
22	#include "llvm/IR/IRBuilder.h"
23	#include "llvm/IR/InstVisitor.h"
24	#include "llvm/IR/Intrinsics.h"
25	#include "llvm/IR/IntrinsicsRISCV.h"
26	#include "llvm/IR/PatternMatch.h"
27	#include "llvm/InitializePasses.h"
28	#include "llvm/Pass.h"
29
30	using namespace llvm;
31
32	#define DEBUG_TYPE "riscv-codegenprepare"
33	#define PASS_NAME "RISC-V CodeGenPrepare"
34
35	namespace {
36
37	class RISCVCodeGenPrepare : public FunctionPass,
38	public InstVisitor<RISCVCodeGenPrepare, bool> {
39	const DataLayout *DL;
40	const DominatorTree *DT;
41	const RISCVSubtarget *ST;
42
43	public:
44	static char ID;
45
46	RISCVCodeGenPrepare() : FunctionPass (ID) {}
47
48	bool runOnFunction(Function &F) override;
49
50	StringRef getPassName() const override { return PASS_NAME; }
51
52	void getAnalysisUsage(AnalysisUsage &AU) const override {
53	AU.setPreservesCFG();
54	AU.addRequired<DominatorTreeWrapperPass>();
55	AU.addRequired<TargetPassConfig>();
56	}
57
58	bool visitInstruction(Instruction &I) { return false; }
59	bool visitAnd(BinaryOperator &BO);
60	bool visitIntrinsicInst(IntrinsicInst &I);
61	bool expandVPStrideLoad(IntrinsicInst &I);
62	};
63
64	} // end anonymous namespace
65
66	// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
67	// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
68	// the upper 32 bits with ones.
69	bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
70	if (!ST->is64Bit())
71	return false;
72
73	if (!BO.getType()->isIntegerTy(Bitwidth: `64`))
74	return false;
75
76	using namespace PatternMatch;
77
78	// Left hand side should be a zext nneg.
79	Value *LHSSrc;
80	if (!match(V: BO.getOperand(i_nocapture: `0`), P: m_NNegZExt(Op: m_Value(V&: LHSSrc))))
81	return false;
82
83	if (!LHSSrc->getType()->isIntegerTy(Bitwidth: `32`))
84	return false;
85
86	// Right hand side should be a constant.
87	Value *RHS = BO.getOperand(i_nocapture: `1`);
88
89	auto *CI = dyn_cast<ConstantInt>(Val: RHS);
90	if (!CI)
91	return false;
92	uint64_t C = CI->getZExtValue();
93
94	// Look for constants that fit in 32 bits but not simm12, and can be made
95	// into simm12 by sign extending bit 31. This will allow use of ANDI.
96	// TODO: Is worth making simm32?
97	if (!isUInt<`32`>(x: C) \|\| isInt<`12`>(x: C) \|\| !isInt<`12`>(x: SignExtend64<`32`>(x: C)))
98	return false;
99
100	// Sign extend the constant and replace the And operand.
101	C = SignExtend64<`32`>(x: C);
102	BO.setOperand(i_nocapture: `1`, Val_nocapture: ConstantInt::get(Ty: RHS->getType(), V: C));
103
104	return true;
105	}
106
107	// LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
108	// reduction instructions write the result in the first element of a vector
109	// register. So when a reduction in a loop uses a scalar phi, we end up with
110	// unnecessary scalar moves:
111	//
112	// loop:
113	// vfmv.s.f v10, fa0
114	// vfredosum.vs v8, v8, v10
115	// vfmv.f.s fa0, v8
116	//
117	// This mainly affects ordered fadd reductions, since other types of reduction
118	// typically use element-wise vectorisation in the loop body. This tries to
119	// vectorize any scalar phis that feed into a fadd reduction:
120	//
121	// loop:
122	// %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
123	// %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi,
124	// <vscale x 2 x float> %vec)
125	//
126	// ->
127	//
128	// loop:
129	// %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
130	// %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
131	// %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x,
132	// <vscale x 2 x float> %vec)
133	// %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
134	//
135	// Which eliminates the scalar -> vector -> scalar crossing during instruction
136	// selection.
137	bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
138	if (expandVPStrideLoad(I))
139	return true;
140
141	if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
142	return false;
143
144	auto *PHI = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: `0`));
145	if (!PHI \|\| !PHI->hasOneUse() \|\|
146	!llvm::is_contained(Range: PHI->incoming_values(), Element: &I))
147	return false;
148
149	Type *VecTy = I.getOperand(i_nocapture: `1`)->getType();
150	IRBuilder<> Builder(PHI);
151	auto *VecPHI = Builder.CreatePHI(Ty: VecTy, NumReservedValues: PHI->getNumIncomingValues());
152
153	for (auto *BB : PHI->blocks()) {
154	Builder.SetInsertPoint(BB->getTerminator());
155	Value *InsertElt = Builder.CreateInsertElement(
156	VecTy, NewElt: PHI->getIncomingValueForBlock(BB), Idx: (uint64_t)`0`);
157	VecPHI->addIncoming(V: InsertElt, BB);
158	}
159
160	Builder.SetInsertPoint(&I);
161	I.setOperand(i_nocapture: `0`, Val_nocapture: Builder.CreateExtractElement(Vec: VecPHI, Idx: (uint64_t)`0`));
162
163	PHI->eraseFromParent();
164
165	return true;
166	}
167
168	// Always expand zero strided loads so we match more .vx splat patterns, even if
169	// we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
170	// it back to a strided load if it's optimized.
171	bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
172	Value BasePtr, VL;
173
174	using namespace PatternMatch;
175	if (!match(V: &II, P: m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
176	Op0: m_Value(V&: BasePtr), Op1: m_Zero(), Op2: m_AllOnes(), Op3: m_Value(V&: VL))))
177	return false;
178
179	// If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so
180	// avoid expanding here.
181	if (II.getType()->getScalarSizeInBits() > ST->getXLen())
182	return false;
183
184	if (!isKnownNonZero(V: VL, Q: {DL, DT, nullptr*, &II}))
185	return false;
186
187	auto *VTy = cast<VectorType>(Val: II.getType());
188
189	IRBuilder<> Builder(&II);
190	Type *STy = VTy->getElementType();
191	Value *Val = Builder.CreateLoad(Ty: STy, Ptr: BasePtr);
192	Value *Res = Builder.CreateIntrinsic(ID: Intrinsic::experimental_vp_splat, Types: {VTy},
193	Args: {Val, II.getOperand(i_nocapture: `2`), VL});
194
195	II.replaceAllUsesWith(V: Res);
196	II.eraseFromParent();
197	return true;
198	}
199
200	bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
201	if (skipFunction(F))
202	return false;
203
204	auto &TPC = getAnalysis<TargetPassConfig>();
205	auto &TM = TPC.getTM<RISCVTargetMachine>();
206	ST = &TM.getSubtarget<RISCVSubtarget>(F);
207
208	DL = &F.getDataLayout();
209	DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
210
211	bool MadeChange = false;
212	for (auto &BB : F)
213	for (Instruction &I : llvm::make_early_inc_range(Range&: BB))
214	MadeChange \|= visit(I);
215
216	return MadeChange;
217	}
218
219	INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
220	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
221	INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
222
223	char RISCVCodeGenPrepare::ID = `0`;
224
225	FunctionPass *llvm::createRISCVCodeGenPreparePass() {
226	return new RISCVCodeGenPrepare ();
227	}
228

Browse the source code of llvm_projects/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp