1//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a RISC-V specific version of CodeGenPrepare.
10// It munges the code in the input function to better prepare it for
11// SelectionDAG-based code generation. This works around limitations in it's
12// basic-block-at-a-time approach.
13//
14//===----------------------------------------------------------------------===//
15
16#include "RISCV.h"
17#include "RISCVTargetMachine.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/ValueTracking.h"
20#include "llvm/CodeGen/TargetPassConfig.h"
21#include "llvm/IR/Dominators.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/InstVisitor.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsRISCV.h"
26#include "llvm/IR/PatternMatch.h"
27#include "llvm/InitializePasses.h"
28#include "llvm/Pass.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "riscv-codegenprepare"
33#define PASS_NAME "RISC-V CodeGenPrepare"
34
35namespace {
36
37class RISCVCodeGenPrepare : public FunctionPass,
38 public InstVisitor<RISCVCodeGenPrepare, bool> {
39 const DataLayout *DL;
40 const DominatorTree *DT;
41 const RISCVSubtarget *ST;
42
43public:
44 static char ID;
45
46 RISCVCodeGenPrepare() : FunctionPass(ID) {}
47
48 bool runOnFunction(Function &F) override;
49
50 StringRef getPassName() const override { return PASS_NAME; }
51
52 void getAnalysisUsage(AnalysisUsage &AU) const override {
53 AU.setPreservesCFG();
54 AU.addRequired<DominatorTreeWrapperPass>();
55 AU.addRequired<TargetPassConfig>();
56 }
57
58 bool visitInstruction(Instruction &I) { return false; }
59 bool visitAnd(BinaryOperator &BO);
60 bool visitIntrinsicInst(IntrinsicInst &I);
61 bool expandVPStrideLoad(IntrinsicInst &I);
62};
63
64} // end anonymous namespace
65
66// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
67// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
68// the upper 32 bits with ones.
69bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
70 if (!ST->is64Bit())
71 return false;
72
73 if (!BO.getType()->isIntegerTy(Bitwidth: 64))
74 return false;
75
76 using namespace PatternMatch;
77
78 // Left hand side should be a zext nneg.
79 Value *LHSSrc;
80 if (!match(V: BO.getOperand(i_nocapture: 0), P: m_NNegZExt(Op: m_Value(V&: LHSSrc))))
81 return false;
82
83 if (!LHSSrc->getType()->isIntegerTy(Bitwidth: 32))
84 return false;
85
86 // Right hand side should be a constant.
87 Value *RHS = BO.getOperand(i_nocapture: 1);
88
89 auto *CI = dyn_cast<ConstantInt>(Val: RHS);
90 if (!CI)
91 return false;
92 uint64_t C = CI->getZExtValue();
93
94 // Look for constants that fit in 32 bits but not simm12, and can be made
95 // into simm12 by sign extending bit 31. This will allow use of ANDI.
96 // TODO: Is worth making simm32?
97 if (!isUInt<32>(x: C) || isInt<12>(x: C) || !isInt<12>(x: SignExtend64<32>(x: C)))
98 return false;
99
100 // Sign extend the constant and replace the And operand.
101 C = SignExtend64<32>(x: C);
102 BO.setOperand(i_nocapture: 1, Val_nocapture: ConstantInt::get(Ty: RHS->getType(), V: C));
103
104 return true;
105}
106
107// LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
108// reduction instructions write the result in the first element of a vector
109// register. So when a reduction in a loop uses a scalar phi, we end up with
110// unnecessary scalar moves:
111//
112// loop:
113// vfmv.s.f v10, fa0
114// vfredosum.vs v8, v8, v10
115// vfmv.f.s fa0, v8
116//
117// This mainly affects ordered fadd reductions, since other types of reduction
118// typically use element-wise vectorisation in the loop body. This tries to
119// vectorize any scalar phis that feed into a fadd reduction:
120//
121// loop:
122// %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
123// %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi,
124// <vscale x 2 x float> %vec)
125//
126// ->
127//
128// loop:
129// %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
130// %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
131// %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x,
132// <vscale x 2 x float> %vec)
133// %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
134//
135// Which eliminates the scalar -> vector -> scalar crossing during instruction
136// selection.
137bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
138 if (expandVPStrideLoad(I))
139 return true;
140
141 if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
142 return false;
143
144 auto *PHI = dyn_cast<PHINode>(Val: I.getOperand(i_nocapture: 0));
145 if (!PHI || !PHI->hasOneUse() ||
146 !llvm::is_contained(Range: PHI->incoming_values(), Element: &I))
147 return false;
148
149 Type *VecTy = I.getOperand(i_nocapture: 1)->getType();
150 IRBuilder<> Builder(PHI);
151 auto *VecPHI = Builder.CreatePHI(Ty: VecTy, NumReservedValues: PHI->getNumIncomingValues());
152
153 for (auto *BB : PHI->blocks()) {
154 Builder.SetInsertPoint(BB->getTerminator());
155 Value *InsertElt = Builder.CreateInsertElement(
156 VecTy, NewElt: PHI->getIncomingValueForBlock(BB), Idx: (uint64_t)0);
157 VecPHI->addIncoming(V: InsertElt, BB);
158 }
159
160 Builder.SetInsertPoint(&I);
161 I.setOperand(i_nocapture: 0, Val_nocapture: Builder.CreateExtractElement(Vec: VecPHI, Idx: (uint64_t)0));
162
163 PHI->eraseFromParent();
164
165 return true;
166}
167
168// Always expand zero strided loads so we match more .vx splat patterns, even if
169// we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
170// it back to a strided load if it's optimized.
171bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
172 Value *BasePtr, *VL;
173
174 using namespace PatternMatch;
175 if (!match(V: &II, P: m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
176 Op0: m_Value(V&: BasePtr), Op1: m_Zero(), Op2: m_AllOnes(), Op3: m_Value(V&: VL))))
177 return false;
178
179 // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so
180 // avoid expanding here.
181 if (II.getType()->getScalarSizeInBits() > ST->getXLen())
182 return false;
183
184 if (!isKnownNonZero(V: VL, Q: {*DL, DT, nullptr, &II}))
185 return false;
186
187 auto *VTy = cast<VectorType>(Val: II.getType());
188
189 IRBuilder<> Builder(&II);
190 Type *STy = VTy->getElementType();
191 Value *Val = Builder.CreateLoad(Ty: STy, Ptr: BasePtr);
192 Value *Res = Builder.CreateIntrinsic(ID: Intrinsic::experimental_vp_splat, Types: {VTy},
193 Args: {Val, II.getOperand(i_nocapture: 2), VL});
194
195 II.replaceAllUsesWith(V: Res);
196 II.eraseFromParent();
197 return true;
198}
199
200bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
201 if (skipFunction(F))
202 return false;
203
204 auto &TPC = getAnalysis<TargetPassConfig>();
205 auto &TM = TPC.getTM<RISCVTargetMachine>();
206 ST = &TM.getSubtarget<RISCVSubtarget>(F);
207
208 DL = &F.getDataLayout();
209 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
210
211 bool MadeChange = false;
212 for (auto &BB : F)
213 for (Instruction &I : llvm::make_early_inc_range(Range&: BB))
214 MadeChange |= visit(I);
215
216 return MadeChange;
217}
218
219INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
220INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
221INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
222
223char RISCVCodeGenPrepare::ID = 0;
224
225FunctionPass *llvm::createRISCVCodeGenPreparePass() {
226 return new RISCVCodeGenPrepare();
227}
228