ExpandReductions.cpp source code [llvm_projects/llvm/lib/CodeGen/ExpandReductions.cpp]

1	//===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass implements IR expansion for reduction intrinsics, allowing targets
10	// to enable the intrinsics until just before codegen.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/CodeGen/ExpandReductions.h"
15	#include "llvm/Analysis/LoopInfo.h"
16	#include "llvm/Analysis/TargetTransformInfo.h"
17	#include "llvm/CodeGen/Passes.h"
18	#include "llvm/IR/Dominators.h"
19	#include "llvm/IR/IRBuilder.h"
20	#include "llvm/IR/InstIterator.h"
21	#include "llvm/IR/IntrinsicInst.h"
22	#include "llvm/IR/Intrinsics.h"
23	#include "llvm/InitializePasses.h"
24	#include "llvm/Pass.h"
25	#include "llvm/Transforms/Utils/LoopUtils.h"
26
27	using namespace llvm;
28
29	namespace {
30
31	bool expandReductions(Function &F, const TargetTransformInfo *TTI,
32	DominatorTree DT, LoopInfo LI) {
33	bool Changed = false;
34	SmallVector<IntrinsicInst *, `4`> Worklist;
35	for (auto &I : instructions(F)) {
36	if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) {
37	switch (II->getIntrinsicID()) {
38	default:
39	break;
40	case Intrinsic::vector_reduce_fadd:
41	case Intrinsic::vector_reduce_fmul:
42	case Intrinsic::vector_reduce_add:
43	case Intrinsic::vector_reduce_mul:
44	case Intrinsic::vector_reduce_and:
45	case Intrinsic::vector_reduce_or:
46	case Intrinsic::vector_reduce_xor:
47	case Intrinsic::vector_reduce_smax:
48	case Intrinsic::vector_reduce_smin:
49	case Intrinsic::vector_reduce_umax:
50	case Intrinsic::vector_reduce_umin:
51	case Intrinsic::vector_reduce_fmax:
52	case Intrinsic::vector_reduce_fmin:
53	case Intrinsic::vector_reduce_fmaximum:
54	case Intrinsic::vector_reduce_fminimum: {
55	// Only expand if the target doesn't support this operation natively.
56	if (TTI->shouldExpandReduction(II))
57	Worklist.push_back(Elt: II);
58	break;
59	}
60	}
61	}
62	}
63
64	for (auto *II : Worklist) {
65	FastMathFlags FMF = II->getFastMathFlagsOrNone();
66	Intrinsic::ID ID = II->getIntrinsicID();
67	RecurKind RK = getMinMaxReductionRecurKind(RdxID: ID);
68	TargetTransformInfo::ReductionShuffle RS =
69	TTI->getPreferredExpandedReductionShuffle(II);
70
71	Value Rdx = nullptr*;
72	IRBuilder<> Builder(II);
73	IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
74	Builder.setFastMathFlags(FMF);
75	switch (ID) {
76	default:
77	llvm_unreachable("Unexpected intrinsic!");
78	case Intrinsic::vector_reduce_fadd:
79	case Intrinsic::vector_reduce_fmul: {
80	// FMFs must be attached to the call, otherwise it's an ordered reduction
81	// and it can't be handled by generating a shuffle sequence.
82	Value *Acc = II->getArgOperand(i: `0`);
83	Value *Vec = II->getArgOperand(i: `1`);
84	unsigned RdxOpcode = getArithmeticReductionInstruction(RdxID: ID);
85	if (isa<ScalableVectorType>(Val: Vec->getType())) {
86	Rdx = expandReductionViaLoop(Builder, Vec, RdxOpcode, Acc, DT, LI);
87	break;
88	}
89	if (!FMF.allowReassoc())
90	Rdx = getOrderedReduction(Builder, Acc, Src: Vec, Op: RdxOpcode, MinMaxKind: RK);
91	else {
92	if (!isPowerOf2_32(
93	Value: cast<FixedVectorType>(Val: Vec->getType())->getNumElements()))
94	continue;
95	Rdx = getShuffleReduction(Builder, Src: Vec, Op: RdxOpcode, RS, MinMaxKind: RK);
96	Rdx = Builder.CreateBinOp(Opc: (Instruction::BinaryOps)RdxOpcode, LHS: Acc, RHS: Rdx,
97	Name: "bin.rdx");
98	}
99	break;
100	}
101	case Intrinsic::vector_reduce_and:
102	case Intrinsic::vector_reduce_or: {
103	// Canonicalize logical or/and reductions:
104	// Or reduction for i1 is represented as:
105	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
106	// %res = cmp ne iReduxWidth %val, 0
107	// And reduction for i1 is represented as:
108	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
109	// %res = cmp eq iReduxWidth %val, 11111
110	Value *Vec = II->getArgOperand(i: `0`);
111	auto *FTy = cast<FixedVectorType>(Val: Vec->getType());
112	unsigned NumElts = FTy->getNumElements();
113	if (!isPowerOf2_32(Value: NumElts))
114	continue;
115
116	if (FTy->getElementType() == Builder.getInt1Ty()) {
117	Rdx = Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: NumElts));
118	if (ID == Intrinsic::vector_reduce_and) {
119	Rdx = Builder.CreateICmpEQ(
120	LHS: Rdx, RHS: ConstantInt::getAllOnesValue(Ty: Rdx->getType()));
121	} else {
122	assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
123	Rdx = Builder.CreateIsNotNull(Arg: Rdx);
124	}
125	break;
126	}
127	unsigned RdxOpcode = getArithmeticReductionInstruction(RdxID: ID);
128	Rdx = getShuffleReduction(Builder, Src: Vec, Op: RdxOpcode, RS, MinMaxKind: RK);
129	break;
130	}
131	case Intrinsic::vector_reduce_add:
132	case Intrinsic::vector_reduce_mul:
133	case Intrinsic::vector_reduce_xor:
134	case Intrinsic::vector_reduce_smax:
135	case Intrinsic::vector_reduce_smin:
136	case Intrinsic::vector_reduce_umax:
137	case Intrinsic::vector_reduce_umin: {
138	Value *Vec = II->getArgOperand(i: `0`);
139	unsigned RdxOpcode = getArithmeticReductionInstruction(RdxID: ID);
140	if (isa<ScalableVectorType>(Val: Vec->getType())) {
141	Type *EltTy = Vec->getType()->getScalarType();
142	Value *Ident = getReductionIdentity(RdxID: ID, Ty: EltTy, FMF);
143	Rdx = expandReductionViaLoop(Builder, Vec, RdxOpcode, Acc: Ident, DT, LI);
144	break;
145	}
146	if (!isPowerOf2_32(
147	Value: cast<FixedVectorType>(Val: Vec->getType())->getNumElements()))
148	continue;
149	Rdx = getShuffleReduction(Builder, Src: Vec, Op: RdxOpcode, RS, MinMaxKind: RK);
150	break;
151	}
152	case Intrinsic::vector_reduce_fmax:
153	case Intrinsic::vector_reduce_fmin: {
154	// We require "nnan" to use a shuffle reduction; "nsz" is implied by the
155	// semantics of the reduction.
156	Value *Vec = II->getArgOperand(i: `0`);
157	if (!isPowerOf2_32(
158	Value: cast<FixedVectorType>(Val: Vec->getType())->getNumElements()) \|\|
159	!FMF.noNaNs())
160	continue;
161	unsigned RdxOpcode = getArithmeticReductionInstruction(RdxID: ID);
162	Rdx = getShuffleReduction(Builder, Src: Vec, Op: RdxOpcode, RS, MinMaxKind: RK);
163	break;
164	}
165	case Intrinsic::vector_reduce_fmaximum:
166	case Intrinsic::vector_reduce_fminimum: {
167	Value *Vec = II->getArgOperand(i: `0`);
168	if (!isPowerOf2_32(
169	Value: cast<FixedVectorType>(Val: Vec->getType())->getNumElements()))
170	continue;
171	unsigned RdxOpcode = getArithmeticReductionInstruction(RdxID: ID);
172	Rdx = getShuffleReduction(Builder, Src: Vec, Op: RdxOpcode, RS, MinMaxKind: RK);
173	break;
174	}
175	}
176	II->replaceAllUsesWith(V: Rdx);
177	II->eraseFromParent();
178	Changed = true;
179	}
180	return Changed;
181	}
182
183	class ExpandReductions : public FunctionPass {
184	public:
185	static char ID;
186	ExpandReductions() : FunctionPass (ID) {}
187
188	bool runOnFunction(Function &F) override {
189	const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
190	auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
191	auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
192	auto DT = DTWP ? &DTWP->getDomTree() : nullptr*;
193	auto LI = LIWP ? &LIWP->getLoopInfo() : nullptr*;
194	return expandReductions(F, TTI, DT, LI);
195	}
196
197	void getAnalysisUsage(AnalysisUsage &AU) const override {
198	AU.addRequired<TargetTransformInfoWrapperPass>();
199	AU.addPreserved<DominatorTreeWrapperPass>();
200	AU.addPreserved<LoopInfoWrapperPass>();
201	}
202	};
203	}
204
205	char ExpandReductions::ID;
206	INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
207	"Expand reduction intrinsics", false, false)
208	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
209	INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
210	"Expand reduction intrinsics", false, false)
211
212	FunctionPass *llvm::createExpandReductionsPass() {
213	return new ExpandReductions ();
214	}
215
216	PreservedAnalyses ExpandReductionsPass::run(Function &F,
217	FunctionAnalysisManager &AM) {
218	const auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
219	auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(IR&: F);
220	auto *LI = AM.getCachedResult<LoopAnalysis>(IR&: F);
221	if (!expandReductions(F, TTI: &TTI, DT, LI))
222	return PreservedAnalyses::all();
223	PreservedAnalyses PA;
224	PA.preserve<DominatorTreeAnalysis>();
225	PA.preserve<LoopAnalysis>();
226	return PA;
227	}
228

Browse the source code of llvm_projects/llvm/lib/CodeGen/ExpandReductions.cpp