ExpandReductions.cpp source code [llvm_projects/llvm/lib/CodeGen/ExpandReductions.cpp]

1	//===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass implements IR expansion for reduction intrinsics, allowing targets
10	// to enable the intrinsics until just before codegen.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/CodeGen/ExpandReductions.h"
15	#include "llvm/Analysis/TargetTransformInfo.h"
16	#include "llvm/CodeGen/Passes.h"
17	#include "llvm/IR/IRBuilder.h"
18	#include "llvm/IR/InstIterator.h"
19	#include "llvm/IR/IntrinsicInst.h"
20	#include "llvm/IR/Intrinsics.h"
21	#include "llvm/InitializePasses.h"
22	#include "llvm/Pass.h"
23	#include "llvm/Transforms/Utils/LoopUtils.h"
24
25	using namespace llvm;
26
27	namespace {
28
29	bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
30	bool Changed = false;
31	SmallVector<IntrinsicInst *, `4`> Worklist;
32	for (auto &I : instructions(F)) {
33	if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) {
34	switch (II->getIntrinsicID()) {
35	default: break;
36	case Intrinsic::vector_reduce_fadd:
37	case Intrinsic::vector_reduce_fmul:
38	case Intrinsic::vector_reduce_add:
39	case Intrinsic::vector_reduce_mul:
40	case Intrinsic::vector_reduce_and:
41	case Intrinsic::vector_reduce_or:
42	case Intrinsic::vector_reduce_xor:
43	case Intrinsic::vector_reduce_smax:
44	case Intrinsic::vector_reduce_smin:
45	case Intrinsic::vector_reduce_umax:
46	case Intrinsic::vector_reduce_umin:
47	case Intrinsic::vector_reduce_fmax:
48	case Intrinsic::vector_reduce_fmin:
49	if (TTI->shouldExpandReduction(II))
50	Worklist.push_back(Elt: II);
51
52	break;
53	}
54	}
55	}
56
57	for (auto *II : Worklist) {
58	FastMathFlags FMF =
59	isa<FPMathOperator>(Val: II) ? II->getFastMathFlags() : FastMathFlags {};
60	Intrinsic::ID ID = II->getIntrinsicID();
61	RecurKind RK = getMinMaxReductionRecurKind(RdxID: ID);
62	TargetTransformInfo::ReductionShuffle RS =
63	TTI->getPreferredExpandedReductionShuffle(II);
64
65	Value Rdx = nullptr*;
66	IRBuilder<> Builder(II);
67	IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
68	Builder.setFastMathFlags(FMF);
69	switch (ID) {
70	default: llvm_unreachable("Unexpected intrinsic!");
71	case Intrinsic::vector_reduce_fadd:
72	case Intrinsic::vector_reduce_fmul: {
73	// FMFs must be attached to the call, otherwise it's an ordered reduction
74	// and it can't be handled by generating a shuffle sequence.
75	Value *Acc = II->getArgOperand(i: `0`);
76	Value *Vec = II->getArgOperand(i: `1`);
77	unsigned RdxOpcode = getArithmeticReductionInstruction(RdxID: ID);
78	if (!FMF.allowReassoc())
79	Rdx = getOrderedReduction(Builder, Acc, Src: Vec, Op: RdxOpcode, MinMaxKind: RK);
80	else {
81	if (!isPowerOf2_32(
82	Value: cast<FixedVectorType>(Val: Vec->getType())->getNumElements()))
83	continue;
84	Rdx = getShuffleReduction(Builder, Src: Vec, Op: RdxOpcode, RS, MinMaxKind: RK);
85	Rdx = Builder.CreateBinOp(Opc: (Instruction::BinaryOps)RdxOpcode, LHS: Acc, RHS: Rdx,
86	Name: "bin.rdx");
87	}
88	break;
89	}
90	case Intrinsic::vector_reduce_and:
91	case Intrinsic::vector_reduce_or: {
92	// Canonicalize logical or/and reductions:
93	// Or reduction for i1 is represented as:
94	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
95	// %res = cmp ne iReduxWidth %val, 0
96	// And reduction for i1 is represented as:
97	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
98	// %res = cmp eq iReduxWidth %val, 11111
99	Value *Vec = II->getArgOperand(i: `0`);
100	auto *FTy = cast<FixedVectorType>(Val: Vec->getType());
101	unsigned NumElts = FTy->getNumElements();
102	if (!isPowerOf2_32(Value: NumElts))
103	continue;
104
105	if (FTy->getElementType() == Builder.getInt1Ty()) {
106	Rdx = Builder.CreateBitCast(V: Vec, DestTy: Builder.getIntNTy(N: NumElts));
107	if (ID == Intrinsic::vector_reduce_and) {
108	Rdx = Builder.CreateICmpEQ(
109	LHS: Rdx, RHS: ConstantInt::getAllOnesValue(Ty: Rdx->getType()));
110	} else {
111	assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
112	Rdx = Builder.CreateIsNotNull(Arg: Rdx);
113	}
114	break;
115	}
116	unsigned RdxOpcode = getArithmeticReductionInstruction(RdxID: ID);
117	Rdx = getShuffleReduction(Builder, Src: Vec, Op: RdxOpcode, RS, MinMaxKind: RK);
118	break;
119	}
120	case Intrinsic::vector_reduce_add:
121	case Intrinsic::vector_reduce_mul:
122	case Intrinsic::vector_reduce_xor:
123	case Intrinsic::vector_reduce_smax:
124	case Intrinsic::vector_reduce_smin:
125	case Intrinsic::vector_reduce_umax:
126	case Intrinsic::vector_reduce_umin: {
127	Value *Vec = II->getArgOperand(i: `0`);
128	if (!isPowerOf2_32(
129	Value: cast<FixedVectorType>(Val: Vec->getType())->getNumElements()))
130	continue;
131	unsigned RdxOpcode = getArithmeticReductionInstruction(RdxID: ID);
132	Rdx = getShuffleReduction(Builder, Src: Vec, Op: RdxOpcode, RS, MinMaxKind: RK);
133	break;
134	}
135	case Intrinsic::vector_reduce_fmax:
136	case Intrinsic::vector_reduce_fmin: {
137	// We require "nnan" to use a shuffle reduction; "nsz" is implied by the
138	// semantics of the reduction.
139	Value *Vec = II->getArgOperand(i: `0`);
140	if (!isPowerOf2_32(
141	Value: cast<FixedVectorType>(Val: Vec->getType())->getNumElements()) \|\|
142	!FMF.noNaNs())
143	continue;
144	unsigned RdxOpcode = getArithmeticReductionInstruction(RdxID: ID);
145	Rdx = getShuffleReduction(Builder, Src: Vec, Op: RdxOpcode, RS, MinMaxKind: RK);
146	break;
147	}
148	}
149	II->replaceAllUsesWith(V: Rdx);
150	II->eraseFromParent();
151	Changed = true;
152	}
153	return Changed;
154	}
155
156	class ExpandReductions : public FunctionPass {
157	public:
158	static char ID;
159	ExpandReductions() : FunctionPass (ID) {
160	initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
161	}
162
163	bool runOnFunction(Function &F) override {
164	const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
165	return expandReductions(F, TTI);
166	}
167
168	void getAnalysisUsage(AnalysisUsage &AU) const override {
169	AU.addRequired<TargetTransformInfoWrapperPass>();
170	AU.setPreservesCFG();
171	}
172	};
173	}
174
175	char ExpandReductions::ID;
176	INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
177	"Expand reduction intrinsics", false, false)
178	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
179	INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
180	"Expand reduction intrinsics", false, false)
181
182	FunctionPass *llvm::createExpandReductionsPass() {
183	return new ExpandReductions ();
184	}
185
186	PreservedAnalyses ExpandReductionsPass::run(Function &F,
187	FunctionAnalysisManager &AM) {
188	const auto &TTI = AM.getResult<TargetIRAnalysis>(IR&: F);
189	if (!expandReductions(F, TTI: &TTI))
190	return PreservedAnalyses::all();
191	PreservedAnalyses PA;
192	PA.preserveSet<CFGAnalyses>();
193	return PA;
194	}
195

Browse the source code of llvm_projects/llvm/lib/CodeGen/ExpandReductions.cpp