Reassociate.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/Reassociate.cpp]

1	//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass reassociates commutative expressions in an order that is designed
10	// to promote better constant propagation, GCSE, LICM, PRE, etc.
11	//
12	// For example: 4 + (x + 5) -> x + (4 + 5)
13	//
14	// In the implementation of this algorithm, constants are assigned rank = 0,
15	// function arguments are rank = 1, and other values are assigned ranks
16	// corresponding to the reverse post order traversal of current function
17	// (starting at 2), which effectively gives values in deep loops higher rank
18	// than values not in loops.
19	//
20	//===----------------------------------------------------------------------===//
21
22	#include "llvm/Transforms/Scalar/Reassociate.h"
23	#include "llvm/ADT/APFloat.h"
24	#include "llvm/ADT/APInt.h"
25	#include "llvm/ADT/DenseMap.h"
26	#include "llvm/ADT/PostOrderIterator.h"
27	#include "llvm/ADT/SmallPtrSet.h"
28	#include "llvm/ADT/SmallSet.h"
29	#include "llvm/ADT/SmallVector.h"
30	#include "llvm/ADT/Statistic.h"
31	#include "llvm/Analysis/BasicAliasAnalysis.h"
32	#include "llvm/Analysis/ConstantFolding.h"
33	#include "llvm/Analysis/GlobalsModRef.h"
34	#include "llvm/Analysis/ValueTracking.h"
35	#include "llvm/IR/Argument.h"
36	#include "llvm/IR/BasicBlock.h"
37	#include "llvm/IR/CFG.h"
38	#include "llvm/IR/Constant.h"
39	#include "llvm/IR/Constants.h"
40	#include "llvm/IR/Function.h"
41	#include "llvm/IR/IRBuilder.h"
42	#include "llvm/IR/InstrTypes.h"
43	#include "llvm/IR/Instruction.h"
44	#include "llvm/IR/Instructions.h"
45	#include "llvm/IR/Operator.h"
46	#include "llvm/IR/PassManager.h"
47	#include "llvm/IR/PatternMatch.h"
48	#include "llvm/IR/Type.h"
49	#include "llvm/IR/User.h"
50	#include "llvm/IR/Value.h"
51	#include "llvm/IR/ValueHandle.h"
52	#include "llvm/InitializePasses.h"
53	#include "llvm/Pass.h"
54	#include "llvm/Support/Casting.h"
55	#include "llvm/Support/CommandLine.h"
56	#include "llvm/Support/Debug.h"
57	#include "llvm/Support/raw_ostream.h"
58	#include "llvm/Transforms/Scalar.h"
59	#include "llvm/Transforms/Utils/Local.h"
60	#include <algorithm>
61	#include <cassert>
62	#include <utility>
63
64	using namespace llvm;
65	using namespace reassociate;
66	using namespace PatternMatch;
67
68	#define DEBUG_TYPE "reassociate"
69
70	STATISTIC(NumChanged, "Number of insts reassociated");
71	STATISTIC(NumAnnihil, "Number of expr tree annihilated");
72	STATISTIC(NumFactor , "Number of multiplies factored");
73
74	static cl::opt<bool>
75	UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
76	cl::desc("Only reorder expressions within a basic block "
77	"when exposing CSE opportunities"),
78	cl::init(Val: true), cl::Hidden);
79
80	#ifndef NDEBUG
81	/// Print out the expression identified in the Ops list.
82	static void PrintOps(Instruction I, const* SmallVectorImpl<ValueEntry> &Ops) {
83	Module *M = I->getModule();
84	dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
85	<< *Ops[`0`].Op->getType() << `'\t'`;
86	for (const ValueEntry &Op : Ops) {
87	dbgs() << "[ ";
88	Op.Op->printAsOperand(dbgs(), false, M);
89	dbgs() << ", #" << Op.Rank << "] ";
90	}
91	}
92	#endif
93
94	/// Utility class representing a non-constant Xor-operand. We classify
95	/// non-constant Xor-Operands into two categories:
96	/// C1) The operand is in the form "X & C", where C is a constant and C != ~0
97	/// C2)
98	/// C2.1) The operand is in the form of "X \| C", where C is a non-zero
99	/// constant.
100	/// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this
101	/// operand as "E \| 0"
102	class llvm::reassociate::XorOpnd {
103	public:
104	XorOpnd(Value *V);
105
106	bool isInvalid() const { return SymbolicPart == nullptr; }
107	bool isOrExpr() const { return isOr; }
108	Value getValue() const* { return OrigVal; }
109	Value getSymbolicPart() const* { return SymbolicPart; }
110	unsigned getSymbolicRank() const { return SymbolicRank; }
111	const APInt &getConstPart() const { return ConstPart; }
112
113	void Invalidate() { SymbolicPart = OrigVal = nullptr; }
114	void setSymbolicRank(unsigned R) { SymbolicRank = R; }
115
116	private:
117	Value *OrigVal;
118	Value *SymbolicPart;
119	APInt ConstPart;
120	unsigned SymbolicRank;
121	bool isOr;
122	};
123
124	XorOpnd::XorOpnd(Value *V) {
125	assert(!isa<ConstantInt>(V) && "No ConstantInt");
126	OrigVal = V;
127	Instruction *I = dyn_cast<Instruction>(Val: V);
128	SymbolicRank = `0`;
129
130	if (I && (I->getOpcode() == Instruction::Or \|\|
131	I->getOpcode() == Instruction::And)) {
132	Value *V0 = I->getOperand(i: `0`);
133	Value *V1 = I->getOperand(i: `1`);
134	const APInt *C;
135	if (match(V: V0, P: m_APInt(Res&: C)))
136	std::swap(a&: V0, b&: V1);
137
138	if (match(V: V1, P: m_APInt(Res&: C))) {
139	ConstPart = *C;
140	SymbolicPart = V0;
141	isOr = (I->getOpcode() == Instruction::Or);
142	return;
143	}
144	}
145
146	// view the operand as "V \| 0"
147	SymbolicPart = V;
148	ConstPart = APInt::getZero(numBits: V->getType()->getScalarSizeInBits());
149	isOr = true;
150	}
151
152	/// Return true if I is an instruction with the FastMathFlags that are needed
153	/// for general reassociation set. This is not the same as testing
154	/// Instruction::isAssociative() because it includes operations like fsub.
155	/// (This routine is only intended to be called for floating-point operations.)
156	static bool hasFPAssociativeFlags(Instruction *I) {
157	assert(I && isa<FPMathOperator>(I) && "Should only check FP ops");
158	return I->hasAllowReassoc() && I->hasNoSignedZeros();
159	}
160
161	/// Return true if V is an instruction of the specified opcode and if it
162	/// only has one use.
163	static BinaryOperator isReassociableOp(Value V, unsigned Opcode) {
164	auto *BO = dyn_cast<BinaryOperator>(Val: V);
165	if (BO && BO->hasOneUse() && BO->getOpcode() == Opcode)
166	if (!isa<FPMathOperator>(Val: BO) \|\| hasFPAssociativeFlags(I: BO))
167	return BO;
168	return nullptr;
169	}
170
171	static BinaryOperator isReassociableOp(Value V, unsigned Opcode1,
172	unsigned Opcode2) {
173	auto *BO = dyn_cast<BinaryOperator>(Val: V);
174	if (BO && BO->hasOneUse() &&
175	(BO->getOpcode() == Opcode1 \|\| BO->getOpcode() == Opcode2))
176	if (!isa<FPMathOperator>(Val: BO) \|\| hasFPAssociativeFlags(I: BO))
177	return BO;
178	return nullptr;
179	}
180
181	void ReassociatePass::BuildRankMap(Function &F,
182	ReversePostOrderTraversal<Function*> &RPOT) {
183	unsigned Rank = `2`;
184
185	// Assign distinct ranks to function arguments.
186	for (auto &Arg : F.args()) {
187	ValueRankMap [&Arg] = ++Rank;
188	LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank
189	<< "\n");
190	}
191
192	// Traverse basic blocks in ReversePostOrder.
193	for (BasicBlock *BB : RPOT) {
194	unsigned BBRank = RankMap [BB] = ++Rank << `16`;
195
196	// Walk the basic block, adding precomputed ranks for any instructions that
197	// we cannot move. This ensures that the ranks for these instructions are
198	// all different in the block.
199	for (Instruction &I : *BB)
200	if (mayHaveNonDefUseDependency(I))
201	ValueRankMap [&I] = ++BBRank;
202	}
203	}
204
205	unsigned ReassociatePass::getRank(Value *V) {
206	// Return 1+MAX(rank(LHS), rank(RHS)) for expressions so we can reassociate
207	// expressions for code motion. Use an explicit worklist rather than native
208	// recursion so long acyclic use-def chains do not overflow the stack.
209	struct RankWorkItem {
210	Value *V;
211	unsigned OpNo;
212	unsigned Rank;
213	};
214
215	// Each item is one suspended recursive getRank() call.
216	// Completed ranks are folded back into the parent.
217	SmallVector<RankWorkItem, `16`> Worklist;
218	Worklist.push_back(Elt: RankWorkItem{.V: V, .OpNo: `0`, .Rank: `0`});
219
220	while (true) {
221	RankWorkItem &Item = Worklist.back();
222	Instruction *I = dyn_cast<Instruction>(Val: Item.V);
223	unsigned Rank = `0`;
224	if (!I) {
225	// Function argument, global or constant
226	Rank = isa<Argument>(Val: Item.V) ? ValueRankMap [Item.V] : `0`;
227	} else if (ValueRankMap [I]) {
228	// Instruction that is not movable.
229	Rank = ValueRankMap [I];
230	} else if (Item.OpNo == I->getNumOperands() \|\|
231	Item.Rank == RankMap [I->getParent()]) {
232	// All operands were visited or the max block rank was reached.
233	Rank = Item.Rank;
234	// If this is a 'not' or 'neg' instruction, do not count it for rank.
235	// This assures us that X and ~X will have the same rank.
236	if (!match(V: I, P: m_Not(V: m_Value())) && !match(V: I, P: m_Neg(V: m_Value())) &&
237	!match(V: I, P: m_FNeg(X: m_Value())))
238	++Rank;
239
240	LLVM_DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << Rank
241	<< "\n");
242
243	ValueRankMap [I] = Rank;
244	} else {
245	Worklist.push_back(Elt: RankWorkItem{.V: I->getOperand(i: Item.OpNo), .OpNo: `0`, .Rank: `0`});
246	continue;
247	}
248
249	// Once the current use-def node has a known rank, carry that rank back to
250	// the parent expression and advance past the operand that led here.
251	Worklist.pop_back();
252	if (Worklist.empty())
253	return Rank;
254
255	RankWorkItem &Parent = Worklist.back();
256	Parent.Rank = std::max(a: Parent.Rank, b: Rank);
257	++Parent.OpNo;
258	}
259	}
260
261	// Canonicalize constants to RHS. Otherwise, sort the operands by rank.
262	void ReassociatePass::canonicalizeOperands(Instruction *I) {
263	assert(isa<BinaryOperator>(I) && "Expected binary operator.");
264	assert(I->isCommutative() && "Expected commutative operator.");
265
266	Value *LHS = I->getOperand(i: `0`);
267	Value *RHS = I->getOperand(i: `1`);
268	if (LHS == RHS \|\| isa<Constant>(Val: RHS))
269	return;
270	if (isa<Constant>(Val: LHS) \|\| getRank(V: RHS) < getRank(V: LHS)) {
271	cast<BinaryOperator>(Val: I)->swapOperands();
272	MadeChange = true;
273	}
274	}
275
276	static BinaryOperator CreateAdd(Value S1, Value S2, const* Twine &Name,
277	BasicBlock::iterator InsertBefore,
278	Value *FlagsOp) {
279	if (S1->getType()->isIntOrIntVectorTy())
280	return BinaryOperator::CreateAdd(V1: S1, V2: S2, Name, InsertBefore);
281	else {
282	BinaryOperator *Res =
283	BinaryOperator::CreateFAdd(V1: S1, V2: S2, Name, InsertBefore);
284	Res->setFastMathFlags(cast<FPMathOperator>(Val: FlagsOp)->getFastMathFlags());
285	return Res;
286	}
287	}
288
289	static BinaryOperator CreateMul(Value S1, Value S2, const* Twine &Name,
290	BasicBlock::iterator InsertBefore,
291	Value *FlagsOp) {
292	if (S1->getType()->isIntOrIntVectorTy())
293	return BinaryOperator::CreateMul(V1: S1, V2: S2, Name, InsertBefore);
294	else {
295	BinaryOperator *Res =
296	BinaryOperator::CreateFMul(V1: S1, V2: S2, Name, InsertBefore);
297	Res->setFastMathFlags(cast<FPMathOperator>(Val: FlagsOp)->getFastMathFlags());
298	return Res;
299	}
300	}
301
302	static Instruction CreateNeg(Value S1, const Twine &Name,
303	BasicBlock::iterator InsertBefore,
304	Value *FlagsOp) {
305	if (S1->getType()->isIntOrIntVectorTy())
306	return BinaryOperator::CreateNeg(Op: S1, Name, InsertBefore);
307
308	if (auto *FMFSource = dyn_cast<Instruction>(Val: FlagsOp))
309	return UnaryOperator::CreateFNegFMF(Op: S1, FMFSource, Name, InsertBefore);
310
311	return UnaryOperator::CreateFNeg(V: S1, Name, InsertBefore);
312	}
313
314	/// Replace 0-X with X-1.*
315	static BinaryOperator LowerNegateToMultiply(Instruction Neg) {
316	assert((isa<UnaryOperator>(Neg) \|\| isa<BinaryOperator>(Neg)) &&
317	"Expected a Negate!");
318	// FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0.
319	unsigned OpNo = isa<BinaryOperator>(Val: Neg) ? `1` : `0`;
320	Type *Ty = Neg->getType();
321	Constant *NegOne = Ty->isIntOrIntVectorTy() ?
322	ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, V: -`1.0`);
323
324	BinaryOperator *Res =
325	CreateMul(S1: Neg->getOperand(i: OpNo), S2: NegOne, Name: "", InsertBefore: Neg->getIterator(), FlagsOp: Neg);
326	Neg->setOperand(i: OpNo, Val: Constant::getNullValue(Ty)); // Drop use of op.
327	Res->takeName(V: Neg);
328	Neg->replaceAllUsesWith(V: Res);
329	Res->setDebugLoc(Neg->getDebugLoc());
330	return Res;
331	}
332
333	using RepeatedValue = std::pair<Value *, uint64_t>;
334
335	/// Given an associative binary expression, return the leaf
336	/// nodes in Ops along with their weights (how many times the leaf occurs). The
337	/// original expression is the same as
338	/// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times
339	/// op
340	/// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times
341	/// op
342	/// ...
343	/// op
344	/// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times
345	///
346	/// Note that the values Ops[0].first, ..., Ops[N].first are all distinct.
347	///
348	/// This routine may modify the function, in which case it returns 'true'. The
349	/// changes it makes may well be destructive, changing the value computed by 'I'
350	/// to something completely different. Thus if the routine returns 'true' then
351	/// you MUST either replace I with a new expression computed from the Ops array,
352	/// or use RewriteExprTree to put the values back in.
353	///
354	/// A leaf node is either not a binary operation of the same kind as the root
355	/// node 'I' (i.e. is not a binary operator at all, or is, but with a different
356	/// opcode), or is the same kind of binary operator but has a use which either
357	/// does not belong to the expression, or does belong to the expression but is
358	/// a leaf node. Every leaf node has at least one use that is a non-leaf node
359	/// of the expression, while for non-leaf nodes (except for the root 'I') every
360	/// use is a non-leaf node of the expression.
361	///
362	/// For example:
363	/// expression graph node names
364	///
365	/// + \| I
366	/// / \ \|
367	/// + + \| A, B
368	/// / \ / \ \|
369	/// + * \| C, D, E*
370	/// / \ / \ / \ \|
371	/// + \| F, G*
372	///
373	/// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in
374	/// that order) (C, 1), (E, 1), (F, 2), (G, 2).
375	///
376	/// The expression is maximal: if some instruction is a binary operator of the
377	/// same kind as 'I', and all of its uses are non-leaf nodes of the expression,
378	/// then the instruction also belongs to the expression, is not a leaf node of
379	/// it, and its operands also belong to the expression (but may be leaf nodes).
380	///
381	/// NOTE: This routine will set operands of non-leaf non-root nodes to undef in
382	/// order to ensure that every non-root node in the expression has exactly one
383	/// use by a non-leaf node of the expression. This destruction means that the
384	/// caller MUST either replace 'I' with a new expression or use something like
385	/// RewriteExprTree to put the values back in if the routine indicates that it
386	/// made a change by returning 'true'.
387	///
388	/// In the above example either the right operand of A or the left operand of B
389	/// will be replaced by undef. If it is B's operand then this gives:
390	///
391	/// + \| I
392	/// / \ \|
393	/// + + \| A, B - operand of B replaced with undef
394	/// / \ \ \|
395	/// + * \| C, D, E*
396	/// / \ / \ / \ \|
397	/// + \| F, G*
398	///
399	/// Note that such undef operands can only be reached by passing through 'I'.
400	/// For example, if you visit operands recursively starting from a leaf node
401	/// then you will never see such an undef operand unless you get back to 'I',
402	/// which requires passing through a phi node.
403	///
404	/// Note that this routine may also mutate binary operators of the wrong type
405	/// that have all uses inside the expression (i.e. only used by non-leaf nodes
406	/// of the expression) if it can turn them into binary operators of the right
407	/// type and thus make the expression bigger.
408	static bool LinearizeExprTree(Instruction *I,
409	SmallVectorImpl<RepeatedValue> &Ops,
410	ReassociatePass::OrderedSet &ToRedo,
411	OverflowTracking &Flags) {
412	assert((isa<UnaryOperator>(I) \|\| isa<BinaryOperator>(I)) &&
413	"Expected a UnaryOperator or BinaryOperator!");
414	LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << `'\n'`);
415	unsigned Opcode = I->getOpcode();
416	assert(I->isAssociative() && I->isCommutative() &&
417	"Expected an associative and commutative operation!");
418
419	// Visit all operands of the expression, keeping track of their weight (the
420	// number of paths from the expression root to the operand, or if you like
421	// the number of times that operand occurs in the linearized expression).
422	// For example, if I = X + A, where X = A + B, then I, X and B have weight 1
423	// while A has weight two.
424
425	// Worklist of non-leaf nodes (their operands are in the expression too) along
426	// with their weights, representing a certain number of paths to the operator.
427	// If an operator occurs in the worklist multiple times then we found multiple
428	// ways to get to it.
429	SmallVector<std::pair<Instruction , uint64_t>, `8`> Worklist; // (Op, Weight)*
430	Worklist.push_back(Elt: std::make_pair(x&: I, y: `1`));
431	bool Changed = false;
432
433	// Leaves of the expression are values that either aren't the right kind of
434	// operation (eg: a constant, or a multiply in an add tree), or are, but have
435	// some uses that are not inside the expression. For example, in I = X + X,
436	// X = A + B, the value X has two uses (by I) that are in the expression. If
437	// X has any other uses, for example in a return instruction, then we consider
438	// X to be a leaf, and won't analyze it further. When we first visit a value,
439	// if it has more than one use then at first we conservatively consider it to
440	// be a leaf. Later, as the expression is explored, we may discover some more
441	// uses of the value from inside the expression. If all uses turn out to be
442	// from within the expression (and the value is a binary operator of the right
443	// kind) then the value is no longer considered to be a leaf, and its operands
444	// are explored.
445
446	// Leaves - Keeps track of the set of putative leaves as well as the number of
447	// paths to each leaf seen so far.
448	using LeafMap = DenseMap<Value *, uint64_t>;
449	LeafMap Leaves; // Leaf -> Total weight so far.
450	SmallVector<Value , `8`> LeafOrder; // Ensure deterministic leaf output order.*
451	const DataLayout &DL = I->getDataLayout();
452
453	#ifndef NDEBUG
454	SmallPtrSet<Value , `8`> Visited; // For checking the iteration scheme.*
455	#endif
456	while (!Worklist.empty()) {
457	// We examine the operands of this binary operator.
458	auto [I, Weight] = Worklist.pop_back_val();
459
460	Flags.mergeFlags(I&: *I);
461
462	for (unsigned OpIdx = `0`; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands.
463	Value *Op = I->getOperand(i: OpIdx);
464	LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n");
465	assert((!Op->hasUseList() \|\| !Op->use_empty()) &&
466	"No uses, so how did we get to it?!");
467
468	// If this is a binary operation of the right kind with only one use then
469	// add its operands to the expression.
470	if (BinaryOperator *BO = isReassociableOp(V: Op, Opcode)) {
471	assert(Visited.insert(Op).second && "Not first visit!");
472	LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n");
473	Worklist.push_back(Elt: std::make_pair(x&: BO, y&: Weight));
474	continue;
475	}
476
477	// Appears to be a leaf. Is the operand already in the set of leaves?
478	LeafMap::iterator It = Leaves.find(Val: Op);
479	if (It == Leaves.end()) {
480	// Not in the leaf map. Must be the first time we saw this operand.
481	assert(Visited.insert(Op).second && "Not first visit!");
482	if (!Op->hasOneUse()) {
483	// This value has uses not accounted for by the expression, so it is
484	// not safe to modify. Mark it as being a leaf.
485	LLVM_DEBUG(dbgs()
486	<< "ADD USES LEAF: " << *Op << " (" << Weight << ")\n");
487	LeafOrder.push_back(Elt: Op);
488	Leaves [Op] = Weight;
489	continue;
490	}
491	// No uses outside the expression, try morphing it.
492	} else {
493	// Already in the leaf map.
494	assert(It != Leaves.end() && Visited.count(Op) &&
495	"In leaf map but not visited!");
496
497	// Update the number of paths to the leaf.
498	It ->second += Weight;
499	assert(It->second >= Weight && "Weight overflows");
500
501	// If we still have uses that are not accounted for by the expression
502	// then it is not safe to modify the value.
503	if (!Op->hasOneUse())
504	continue;
505
506	// No uses outside the expression, try morphing it.
507	Weight = It ->second;
508	Leaves.erase(I: It); // Since the value may be morphed below.
509	}
510
511	// At this point we have a value which, first of all, is not a binary
512	// expression of the right kind, and secondly, is only used inside the
513	// expression. This means that it can safely be modified. See if we
514	// can usefully morph it into an expression of the right kind.
515	assert((!isa<Instruction>(Op) \|\|
516	cast<Instruction>(Op)->getOpcode() != Opcode
517	\|\| (isa<FPMathOperator>(Op) &&
518	!hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
519	"Should have been handled above!");
520	assert(Op->hasOneUse() && "Has uses outside the expression tree!");
521
522	// If this is a multiply expression, turn any internal negations into
523	// multiplies by -1 so they can be reassociated. Add any users of the
524	// newly created multiplication by -1 to the redo list, so any
525	// reassociation opportunities that are exposed will be reassociated
526	// further.
527	Instruction *Neg;
528	if (((Opcode == Instruction::Mul && match(V: Op, P: m_Neg(V: m_Value()))) \|\|
529	(Opcode == Instruction::FMul && match(V: Op, P: m_FNeg(X: m_Value())))) &&
530	match(V: Op, P: m_Instruction(I&: Neg))) {
531	LLVM_DEBUG(dbgs()
532	<< "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
533	Instruction *Mul = LowerNegateToMultiply(Neg);
534	LLVM_DEBUG(dbgs() << *Mul << `'\n'`);
535	Worklist.push_back(Elt: std::make_pair(x&: Mul, y&: Weight));
536	for (User *U : Mul->users()) {
537	if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(Val: U))
538	ToRedo.insert(X: UserBO);
539	}
540	ToRedo.insert(X: Neg);
541	Changed = true;
542	continue;
543	}
544
545	// Failed to morph into an expression of the right type. This really is
546	// a leaf.
547	LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n");
548	assert(!isReassociableOp(Op, Opcode) && "Value was morphed?");
549	LeafOrder.push_back(Elt: Op);
550	Leaves [Op] = Weight;
551	}
552	}
553
554	// The leaves, repeated according to their weights, represent the linearized
555	// form of the expression.
556	for (Value *V : LeafOrder) {
557	LeafMap::iterator It = Leaves.find(Val: V);
558	if (It == Leaves.end())
559	// Node initially thought to be a leaf wasn't.
560	continue;
561	assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!");
562	uint64_t Weight = It ->second;
563	// Ensure the leaf is only output once.
564	It ->second = `0`;
565	Ops.push_back(Elt: std::make_pair(x&: V, y&: Weight));
566	if (Opcode == Instruction::Add && Flags.AllKnownNonNegative && Flags.HasNSW)
567	Flags.AllKnownNonNegative &= isKnownNonNegative(V, SQ: SimplifyQuery (DL));
568	else if (Opcode == Instruction::Mul) {
569	// To preserve NUW we need all inputs non-zero.
570	// To preserve NSW we need all inputs strictly positive.
571	if (Flags.AllKnownNonZero &&
572	(Flags.HasNUW \|\| (Flags.HasNSW && Flags.AllKnownNonNegative))) {
573	Flags.AllKnownNonZero &= isKnownNonZero(V, Q: SimplifyQuery (DL));
574	if (Flags.HasNSW && Flags.AllKnownNonNegative)
575	Flags.AllKnownNonNegative &= isKnownNonNegative(V, SQ: SimplifyQuery (DL));
576	}
577	}
578	}
579
580	// For nilpotent operations or addition there may be no operands, for example
581	// because the expression was "X xor X" or consisted of 2^Bitwidth additions:
582	// in both cases the weight reduces to 0 causing the value to be skipped.
583	if (Ops.empty()) {
584	Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, Ty: I->getType());
585	assert(Identity && "Associative operation without identity!");
586	Ops.emplace_back(Args&: Identity, Args: `1`);
587	}
588
589	return Changed;
590	}
591
592	/// Now that the operands for this expression tree are
593	/// linearized and optimized, emit them in-order.
594	void ReassociatePass::RewriteExprTree(BinaryOperator *I,
595	SmallVectorImpl<ValueEntry> &Ops,
596	OverflowTracking Flags) {
597	assert(Ops.size() > `1` && "Single values should be used directly!");
598
599	// Since our optimizations should never increase the number of operations, the
600	// new expression can usually be written reusing the existing binary operators
601	// from the original expression tree, without creating any new instructions,
602	// though the rewritten expression may have a completely different topology.
603	// We take care to not change anything if the new expression will be the same
604	// as the original. If more than trivial changes (like commuting operands)
605	// were made then we are obliged to clear out any optional subclass data like
606	// nsw flags.
607
608	/// NodesToRewrite - Nodes from the original expression available for writing
609	/// the new expression into.
610	SmallVector<BinaryOperator*, `8`> NodesToRewrite;
611	unsigned Opcode = I->getOpcode();
612	BinaryOperator *Op = I;
613
614	/// NotRewritable - The operands being written will be the leaves of the new
615	/// expression and must not be used as inner nodes (via NodesToRewrite) by
616	/// mistake. Inner nodes are always reassociable, and usually leaves are not
617	/// (if they were they would have been incorporated into the expression and so
618	/// would not be leaves), so most of the time there is no danger of this. But
619	/// in rare cases a leaf may become reassociable if an optimization kills uses
620	/// of it, or it may momentarily become reassociable during rewriting (below)
621	/// due it being removed as an operand of one of its uses. Ensure that misuse
622	/// of leaf nodes as inner nodes cannot occur by remembering all of the future
623	/// leaves and refusing to reuse any of them as inner nodes.
624	SmallPtrSet<Value*, `8`> NotRewritable;
625	for (const ValueEntry &Op : Ops)
626	NotRewritable.insert(Ptr: Op.Op);
627
628	// ExpressionChangedStart - Non-null if the rewritten expression differs from
629	// the original in some non-trivial way, requiring the clearing of optional
630	// flags. Flags are cleared from the operator in ExpressionChangedStart up to
631	// ExpressionChangedEnd inclusive.
632	BinaryOperator ExpressionChangedStart = nullptr*,
633	ExpressionChangedEnd = nullptr*;
634	for (unsigned i = `0`; ; ++i) {
635	// The last operation (which comes earliest in the IR) is special as both
636	// operands will come from Ops, rather than just one with the other being
637	// a subexpression.
638	if (i+`2` == Ops.size()) {
639	Value *NewLHS = Ops [i].Op;
640	Value *NewRHS = Ops [i+`1`].Op;
641	Value *OldLHS = Op->getOperand(i_nocapture: `0`);
642	Value *OldRHS = Op->getOperand(i_nocapture: `1`);
643
644	if (NewLHS == OldLHS && NewRHS == OldRHS)
645	// Nothing changed, leave it alone.
646	break;
647
648	if (NewLHS == OldRHS && NewRHS == OldLHS) {
649	// The order of the operands was reversed. Swap them.
650	LLVM_DEBUG(dbgs() << "RA: " << *Op << `'\n'`);
651	Op->swapOperands();
652	LLVM_DEBUG(dbgs() << "TO: " << *Op << `'\n'`);
653	MadeChange = true;
654	++NumChanged;
655	break;
656	}
657
658	// The new operation differs non-trivially from the original. Overwrite
659	// the old operands with the new ones.
660	LLVM_DEBUG(dbgs() << "RA: " << *Op << `'\n'`);
661	if (NewLHS != OldLHS) {
662	BinaryOperator *BO = isReassociableOp(V: OldLHS, Opcode);
663	if (BO && !NotRewritable.count(Ptr: BO))
664	NodesToRewrite.push_back(Elt: BO);
665	salvageDebugInfo(I&: *Op);
666	Op->setOperand(i_nocapture: `0`, Val_nocapture: NewLHS);
667	}
668	if (NewRHS != OldRHS) {
669	BinaryOperator *BO = isReassociableOp(V: OldRHS, Opcode);
670	if (BO && !NotRewritable.count(Ptr: BO))
671	NodesToRewrite.push_back(Elt: BO);
672	salvageDebugInfo(I&: *Op);
673	Op->setOperand(i_nocapture: `1`, Val_nocapture: NewRHS);
674	}
675	LLVM_DEBUG(dbgs() << "TO: " << *Op << `'\n'`);
676
677	ExpressionChangedStart = Op;
678	if (!ExpressionChangedEnd)
679	ExpressionChangedEnd = Op;
680	MadeChange = true;
681	++NumChanged;
682
683	break;
684	}
685
686	// Not the last operation. The left-hand side will be a sub-expression
687	// while the right-hand side will be the current element of Ops.
688	Value *NewRHS = Ops [i].Op;
689	if (NewRHS != Op->getOperand(i_nocapture: `1`)) {
690	LLVM_DEBUG(dbgs() << "RA: " << *Op << `'\n'`);
691	if (NewRHS == Op->getOperand(i_nocapture: `0`)) {
692	// The new right-hand side was already present as the left operand. If
693	// we are lucky then swapping the operands will sort out both of them.
694	Op->swapOperands();
695	} else {
696	// Overwrite with the new right-hand side.
697	BinaryOperator *BO = isReassociableOp(V: Op->getOperand(i_nocapture: `1`), Opcode);
698	if (BO && !NotRewritable.count(Ptr: BO))
699	NodesToRewrite.push_back(Elt: BO);
700	salvageDebugInfo(I&: *Op);
701	Op->setOperand(i_nocapture: `1`, Val_nocapture: NewRHS);
702	ExpressionChangedStart = Op;
703	if (!ExpressionChangedEnd)
704	ExpressionChangedEnd = Op;
705	}
706	LLVM_DEBUG(dbgs() << "TO: " << *Op << `'\n'`);
707	MadeChange = true;
708	++NumChanged;
709	}
710
711	// Now deal with the left-hand side. If this is already an operation node
712	// from the original expression then just rewrite the rest of the expression
713	// into it.
714	BinaryOperator *BO = isReassociableOp(V: Op->getOperand(i_nocapture: `0`), Opcode);
715	if (BO && !NotRewritable.count(Ptr: BO)) {
716	Op = BO;
717	continue;
718	}
719
720	// Otherwise, grab a spare node from the original expression and use that as
721	// the left-hand side. If there are no nodes left then the optimizers made
722	// an expression with more nodes than the original! This usually means that
723	// they did something stupid but it might mean that the problem was just too
724	// hard (finding the mimimal number of multiplications needed to realize a
725	// multiplication expression is NP-complete). Whatever the reason, smart or
726	// stupid, create a new node if there are none left.
727	BinaryOperator *NewOp;
728	if (NodesToRewrite.empty()) {
729	Constant *Poison = PoisonValue::get(T: I->getType());
730	NewOp = BinaryOperator::Create(Op: Instruction::BinaryOps(Opcode), S1: Poison,
731	S2: Poison, Name: "", InsertBefore: I->getIterator());
732	if (isa<FPMathOperator>(Val: NewOp))
733	NewOp->setFastMathFlags(I->getFastMathFlags());
734	} else {
735	NewOp = NodesToRewrite.pop_back_val();
736	}
737
738	LLVM_DEBUG(dbgs() << "RA: " << *Op << `'\n'`);
739	salvageDebugInfo(I&: *Op);
740	Op->setOperand(i_nocapture: `0`, Val_nocapture: NewOp);
741	LLVM_DEBUG(dbgs() << "TO: " << *Op << `'\n'`);
742	ExpressionChangedStart = Op;
743	if (!ExpressionChangedEnd)
744	ExpressionChangedEnd = Op;
745	MadeChange = true;
746	++NumChanged;
747	Op = NewOp;
748	}
749
750	// If the expression changed non-trivially then clear out all subclass data
751	// starting from the operator specified in ExpressionChanged, and compactify
752	// the operators to just before the expression root to guarantee that the
753	// expression tree is dominated by all of Ops.
754	if (ExpressionChangedStart) {
755	bool ClearFlags = true;
756	do {
757	// Preserve flags.
758	if (ClearFlags) {
759	if (isa<FPMathOperator>(Val: I)) {
760	ExpressionChangedStart->copyFastMathFlags(FMF: I->getFastMathFlags());
761	} else {
762	Flags.applyFlags(I&: *ExpressionChangedStart);
763	}
764	}
765
766	if (ExpressionChangedStart == ExpressionChangedEnd)
767	ClearFlags = false;
768	if (ExpressionChangedStart == I)
769	break;
770
771	ExpressionChangedStart->moveBefore(InsertPos: I->getIterator());
772	ExpressionChangedStart =
773	cast<BinaryOperator>(Val: *ExpressionChangedStart->user_begin());
774	} while (true);
775	}
776
777	// Throw away any left over nodes from the original expression.
778	RedoInsts.insert_range(R&: NodesToRewrite);
779	}
780
781	/// Insert instructions before the instruction pointed to by BI,
782	/// that computes the negative version of the value specified. The negative
783	/// version of the value is returned, and BI is left pointing at the instruction
784	/// that should be processed next by the reassociation pass.
785	/// Also add intermediate instructions to the redo list that are modified while
786	/// pushing the negates through adds. These will be revisited to see if
787	/// additional opportunities have been exposed.
788	static Value NegateValue(Value V, Instruction *BI,
789	ReassociatePass::OrderedSet &ToRedo) {
790	if (auto *C = dyn_cast<Constant>(Val: V)) {
791	const DataLayout &DL = BI->getDataLayout();
792	Constant *Res = C->getType()->isFPOrFPVectorTy()
793	? ConstantFoldUnaryOpOperand(Opcode: Instruction::FNeg, Op: C, DL)
794	: ConstantExpr::getNeg(C);
795	if (Res)
796	return Res;
797	}
798
799	// We are trying to expose opportunity for reassociation. One of the things
800	// that we want to do to achieve this is to push a negation as deep into an
801	// expression chain as possible, to expose the add instructions. In practice,
802	// this means that we turn this:
803	// X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D
804	// so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
805	// the constants. We assume that instcombine will clean up the mess later if
806	// we introduce tons of unnecessary negation instructions.
807	//
808	if (BinaryOperator *I =
809	isReassociableOp(V, Opcode1: Instruction::Add, Opcode2: Instruction::FAdd)) {
810	// Push the negates through the add.
811	I->setOperand(i_nocapture: `0`, Val_nocapture: NegateValue(V: I->getOperand(i_nocapture: `0`), BI, ToRedo));
812	I->setOperand(i_nocapture: `1`, Val_nocapture: NegateValue(V: I->getOperand(i_nocapture: `1`), BI, ToRedo));
813	if (I->getOpcode() == Instruction::Add) {
814	I->setHasNoUnsignedWrap(false);
815	I->setHasNoSignedWrap(false);
816	}
817
818	// We must move the add instruction here, because the neg instructions do
819	// not dominate the old add instruction in general. By moving it, we are
820	// assured that the neg instructions we just inserted dominate the
821	// instruction we are about to insert after them.
822	//
823	I->moveBefore(InsertPos: BI->getIterator());
824	I->setName(I->getName()+".neg");
825
826	// Add the intermediate negates to the redo list as processing them later
827	// could expose more reassociating opportunities.
828	ToRedo.insert(X: I);
829	return I;
830	}
831
832	// Okay, we need to materialize a negated version of V with an instruction.
833	// Scan the use lists of V to see if we have one already.
834	for (User *U : V->users()) {
835	if (!match(V: U, P: m_Neg(V: m_Value())) && !match(V: U, P: m_FNeg(X: m_Value())))
836	continue;
837
838	// We found one! Now we have to make sure that the definition dominates
839	// this use. We do this by moving it to the entry block (if it is a
840	// non-instruction value) or right after the definition. These negates will
841	// be zapped by reassociate later, so we don't need much finesse here.
842	Instruction *TheNeg = dyn_cast<Instruction>(Val: U);
843
844	// We can't safely propagate a vector zero constant with poison/undef lanes.
845	Constant *C;
846	if (match(V: TheNeg, P: m_BinOp(L: m_Constant(C), R: m_Value())) &&
847	C->containsUndefOrPoisonElement())
848	continue;
849
850	// Verify that the negate is in this function, V might be a constant expr.
851	if (!TheNeg \|\|
852	TheNeg->getParent()->getParent() != BI->getParent()->getParent())
853	continue;
854
855	BasicBlock::iterator InsertPt;
856	if (Instruction *InstInput = dyn_cast<Instruction>(Val: V)) {
857	auto InsertPtOpt = InstInput->getInsertionPointAfterDef();
858	if (!InsertPtOpt)
859	continue;
860	InsertPt = *InsertPtOpt;
861	} else {
862	InsertPt = TheNeg->getFunction()
863	->getEntryBlock()
864	.getFirstNonPHIOrDbg()
865	->getIterator();
866	}
867
868	// Check that if TheNeg is moved out of its parent block, we drop its
869	// debug location to avoid extra coverage.
870	// See test dropping_debugloc_the_neg.ll for a detailed example.
871	if (TheNeg->getParent() != InsertPt ->getParent())
872	TheNeg->dropLocation();
873	TheNeg->moveBefore(BB&: *InsertPt ->getParent(), I: InsertPt);
874
875	if (TheNeg->getOpcode() == Instruction::Sub) {
876	TheNeg->setHasNoUnsignedWrap(false);
877	TheNeg->setHasNoSignedWrap(false);
878	} else {
879	TheNeg->andIRFlags(V: BI);
880	}
881	ToRedo.insert(X: TheNeg);
882	return TheNeg;
883	}
884
885	// Insert a 'neg' instruction that subtracts the value from zero to get the
886	// negation.
887	Instruction *NewNeg =
888	CreateNeg(S1: V, Name: V->getName() + ".neg", InsertBefore: BI->getIterator(), FlagsOp: BI);
889	// NewNeg is generated to potentially replace BI, so use its DebugLoc.
890	NewNeg->setDebugLoc(BI->getDebugLoc());
891	ToRedo.insert(X: NewNeg);
892	return NewNeg;
893	}
894
895	// See if this `or` looks like an load widening reduction, i.e. that it
896	// consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't
897	// ensure that the pattern is really* a load widening reduction,*
898	// we do not ensure that it can really be replaced with a widened load,
899	// only that it mostly looks like one.
900	static bool isLoadCombineCandidate(Instruction *Or) {
901	SmallVector<Instruction *, `8`> Worklist;
902	SmallPtrSet<Instruction *, `8`> Visited;
903
904	auto Enqueue = [&](Value *V) {
905	auto *I = dyn_cast<Instruction>(Val: V);
906	// Each node of an `or` reduction must be an instruction,
907	if (!I)
908	return false; // Node is certainly not part of an `or` load reduction.
909	// Only process instructions we have never processed before.
910	if (Visited.insert(Ptr: I).second)
911	Worklist.emplace_back(Args&: I);
912	return true; // Will need to look at parent nodes.
913	};
914
915	if (!Enqueue (Or))
916	return false; // Not an `or` reduction pattern.
917
918	while (!Worklist.empty()) {
919	auto *I = Worklist.pop_back_val();
920
921	// Okay, which instruction is this node?
922	switch (I->getOpcode()) {
923	case Instruction::Or:
924	// Got an `or` node. That's fine, just recurse into it's operands.
925	for (Value *Op : I->operands())
926	if (!Enqueue (Op))
927	return false; // Not an `or` reduction pattern.
928	continue;
929
930	case Instruction::Shl:
931	case Instruction::ZExt:
932	// `shl`/`zext` nodes are fine, just recurse into their base operand.
933	if (!Enqueue (I->getOperand(i: `0`)))
934	return false; // Not an `or` reduction pattern.
935	continue;
936
937	case Instruction::Load:
938	// Perfect, `load` node means we've reached an edge of the graph.
939	continue;
940
941	default: // Unknown node.
942	return false; // Not an `or` reduction pattern.
943	}
944	}
945
946	return true;
947	}
948
949	/// Return true if it may be profitable to convert this (X\|Y) into (X+Y).
950	static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) {
951	// Don't bother to convert this up unless either the LHS is an associable add
952	// or subtract or mul or if this is only used by one of the above.
953	// This is only a compile-time improvement, it is not needed for correctness!
954	auto isInteresting = [](Value *V) {
955	for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul,
956	Instruction::Shl})
957	if (isReassociableOp(V, Opcode: Op))
958	return true;
959	return false;
960	};
961
962	if (any_of(Range: Or->operands(), P: isInteresting))
963	return true;
964
965	Value *VB = Or->user_back();
966	if (Or->hasOneUse() && isInteresting (VB))
967	return true;
968
969	return false;
970	}
971
972	/// If we have (X\|Y), and iff X and Y have no common bits set,
973	/// transform this into (X+Y) to allow arithmetics reassociation.
974	static BinaryOperator convertOrWithNoCommonBitsToAdd(Instruction Or) {
975	// Convert an or into an add.
976	BinaryOperator *New = CreateAdd(S1: Or->getOperand(i: `0`), S2: Or->getOperand(i: `1`), Name: "",
977	InsertBefore: Or->getIterator(), FlagsOp: Or);
978	New->setHasNoSignedWrap();
979	New->setHasNoUnsignedWrap();
980	New->takeName(V: Or);
981
982	// Everyone now refers to the add instruction.
983	Or->replaceAllUsesWith(V: New);
984	New->setDebugLoc(Or->getDebugLoc());
985
986	LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << `'\n'`);
987	return New;
988	}
989
990	/// Return true if Mul is of the form (X+Y)C or (X-Y)C where C is a
991	/// constant, and there exists a sibling instruction of the form XC' or YC'
992	/// in the same expression — indicating that distribution followed by
993	/// factoring will reduce the instruction count.
994	static bool ShouldBreakUpDistribution(Instruction *Mul) {
995	Value A, B;
996	if (!match(V: Mul, P: m_OneUse(SubPattern: m_Mul(
997	L: m_OneUse(SubPattern: m_CombineOr(Ps: m_Add(L: m_Value(V&: A), R: m_Value(V&: B)),
998	Ps: m_Sub(L: m_Value(V&: A), R: m_Value(V&: B)))),
999	R: m_ImmConstant()))))
1000	return false;
1001
1002	auto *MulUser = cast<Instruction>(Val: Mul->user_back());
1003	// The parent MUST be an Add or Sub to ensure the tree is flattened
1004	if (MulUser->getOpcode() != Instruction::Add &&
1005	MulUser->getOpcode() != Instruction::Sub)
1006	return false;
1007
1008	for (Value *Sibling : MulUser->operands()) {
1009	if (Sibling == Mul \|\| !Sibling->hasOneUse())
1010	continue;
1011
1012	// Sibling must be NonConst C'.*
1013	Value *SibNC;
1014	if (match(V: Sibling, P: m_Mul(L: m_Value(V&: SibNC), R: m_ImmConstant())) &&
1015	(SibNC == A \|\| SibNC == B) && !isa<Constant>(Val: SibNC))
1016	return true;
1017	}
1018	return false;
1019	}
1020
1021	/// Distribute Mul of the form (X+Y)C into XC + YC.*
1022	/// For the sub case (X-Y)C, the second term uses -C to avoid*
1023	/// introducing a negation instruction.
1024	static BinaryOperator BreakUpDistribute(Instruction Mul,
1025	ReassociatePass::OrderedSet &ToRedo) {
1026	Instruction *AddSub = cast<Instruction>(Val: Mul->getOperand(i: `0`));
1027	Constant *C = cast<Constant>(Val: Mul->getOperand(i: `1`));
1028	Constant *C2 =
1029	AddSub->getOpcode() == Instruction::Sub ? ConstantExpr::getNeg(C) : C;
1030
1031	BinaryOperator *M1 = BinaryOperator::CreateMul(V1: AddSub->getOperand(i: `0`), V2: C,
1032	Name: "Mul1", InsertBefore: Mul->getIterator());
1033	BinaryOperator *M2 = BinaryOperator::CreateMul(V1: AddSub->getOperand(i: `1`), V2: C2,
1034	Name: "Mul2", InsertBefore: Mul->getIterator());
1035	BinaryOperator *Result =
1036	BinaryOperator::CreateAdd(V1: M1, V2: M2, Name: "DistAdd", InsertBefore: Mul->getIterator());
1037
1038	Mul->replaceAllUsesWith(V: Result);
1039	Result->setDebugLoc(Mul->getDebugLoc());
1040
1041	ToRedo.insert(X: M1);
1042	ToRedo.insert(X: M2);
1043	ToRedo.insert(X: Result);
1044
1045	return Result;
1046	}
1047
1048	/// Return true if we should break up this subtract of X-Y into (X + -Y).
1049	static bool ShouldBreakUpSubtract(Instruction *Sub) {
1050	// If this is a negation, we can't split it up!
1051	if (match(V: Sub, P: m_Neg(V: m_Value())) \|\| match(V: Sub, P: m_FNeg(X: m_Value())))
1052	return false;
1053
1054	// Don't breakup X - undef.
1055	if (isa<UndefValue>(Val: Sub->getOperand(i: `1`)))
1056	return false;
1057
1058	// Don't bother to break this up unless either the LHS is an associable add or
1059	// subtract or if this is only used by one.
1060	Value *V0 = Sub->getOperand(i: `0`);
1061	if (isReassociableOp(V: V0, Opcode1: Instruction::Add, Opcode2: Instruction::FAdd) \|\|
1062	isReassociableOp(V: V0, Opcode1: Instruction::Sub, Opcode2: Instruction::FSub))
1063	return true;
1064	Value *V1 = Sub->getOperand(i: `1`);
1065	if (isReassociableOp(V: V1, Opcode1: Instruction::Add, Opcode2: Instruction::FAdd) \|\|
1066	isReassociableOp(V: V1, Opcode1: Instruction::Sub, Opcode2: Instruction::FSub))
1067	return true;
1068	Value *VB = Sub->user_back();
1069	if (Sub->hasOneUse() &&
1070	(isReassociableOp(V: VB, Opcode1: Instruction::Add, Opcode2: Instruction::FAdd) \|\|
1071	isReassociableOp(V: VB, Opcode1: Instruction::Sub, Opcode2: Instruction::FSub)))
1072	return true;
1073
1074	return false;
1075	}
1076
1077	/// If we have (X-Y), and if either X is an add, or if this is only used by an
1078	/// add, transform this into (X+(0-Y)) to promote better reassociation.
1079	static BinaryOperator BreakUpSubtract(Instruction Sub,
1080	ReassociatePass::OrderedSet &ToRedo) {
1081	// Convert a subtract into an add and a neg instruction. This allows sub
1082	// instructions to be commuted with other add instructions.
1083	//
1084	// Calculate the negative value of Operand 1 of the sub instruction,
1085	// and set it as the RHS of the add instruction we just made.
1086	Value *NegVal = NegateValue(V: Sub->getOperand(i: `1`), BI: Sub, ToRedo);
1087	BinaryOperator *New =
1088	CreateAdd(S1: Sub->getOperand(i: `0`), S2: NegVal, Name: "", InsertBefore: Sub->getIterator(), FlagsOp: Sub);
1089	Sub->setOperand(i: `0`, Val: Constant::getNullValue(Ty: Sub->getType())); // Drop use of op.
1090	Sub->setOperand(i: `1`, Val: Constant::getNullValue(Ty: Sub->getType())); // Drop use of op.
1091	New->takeName(V: Sub);
1092
1093	// Everyone now refers to the add instruction.
1094	Sub->replaceAllUsesWith(V: New);
1095	New->setDebugLoc(Sub->getDebugLoc());
1096
1097	LLVM_DEBUG(dbgs() << "Negated: " << *New << `'\n'`);
1098	return New;
1099	}
1100
1101	/// If this is a shift of a reassociable multiply or is used by one, change
1102	/// this into a multiply by a constant to assist with further reassociation.
1103	static BinaryOperator ConvertShiftToMul(Instruction Shl) {
1104	Constant *MulCst = ConstantInt::get(Ty: Shl->getType(), V: `1`);
1105	auto *SA = cast<ConstantInt>(Val: Shl->getOperand(i: `1`));
1106	MulCst = ConstantFoldBinaryInstruction(Opcode: Instruction::Shl, V1: MulCst, V2: SA);
1107	assert(MulCst && "Constant folding of immediate constants failed");
1108
1109	BinaryOperator *Mul = BinaryOperator::CreateMul(V1: Shl->getOperand(i: `0`), V2: MulCst,
1110	Name: "", InsertBefore: Shl->getIterator());
1111	Shl->setOperand(i: `0`, Val: PoisonValue::get(T: Shl->getType())); // Drop use of op.
1112	Mul->takeName(V: Shl);
1113
1114	// Everyone now refers to the mul instruction.
1115	Shl->replaceAllUsesWith(V: Mul);
1116	Mul->setDebugLoc(Shl->getDebugLoc());
1117
1118	// We can safely preserve the nuw flag in all cases. It's also safe to turn a
1119	// nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special
1120	// handling. It can be preserved as long as we're not left shifting by
1121	// bitwidth - 1.
1122	bool NSW = cast<BinaryOperator>(Val: Shl)->hasNoSignedWrap();
1123	bool NUW = cast<BinaryOperator>(Val: Shl)->hasNoUnsignedWrap();
1124	unsigned BitWidth = Shl->getType()->getScalarSizeInBits();
1125	if (NSW && (NUW \|\| SA->getValue().ult(RHS: BitWidth - `1`)))
1126	Mul->setHasNoSignedWrap(true);
1127	Mul->setHasNoUnsignedWrap(NUW);
1128	return Mul;
1129	}
1130
1131	/// Scan backwards and forwards among values with the same rank as element i
1132	/// to see if X exists. If X does not exist, return i. This is useful when
1133	/// scanning for 'x' when we see '-x' because they both get the same rank.
1134	static unsigned FindInOperandList(const SmallVectorImpl<ValueEntry> &Ops,
1135	unsigned i, Value *X) {
1136	unsigned XRank = Ops [i].Rank;
1137	unsigned e = Ops.size();
1138	for (unsigned j = i+`1`; j != e && Ops [j].Rank == XRank; ++j) {
1139	if (Ops [j].Op == X)
1140	return j;
1141	if (Instruction *I1 = dyn_cast<Instruction>(Val: Ops [j].Op))
1142	if (Instruction *I2 = dyn_cast<Instruction>(Val: X))
1143	if (I1->isIdenticalTo(I: I2))
1144	return j;
1145	}
1146	// Scan backwards.
1147	for (unsigned j = i-`1`; j != ~`0U` && Ops [j].Rank == XRank; --j) {
1148	if (Ops [j].Op == X)
1149	return j;
1150	if (Instruction *I1 = dyn_cast<Instruction>(Val: Ops [j].Op))
1151	if (Instruction *I2 = dyn_cast<Instruction>(Val: X))
1152	if (I1->isIdenticalTo(I: I2))
1153	return j;
1154	}
1155	return i;
1156	}
1157
1158	/// Emit a tree of add instructions, summing Ops together
1159	/// and returning the result. Insert the tree before I.
1160	static Value EmitAddTreeOfValues(Instruction I,
1161	SmallVectorImpl<WeakTrackingVH> &Ops) {
1162	if (Ops.size() == `1`) return Ops.back();
1163
1164	Value *V1 = Ops.pop_back_val();
1165	Value *V2 = EmitAddTreeOfValues(I, Ops);
1166	auto *NewAdd = CreateAdd(S1: V2, S2: V1, Name: "reass.add", InsertBefore: I->getIterator(), FlagsOp: I);
1167	NewAdd->setDebugLoc(I->getDebugLoc());
1168	return NewAdd;
1169	}
1170
1171	/// If V is an expression tree that is a multiplication sequence,
1172	/// and if this sequence contains a multiply by Factor,
1173	/// remove Factor from the tree and return the new tree.
1174	/// If new instructions are inserted to generate this tree, DL should be used
1175	/// as the DebugLoc for these instructions.
1176	Value ReassociatePass::RemoveFactorFromExpression(Value V, Value *Factor,
1177	DebugLoc DL) {
1178	BinaryOperator *BO = isReassociableOp(V, Opcode1: Instruction::Mul, Opcode2: Instruction::FMul);
1179	if (!BO)
1180	return nullptr;
1181
1182	SmallVector<RepeatedValue, `8`> Tree;
1183	OverflowTracking Flags;
1184	MadeChange \|= LinearizeExprTree(I: BO, Ops&: Tree, ToRedo&: RedoInsts, Flags);
1185	SmallVector<ValueEntry, `8`> Factors;
1186	Factors.reserve(N: Tree.size());
1187	for (const RepeatedValue &E : Tree)
1188	Factors.append(NumInputs: E.second, Elt: ValueEntry (getRank(V: E.first), E.first));
1189
1190	bool FoundFactor = false;
1191	bool NeedsNegate = false;
1192	for (unsigned i = `0`, e = Factors.size(); i != e; ++i) {
1193	if (Factors [i].Op == Factor) {
1194	FoundFactor = true;
1195	Factors.erase(CI: Factors.begin()+i);
1196	break;
1197	}
1198
1199	// If this is a negative version of this factor, remove it.
1200	if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Val: Factor)) {
1201	if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Val: Factors [i].Op))
1202	if (FC1->getValue() == -FC2->getValue()) {
1203	FoundFactor = NeedsNegate = true;
1204	Factors.erase(CI: Factors.begin()+i);
1205	break;
1206	}
1207	} else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Val: Factor)) {
1208	if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Val: Factors [i].Op)) {
1209	const APFloat &F1 = FC1->getValueAPF();
1210	APFloat F2(FC2->getValueAPF());
1211	F2.changeSign();
1212	if (F1 == F2) {
1213	FoundFactor = NeedsNegate = true;
1214	Factors.erase(CI: Factors.begin() + i);
1215	break;
1216	}
1217	}
1218	}
1219	}
1220
1221	if (!FoundFactor) {
1222	// Make sure to restore the operands to the expression tree.
1223	RewriteExprTree(I: BO, Ops&: Factors, Flags);
1224	return nullptr;
1225	}
1226
1227	BasicBlock::iterator InsertPt = ++BO->getIterator();
1228
1229	// If this was just a single multiply, remove the multiply and return the only
1230	// remaining operand.
1231	if (Factors.size() == `1`) {
1232	RedoInsts.insert(X: BO);
1233	V = Factors [`0`].Op;
1234	} else {
1235	RewriteExprTree(I: BO, Ops&: Factors, Flags);
1236	V = BO;
1237	}
1238
1239	if (NeedsNegate) {
1240	V = CreateNeg(S1: V, Name: "neg", InsertBefore: InsertPt, FlagsOp: BO);
1241	cast<Instruction>(Val: V)->setDebugLoc(DL);
1242	}
1243
1244	return V;
1245	}
1246
1247	/// If V is a single-use multiply, recursively add its operands as factors,
1248	/// otherwise add V to the list of factors.
1249	///
1250	/// Ops is the top-level list of add operands we're trying to factor.
1251	static void FindSingleUseMultiplyFactors(Value *V,
1252	SmallVectorImpl<Value*> &Factors) {
1253	BinaryOperator *BO = isReassociableOp(V, Opcode1: Instruction::Mul, Opcode2: Instruction::FMul);
1254	if (!BO) {
1255	Factors.push_back(Elt: V);
1256	return;
1257	}
1258
1259	// Otherwise, add the LHS and RHS to the list of factors.
1260	FindSingleUseMultiplyFactors(V: BO->getOperand(i_nocapture: `1`), Factors);
1261	FindSingleUseMultiplyFactors(V: BO->getOperand(i_nocapture: `0`), Factors);
1262	}
1263
1264	/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
1265	/// This optimizes based on identities. If it can be reduced to a single Value,
1266	/// it is returned, otherwise the Ops list is mutated as necessary.
1267	static Value OptimizeAndOrXor(unsigned* Opcode,
1268	SmallVectorImpl<ValueEntry> &Ops) {
1269	// Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
1270	// If we find any, we can simplify the expression. X&~X == 0, X\|~X == -1.
1271	for (unsigned i = `0`, e = Ops.size(); i != e; ++i) {
1272	// First, check for X and ~X in the operand list.
1273	assert(i < Ops.size());
1274	Value *X;
1275	if (match(V: Ops [i].Op, P: m_Not(V: m_Value(V&: X)))) { // Cannot occur for ^.
1276	unsigned FoundX = FindInOperandList(Ops, i, X);
1277	if (FoundX != i) {
1278	if (Opcode == Instruction::And) // ...&X&~X = 0
1279	return Constant::getNullValue(Ty: X->getType());
1280
1281	if (Opcode == Instruction::Or) // ...\|X\|~X = -1
1282	return Constant::getAllOnesValue(Ty: X->getType());
1283	}
1284	}
1285
1286	// Next, check for duplicate pairs of values, which we assume are next to
1287	// each other, due to our sorting criteria.
1288	assert(i < Ops.size());
1289	if (i+`1` != Ops.size() && Ops [i+`1`].Op == Ops [i].Op) {
1290	if (Opcode == Instruction::And \|\| Opcode == Instruction::Or) {
1291	// Drop duplicate values for And and Or.
1292	Ops.erase(CI: Ops.begin()+i);
1293	--i; --e;
1294	++NumAnnihil;
1295	continue;
1296	}
1297
1298	// Drop pairs of values for Xor.
1299	assert(Opcode == Instruction::Xor);
1300	if (e == `2`)
1301	return Constant::getNullValue(Ty: Ops [`0`].Op->getType());
1302
1303	// Y ^ X^X -> Y
1304	Ops.erase(CS: Ops.begin()+i, CE: Ops.begin()+i+`2`);
1305	i -= `1`; e -= `2`;
1306	++NumAnnihil;
1307	}
1308	}
1309	return nullptr;
1310	}
1311
1312	/// Helper function of CombineXorOpnd(). It creates a bitwise-and
1313	/// instruction with the given two operands, and return the resulting
1314	/// instruction. There are two special cases: 1) if the constant operand is 0,
1315	/// it will return NULL. 2) if the constant is ~0, the symbolic operand will
1316	/// be returned.
1317	static Value createAndInstr(BasicBlock::iterator InsertBefore, Value Opnd,
1318	const APInt &ConstOpnd) {
1319	if (ConstOpnd.isZero())
1320	return nullptr;
1321
1322	if (ConstOpnd.isAllOnes())
1323	return Opnd;
1324
1325	Instruction *I = BinaryOperator::CreateAnd(
1326	V1: Opnd, V2: ConstantInt::get(Ty: Opnd->getType(), V: ConstOpnd), Name: "and.ra",
1327	InsertBefore);
1328	I->setDebugLoc(InsertBefore ->getDebugLoc());
1329	return I;
1330	}
1331
1332	// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
1333	// into "R ^ C", where C would be 0, and R is a symbolic value.
1334	//
1335	// If it was successful, true is returned, and the "R" and "C" is returned
1336	// via "Res" and "ConstOpnd", respectively; otherwise, false is returned,
1337	// and both "Res" and "ConstOpnd" remain unchanged.
1338	bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1339	APInt &ConstOpnd, Value *&Res) {
1340	// Xor-Rule 1: (x \| c1) ^ c2 = (x \| c1) ^ (c1 ^ c1) ^ c2
1341	// = ((x \| c1) ^ c1) ^ (c1 ^ c2)
1342	// = (x & ~c1) ^ (c1 ^ c2)
1343	// It is useful only when c1 == c2.
1344	if (!Opnd1->isOrExpr() \|\| Opnd1->getConstPart().isZero())
1345	return false;
1346
1347	if (!Opnd1->getValue()->hasOneUse())
1348	return false;
1349
1350	const APInt &C1 = Opnd1->getConstPart();
1351	if (C1 != ConstOpnd)
1352	return false;
1353
1354	Value *X = Opnd1->getSymbolicPart();
1355	Res = createAndInstr(InsertBefore: It, Opnd: X, ConstOpnd: ~C1);
1356	// ConstOpnd was C2, now C1 ^ C2.
1357	ConstOpnd ^= C1;
1358
1359	if (Instruction *T = dyn_cast<Instruction>(Val: Opnd1->getValue()))
1360	RedoInsts.insert(X: T);
1361	return true;
1362	}
1363
1364	// Helper function of OptimizeXor(). It tries to simplify
1365	// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a
1366	// symbolic value.
1367	//
1368	// If it was successful, true is returned, and the "R" and "C" is returned
1369	// via "Res" and "ConstOpnd", respectively (If the entire expression is
1370	// evaluated to a constant, the Res is set to NULL); otherwise, false is
1371	// returned, and both "Res" and "ConstOpnd" remain unchanged.
1372	bool ReassociatePass::CombineXorOpnd(BasicBlock::iterator It, XorOpnd *Opnd1,
1373	XorOpnd *Opnd2, APInt &ConstOpnd,
1374	Value *&Res) {
1375	Value *X = Opnd1->getSymbolicPart();
1376	if (X != Opnd2->getSymbolicPart())
1377	return false;
1378
1379	// This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
1380	int DeadInstNum = `1`;
1381	if (Opnd1->getValue()->hasOneUse())
1382	DeadInstNum++;
1383	if (Opnd2->getValue()->hasOneUse())
1384	DeadInstNum++;
1385
1386	// Xor-Rule 2:
1387	// (x \| c1) ^ (x & c2)
1388	// = (x\|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x\|c1) ^ c1) ^ (x & c2) ^ c1
1389	// = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1
1390	// = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3
1391	//
1392	if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) {
1393	if (Opnd2->isOrExpr())
1394	std::swap(a&: Opnd1, b&: Opnd2);
1395
1396	const APInt &C1 = Opnd1->getConstPart();
1397	const APInt &C2 = Opnd2->getConstPart();
1398	APInt C3((~C1) ^ C2);
1399
1400	// Do not increase code size!
1401	if (!C3.isZero() && !C3.isAllOnes()) {
1402	int NewInstNum = ConstOpnd.getBoolValue() ? `1` : `2`;
1403	if (NewInstNum > DeadInstNum)
1404	return false;
1405	}
1406
1407	Res = createAndInstr(InsertBefore: It, Opnd: X, ConstOpnd: C3);
1408	ConstOpnd ^= C1;
1409	} else if (Opnd1->isOrExpr()) {
1410	// Xor-Rule 3: (x \| c1) ^ (x \| c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
1411	//
1412	const APInt &C1 = Opnd1->getConstPart();
1413	const APInt &C2 = Opnd2->getConstPart();
1414	APInt C3 = C1 ^ C2;
1415
1416	// Do not increase code size
1417	if (!C3.isZero() && !C3.isAllOnes()) {
1418	int NewInstNum = ConstOpnd.getBoolValue() ? `1` : `2`;
1419	if (NewInstNum > DeadInstNum)
1420	return false;
1421	}
1422
1423	Res = createAndInstr(InsertBefore: It, Opnd: X, ConstOpnd: C3);
1424	ConstOpnd ^= C3;
1425	} else {
1426	// Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
1427	//
1428	const APInt &C1 = Opnd1->getConstPart();
1429	const APInt &C2 = Opnd2->getConstPart();
1430	APInt C3 = C1 ^ C2;
1431	Res = createAndInstr(InsertBefore: It, Opnd: X, ConstOpnd: C3);
1432	}
1433
1434	// Put the original operands in the Redo list; hope they will be deleted
1435	// as dead code.
1436	if (Instruction *T = dyn_cast<Instruction>(Val: Opnd1->getValue()))
1437	RedoInsts.insert(X: T);
1438	if (Instruction *T = dyn_cast<Instruction>(Val: Opnd2->getValue()))
1439	RedoInsts.insert(X: T);
1440
1441	return true;
1442	}
1443
1444	/// Optimize a series of operands to an 'xor' instruction. If it can be reduced
1445	/// to a single Value, it is returned, otherwise the Ops list is mutated as
1446	/// necessary.
1447	Value ReassociatePass::OptimizeXor(Instruction I,
1448	SmallVectorImpl<ValueEntry> &Ops) {
1449	if (Value *V = OptimizeAndOrXor(Opcode: Instruction::Xor, Ops))
1450	return V;
1451
1452	if (Ops.size() == `1`)
1453	return nullptr;
1454
1455	SmallVector<XorOpnd, `8`> Opnds;
1456	SmallVector<XorOpnd*, `8`> OpndPtrs;
1457	Type *Ty = Ops [`0`].Op->getType();
1458	APInt ConstOpnd(Ty->getScalarSizeInBits(), `0`);
1459
1460	// Step 1: Convert ValueEntry to XorOpnd
1461	for (const ValueEntry &Op : Ops) {
1462	Value *V = Op.Op;
1463	const APInt *C;
1464	// TODO: Support non-splat vectors.
1465	if (match(V, P: m_APInt(Res&: C))) {
1466	ConstOpnd ^= *C;
1467	} else {
1468	XorOpnd O(V);
1469	O.setSymbolicRank(getRank(V: O.getSymbolicPart()));
1470	Opnds.push_back(Elt: O);
1471	}
1472	}
1473
1474	// NOTE: From this point on, do NOT* add/delete element to/from "Opnds".*
1475	// It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
1476	// the "OpndPtrs" as well. For the similar reason, do not fuse this loop
1477	// with the previous loop --- the iterator of the "Opnds" may be invalidated
1478	// when new elements are added to the vector.
1479	for (XorOpnd &Op : Opnds)
1480	OpndPtrs.push_back(Elt: &Op);
1481
1482	// Step 2: Sort the Xor-Operands in a way such that the operands containing
1483	// the same symbolic value cluster together. For instance, the input operand
1484	// sequence ("x \| 123", "y & 456", "x & 789") will be sorted into:
1485	// ("x \| 123", "x & 789", "y & 456").
1486	//
1487	// The purpose is twofold:
1488	// 1) Cluster together the operands sharing the same symbolic-value.
1489	// 2) Operand having smaller symbolic-value-rank is permuted earlier, which
1490	// could potentially shorten crital path, and expose more loop-invariants.
1491	// Note that values' rank are basically defined in RPO order (FIXME).
1492	// So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier
1493	// than Y which is defined earlier than Z. Permute "x \| 1", "Y & 2",
1494	// "z" in the order of X-Y-Z is better than any other orders.
1495	llvm::stable_sort(Range&: OpndPtrs, C: [](XorOpnd LHS, XorOpnd RHS) {
1496	return LHS->getSymbolicRank() < RHS->getSymbolicRank();
1497	});
1498
1499	// Step 3: Combine adjacent operands
1500	XorOpnd PrevOpnd = nullptr*;
1501	bool Changed = false;
1502	for (unsigned i = `0`, e = Opnds.size(); i < e; i++) {
1503	XorOpnd *CurrOpnd = OpndPtrs [i];
1504	// The combined value
1505	Value *CV;
1506
1507	// Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
1508	if (!ConstOpnd.isZero() &&
1509	CombineXorOpnd(It: I->getIterator(), Opnd1: CurrOpnd, ConstOpnd, Res&: CV)) {
1510	Changed = true;
1511	if (CV)
1512	*CurrOpnd = XorOpnd (CV);
1513	else {
1514	CurrOpnd->Invalidate();
1515	continue;
1516	}
1517	}
1518
1519	if (!PrevOpnd \|\| CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) {
1520	PrevOpnd = CurrOpnd;
1521	continue;
1522	}
1523
1524	// step 3.2: When previous and current operands share the same symbolic
1525	// value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd"
1526	if (CombineXorOpnd(It: I->getIterator(), Opnd1: CurrOpnd, Opnd2: PrevOpnd, ConstOpnd, Res&: CV)) {
1527	// Remove previous operand
1528	PrevOpnd->Invalidate();
1529	if (CV) {
1530	*CurrOpnd = XorOpnd (CV);
1531	PrevOpnd = CurrOpnd;
1532	} else {
1533	CurrOpnd->Invalidate();
1534	PrevOpnd = nullptr;
1535	}
1536	Changed = true;
1537	}
1538	}
1539
1540	// Step 4: Reassemble the Ops
1541	if (Changed) {
1542	Ops.clear();
1543	for (const XorOpnd &O : Opnds) {
1544	if (O.isInvalid())
1545	continue;
1546	ValueEntry VE(getRank(V: O.getValue()), O.getValue());
1547	Ops.push_back(Elt: VE);
1548	}
1549	if (!ConstOpnd.isZero()) {
1550	Value *C = ConstantInt::get(Ty, V: ConstOpnd);
1551	ValueEntry VE(getRank(V: C), C);
1552	Ops.push_back(Elt: VE);
1553	}
1554	unsigned Sz = Ops.size();
1555	if (Sz == `1`)
1556	return Ops.back().Op;
1557	if (Sz == `0`) {
1558	assert(ConstOpnd.isZero());
1559	return ConstantInt::get(Ty, V: ConstOpnd);
1560	}
1561	}
1562
1563	return nullptr;
1564	}
1565
1566	/// Optimize a series of operands to an 'add' instruction. This
1567	/// optimizes based on identities. If it can be reduced to a single Value, it
1568	/// is returned, otherwise the Ops list is mutated as necessary.
1569	Value ReassociatePass::OptimizeAdd(Instruction I,
1570	SmallVectorImpl<ValueEntry> &Ops) {
1571	// Scan the operand lists looking for X and -X pairs. If we find any, we
1572	// can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
1573	// scan for any
1574	// duplicates. We want to canonicalize Y+Y+Y+Z -> 3Y+Z.*
1575
1576	for (unsigned i = `0`, e = Ops.size(); i != e; ++i) {
1577	Value *TheOp = Ops [i].Op;
1578	// Check to see if we've seen this operand before. If so, we factor all
1579	// instances of the operand together. Due to our sorting criteria, we know
1580	// that these need to be next to each other in the vector.
1581	if (i+`1` != Ops.size() && Ops [i+`1`].Op == TheOp) {
1582	// Rescan the list, remove all instances of this operand from the expr.
1583	unsigned NumFound = `0`;
1584	do {
1585	Ops.erase(CI: Ops.begin()+i);
1586	++NumFound;
1587	} while (i != Ops.size() && Ops [i].Op == TheOp);
1588
1589	LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp
1590	<< `'\n'`);
1591	++NumFactor;
1592
1593	// Insert a new multiply.
1594	Type *Ty = TheOp->getType();
1595	// Truncate if NumFound overflows the type.
1596	Constant *C = Ty->isIntOrIntVectorTy()
1597	? ConstantInt::get(Ty, V: NumFound, /IsSigned=/false,
1598	/ImplicitTrunc=/true)
1599	: ConstantFP::get(Ty, V: NumFound);
1600	Instruction *Mul = CreateMul(S1: TheOp, S2: C, Name: "factor", InsertBefore: I->getIterator(), FlagsOp: I);
1601	Mul->setDebugLoc(I->getDebugLoc());
1602
1603	// Now that we have inserted a multiply, optimize it. This allows us to
1604	// handle cases that require multiple factoring steps, such as this:
1605	// (X2) + (X2) + (X2) -> (X2)3 -> X6
1606	RedoInsts.insert(X: Mul);
1607
1608	// If every add operand was a duplicate, return the multiply.
1609	if (Ops.empty())
1610	return Mul;
1611
1612	// Otherwise, we had some input that didn't have the dupe, such as
1613	// "A + A + B" -> "A2 + B". Add the new multiply to the list of*
1614	// things being added by this operation.
1615	Ops.insert(I: Ops.begin(), Elt: ValueEntry (getRank(V: Mul), Mul));
1616
1617	--i;
1618	e = Ops.size();
1619	continue;
1620	}
1621
1622	// Check for X and -X or X and ~X in the operand list.
1623	Value *X;
1624	if (!match(V: TheOp, P: m_Neg(V: m_Value(V&: X))) && !match(V: TheOp, P: m_Not(V: m_Value(V&: X))) &&
1625	!match(V: TheOp, P: m_FNeg(X: m_Value(V&: X))))
1626	continue;
1627
1628	unsigned FoundX = FindInOperandList(Ops, i, X);
1629	if (FoundX == i)
1630	continue;
1631
1632	// Remove X and -X from the operand list.
1633	if (Ops.size() == `2` &&
1634	(match(V: TheOp, P: m_Neg(V: m_Value())) \|\| match(V: TheOp, P: m_FNeg(X: m_Value()))))
1635	return Constant::getNullValue(Ty: X->getType());
1636
1637	// Remove X and ~X from the operand list.
1638	if (Ops.size() == `2` && match(V: TheOp, P: m_Not(V: m_Value())))
1639	return Constant::getAllOnesValue(Ty: X->getType());
1640
1641	Ops.erase(CI: Ops.begin()+i);
1642	if (i < FoundX)
1643	--FoundX;
1644	else
1645	--i; // Need to back up an extra one.
1646	Ops.erase(CI: Ops.begin()+FoundX);
1647	++NumAnnihil;
1648	--i; // Revisit element.
1649	e -= `2`; // Removed two elements.
1650
1651	// if X and ~X we append -1 to the operand list.
1652	if (match(V: TheOp, P: m_Not(V: m_Value()))) {
1653	Value *V = Constant::getAllOnesValue(Ty: X->getType());
1654	Ops.insert(I: Ops.end(), Elt: ValueEntry (getRank(V), V));
1655	e += `1`;
1656	}
1657	}
1658
1659	// Scan the operand list, checking to see if there are any common factors
1660	// between operands. Consider something like AA+ABC+D. We would like to*
1661	// reassociate this to A(A+BC)+D, which reduces the number of multiplies.
1662	// To efficiently find this, we count the number of times a factor occurs
1663	// for any ADD operands that are MULs.
1664	DenseMap<Value, unsigned*> FactorOccurrences;
1665
1666	// Keep track of each multiply we see, to avoid triggering on (X4)+(X4)
1667	// where they are actually the same multiply.
1668	unsigned MaxOcc = `0`;
1669	Value MaxOccVal = nullptr*;
1670
1671	// Prefer a non-constant factor over a constant when occurrence counts
1672	// tie. Factoring out a variable (e.g., X from XC1 + XC2) exposes
1673	// downstream constant folding; factoring out a constant does not.
1674	auto IsBetterFactor = [](Value Factor, Value MaxOccVal, unsigned Occ,
1675	unsigned MaxOcc) {
1676	return Occ > MaxOcc \|\|
1677	(Occ == MaxOcc &&
1678	(isa<Instruction>(Val: Factor) \|\| isa<Argument>(Val: Factor)) &&
1679	isa<Constant>(Val: MaxOccVal) && !isa<UndefValue>(Val: MaxOccVal));
1680	};
1681	for (const ValueEntry &Op : Ops) {
1682	BinaryOperator *BOp =
1683	isReassociableOp(V: Op.Op, Opcode1: Instruction::Mul, Opcode2: Instruction::FMul);
1684	if (!BOp)
1685	continue;
1686
1687	// Compute all of the factors of this added value.
1688	SmallVector<Value*, `8`> Factors;
1689	FindSingleUseMultiplyFactors(V: BOp, Factors);
1690	assert(Factors.size() > `1` && "Bad linearize!");
1691
1692	// Add one to FactorOccurrences for each unique factor in this op.
1693	SmallPtrSet<Value*, `8`> Duplicates;
1694	for (Value *Factor : Factors) {
1695	if (!Duplicates.insert(Ptr: Factor).second)
1696	continue;
1697
1698	unsigned Occ = ++FactorOccurrences [Factor];
1699	if (IsBetterFactor (Factor, MaxOccVal, Occ, MaxOcc)) {
1700	MaxOcc = Occ;
1701	MaxOccVal = Factor;
1702	}
1703
1704	// If Factor is a negative constant, add the negated value as a factor
1705	// because we can percolate the negate out. Watch for minint, which
1706	// cannot be positivified.
1707	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Factor)) {
1708	if (CI->isNegative() && !CI->isMinValue(IsSigned: true)) {
1709	Factor = ConstantInt::get(Context&: CI->getContext(), V: -CI->getValue());
1710	if (!Duplicates.insert(Ptr: Factor).second)
1711	continue;
1712	unsigned Occ = ++FactorOccurrences [Factor];
1713	if (IsBetterFactor (Factor, MaxOccVal, Occ, MaxOcc)) {
1714	MaxOcc = Occ;
1715	MaxOccVal = Factor;
1716	}
1717	}
1718	} else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val: Factor)) {
1719	if (CF->isNegative()) {
1720	APFloat F(CF->getValueAPF());
1721	F.changeSign();
1722	Factor = ConstantFP::get(Ty: CF->getType(), V: F);
1723	if (!Duplicates.insert(Ptr: Factor).second)
1724	continue;
1725	unsigned Occ = ++FactorOccurrences [Factor];
1726	if (IsBetterFactor (Factor, MaxOccVal, Occ, MaxOcc)) {
1727	MaxOcc = Occ;
1728	MaxOccVal = Factor;
1729	}
1730	}
1731	}
1732	}
1733	}
1734
1735	// If any factor occurred more than one time, we can pull it out.
1736	if (MaxOcc > `1`) {
1737	LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal
1738	<< `'\n'`);
1739	++NumFactor;
1740
1741	// Create a new instruction that uses the MaxOccVal twice. If we don't do
1742	// this, we could otherwise run into situations where removing a factor
1743	// from an expression will drop a use of maxocc, and this can cause
1744	// RemoveFactorFromExpression on successive values to behave differently.
1745	Instruction *DummyInst =
1746	I->getType()->isIntOrIntVectorTy()
1747	? BinaryOperator::CreateAdd(V1: MaxOccVal, V2: MaxOccVal)
1748	: BinaryOperator::CreateFAdd(V1: MaxOccVal, V2: MaxOccVal);
1749
1750	SmallVector<WeakTrackingVH, `4`> NewMulOps;
1751	for (unsigned i = `0`; i != Ops.size(); ++i) {
1752	// Only try to remove factors from expressions we're allowed to.
1753	BinaryOperator *BOp =
1754	isReassociableOp(V: Ops [i].Op, Opcode1: Instruction::Mul, Opcode2: Instruction::FMul);
1755	if (!BOp)
1756	continue;
1757
1758	if (Value *V = RemoveFactorFromExpression(V: Ops [i].Op, Factor: MaxOccVal,
1759	DL: I->getDebugLoc())) {
1760	// The factorized operand may occur several times. Convert them all in
1761	// one fell swoop.
1762	for (unsigned j = Ops.size(); j != i;) {
1763	--j;
1764	if (Ops [j].Op == Ops [i].Op) {
1765	NewMulOps.push_back(Elt: V);
1766	Ops.erase(CI: Ops.begin()+j);
1767	}
1768	}
1769	--i;
1770	}
1771	}
1772
1773	// No need for extra uses anymore.
1774	DummyInst->deleteValue();
1775
1776	unsigned NumAddedValues = NewMulOps.size();
1777	Value *V = EmitAddTreeOfValues(I, Ops&: NewMulOps);
1778
1779	// Now that we have inserted the add tree, optimize it. This allows us to
1780	// handle cases that require multiple factoring steps, such as this:
1781	// AAB + AAC --> A(AB+AC) --> A(A(B+C))*
1782	assert(NumAddedValues > `1` && "Each occurrence should contribute a value");
1783	(void)NumAddedValues;
1784	if (Instruction *VI = dyn_cast<Instruction>(Val: V))
1785	RedoInsts.insert(X: VI);
1786
1787	// Create the multiply.
1788	Instruction *V2 = CreateMul(S1: V, S2: MaxOccVal, Name: "reass.mul", InsertBefore: I->getIterator(), FlagsOp: I);
1789	V2->setDebugLoc(I->getDebugLoc());
1790
1791	// Rerun associate on the multiply in case the inner expression turned into
1792	// a multiply. We want to make sure that we keep things in canonical form.
1793	RedoInsts.insert(X: V2);
1794
1795	// If every add operand included the factor (e.g. "AB + AC"), then the
1796	// entire result expression is just the multiply "A(B+C)".*
1797	if (Ops.empty())
1798	return V2;
1799
1800	// Otherwise, we had some input that didn't have the factor, such as
1801	// "AB + AC + D" -> "A(B+C) + D". Add the new multiply to the list of*
1802	// things being added by this operation.
1803	Ops.insert(I: Ops.begin(), Elt: ValueEntry (getRank(V: V2), V2));
1804	}
1805
1806	return nullptr;
1807	}
1808
1809	/// Build up a vector of value/power pairs factoring a product.
1810	///
1811	/// Given a series of multiplication operands, build a vector of factors and
1812	/// the powers each is raised to when forming the final product. Sort them in
1813	/// the order of descending power.
1814	///
1815	/// (xx) -> [(x, 2)]*
1816	/// ((xx)x) -> [(x, 3)]
1817	/// ((((xy)x)y)x) -> [(x, 3), (y, 2)]
1818	///
1819	/// \returns Whether any factors have a power greater than one.
1820	static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops,
1821	SmallVectorImpl<Factor> &Factors) {
1822	// FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this.
1823	// Compute the sum of powers of simplifiable factors.
1824	unsigned FactorPowerSum = `0`;
1825	for (unsigned Idx = `1`, Size = Ops.size(); Idx < Size; ++Idx) {
1826	Value *Op = Ops [Idx-`1`].Op;
1827
1828	// Count the number of occurrences of this value.
1829	unsigned Count = `1`;
1830	for (; Idx < Size && Ops [Idx].Op == Op; ++Idx)
1831	++Count;
1832	// Track for simplification all factors which occur 2 or more times.
1833	if (Count > `1`)
1834	FactorPowerSum += Count;
1835	}
1836
1837	// We can only simplify factors if the sum of the powers of our simplifiable
1838	// factors is 4 or higher. When that is the case, we will always* have*
1839	// a simplification. This is an important invariant to prevent cyclicly
1840	// trying to simplify already minimal formations.
1841	if (FactorPowerSum < `4`)
1842	return false;
1843
1844	// Now gather the simplifiable factors, removing them from Ops.
1845	FactorPowerSum = `0`;
1846	for (unsigned Idx = `1`; Idx < Ops.size(); ++Idx) {
1847	Value *Op = Ops [Idx-`1`].Op;
1848
1849	// Count the number of occurrences of this value.
1850	unsigned Count = `1`;
1851	for (; Idx < Ops.size() && Ops [Idx].Op == Op; ++Idx)
1852	++Count;
1853	if (Count == `1`)
1854	continue;
1855	// Move an even number of occurrences to Factors.
1856	Count &= ~`1U`;
1857	Idx -= Count;
1858	FactorPowerSum += Count;
1859	Factors.push_back(Elt: Factor (Op, Count));
1860	Ops.erase(CS: Ops.begin()+Idx, CE: Ops.begin()+Idx+Count);
1861	}
1862
1863	// None of the adjustments above should have reduced the sum of factor powers
1864	// below our mininum of '4'.
1865	assert(FactorPowerSum >= `4`);
1866
1867	llvm::stable_sort(Range&: Factors, C: [](const Factor &LHS, const Factor &RHS) {
1868	return LHS.Power > RHS.Power;
1869	});
1870	return true;
1871	}
1872
1873	/// Build a tree of multiplies, computing the product of Ops.
1874	static Value *buildMultiplyTree(IRBuilderBase &Builder,
1875	SmallVectorImpl<Value*> &Ops) {
1876	if (Ops.size() == `1`)
1877	return Ops.back();
1878
1879	Value *LHS = Ops.pop_back_val();
1880	do {
1881	if (LHS->getType()->isIntOrIntVectorTy())
1882	LHS = Builder.CreateMul(LHS, RHS: Ops.pop_back_val());
1883	else
1884	LHS = Builder.CreateFMul(L: LHS, R: Ops.pop_back_val());
1885	} while (!Ops.empty());
1886
1887	return LHS;
1888	}
1889
1890	/// Build a minimal multiplication DAG for (a^x)(b^y)(c^z)...*
1891	///
1892	/// Given a vector of values raised to various powers, where no two values are
1893	/// equal and the powers are sorted in decreasing order, compute the minimal
1894	/// DAG of multiplies to compute the final product, and return that product
1895	/// value.
1896	Value *
1897	ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder,
1898	SmallVectorImpl<Factor> &Factors) {
1899	assert(Factors[`0`].Power);
1900	SmallVector<Value *, `4`> OuterProduct;
1901	for (unsigned LastIdx = `0`, Idx = `1`, Size = Factors.size();
1902	Idx < Size && Factors [Idx].Power > `0`; ++Idx) {
1903	if (Factors [Idx].Power != Factors [LastIdx].Power) {
1904	LastIdx = Idx;
1905	continue;
1906	}
1907
1908	// We want to multiply across all the factors with the same power so that
1909	// we can raise them to that power as a single entity. Build a mini tree
1910	// for that.
1911	SmallVector<Value *, `4`> InnerProduct;
1912	InnerProduct.push_back(Elt: Factors [LastIdx].Base);
1913	do {
1914	InnerProduct.push_back(Elt: Factors [Idx].Base);
1915	++Idx;
1916	} while (Idx < Size && Factors [Idx].Power == Factors [LastIdx].Power);
1917
1918	// Reset the base value of the first factor to the new expression tree.
1919	// We'll remove all the factors with the same power in a second pass.
1920	Value *M = Factors [LastIdx].Base = buildMultiplyTree(Builder, Ops&: InnerProduct);
1921	if (Instruction *MI = dyn_cast<Instruction>(Val: M))
1922	RedoInsts.insert(X: MI);
1923
1924	LastIdx = Idx;
1925	}
1926	// Unique factors with equal powers -- we've folded them into the first one's
1927	// base.
1928	Factors.erase(CS: llvm::unique(R&: Factors,
1929	P: [](const Factor &LHS, const Factor &RHS) {
1930	return LHS.Power == RHS.Power;
1931	}),
1932	CE: Factors.end());
1933
1934	// Iteratively collect the base of each factor with an add power into the
1935	// outer product, and halve each power in preparation for squaring the
1936	// expression.
1937	for (Factor &F : Factors) {
1938	if (F.Power & `1`)
1939	OuterProduct.push_back(Elt: F.Base);
1940	F.Power >>= `1`;
1941	}
1942	if (Factors [`0`].Power) {
1943	Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors);
1944	OuterProduct.push_back(Elt: SquareRoot);
1945	OuterProduct.push_back(Elt: SquareRoot);
1946	}
1947	if (OuterProduct.size() == `1`)
1948	return OuterProduct.front();
1949
1950	Value *V = buildMultiplyTree(Builder, Ops&: OuterProduct);
1951	return V;
1952	}
1953
1954	Value ReassociatePass::OptimizeMul(BinaryOperator I,
1955	SmallVectorImpl<ValueEntry> &Ops) {
1956	// We can only optimize the multiplies when there is a chain of more than
1957	// three, such that a balanced tree might require fewer total multiplies.
1958	if (Ops.size() < `4`)
1959	return nullptr;
1960
1961	// Try to turn linear trees of multiplies without other uses of the
1962	// intermediate stages into minimal multiply DAGs with perfect sub-expression
1963	// re-use.
1964	SmallVector<Factor, `4`> Factors;
1965	if (!collectMultiplyFactors(Ops, Factors))
1966	return nullptr; // All distinct factors, so nothing left for us to do.
1967
1968	IRBuilder<> Builder(I);
1969	// The reassociate transformation for FP operations is performed only
1970	// if unsafe algebra is permitted by FastMathFlags. Propagate those flags
1971	// to the newly generated operations.
1972	if (auto FPI = dyn_cast<FPMathOperator>(Val: I))
1973	Builder.setFastMathFlags(FPI->getFastMathFlags());
1974
1975	Value *V = buildMinimalMultiplyDAG(Builder, Factors);
1976	if (Ops.empty())
1977	return V;
1978
1979	ValueEntry NewEntry = ValueEntry (getRank(V), V);
1980	Ops.insert(I: llvm::lower_bound(Range&: Ops, Value&: NewEntry), Elt: NewEntry);
1981	return nullptr;
1982	}
1983
1984	Value ReassociatePass::OptimizeExpression(BinaryOperator I,
1985	SmallVectorImpl<ValueEntry> &Ops) {
1986	// Now that we have the linearized expression tree, try to optimize it.
1987	// Start by folding any constants that we found.
1988	const DataLayout &DL = I->getDataLayout();
1989	Constant Cst = nullptr*;
1990	unsigned Opcode = I->getOpcode();
1991	while (!Ops.empty()) {
1992	if (auto *C = dyn_cast<Constant>(Val: Ops.back().Op)) {
1993	if (!Cst) {
1994	Ops.pop_back();
1995	Cst = C;
1996	continue;
1997	}
1998	if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, LHS: C, RHS: Cst, DL)) {
1999	Ops.pop_back();
2000	Cst = Res;
2001	continue;
2002	}
2003	}
2004	break;
2005	}
2006	// If there was nothing but constants then we are done.
2007	if (Ops.empty())
2008	return Cst;
2009
2010	// Put the combined constant back at the end of the operand list, except if
2011	// there is no point. For example, an add of 0 gets dropped here, while a
2012	// multiplication by zero turns the whole expression into zero.
2013	if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, Ty: I->getType())) {
2014	if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, Ty: I->getType()))
2015	return Cst;
2016	Ops.push_back(Elt: ValueEntry (`0`, Cst));
2017	}
2018
2019	if (Ops.size() == `1`) return Ops [`0`].Op;
2020
2021	// Handle destructive annihilation due to identities between elements in the
2022	// argument list here.
2023	unsigned NumOps = Ops.size();
2024	switch (Opcode) {
2025	default: break;
2026	case Instruction::And:
2027	case Instruction::Or:
2028	if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
2029	return Result;
2030	break;
2031
2032	case Instruction::Xor:
2033	if (Value *Result = OptimizeXor(I, Ops))
2034	return Result;
2035	break;
2036
2037	case Instruction::Add:
2038	case Instruction::FAdd:
2039	if (Value *Result = OptimizeAdd(I, Ops))
2040	return Result;
2041	break;
2042
2043	case Instruction::Mul:
2044	case Instruction::FMul:
2045	if (Value *Result = OptimizeMul(I, Ops))
2046	return Result;
2047	break;
2048	}
2049
2050	if (Ops.size() != NumOps)
2051	return OptimizeExpression(I, Ops);
2052	return nullptr;
2053	}
2054
2055	// Remove dead instructions and if any operands are trivially dead add them to
2056	// Insts so they will be removed as well.
2057	void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I,
2058	OrderedSet &Insts) {
2059	assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2060	SmallVector<Value *, `4`> Ops(I->operands());
2061	ValueRankMap.erase(Val: I);
2062	Insts.remove(X: I);
2063	RedoInsts.remove(X: I);
2064	llvm::salvageDebugInfo(I&: *I);
2065	I->eraseFromParent();
2066	for (auto *Op : Ops)
2067	if (Instruction *OpInst = dyn_cast<Instruction>(Val: Op))
2068	if (OpInst->use_empty())
2069	Insts.insert(X: OpInst);
2070	}
2071
2072	/// Zap the given instruction, adding interesting operands to the work list.
2073	void ReassociatePass::EraseInst(Instruction *I) {
2074	assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
2075	LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
2076
2077	SmallVector<Value *, `8`> Ops(I->operands());
2078	// Erase the dead instruction.
2079	ValueRankMap.erase(Val: I);
2080	RedoInsts.remove(X: I);
2081	llvm::salvageDebugInfo(I&: *I);
2082	I->eraseFromParent();
2083	// Optimize its operands.
2084	SmallPtrSet<Instruction , `8`> Visited; // Detect self-referential nodes.*
2085	for (Value *V : Ops)
2086	if (Instruction *Op = dyn_cast<Instruction>(Val: V)) {
2087	// If this is a node in an expression tree, climb to the expression root
2088	// and add that since that's where optimization actually happens.
2089	unsigned Opcode = Op->getOpcode();
2090	while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode &&
2091	Visited.insert(Ptr: Op).second)
2092	Op = Op->user_back();
2093
2094	// The instruction we're going to push may be coming from a
2095	// dead block, and Reassociate skips the processing of unreachable
2096	// blocks because it's a waste of time and also because it can
2097	// lead to infinite loop due to LLVM's non-standard definition
2098	// of dominance.
2099	if (ValueRankMap.contains(Val: Op))
2100	RedoInsts.insert(X: Op);
2101	}
2102
2103	MadeChange = true;
2104	}
2105
2106	/// Recursively analyze an expression to build a list of instructions that have
2107	/// negative floating-point constant operands. The caller can then transform
2108	/// the list to create positive constants for better reassociation and CSE.
2109	static void getNegatibleInsts(Value *V,
2110	SmallVectorImpl<Instruction *> &Candidates) {
2111	// Handle only one-use instructions. Combining negations does not justify
2112	// replicating instructions.
2113	Instruction *I;
2114	if (!match(V, P: m_OneUse(SubPattern: m_Instruction(I))))
2115	return;
2116
2117	// Handle expressions of multiplications and divisions.
2118	// TODO: This could look through floating-point casts.
2119	const APFloat *C;
2120	switch (I->getOpcode()) {
2121	case Instruction::FMul:
2122	// Not expecting non-canonical code here. Bail out and wait.
2123	if (match(V: I->getOperand(i: `0`), P: m_Constant()))
2124	break;
2125
2126	if (match(V: I->getOperand(i: `1`), P: m_APFloat(Res&: C)) && C->isNegative()) {
2127	Candidates.push_back(Elt: I);
2128	LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << `'\n'`);
2129	}
2130	getNegatibleInsts(V: I->getOperand(i: `0`), Candidates);
2131	getNegatibleInsts(V: I->getOperand(i: `1`), Candidates);
2132	break;
2133	case Instruction::FDiv:
2134	// Not expecting non-canonical code here. Bail out and wait.
2135	if (match(V: I->getOperand(i: `0`), P: m_Constant()) &&
2136	match(V: I->getOperand(i: `1`), P: m_Constant()))
2137	break;
2138
2139	if ((match(V: I->getOperand(i: `0`), P: m_APFloat(Res&: C)) && C->isNegative()) \|\|
2140	(match(V: I->getOperand(i: `1`), P: m_APFloat(Res&: C)) && C->isNegative())) {
2141	Candidates.push_back(Elt: I);
2142	LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << `'\n'`);
2143	}
2144	getNegatibleInsts(V: I->getOperand(i: `0`), Candidates);
2145	getNegatibleInsts(V: I->getOperand(i: `1`), Candidates);
2146	break;
2147	default:
2148	break;
2149	}
2150	}
2151
2152	/// Given an fadd/fsub with an operand that is a one-use instruction
2153	/// (the fadd/fsub), try to change negative floating-point constants into
2154	/// positive constants to increase potential for reassociation and CSE.
2155	Instruction ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction I,
2156	Instruction *Op,
2157	Value *OtherOp) {
2158	assert((I->getOpcode() == Instruction::FAdd \|\|
2159	I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub");
2160
2161	// Collect instructions with negative FP constants from the subtree that ends
2162	// in Op.
2163	SmallVector<Instruction *, `4`> Candidates;
2164	getNegatibleInsts(V: Op, Candidates);
2165	if (Candidates.empty())
2166	return nullptr;
2167
2168	// Don't canonicalize x + (-Constant y) -> x - (Constant * y), if the*
2169	// resulting subtract will be broken up later. This can get us into an
2170	// infinite loop during reassociation.
2171	bool IsFSub = I->getOpcode() == Instruction::FSub;
2172	bool NeedsSubtract = !IsFSub && Candidates.size() % `2` == `1`;
2173	if (NeedsSubtract && ShouldBreakUpSubtract(Sub: I))
2174	return nullptr;
2175
2176	for (Instruction *Negatible : Candidates) {
2177	const APFloat *C;
2178	if (match(V: Negatible->getOperand(i: `0`), P: m_APFloat(Res&: C))) {
2179	assert(!match(Negatible->getOperand(`1`), m_Constant()) &&
2180	"Expecting only 1 constant operand");
2181	assert(C->isNegative() && "Expected negative FP constant");
2182	Negatible->setOperand(i: `0`, Val: ConstantFP::get(Ty: Negatible->getType(), V: abs(X: *C)));
2183	MadeChange = true;
2184	}
2185	if (match(V: Negatible->getOperand(i: `1`), P: m_APFloat(Res&: C))) {
2186	assert(!match(Negatible->getOperand(`0`), m_Constant()) &&
2187	"Expecting only 1 constant operand");
2188	assert(C->isNegative() && "Expected negative FP constant");
2189	Negatible->setOperand(i: `1`, Val: ConstantFP::get(Ty: Negatible->getType(), V: abs(X: *C)));
2190	MadeChange = true;
2191	}
2192	}
2193	assert(MadeChange == true && "Negative constant candidate was not changed");
2194
2195	// Negations cancelled out.
2196	if (Candidates.size() % `2` == `0`)
2197	return I;
2198
2199	// Negate the final operand in the expression by flipping the opcode of this
2200	// fadd/fsub.
2201	assert(Candidates.size() % `2` == `1` && "Expected odd number");
2202	IRBuilder<> Builder(I);
2203	Value *NewInst = IsFSub ? Builder.CreateFAddFMF(L: OtherOp, R: Op, FMFSource: I)
2204	: Builder.CreateFSubFMF(L: OtherOp, R: Op, FMFSource: I);
2205	I->replaceAllUsesWith(V: NewInst);
2206	RedoInsts.insert(X: I);
2207	return dyn_cast<Instruction>(Val: NewInst);
2208	}
2209
2210	/// Canonicalize expressions that contain a negative floating-point constant
2211	/// of the following form:
2212	/// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree)
2213	/// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree)
2214	/// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree)
2215	///
2216	/// The fadd/fsub opcode may be switched to allow folding a negation into the
2217	/// input instruction.
2218	Instruction ReassociatePass::canonicalizeNegFPConstants(Instruction I) {
2219	LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << `'\n'`);
2220	Value *X;
2221	Instruction *Op;
2222	if (match(V: I, P: m_FAdd(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Instruction(I&: Op)))))
2223	if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, OtherOp: X))
2224	I = R;
2225	if (match(V: I, P: m_FAdd(L: m_OneUse(SubPattern: m_Instruction(I&: Op)), R: m_Value(V&: X))))
2226	if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, OtherOp: X))
2227	I = R;
2228	if (match(V: I, P: m_FSub(L: m_Value(V&: X), R: m_OneUse(SubPattern: m_Instruction(I&: Op)))))
2229	if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, OtherOp: X))
2230	I = R;
2231	return I;
2232	}
2233
2234	/// Inspect and optimize the given instruction. Note that erasing
2235	/// instructions is not allowed.
2236	void ReassociatePass::OptimizeInst(Instruction *I) {
2237	// Only consider operations that we understand.
2238	if (!isa<UnaryOperator>(Val: I) && !isa<BinaryOperator>(Val: I))
2239	return;
2240
2241	if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(Val: I->getOperand(i: `1`)))
2242	// If an operand of this shift is a reassociable multiply, or if the shift
2243	// is used by a reassociable multiply or add, turn into a multiply.
2244	if (isReassociableOp(V: I->getOperand(i: `0`), Opcode: Instruction::Mul) \|\|
2245	(I->hasOneUse() &&
2246	(isReassociableOp(V: I->user_back(), Opcode: Instruction::Mul) \|\|
2247	isReassociableOp(V: I->user_back(), Opcode: Instruction::Add)))) {
2248	Instruction *NI = ConvertShiftToMul(Shl: I);
2249	RedoInsts.insert(X: I);
2250	MadeChange = true;
2251	I = NI;
2252	}
2253
2254	// Commute binary operators, to canonicalize the order of their operands.
2255	// This can potentially expose more CSE opportunities, and makes writing other
2256	// transformations simpler.
2257	if (I->isCommutative())
2258	canonicalizeOperands(I);
2259
2260	// Canonicalize negative constants out of expressions.
2261	if (Instruction *Res = canonicalizeNegFPConstants(I))
2262	I = Res;
2263
2264	// Don't optimize floating-point instructions unless they have the
2265	// appropriate FastMathFlags for reassociation enabled.
2266	if (isa<FPMathOperator>(Val: I) && !hasFPAssociativeFlags(I))
2267	return;
2268
2269	// Do not reassociate boolean (i1/vXi1) expressions. We want to preserve the
2270	// original order of evaluation for short-circuited comparisons that
2271	// SimplifyCFG has folded to AND/OR expressions. If the expression
2272	// is not further optimized, it is likely to be transformed back to a
2273	// short-circuited form for code gen, and the source order may have been
2274	// optimized for the most likely conditions. For vector boolean expressions,
2275	// we should be optimizing for ILP and not serializing the logical operations.
2276	if (I->getType()->isIntOrIntVectorTy(BitWidth: `1`))
2277	return;
2278
2279	// If this is a bitwise or instruction of operands
2280	// with no common bits set, convert it to X+Y.
2281	if (I->getOpcode() == Instruction::Or &&
2282	shouldConvertOrWithNoCommonBitsToAdd(Or: I) && !isLoadCombineCandidate(Or: I) &&
2283	(cast<PossiblyDisjointInst>(Val: I)->isDisjoint() \|\|
2284	haveNoCommonBitsSet(LHSCache: I->getOperand(i: `0`), RHSCache: I->getOperand(i: `1`),
2285	SQ: SimplifyQuery (I->getDataLayout(),
2286	/DT=/nullptr, /AC=/nullptr, I)))) {
2287	Instruction *NI = convertOrWithNoCommonBitsToAdd(Or: I);
2288	RedoInsts.insert(X: I);
2289	MadeChange = true;
2290	I = NI;
2291	}
2292
2293	if (I->getOpcode() == Instruction::Mul && ShouldBreakUpDistribution(Mul: I)) {
2294	Instruction *MulUser = cast<Instruction>(Val: I->user_back());
2295	Instruction *NI = BreakUpDistribute(Mul: I, ToRedo&: RedoInsts);
2296	RedoInsts.insert(X: I);
2297	RedoInsts.insert(X: MulUser);
2298	MadeChange = true;
2299	I = NI;
2300	}
2301
2302	// If this is a subtract instruction which is not already in negate form,
2303	// see if we can convert it to X+-Y.
2304	if (I->getOpcode() == Instruction::Sub) {
2305	if (ShouldBreakUpSubtract(Sub: I)) {
2306	Instruction *NI = BreakUpSubtract(Sub: I, ToRedo&: RedoInsts);
2307	RedoInsts.insert(X: I);
2308	MadeChange = true;
2309	I = NI;
2310	} else if (match(V: I, P: m_Neg(V: m_Value()))) {
2311	// Otherwise, this is a negation. See if the operand is a multiply tree
2312	// and if this is not an inner node of a multiply tree.
2313	if (isReassociableOp(V: I->getOperand(i: `1`), Opcode: Instruction::Mul) &&
2314	(!I->hasOneUse() \|\|
2315	!isReassociableOp(V: I->user_back(), Opcode: Instruction::Mul))) {
2316	Instruction *NI = LowerNegateToMultiply(Neg: I);
2317	// If the negate was simplified, revisit the users to see if we can
2318	// reassociate further.
2319	for (User *U : NI->users()) {
2320	if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(Val: U))
2321	RedoInsts.insert(X: Tmp);
2322	}
2323	RedoInsts.insert(X: I);
2324	MadeChange = true;
2325	I = NI;
2326	}
2327	}
2328	} else if (I->getOpcode() == Instruction::FNeg \|\|
2329	I->getOpcode() == Instruction::FSub) {
2330	if (ShouldBreakUpSubtract(Sub: I)) {
2331	Instruction *NI = BreakUpSubtract(Sub: I, ToRedo&: RedoInsts);
2332	RedoInsts.insert(X: I);
2333	MadeChange = true;
2334	I = NI;
2335	} else if (match(V: I, P: m_FNeg(X: m_Value()))) {
2336	// Otherwise, this is a negation. See if the operand is a multiply tree
2337	// and if this is not an inner node of a multiply tree.
2338	Value *Op = isa<BinaryOperator>(Val: I) ? I->getOperand(i: `1`) :
2339	I->getOperand(i: `0`);
2340	if (isReassociableOp(V: Op, Opcode: Instruction::FMul) &&
2341	(!I->hasOneUse() \|\|
2342	!isReassociableOp(V: I->user_back(), Opcode: Instruction::FMul))) {
2343	// If the negate was simplified, revisit the users to see if we can
2344	// reassociate further.
2345	Instruction *NI = LowerNegateToMultiply(Neg: I);
2346	for (User *U : NI->users()) {
2347	if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(Val: U))
2348	RedoInsts.insert(X: Tmp);
2349	}
2350	RedoInsts.insert(X: I);
2351	MadeChange = true;
2352	I = NI;
2353	}
2354	}
2355	}
2356
2357	// If this instruction is an associative binary operator, process it.
2358	if (!I->isAssociative()) return;
2359	BinaryOperator *BO = cast<BinaryOperator>(Val: I);
2360
2361	// If this is an interior node of a reassociable tree, ignore it until we
2362	// get to the root of the tree, to avoid N^2 analysis.
2363	unsigned Opcode = BO->getOpcode();
2364	if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
2365	// During the initial run we will get to the root of the tree.
2366	// But if we get here while we are redoing instructions, there is no
2367	// guarantee that the root will be visited. So Redo later
2368	if (BO->user_back() != BO &&
2369	BO->getParent() == BO->user_back()->getParent())
2370	RedoInsts.insert(X: BO->user_back());
2371	return;
2372	}
2373
2374	// If this is an add tree that is used by a sub instruction, ignore it
2375	// until we process the subtract.
2376	if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add &&
2377	cast<Instruction>(Val: BO->user_back())->getOpcode() == Instruction::Sub)
2378	return;
2379	if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd &&
2380	cast<Instruction>(Val: BO->user_back())->getOpcode() == Instruction::FSub)
2381	return;
2382
2383	ReassociateExpression(I: BO);
2384	}
2385
2386	void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
2387	// First, walk the expression tree, linearizing the tree, collecting the
2388	// operand information.
2389	SmallVector<RepeatedValue, `8`> Tree;
2390	OverflowTracking Flags;
2391	MadeChange \|= LinearizeExprTree(I, Ops&: Tree, ToRedo&: RedoInsts, Flags);
2392	SmallVector<ValueEntry, `8`> Ops;
2393	Ops.reserve(N: Tree.size());
2394	for (const RepeatedValue &E : Tree)
2395	Ops.append(NumInputs: E.second, Elt: ValueEntry (getRank(V: E.first), E.first));
2396
2397	LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << `'\n'`);
2398
2399	// Now that we have linearized the tree to a list and have gathered all of
2400	// the operands and their ranks, sort the operands by their rank. Use a
2401	// stable_sort so that values with equal ranks will have their relative
2402	// positions maintained (and so the compiler is deterministic). Note that
2403	// this sorts so that the highest ranking values end up at the beginning of
2404	// the vector.
2405	llvm::stable_sort(Range&: Ops);
2406
2407	// Now that we have the expression tree in a convenient
2408	// sorted form, optimize it globally if possible.
2409	if (Value *V = OptimizeExpression(I, Ops)) {
2410	if (V == I)
2411	// Self-referential expression in unreachable code.
2412	return;
2413	// This expression tree simplified to something that isn't a tree,
2414	// eliminate it.
2415	LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << `'\n'`);
2416	I->replaceAllUsesWith(V);
2417	if (Instruction *VI = dyn_cast<Instruction>(Val: V))
2418	if (I->getDebugLoc())
2419	VI->setDebugLoc(I->getDebugLoc());
2420	RedoInsts.insert(X: I);
2421	++NumAnnihil;
2422	return;
2423	}
2424
2425	// We want to sink immediates as deeply as possible except in the case where
2426	// this is a multiply tree used only by an add, and the immediate is a -1.
2427	// In this case we reassociate to put the negation on the outside so that we
2428	// can fold the negation into the add: (-X)Y + Z -> Z-XY
2429	if (I->hasOneUse()) {
2430	if (I->getOpcode() == Instruction::Mul &&
2431	cast<Instruction>(Val: I->user_back())->getOpcode() == Instruction::Add &&
2432	isa<ConstantInt>(Val: Ops.back().Op) &&
2433	cast<ConstantInt>(Val: Ops.back().Op)->isMinusOne()) {
2434	ValueEntry Tmp = Ops.pop_back_val();
2435	Ops.insert(I: Ops.begin(), Elt: Tmp);
2436	} else if (I->getOpcode() == Instruction::FMul &&
2437	cast<Instruction>(Val: I->user_back())->getOpcode() ==
2438	Instruction::FAdd &&
2439	isa<ConstantFP>(Val: Ops.back().Op) &&
2440	cast<ConstantFP>(Val: Ops.back().Op)->isMinusOne()) {
2441	ValueEntry Tmp = Ops.pop_back_val();
2442	Ops.insert(I: Ops.begin(), Elt: Tmp);
2443	}
2444	}
2445
2446	LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << `'\n'`);
2447
2448	if (Ops.size() == `1`) {
2449	if (Ops [`0`].Op == I)
2450	// Self-referential expression in unreachable code.
2451	return;
2452
2453	// This expression tree simplified to something that isn't a tree,
2454	// eliminate it.
2455	I->replaceAllUsesWith(V: Ops [`0`].Op);
2456	if (Instruction *OI = dyn_cast<Instruction>(Val: Ops [`0`].Op))
2457	OI->setDebugLoc(I->getDebugLoc());
2458	RedoInsts.insert(X: I);
2459	return;
2460	}
2461
2462	if (Ops.size() > `2` && Ops.size() <= GlobalReassociateLimit) {
2463	// Find the pair with the highest count in the pairmap and move it to the
2464	// back of the list so that it can later be CSE'd.
2465	// example:
2466	// abcde
2467	// if ce is the most "popular" pair, we can express this as*
2468	// (((ce)d)b)a
2469	unsigned Max = `1`;
2470	unsigned BestRank = `0`;
2471	std::pair<unsigned, unsigned> BestPair;
2472	unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
2473	unsigned LimitIdx = `0`;
2474	// With the CSE-driven heuristic, we are about to slap two values at the
2475	// beginning of the expression whereas they could live very late in the CFG.
2476	// When using the CSE-local heuristic we avoid creating dependences from
2477	// completely unrelated part of the CFG by limiting the expression
2478	// reordering on the values that live in the first seen basic block.
2479	// The main idea is that we want to avoid forming expressions that would
2480	// become loop dependent.
2481	if (UseCSELocalOpt) {
2482	const BasicBlock FirstSeenBB = nullptr*;
2483	int StartIdx = Ops.size() - `1`;
2484	// Skip the first value of the expression since we need at least two
2485	// values to materialize an expression. I.e., even if this value is
2486	// anchored in a different basic block, the actual first sub expression
2487	// will be anchored on the second value.
2488	for (int i = StartIdx - `1`; i != -`1`; --i) {
2489	const Value *Val = Ops [i].Op;
2490	const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
2491	const BasicBlock SeenBB = nullptr*;
2492	if (!CurrLeafInstr) {
2493	// The value is free of any CFG dependencies.
2494	// Do as if it lives in the entry block.
2495	//
2496	// We do this to make sure all the values falling on this path are
2497	// seen through the same anchor point. The rationale is these values
2498	// can be combined together to from a sub expression free of any CFG
2499	// dependencies so we want them to stay together.
2500	// We could be cleverer and postpone the anchor down to the first
2501	// anchored value, but that's likely complicated to get right.
2502	// E.g., we wouldn't want to do that if that means being stuck in a
2503	// loop.
2504	//
2505	// For instance, we wouldn't want to change:
2506	// res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
2507	// into
2508	// res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
2509	// Because all the sub expressions with arg2..N would be stuck between
2510	// two loop dependent values.
2511	SeenBB = &I->getParent()->getParent()->getEntryBlock();
2512	} else {
2513	SeenBB = CurrLeafInstr->getParent();
2514	}
2515
2516	if (!FirstSeenBB) {
2517	FirstSeenBB = SeenBB;
2518	continue;
2519	}
2520	if (FirstSeenBB != SeenBB) {
2521	// ith value is in a different basic block.
2522	// Rewind the index once to point to the last value on the same basic
2523	// block.
2524	LimitIdx = i + `1`;
2525	LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
2526	<< LimitIdx << ", " << StartIdx << "]\n");
2527	break;
2528	}
2529	}
2530	}
2531	for (unsigned i = Ops.size() - `1`; i > LimitIdx; --i) {
2532	// We must use int type to go below zero when LimitIdx is 0.
2533	for (int j = i - `1`; j >= (int)LimitIdx; --j) {
2534	unsigned Score = `0`;
2535	Value *Op0 = Ops [i].Op;
2536	Value *Op1 = Ops [j].Op;
2537	if (std::less<Value *>()(Op1, Op0))
2538	std::swap(a&: Op0, b&: Op1);
2539	auto it = PairMap[Idx].find(Val: {Op0, Op1});
2540	if (it != PairMap[Idx].end()) {
2541	// Functions like BreakUpSubtract() can erase the Values we're using
2542	// as keys and create new Values after we built the PairMap. There's a
2543	// small chance that the new nodes can have the same address as
2544	// something already in the table. We shouldn't accumulate the stored
2545	// score in that case as it refers to the wrong Value.
2546	if (it ->second.isValid())
2547	Score += it ->second.Score;
2548	}
2549
2550	unsigned MaxRank = std::max(a: Ops [i].Rank, b: Ops [j].Rank);
2551
2552	// By construction, the operands are sorted in reverse order of their
2553	// topological order.
2554	// So we tend to form (sub) expressions with values that are close to
2555	// each other.
2556	//
2557	// Now to expose more CSE opportunities we want to expose the pair of
2558	// operands that occur the most (as statically computed in
2559	// BuildPairMap.) as the first sub-expression.
2560	//
2561	// If two pairs occur as many times, we pick the one with the
2562	// lowest rank, meaning the one with both operands appearing first in
2563	// the topological order.
2564	if (Score > Max \|\| (Score == Max && MaxRank < BestRank)) {
2565	BestPair = {j, i};
2566	Max = Score;
2567	BestRank = MaxRank;
2568	}
2569	}
2570	}
2571	if (Max > `1`) {
2572	auto Op0 = Ops [BestPair.first];
2573	auto Op1 = Ops [BestPair.second];
2574	Ops.erase(CI: &Ops [BestPair.second]);
2575	Ops.erase(CI: &Ops [BestPair.first]);
2576	Ops.push_back(Elt: Op0);
2577	Ops.push_back(Elt: Op1);
2578	}
2579	}
2580	LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
2581	dbgs() << `'\n'`);
2582	// Now that we ordered and optimized the expressions, splat them back into
2583	// the expression tree, removing any unneeded nodes.
2584	RewriteExprTree(I, Ops, Flags);
2585	}
2586
2587	void
2588	ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
2589	// Make a "pairmap" of how often each operand pair occurs.
2590	for (BasicBlock *BI : RPOT) {
2591	for (Instruction &I : *BI) {
2592	if (!I.isAssociative() \|\| !I.isBinaryOp())
2593	continue;
2594
2595	// Ignore nodes that aren't at the root of trees.
2596	if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode())
2597	continue;
2598
2599	// Collect all operands in a single reassociable expression.
2600	// Since Reassociate has already been run once, we can assume things
2601	// are already canonical according to Reassociation's regime.
2602	SmallVector<Value *, `8`> Worklist = { I.getOperand(i: `0`), I.getOperand(i: `1`) };
2603	SmallVector<Value *, `8`> Ops;
2604	while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) {
2605	Value *Op = Worklist.pop_back_val();
2606	Instruction *OpI = dyn_cast<Instruction>(Val: Op);
2607	if (!OpI \|\| OpI->getOpcode() != I.getOpcode() \|\| !OpI->hasOneUse()) {
2608	Ops.push_back(Elt: Op);
2609	continue;
2610	}
2611	// Be paranoid about self-referencing expressions in unreachable code.
2612	if (OpI->getOperand(i: `0`) != OpI)
2613	Worklist.push_back(Elt: OpI->getOperand(i: `0`));
2614	if (OpI->getOperand(i: `1`) != OpI)
2615	Worklist.push_back(Elt: OpI->getOperand(i: `1`));
2616	}
2617	// Skip extremely long expressions.
2618	if (Ops.size() > GlobalReassociateLimit)
2619	continue;
2620
2621	// Add all pairwise combinations of operands to the pair map.
2622	unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin;
2623	SmallSet<std::pair<Value , Value>, `32`> Visited;
2624	for (unsigned i = `0`; i < Ops.size() - `1`; ++i) {
2625	for (unsigned j = i + `1`; j < Ops.size(); ++j) {
2626	// Canonicalize operand orderings.
2627	Value *Op0 = Ops [i];
2628	Value *Op1 = Ops [j];
2629	if (std::less<Value *>()(Op1, Op0))
2630	std::swap(a&: Op0, b&: Op1);
2631	if (!Visited.insert(V: {Op0, Op1}).second)
2632	continue;
2633	auto res = PairMap[BinaryIdx].insert(KV: {{Op0, Op1}, {.Value1: Op0, .Value2: Op1, .Score: `1`}});
2634	if (!res.second) {
2635	// If either key value has been erased then we've got the same
2636	// address by coincidence. That can't happen here because nothing is
2637	// erasing values but it can happen by the time we're querying the
2638	// map.
2639	assert(res.first->second.isValid() && "WeakVH invalidated");
2640	++res.first ->second.Score;
2641	}
2642	}
2643	}
2644	}
2645	}
2646	}
2647
2648	PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
2649	// Get the functions basic blocks in Reverse Post Order. This order is used by
2650	// BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
2651	// blocks (it has been seen that the analysis in this pass could hang when
2652	// analysing dead basic blocks).
2653	ReversePostOrderTraversal<Function *> RPOT(&F);
2654
2655	// Calculate the rank map for F.
2656	BuildRankMap(F, RPOT);
2657
2658	// Build the pair map before running reassociate.
2659	// Technically this would be more accurate if we did it after one round
2660	// of reassociation, but in practice it doesn't seem to help much on
2661	// real-world code, so don't waste the compile time running reassociate
2662	// twice.
2663	// If a user wants, they could expicitly run reassociate twice in their
2664	// pass pipeline for further potential gains.
2665	// It might also be possible to update the pair map during runtime, but the
2666	// overhead of that may be large if there's many reassociable chains.
2667	BuildPairMap(RPOT);
2668
2669	MadeChange = false;
2670
2671	// Traverse the same blocks that were analysed by BuildRankMap.
2672	for (BasicBlock *BI : RPOT) {
2673	assert(RankMap.count(&*BI) && "BB should be ranked.");
2674	// Optimize every instruction in the basic block.
2675	for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
2676	if (isInstructionTriviallyDead(I: &*II)) {
2677	EraseInst(I: &*II ++);
2678	} else {
2679	OptimizeInst(I: &*II);
2680	assert(II->getParent() == &*BI && "Moved to a different block!");
2681	++II;
2682	}
2683
2684	// Make a copy of all the instructions to be redone so we can remove dead
2685	// instructions.
2686	OrderedSet ToRedo(RedoInsts);
2687	// Iterate over all instructions to be reevaluated and remove trivially dead
2688	// instructions. If any operand of the trivially dead instruction becomes
2689	// dead mark it for deletion as well. Continue this process until all
2690	// trivially dead instructions have been removed.
2691	while (!ToRedo.empty()) {
2692	Instruction *I = ToRedo.pop_back_val();
2693	if (isInstructionTriviallyDead(I)) {
2694	RecursivelyEraseDeadInsts(I, Insts&: ToRedo);
2695	MadeChange = true;
2696	}
2697	}
2698
2699	// Now that we have removed dead instructions, we can reoptimize the
2700	// remaining instructions.
2701	while (!RedoInsts.empty()) {
2702	Instruction *I = RedoInsts.front();
2703	RedoInsts.erase(I: RedoInsts.begin());
2704	if (isInstructionTriviallyDead(I))
2705	EraseInst(I);
2706	else
2707	OptimizeInst(I);
2708	}
2709	}
2710
2711	// We are done with the rank map and pair map.
2712	RankMap.clear();
2713	ValueRankMap.clear();
2714	for (auto &Entry : PairMap)
2715	Entry.clear();
2716
2717	if (MadeChange) {
2718	PreservedAnalyses PA;
2719	PA.preserveSet<CFGAnalyses>();
2720	return PA;
2721	}
2722
2723	return PreservedAnalyses::all();
2724	}
2725
2726	namespace {
2727
2728	class ReassociateLegacyPass : public FunctionPass {
2729	ReassociatePass Impl;
2730
2731	public:
2732	static char ID; // Pass identification, replacement for typeid
2733
2734	ReassociateLegacyPass() : FunctionPass (ID) {
2735	initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
2736	}
2737
2738	bool runOnFunction(Function &F) override {
2739	if (skipFunction(F))
2740	return false;
2741
2742	FunctionAnalysisManager DummyFAM;
2743	auto PA = Impl.run(F, DummyFAM);
2744	return !PA.areAllPreserved();
2745	}
2746
2747	void getAnalysisUsage(AnalysisUsage &AU) const override {
2748	AU.setPreservesCFG();
2749	AU.addPreserved<AAResultsWrapperPass>();
2750	AU.addPreserved<BasicAAWrapperPass>();
2751	AU.addPreserved<GlobalsAAWrapperPass>();
2752	}
2753	};
2754
2755	} // end anonymous namespace
2756
2757	char ReassociateLegacyPass::ID = `0`;
2758
2759	INITIALIZE_PASS(ReassociateLegacyPass, "reassociate",
2760	"Reassociate expressions", false, false)
2761
2762	// Public interface to the Reassociate pass
2763	FunctionPass *llvm::createReassociatePass() {
2764	return new ReassociateLegacyPass ();
2765	}
2766

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/Reassociate.cpp