SeparateConstOffsetFromGEP.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp]

1	//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Loop unrolling may create many similar GEPs for array accesses.
10	// e.g., a 2-level loop
11	//
12	// float a[32][32]; // global variable
13	//
14	// for (int i = 0; i < 2; ++i) {
15	// for (int j = 0; j < 2; ++j) {
16	// ...
17	// ... = a[x + i][y + j];
18	// ...
19	// }
20	// }
21	//
22	// will probably be unrolled to:
23	//
24	// gep %a, 0, %x, %y; load
25	// gep %a, 0, %x, %y + 1; load
26	// gep %a, 0, %x + 1, %y; load
27	// gep %a, 0, %x + 1, %y + 1; load
28	//
29	// LLVM's GVN does not use partial redundancy elimination yet, and is thus
30	// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31	// significant slowdown in targets with limited addressing modes. For instance,
32	// because the PTX target does not support the reg+reg addressing mode, the
33	// NVPTX backend emits PTX code that literally computes the pointer address of
34	// each GEP, wasting tons of registers. It emits the following PTX for the
35	// first load and similar PTX for other loads.
36	//
37	// mov.u32 %r1, %x;
38	// mov.u32 %r2, %y;
39	// mul.wide.u32 %rl2, %r1, 128;
40	// mov.u64 %rl3, a;
41	// add.s64 %rl4, %rl3, %rl2;
42	// mul.wide.u32 %rl5, %r2, 4;
43	// add.s64 %rl6, %rl4, %rl5;
44	// ld.global.f32 %f1, [%rl6];
45	//
46	// To reduce the register pressure, the optimization implemented in this file
47	// merges the common part of a group of GEPs, so we can compute each pointer
48	// address by adding a simple offset to the common part, saving many registers.
49	//
50	// It works by splitting each GEP into a variadic base and a constant offset.
51	// The variadic base can be computed once and reused by multiple GEPs, and the
52	// constant offsets can be nicely folded into the reg+immediate addressing mode
53	// (supported by most targets) without using any extra register.
54	//
55	// For instance, we transform the four GEPs and four loads in the above example
56	// into:
57	//
58	// base = gep a, 0, x, y
59	// load base
60	// load base + 1 sizeof(float)*
61	// load base + 32 sizeof(float)*
62	// load base + 33 sizeof(float)*
63	//
64	// Given the transformed IR, a backend that supports the reg+immediate
65	// addressing mode can easily fold the pointer arithmetics into the loads. For
66	// example, the NVPTX backend can easily fold the pointer arithmetics into the
67	// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68	//
69	// mov.u32 %r1, %tid.x;
70	// mov.u32 %r2, %tid.y;
71	// mul.wide.u32 %rl2, %r1, 128;
72	// mov.u64 %rl3, a;
73	// add.s64 %rl4, %rl3, %rl2;
74	// mul.wide.u32 %rl5, %r2, 4;
75	// add.s64 %rl6, %rl4, %rl5;
76	// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77	// ld.global.f32 %f2, [%rl6+4]; // much better
78	// ld.global.f32 %f3, [%rl6+128]; // much better
79	// ld.global.f32 %f4, [%rl6+132]; // much better
80	//
81	// Another improvement enabled by the LowerGEP flag is to lower a GEP with
82	// multiple indices to either multiple GEPs with a single index or arithmetic
83	// operations (depending on whether the target uses alias analysis in codegen).
84	// Such transformation can have following benefits:
85	// (1) It can always extract constants in the indices of structure type.
86	// (2) After such Lowering, there are more optimization opportunities such as
87	// CSE, LICM and CGP.
88	//
89	// E.g. The following GEPs have multiple indices:
90	// BB1:
91	// %p = getelementptr [10 x %struct] %ptr, i64 %i, i64 %j1, i32 3*
92	// load %p
93	// ...
94	// BB2:
95	// %p2 = getelementptr [10 x %struct] %ptr, i64 %i, i64 %j1, i32 2*
96	// load %p2
97	// ...
98	//
99	// We can not do CSE to the common part related to index "i64 %i". Lowering
100	// GEPs can achieve such goals.
101	// If the target does not use alias analysis in codegen, this pass will
102	// lower a GEP with multiple indices into arithmetic operations:
103	// BB1:
104	// %1 = ptrtoint [10 x %struct] %ptr to i64 ; CSE opportunity*
105	// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106	// %3 = add i64 %1, %2 ; CSE opportunity
107	// %4 = mul i64 %j1, length_of_struct
108	// %5 = add i64 %3, %4
109	// %6 = add i64 %3, struct_field_3 ; Constant offset
110	// %p = inttoptr i64 %6 to i32*
111	// load %p
112	// ...
113	// BB2:
114	// %7 = ptrtoint [10 x %struct] %ptr to i64 ; CSE opportunity*
115	// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116	// %9 = add i64 %7, %8 ; CSE opportunity
117	// %10 = mul i64 %j2, length_of_struct
118	// %11 = add i64 %9, %10
119	// %12 = add i64 %11, struct_field_2 ; Constant offset
120	// %p = inttoptr i64 %12 to i32*
121	// load %p2
122	// ...
123	//
124	// If the target uses alias analysis in codegen, this pass will lower a GEP
125	// with multiple indices into multiple GEPs with a single index:
126	// BB1:
127	// %1 = bitcast [10 x %struct] %ptr to i8* ; CSE opportunity*
128	// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129	// %3 = getelementptr i8 %1, i64 %2 ; CSE opportunity*
130	// %4 = mul i64 %j1, length_of_struct
131	// %5 = getelementptr i8 %3, i64 %4*
132	// %6 = getelementptr i8 %5, struct_field_3 ; Constant offset*
133	// %p = bitcast i8* %6 to i32*
134	// load %p
135	// ...
136	// BB2:
137	// %7 = bitcast [10 x %struct] %ptr to i8* ; CSE opportunity*
138	// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139	// %9 = getelementptr i8 %7, i64 %8 ; CSE opportunity*
140	// %10 = mul i64 %j2, length_of_struct
141	// %11 = getelementptr i8 %9, i64 %10*
142	// %12 = getelementptr i8 %11, struct_field_2 ; Constant offset*
143	// %p2 = bitcast i8* %12 to i32*
144	// load %p2
145	// ...
146	//
147	// Lowering GEPs can also benefit other passes such as LICM and CGP.
148	// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
149	// indices if one of the index is variant. If we lower such GEP into invariant
150	// parts and variant parts, LICM can hoist/sink those invariant parts.
151	// CGP (CodeGen Prepare) tries to sink address calculations that match the
152	// target's addressing modes. A GEP with multiple indices may not match and will
153	// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
154	// them. So we end up with a better addressing mode.
155	//
156	//===----------------------------------------------------------------------===//
157
158	#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
159	#include "llvm/ADT/APInt.h"
160	#include "llvm/ADT/DenseMap.h"
161	#include "llvm/ADT/DepthFirstIterator.h"
162	#include "llvm/ADT/SmallVector.h"
163	#include "llvm/Analysis/LoopInfo.h"
164	#include "llvm/Analysis/MemoryBuiltins.h"
165	#include "llvm/Analysis/TargetLibraryInfo.h"
166	#include "llvm/Analysis/TargetTransformInfo.h"
167	#include "llvm/Analysis/ValueTracking.h"
168	#include "llvm/IR/BasicBlock.h"
169	#include "llvm/IR/Constant.h"
170	#include "llvm/IR/Constants.h"
171	#include "llvm/IR/DataLayout.h"
172	#include "llvm/IR/DerivedTypes.h"
173	#include "llvm/IR/Dominators.h"
174	#include "llvm/IR/Function.h"
175	#include "llvm/IR/GetElementPtrTypeIterator.h"
176	#include "llvm/IR/IRBuilder.h"
177	#include "llvm/IR/InstrTypes.h"
178	#include "llvm/IR/Instruction.h"
179	#include "llvm/IR/Instructions.h"
180	#include "llvm/IR/Module.h"
181	#include "llvm/IR/PassManager.h"
182	#include "llvm/IR/PatternMatch.h"
183	#include "llvm/IR/Type.h"
184	#include "llvm/IR/User.h"
185	#include "llvm/IR/Value.h"
186	#include "llvm/InitializePasses.h"
187	#include "llvm/Pass.h"
188	#include "llvm/Support/Casting.h"
189	#include "llvm/Support/CommandLine.h"
190	#include "llvm/Support/ErrorHandling.h"
191	#include "llvm/Support/raw_ostream.h"
192	#include "llvm/Transforms/Scalar.h"
193	#include "llvm/Transforms/Utils/Local.h"
194	#include <cassert>
195	#include <cstdint>
196	#include <string>
197
198	using namespace llvm;
199	using namespace llvm::PatternMatch;
200
201	static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
202	"disable-separate-const-offset-from-gep", cl::init(Val: false),
203	cl::desc ("Do not separate the constant offset from a GEP instruction"),
204	cl::Hidden);
205
206	// Setting this flag may emit false positives when the input module already
207	// contains dead instructions. Therefore, we set it only in unit tests that are
208	// free of dead code.
209	static cl::opt<bool>
210	VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(Val: false),
211	cl::desc ("Verify this pass produces no dead code"),
212	cl::Hidden);
213
214	namespace {
215
216	/// A helper class for separating a constant offset from a GEP index.
217	///
218	/// In real programs, a GEP index may be more complicated than a simple addition
219	/// of something and a constant integer which can be trivially splitted. For
220	/// example, to split ((a << 3) \| 5) + b, we need to search deeper for the
221	/// constant offset, so that we can separate the index to (a << 3) + b and 5.
222	///
223	/// Therefore, this class looks into the expression that computes a given GEP
224	/// index, and tries to find a constant integer that can be hoisted to the
225	/// outermost level of the expression as an addition. Not every constant in an
226	/// expression can jump out. e.g., we cannot transform (b (a + 5)) to (b * a +*
227	/// 5); nor can we transform (3 (a + 5)) to (3 * a + 5), however in this case,*
228	/// -instcombine probably already optimized (3 (a + 5)) to (3 * a + 15).*
229	class ConstantOffsetExtractor {
230	public:
231	/// Extracts a constant offset from the given GEP index. It returns the
232	/// new index representing the remainder (equal to the original index minus
233	/// the constant offset), or nullptr if we cannot extract a constant offset.
234	/// \p Idx The given GEP index
235	/// \p GEP The given GEP
236	/// \p UserChainTail Outputs the tail of UserChain so that we can
237	/// garbage-collect unused instructions in UserChain.
238	static Value Extract(Value Idx, GetElementPtrInst *GEP,
239	User *&UserChainTail);
240
241	/// Looks for a constant offset from the given GEP index without extracting
242	/// it. It returns the numeric value of the extracted constant offset (0 if
243	/// failed). The meaning of the arguments are the same as Extract.
244	static int64_t Find(Value Idx, GetElementPtrInst GEP);
245
246	private:
247	ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)
248	: IP (InsertionPt), DL(InsertionPt ->getDataLayout()) {}
249
250	/// Searches the expression that computes V for a non-zero constant C s.t.
251	/// V can be reassociated into the form V' + C. If the searching is
252	/// successful, returns C and update UserChain as a def-use chain from C to V;
253	/// otherwise, UserChain is empty.
254	///
255	/// \p V The given expression
256	/// \p SignExtended Whether V will be sign-extended in the computation of the
257	/// GEP index
258	/// \p ZeroExtended Whether V will be zero-extended in the computation of the
259	/// GEP index
260	/// \p NonNegative Whether V is guaranteed to be non-negative. For example,
261	/// an index of an inbounds GEP is guaranteed to be
262	/// non-negative. Levaraging this, we can better split
263	/// inbounds GEPs.
264	APInt find(Value V, bool* SignExtended, bool ZeroExtended, bool NonNegative);
265
266	/// A helper function to look into both operands of a binary operator.
267	APInt findInEitherOperand(BinaryOperator BO, bool* SignExtended,
268	bool ZeroExtended);
269
270	/// After finding the constant offset C from the GEP index I, we build a new
271	/// index I' s.t. I' + C = I. This function builds and returns the new
272	/// index I' according to UserChain produced by function "find".
273	///
274	/// The building conceptually takes two steps:
275	/// 1) iteratively distribute s/zext towards the leaves of the expression tree
276	/// that computes I
277	/// 2) reassociate the expression tree to the form I' + C.
278	///
279	/// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
280	/// sext to a, b and 5 so that we have
281	/// sext(a) + (sext(b) + 5).
282	/// Then, we reassociate it to
283	/// (sext(a) + sext(b)) + 5.
284	/// Given this form, we know I' is sext(a) + sext(b).
285	Value *rebuildWithoutConstOffset();
286
287	/// After the first step of rebuilding the GEP index without the constant
288	/// offset, distribute s/zext to the operands of all operators in UserChain.
289	/// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
290	/// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
291	///
292	/// The function also updates UserChain to point to new subexpressions after
293	/// distributing s/zext. e.g., the old UserChain of the above example is
294	/// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
295	/// and the new UserChain is
296	/// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
297	/// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
298	///
299	/// \p ChainIndex The index to UserChain. ChainIndex is initially
300	/// UserChain.size() - 1, and is decremented during
301	/// the recursion.
302	Value distributeExtsAndCloneChain(unsigned* ChainIndex);
303
304	/// Reassociates the GEP index to the form I' + C and returns I'.
305	Value removeConstOffset(unsigned* ChainIndex);
306
307	/// A helper function to apply ExtInsts, a list of s/zext, to value V.
308	/// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
309	/// returns "sext i32 (zext i16 V to i32) to i64".
310	Value applyExts(Value V);
311
312	/// A helper function that returns whether we can trace into the operands
313	/// of binary operator BO for a constant offset.
314	///
315	/// \p SignExtended Whether BO is surrounded by sext
316	/// \p ZeroExtended Whether BO is surrounded by zext
317	/// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
318	/// array index.
319	bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
320	bool NonNegative);
321
322	/// The path from the constant offset to the old GEP index. e.g., if the GEP
323	/// index is "a b + (c + 5)". After running function find, UserChain[0] will*
324	/// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
325	/// UserChain[2] will be the entire expression "a b + (c + 5)".*
326	///
327	/// This path helps to rebuild the new GEP index.
328	SmallVector<User *, `8`> UserChain;
329
330	/// A data structure used in rebuildWithoutConstOffset. Contains all
331	/// sext/zext instructions along UserChain.
332	SmallVector<CastInst *, `16`> ExtInsts;
333
334	/// Insertion position of cloned instructions.
335	BasicBlock::iterator IP;
336
337	const DataLayout &DL;
338	};
339
340	/// A pass that tries to split every GEP in the function into a variadic
341	/// base and a constant offset. It is a FunctionPass because searching for the
342	/// constant offset may inspect other basic blocks.
343	class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
344	public:
345	static char ID;
346
347	SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
348	: FunctionPass (ID), LowerGEP(LowerGEP) {
349	initializeSeparateConstOffsetFromGEPLegacyPassPass(
350	*PassRegistry::getPassRegistry());
351	}
352
353	void getAnalysisUsage(AnalysisUsage &AU) const override {
354	AU.addRequired<DominatorTreeWrapperPass>();
355	AU.addRequired<TargetTransformInfoWrapperPass>();
356	AU.addRequired<LoopInfoWrapperPass>();
357	AU.setPreservesCFG();
358	AU.addRequired<TargetLibraryInfoWrapperPass>();
359	}
360
361	bool runOnFunction(Function &F) override;
362
363	private:
364	bool LowerGEP;
365	};
366
367	/// A pass that tries to split every GEP in the function into a variadic
368	/// base and a constant offset. It is a FunctionPass because searching for the
369	/// constant offset may inspect other basic blocks.
370	class SeparateConstOffsetFromGEP {
371	public:
372	SeparateConstOffsetFromGEP(
373	DominatorTree DT, LoopInfo LI, TargetLibraryInfo *TLI,
374	function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
375	: DT(DT), LI(LI), TLI(TLI), GetTTI (GetTTI), LowerGEP(LowerGEP) {}
376
377	bool run(Function &F);
378
379	private:
380	/// Track the operands of an add or sub.
381	using ExprKey = std::pair<Value , Value >;
382
383	/// Create a pair for use as a map key for a commutable operation.
384	static ExprKey createNormalizedCommutablePair(Value A, Value B) {
385	if (A < B)
386	return {A, B};
387	return {B, A};
388	}
389
390	/// Tries to split the given GEP into a variadic base and a constant offset,
391	/// and returns true if the splitting succeeds.
392	bool splitGEP(GetElementPtrInst *GEP);
393
394	/// Tries to reorder the given GEP with the GEP that produces the base if
395	/// doing so results in producing a constant offset as the outermost
396	/// index.
397	bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);
398
399	/// Lower a GEP with multiple indices into multiple GEPs with a single index.
400	/// Function splitGEP already split the original GEP into a variadic part and
401	/// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
402	/// variadic part into a set of GEPs with a single index and applies
403	/// AccumulativeByteOffset to it.
404	/// \p Variadic The variadic part of the original GEP.
405	/// \p AccumulativeByteOffset The constant offset.
406	void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
407	int64_t AccumulativeByteOffset);
408
409	/// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
410	/// Function splitGEP already split the original GEP into a variadic part and
411	/// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
412	/// variadic part into a set of arithmetic operations and applies
413	/// AccumulativeByteOffset to it.
414	/// \p Variadic The variadic part of the original GEP.
415	/// \p AccumulativeByteOffset The constant offset.
416	void lowerToArithmetics(GetElementPtrInst *Variadic,
417	int64_t AccumulativeByteOffset);
418
419	/// Finds the constant offset within each index and accumulates them. If
420	/// LowerGEP is true, it finds in indices of both sequential and structure
421	/// types, otherwise it only finds in sequential indices. The output
422	/// NeedsExtraction indicates whether we successfully find a non-zero constant
423	/// offset.
424	int64_t accumulateByteOffset(GetElementPtrInst GEP, bool* &NeedsExtraction);
425
426	/// Canonicalize array indices to pointer-size integers. This helps to
427	/// simplify the logic of splitting a GEP. For example, if a + b is a
428	/// pointer-size integer, we have
429	/// gep base, a + b = gep (gep base, a), b
430	/// However, this equality may not hold if the size of a + b is smaller than
431	/// the pointer size, because LLVM conceptually sign-extends GEP indices to
432	/// pointer size before computing the address
433	/// (http://llvm.org/docs/LangRef.html#id181).
434	///
435	/// This canonicalization is very likely already done in clang and
436	/// instcombine. Therefore, the program will probably remain the same.
437	///
438	/// Returns true if the module changes.
439	///
440	/// Verified in @i32_add in split-gep.ll
441	bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
442
443	/// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
444	/// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
445	/// the constant offset. After extraction, it becomes desirable to reunion the
446	/// distributed sexts. For example,
447	///
448	/// &a[sext(i +nsw (j +nsw 5)]
449	/// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
450	/// => constant extraction &a[sext(i) + sext(j)] + 5
451	/// => reunion &a[sext(i +nsw j)] + 5
452	bool reuniteExts(Function &F);
453
454	/// A helper that reunites sexts in an instruction.
455	bool reuniteExts(Instruction *I);
456
457	/// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
458	Instruction *findClosestMatchingDominator(
459	ExprKey Key, Instruction *Dominatee,
460	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> &DominatingExprs);
461
462	/// Verify F is free of dead code.
463	void verifyNoDeadCode(Function &F);
464
465	bool hasMoreThanOneUseInLoop(Value v, Loop L);
466
467	// Swap the index operand of two GEP.
468	void swapGEPOperand(GetElementPtrInst First, GetElementPtrInst Second);
469
470	// Check if it is safe to swap operand of two GEP.
471	bool isLegalToSwapOperand(GetElementPtrInst First, GetElementPtrInst Second,
472	Loop *CurLoop);
473
474	const DataLayout DL = nullptr*;
475	DominatorTree DT = nullptr*;
476	LoopInfo *LI;
477	TargetLibraryInfo *TLI;
478	// Retrieved lazily since not always used.
479	function_ref<TargetTransformInfo &(Function &)> GetTTI;
480
481	/// Whether to lower a GEP with multiple indices into arithmetic operations or
482	/// multiple GEPs with a single index.
483	bool LowerGEP;
484
485	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> DominatingAdds;
486	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> DominatingSubs;
487	};
488
489	} // end anonymous namespace
490
491	char SeparateConstOffsetFromGEPLegacyPass::ID = `0`;
492
493	INITIALIZE_PASS_BEGIN(
494	SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
495	"Split GEPs to a variadic base and a constant offset for better CSE", false,
496	false)
497	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
498	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
499	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
500	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
501	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
502	INITIALIZE_PASS_END(
503	SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
504	"Split GEPs to a variadic base and a constant offset for better CSE", false,
505	false)
506
507	FunctionPass llvm::createSeparateConstOffsetFromGEPPass(bool* LowerGEP) {
508	return new SeparateConstOffsetFromGEPLegacyPass (LowerGEP);
509	}
510
511	bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
512	bool ZeroExtended,
513	BinaryOperator *BO,
514	bool NonNegative) {
515	// We only consider ADD, SUB and OR, because a non-zero constant found in
516	// expressions composed of these operations can be easily hoisted as a
517	// constant offset by reassociation.
518	if (BO->getOpcode() != Instruction::Add &&
519	BO->getOpcode() != Instruction::Sub &&
520	BO->getOpcode() != Instruction::Or) {
521	return false;
522	}
523
524	Value LHS = BO->getOperand(i_nocapture: `0`), RHS = BO->getOperand(i_nocapture: `1`);
525	// Do not trace into "or" unless it is equivalent to "add".
526	// This is the case if the or's disjoint flag is set.
527	if (BO->getOpcode() == Instruction::Or &&
528	!cast<PossiblyDisjointInst>(Val: BO)->isDisjoint())
529	return false;
530
531	// FIXME: We don't currently support constants from the RHS of subs,
532	// when we are zero-extended, because we need a way to zero-extended
533	// them before they are negated.
534	if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)
535	return false;
536
537	// In addition, tracing into BO requires that its surrounding s/zext (if
538	// any) is distributable to both operands.
539	//
540	// Suppose BO = A op B.
541	// SignExtended \| ZeroExtended \| Distributable?
542	// --------------+--------------+----------------------------------
543	// 0 \| 0 \| true because no s/zext exists
544	// 0 \| 1 \| zext(BO) == zext(A) op zext(B)
545	// 1 \| 0 \| sext(BO) == sext(A) op sext(B)
546	// 1 \| 1 \| zext(sext(BO)) ==
547	// \| \| zext(sext(A)) op zext(sext(B))
548	if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
549	// If a + b >= 0 and (a >= 0 or b >= 0), then
550	// sext(a + b) = sext(a) + sext(b)
551	// even if the addition is not marked nsw.
552	//
553	// Leveraging this invariant, we can trace into an sext'ed inbound GEP
554	// index if the constant offset is non-negative.
555	//
556	// Verified in @sext_add in split-gep.ll.
557	if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(Val: LHS)) {
558	if (!ConstLHS->isNegative())
559	return true;
560	}
561	if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Val: RHS)) {
562	if (!ConstRHS->isNegative())
563	return true;
564	}
565	}
566
567	// sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
568	// zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
569	if (BO->getOpcode() == Instruction::Add \|\|
570	BO->getOpcode() == Instruction::Sub) {
571	if (SignExtended && !BO->hasNoSignedWrap())
572	return false;
573	if (ZeroExtended && !BO->hasNoUnsignedWrap())
574	return false;
575	}
576
577	return true;
578	}
579
580	APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
581	bool SignExtended,
582	bool ZeroExtended) {
583	// Save off the current height of the chain, in case we need to restore it.
584	size_t ChainLength = UserChain.size();
585
586	// BO being non-negative does not shed light on whether its operands are
587	// non-negative. Clear the NonNegative flag here.
588	APInt ConstantOffset = find(V: BO->getOperand(i_nocapture: `0`), SignExtended, ZeroExtended,
589	/ NonNegative / false);
590	// If we found a constant offset in the left operand, stop and return that.
591	// This shortcut might cause us to miss opportunities of combining the
592	// constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
593	// However, such cases are probably already handled by -instcombine,
594	// given this pass runs after the standard optimizations.
595	if (ConstantOffset != `0`) return ConstantOffset;
596
597	// Reset the chain back to where it was when we started exploring this node,
598	// since visiting the LHS didn't pan out.
599	UserChain.resize(N: ChainLength);
600
601	ConstantOffset = find(V: BO->getOperand(i_nocapture: `1`), SignExtended, ZeroExtended,
602	/ NonNegative / false);
603	// If U is a sub operator, negate the constant offset found in the right
604	// operand.
605	if (BO->getOpcode() == Instruction::Sub)
606	ConstantOffset = -ConstantOffset;
607
608	// If RHS wasn't a suitable candidate either, reset the chain again.
609	if (ConstantOffset == `0`)
610	UserChain.resize(N: ChainLength);
611
612	return ConstantOffset;
613	}
614
615	APInt ConstantOffsetExtractor::find(Value V, bool* SignExtended,
616	bool ZeroExtended, bool NonNegative) {
617	// TODO(jingyue): We could trace into integer/pointer casts, such as
618	// inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
619	// integers because it gives good enough results for our benchmarks.
620	unsigned BitWidth = cast<IntegerType>(Val: V->getType())->getBitWidth();
621
622	// We cannot do much with Values that are not a User, such as an Argument.
623	User *U = dyn_cast<User>(Val: V);
624	if (U == nullptr) return APInt (BitWidth, `0`);
625
626	APInt ConstantOffset(BitWidth, `0`);
627	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: V)) {
628	// Hooray, we found it!
629	ConstantOffset = CI->getValue();
630	} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: V)) {
631	// Trace into subexpressions for more hoisting opportunities.
632	if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
633	ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
634	} else if (isa<TruncInst>(Val: V)) {
635	ConstantOffset =
636	find(V: U->getOperand(i: `0`), SignExtended, ZeroExtended, NonNegative)
637	.trunc(width: BitWidth);
638	} else if (isa<SExtInst>(Val: V)) {
639	ConstantOffset = find(V: U->getOperand(i: `0`), / SignExtended / true,
640	ZeroExtended, NonNegative).sext(width: BitWidth);
641	} else if (isa<ZExtInst>(Val: V)) {
642	// As an optimization, we can clear the SignExtended flag because
643	// sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
644	//
645	// Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
646	ConstantOffset =
647	find(V: U->getOperand(i: `0`), / SignExtended / false,
648	/ ZeroExtended / true, / NonNegative / false).zext(width: BitWidth);
649	}
650
651	// If we found a non-zero constant offset, add it to the path for
652	// rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
653	// help this optimization.
654	if (ConstantOffset != `0`)
655	UserChain.push_back(Elt: U);
656	return ConstantOffset;
657	}
658
659	Value ConstantOffsetExtractor::applyExts(Value V) {
660	Value *Current = V;
661	// ExtInsts is built in the use-def order. Therefore, we apply them to V
662	// in the reversed order.
663	for (CastInst *I : llvm::reverse(C&: ExtInsts)) {
664	if (Constant *C = dyn_cast<Constant>(Val: Current)) {
665	// Try to constant fold the cast.
666	Current = ConstantFoldCastOperand(Opcode: I->getOpcode(), C, DestTy: I->getType(), DL);
667	if (Current)
668	continue;
669	}
670
671	Instruction *Ext = I->clone();
672	Ext->setOperand(i: `0`, Val: Current);
673	Ext->insertBefore(BB&: *IP ->getParent(), InsertPos: IP);
674	Current = Ext;
675	}
676	return Current;
677	}
678
679	Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
680	distributeExtsAndCloneChain(ChainIndex: UserChain.size() - `1`);
681	// Remove all nullptrs (used to be s/zext) from UserChain.
682	unsigned NewSize = `0`;
683	for (User *I : UserChain) {
684	if (I != nullptr) {
685	UserChain [NewSize] = I;
686	NewSize++;
687	}
688	}
689	UserChain.resize(N: NewSize);
690	return removeConstOffset(ChainIndex: UserChain.size() - `1`);
691	}
692
693	Value *
694	ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
695	User *U = UserChain [ChainIndex];
696	if (ChainIndex == `0`) {
697	assert(isa<ConstantInt>(U));
698	// If U is a ConstantInt, applyExts will return a ConstantInt as well.
699	return UserChain [ChainIndex] = cast<ConstantInt>(Val: applyExts(V: U));
700	}
701
702	if (CastInst *Cast = dyn_cast<CastInst>(Val: U)) {
703	assert(
704	(isa<SExtInst>(Cast) \|\| isa<ZExtInst>(Cast) \|\| isa<TruncInst>(Cast)) &&
705	"Only following instructions can be traced: sext, zext & trunc");
706	ExtInsts.push_back(Elt: Cast);
707	UserChain [ChainIndex] = nullptr;
708	return distributeExtsAndCloneChain(ChainIndex: ChainIndex - `1`);
709	}
710
711	// Function find only trace into BinaryOperator and CastInst.
712	BinaryOperator *BO = cast<BinaryOperator>(Val: U);
713	// OpNo = which operand of BO is UserChain[ChainIndex - 1]
714	unsigned OpNo = (BO->getOperand(i_nocapture: `0`) == UserChain [ChainIndex - `1`] ? `0` : `1`);
715	Value *TheOther = applyExts(V: BO->getOperand(i_nocapture: `1` - OpNo));
716	Value *NextInChain = distributeExtsAndCloneChain(ChainIndex: ChainIndex - `1`);
717
718	BinaryOperator NewBO = nullptr*;
719	if (OpNo == `0`) {
720	NewBO = BinaryOperator::Create(Op: BO->getOpcode(), S1: NextInChain, S2: TheOther,
721	Name: BO->getName(), InsertBefore: IP);
722	} else {
723	NewBO = BinaryOperator::Create(Op: BO->getOpcode(), S1: TheOther, S2: NextInChain,
724	Name: BO->getName(), InsertBefore: IP);
725	}
726	return UserChain [ChainIndex] = NewBO;
727	}
728
729	Value ConstantOffsetExtractor::removeConstOffset(unsigned* ChainIndex) {
730	if (ChainIndex == `0`) {
731	assert(isa<ConstantInt>(UserChain[ChainIndex]));
732	return ConstantInt::getNullValue(Ty: UserChain [ChainIndex]->getType());
733	}
734
735	BinaryOperator *BO = cast<BinaryOperator>(Val: UserChain [ChainIndex]);
736	assert((BO->use_empty() \|\| BO->hasOneUse()) &&
737	"distributeExtsAndCloneChain clones each BinaryOperator in "
738	"UserChain, so no one should be used more than "
739	"once");
740
741	unsigned OpNo = (BO->getOperand(i_nocapture: `0`) == UserChain [ChainIndex - `1`] ? `0` : `1`);
742	assert(BO->getOperand(OpNo) == UserChain[ChainIndex - `1`]);
743	Value *NextInChain = removeConstOffset(ChainIndex: ChainIndex - `1`);
744	Value *TheOther = BO->getOperand(i_nocapture: `1` - OpNo);
745
746	// If NextInChain is 0 and not the LHS of a sub, we can simplify the
747	// sub-expression to be just TheOther.
748	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: NextInChain)) {
749	if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == `0`))
750	return TheOther;
751	}
752
753	BinaryOperator::BinaryOps NewOp = BO->getOpcode();
754	if (BO->getOpcode() == Instruction::Or) {
755	// Rebuild "or" as "add", because "or" may be invalid for the new
756	// expression.
757	//
758	// For instance, given
759	// a \| (b + 5) where a and b + 5 have no common bits,
760	// we can extract 5 as the constant offset.
761	//
762	// However, reusing the "or" in the new index would give us
763	// (a \| b) + 5
764	// which does not equal a \| (b + 5).
765	//
766	// Replacing the "or" with "add" is fine, because
767	// a \| (b + 5) = a + (b + 5) = (a + b) + 5
768	NewOp = Instruction::Add;
769	}
770
771	BinaryOperator *NewBO;
772	if (OpNo == `0`) {
773	NewBO = BinaryOperator::Create(Op: NewOp, S1: NextInChain, S2: TheOther, Name: "", InsertBefore: IP);
774	} else {
775	NewBO = BinaryOperator::Create(Op: NewOp, S1: TheOther, S2: NextInChain, Name: "", InsertBefore: IP);
776	}
777	NewBO->takeName(V: BO);
778	return NewBO;
779	}
780
781	Value ConstantOffsetExtractor::Extract(Value Idx, GetElementPtrInst *GEP,
782	User *&UserChainTail) {
783	ConstantOffsetExtractor Extractor(GEP->getIterator());
784	// Find a non-zero constant offset first.
785	APInt ConstantOffset =
786	Extractor.find(V: Idx, / SignExtended / false, / ZeroExtended / false,
787	NonNegative: GEP->isInBounds());
788	if (ConstantOffset == `0`) {
789	UserChainTail = nullptr;
790	return nullptr;
791	}
792	// Separates the constant offset from the GEP index.
793	Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
794	UserChainTail = Extractor.UserChain.back();
795	return IdxWithoutConstOffset;
796	}
797
798	int64_t ConstantOffsetExtractor::Find(Value Idx, GetElementPtrInst GEP) {
799	// If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
800	return ConstantOffsetExtractor (GEP->getIterator())
801	.find(V: Idx, / SignExtended / false, / ZeroExtended / false,
802	NonNegative: GEP->isInBounds())
803	.getSExtValue();
804	}
805
806	bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
807	GetElementPtrInst *GEP) {
808	bool Changed = false;
809	Type *PtrIdxTy = DL->getIndexType(PtrTy: GEP->getType());
810	gep_type_iterator GTI = gep_type_begin(GEP: *GEP);
811	for (User::op_iterator I = GEP->op_begin() + `1`, E = GEP->op_end();
812	I != E; ++I, ++GTI) {
813	// Skip struct member indices which must be i32.
814	if (GTI.isSequential()) {
815	if ((*I)->getType() != PtrIdxTy) {
816	I = CastInst::CreateIntegerCast(S: I, Ty: PtrIdxTy, isSigned: true, Name: "idxprom",
817	InsertBefore: GEP->getIterator());
818	Changed = true;
819	}
820	}
821	}
822	return Changed;
823	}
824
825	int64_t
826	SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
827	bool &NeedsExtraction) {
828	NeedsExtraction = false;
829	int64_t AccumulativeByteOffset = `0`;
830	gep_type_iterator GTI = gep_type_begin(GEP: *GEP);
831	for (unsigned I = `1`, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
832	if (GTI.isSequential()) {
833	// Constant offsets of scalable types are not really constant.
834	if (GTI.getIndexedType()->isScalableTy())
835	continue;
836
837	// Tries to extract a constant offset from this GEP index.
838	int64_t ConstantOffset =
839	ConstantOffsetExtractor::Find(Idx: GEP->getOperand(i_nocapture: I), GEP);
840	if (ConstantOffset != `0`) {
841	NeedsExtraction = true;
842	// A GEP may have multiple indices. We accumulate the extracted
843	// constant offset to a byte offset, and later offset the remainder of
844	// the original GEP with this byte offset.
845	AccumulativeByteOffset +=
846	ConstantOffset * GTI.getSequentialElementStride(DL: *DL);
847	}
848	} else if (LowerGEP) {
849	StructType *StTy = GTI.getStructType();
850	uint64_t Field = cast<ConstantInt>(Val: GEP->getOperand(i_nocapture: I))->getZExtValue();
851	// Skip field 0 as the offset is always 0.
852	if (Field != `0`) {
853	NeedsExtraction = true;
854	AccumulativeByteOffset +=
855	DL->getStructLayout(Ty: StTy)->getElementOffset(Idx: Field);
856	}
857	}
858	}
859	return AccumulativeByteOffset;
860	}
861
862	void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
863	GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
864	IRBuilder<> Builder(Variadic);
865	Type *PtrIndexTy = DL->getIndexType(PtrTy: Variadic->getType());
866
867	Value *ResultPtr = Variadic->getOperand(i_nocapture: `0`);
868	Loop *L = LI->getLoopFor(BB: Variadic->getParent());
869	// Check if the base is not loop invariant or used more than once.
870	bool isSwapCandidate =
871	L && L->isLoopInvariant(V: ResultPtr) &&
872	!hasMoreThanOneUseInLoop(v: ResultPtr, L);
873	Value FirstResult = nullptr*;
874
875	gep_type_iterator GTI = gep_type_begin(GEP: *Variadic);
876	// Create an ugly GEP for each sequential index. We don't create GEPs for
877	// structure indices, as they are accumulated in the constant offset index.
878	for (unsigned I = `1`, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
879	if (GTI.isSequential()) {
880	Value *Idx = Variadic->getOperand(i_nocapture: I);
881	// Skip zero indices.
882	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Idx))
883	if (CI->isZero())
884	continue;
885
886	APInt ElementSize = APInt (PtrIndexTy->getIntegerBitWidth(),
887	GTI.getSequentialElementStride(DL: *DL));
888	// Scale the index by element size.
889	if (ElementSize != `1`) {
890	if (ElementSize.isPowerOf2()) {
891	Idx = Builder.CreateShl(
892	LHS: Idx, RHS: ConstantInt::get(Ty: PtrIndexTy, V: ElementSize.logBase2()));
893	} else {
894	Idx =
895	Builder.CreateMul(LHS: Idx, RHS: ConstantInt::get(Ty: PtrIndexTy, V: ElementSize));
896	}
897	}
898	// Create an ugly GEP with a single index for each index.
899	ResultPtr = Builder.CreatePtrAdd(Ptr: ResultPtr, Offset: Idx, Name: "uglygep");
900	if (FirstResult == nullptr)
901	FirstResult = ResultPtr;
902	}
903	}
904
905	// Create a GEP with the constant offset index.
906	if (AccumulativeByteOffset != `0`) {
907	Value *Offset = ConstantInt::get(Ty: PtrIndexTy, V: AccumulativeByteOffset);
908	ResultPtr = Builder.CreatePtrAdd(Ptr: ResultPtr, Offset, Name: "uglygep");
909	} else
910	isSwapCandidate = false;
911
912	// If we created a GEP with constant index, and the base is loop invariant,
913	// then we swap the first one with it, so LICM can move constant GEP out
914	// later.
915	auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(Val: FirstResult);
916	auto *SecondGEP = dyn_cast<GetElementPtrInst>(Val: ResultPtr);
917	if (isSwapCandidate && isLegalToSwapOperand(First: FirstGEP, Second: SecondGEP, CurLoop: L))
918	swapGEPOperand(First: FirstGEP, Second: SecondGEP);
919
920	Variadic->replaceAllUsesWith(V: ResultPtr);
921	Variadic->eraseFromParent();
922	}
923
924	void
925	SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
926	int64_t AccumulativeByteOffset) {
927	IRBuilder<> Builder(Variadic);
928	Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
929	assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
930	"Pointer type must match index type for arithmetic-based lowering of "
931	"split GEPs");
932
933	Value *ResultPtr = Builder.CreatePtrToInt(V: Variadic->getOperand(i_nocapture: `0`), DestTy: IntPtrTy);
934	gep_type_iterator GTI = gep_type_begin(GEP: *Variadic);
935	// Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
936	// don't create arithmetics for structure indices, as they are accumulated
937	// in the constant offset index.
938	for (unsigned I = `1`, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
939	if (GTI.isSequential()) {
940	Value *Idx = Variadic->getOperand(i_nocapture: I);
941	// Skip zero indices.
942	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Idx))
943	if (CI->isZero())
944	continue;
945
946	APInt ElementSize = APInt (IntPtrTy->getIntegerBitWidth(),
947	GTI.getSequentialElementStride(DL: *DL));
948	// Scale the index by element size.
949	if (ElementSize != `1`) {
950	if (ElementSize.isPowerOf2()) {
951	Idx = Builder.CreateShl(
952	LHS: Idx, RHS: ConstantInt::get(Ty: IntPtrTy, V: ElementSize.logBase2()));
953	} else {
954	Idx = Builder.CreateMul(LHS: Idx, RHS: ConstantInt::get(Ty: IntPtrTy, V: ElementSize));
955	}
956	}
957	// Create an ADD for each index.
958	ResultPtr = Builder.CreateAdd(LHS: ResultPtr, RHS: Idx);
959	}
960	}
961
962	// Create an ADD for the constant offset index.
963	if (AccumulativeByteOffset != `0`) {
964	ResultPtr = Builder.CreateAdd(
965	LHS: ResultPtr, RHS: ConstantInt::get(Ty: IntPtrTy, V: AccumulativeByteOffset));
966	}
967
968	ResultPtr = Builder.CreateIntToPtr(V: ResultPtr, DestTy: Variadic->getType());
969	Variadic->replaceAllUsesWith(V: ResultPtr);
970	Variadic->eraseFromParent();
971	}
972
973	bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
974	TargetTransformInfo &TTI) {
975	auto PtrGEP = dyn_cast<GetElementPtrInst>(Val: GEP->getPointerOperand());
976	if (!PtrGEP)
977	return false;
978
979	bool NestedNeedsExtraction;
980	int64_t NestedByteOffset =
981	accumulateByteOffset(GEP: PtrGEP, NeedsExtraction&: NestedNeedsExtraction);
982	if (!NestedNeedsExtraction)
983	return false;
984
985	unsigned AddrSpace = PtrGEP->getPointerAddressSpace();
986	if (!TTI.isLegalAddressingMode(Ty: GEP->getResultElementType(),
987	/BaseGV=/nullptr, BaseOffset: NestedByteOffset,
988	/HasBaseReg=/true, /Scale=/`0`, AddrSpace))
989	return false;
990
991	bool GEPInBounds = GEP->isInBounds();
992	bool PtrGEPInBounds = PtrGEP->isInBounds();
993	bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
994	if (IsChainInBounds) {
995	auto IsKnownNonNegative = [this](Value *V) {
996	return isKnownNonNegative(V, SQ: *DL);
997	};
998	IsChainInBounds &= all_of(Range: GEP->indices(), P: IsKnownNonNegative);
999	if (IsChainInBounds)
1000	IsChainInBounds &= all_of(Range: PtrGEP->indices(), P: IsKnownNonNegative);
1001	}
1002
1003	IRBuilder<> Builder(GEP);
1004	// For trivial GEP chains, we can swap the indices.
1005	Value *NewSrc = Builder.CreateGEP(
1006	Ty: GEP->getSourceElementType(), Ptr: PtrGEP->getPointerOperand(),
1007	IdxList: SmallVector<Value *, `4`>(GEP->indices()), Name: "", NW: IsChainInBounds);
1008	Value *NewGEP = Builder.CreateGEP(Ty: PtrGEP->getSourceElementType(), Ptr: NewSrc,
1009	IdxList: SmallVector<Value *, `4`>(PtrGEP->indices()),
1010	Name: "", NW: IsChainInBounds);
1011	GEP->replaceAllUsesWith(V: NewGEP);
1012	RecursivelyDeleteTriviallyDeadInstructions(V: GEP);
1013	return true;
1014	}
1015
1016	bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
1017	// Skip vector GEPs.
1018	if (GEP->getType()->isVectorTy())
1019	return false;
1020
1021	// The backend can already nicely handle the case where all indices are
1022	// constant.
1023	if (GEP->hasAllConstantIndices())
1024	return false;
1025
1026	bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
1027
1028	bool NeedsExtraction;
1029	int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
1030
1031	TargetTransformInfo &TTI = GetTTI (*GEP->getFunction());
1032
1033	if (!NeedsExtraction) {
1034	Changed \|= reorderGEP(GEP, TTI);
1035	return Changed;
1036	}
1037
1038	// If LowerGEP is disabled, before really splitting the GEP, check whether the
1039	// backend supports the addressing mode we are about to produce. If no, this
1040	// splitting probably won't be beneficial.
1041	// If LowerGEP is enabled, even the extracted constant offset can not match
1042	// the addressing mode, we can still do optimizations to other lowered parts
1043	// of variable indices. Therefore, we don't check for addressing modes in that
1044	// case.
1045	if (!LowerGEP) {
1046	unsigned AddrSpace = GEP->getPointerAddressSpace();
1047	if (!TTI.isLegalAddressingMode(Ty: GEP->getResultElementType(),
1048	/BaseGV=/nullptr, BaseOffset: AccumulativeByteOffset,
1049	/HasBaseReg=/true, /Scale=/`0`,
1050	AddrSpace)) {
1051	return Changed;
1052	}
1053	}
1054
1055	// Remove the constant offset in each sequential index. The resultant GEP
1056	// computes the variadic base.
1057	// Notice that we don't remove struct field indices here. If LowerGEP is
1058	// disabled, a structure index is not accumulated and we still use the old
1059	// one. If LowerGEP is enabled, a structure index is accumulated in the
1060	// constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
1061	// handle the constant offset and won't need a new structure index.
1062	gep_type_iterator GTI = gep_type_begin(GEP: *GEP);
1063	for (unsigned I = `1`, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1064	if (GTI.isSequential()) {
1065	// Constant offsets of scalable types are not really constant.
1066	if (GTI.getIndexedType()->isScalableTy())
1067	continue;
1068
1069	// Splits this GEP index into a variadic part and a constant offset, and
1070	// uses the variadic part as the new index.
1071	Value *OldIdx = GEP->getOperand(i_nocapture: I);
1072	User *UserChainTail;
1073	Value *NewIdx =
1074	ConstantOffsetExtractor::Extract(Idx: OldIdx, GEP, UserChainTail);
1075	if (NewIdx != nullptr) {
1076	// Switches to the index with the constant offset removed.
1077	GEP->setOperand(i_nocapture: I, Val_nocapture: NewIdx);
1078	// After switching to the new index, we can garbage-collect UserChain
1079	// and the old index if they are not used.
1080	RecursivelyDeleteTriviallyDeadInstructions(V: UserChainTail);
1081	RecursivelyDeleteTriviallyDeadInstructions(V: OldIdx);
1082	}
1083	}
1084	}
1085
1086	// Clear the inbounds attribute because the new index may be off-bound.
1087	// e.g.,
1088	//
1089	// b = add i64 a, 5
1090	// addr = gep inbounds float, float p, i64 b*
1091	//
1092	// is transformed to:
1093	//
1094	// addr2 = gep float, float p, i64 a ; inbounds removed*
1095	// addr = gep inbounds float, float addr2, i64 5*
1096	//
1097	// If a is -4, although the old index b is in bounds, the new index a is
1098	// off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1099	// inbounds keyword is not present, the offsets are added to the base
1100	// address with silently-wrapping two's complement arithmetic".
1101	// Therefore, the final code will be a semantically equivalent.
1102	//
1103	// TODO(jingyue): do some range analysis to keep as many inbounds as
1104	// possible. GEPs with inbounds are more friendly to alias analysis.
1105	// TODO(gep_nowrap): Preserve nuw at least.
1106	bool GEPWasInBounds = GEP->isInBounds();
1107	GEP->setNoWrapFlags(GEPNoWrapFlags::none());
1108
1109	// Lowers a GEP to either GEPs with a single index or arithmetic operations.
1110	if (LowerGEP) {
1111	// As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1112	// arithmetic operations if the target uses alias analysis in codegen.
1113	// Additionally, pointers that aren't integral (and so can't be safely
1114	// converted to integers) or those whose offset size is different from their
1115	// pointer size (which means that doing integer arithmetic on them could
1116	// affect that data) can't be lowered in this way.
1117	unsigned AddrSpace = GEP->getPointerAddressSpace();
1118	bool PointerHasExtraData = DL->getPointerSizeInBits(AS: AddrSpace) !=
1119	DL->getIndexSizeInBits(AS: AddrSpace);
1120	if (TTI.useAA() \|\| DL->isNonIntegralAddressSpace(AddrSpace) \|\|
1121	PointerHasExtraData)
1122	lowerToSingleIndexGEPs(Variadic: GEP, AccumulativeByteOffset);
1123	else
1124	lowerToArithmetics(Variadic: GEP, AccumulativeByteOffset);
1125	return true;
1126	}
1127
1128	// No need to create another GEP if the accumulative byte offset is 0.
1129	if (AccumulativeByteOffset == `0`)
1130	return true;
1131
1132	// Offsets the base with the accumulative byte offset.
1133	//
1134	// %gep ; the base
1135	// ... %gep ...
1136	//
1137	// => add the offset
1138	//
1139	// %gep2 ; clone of %gep
1140	// %new.gep = gep i8, %gep2, %offset
1141	// %gep ; will be removed
1142	// ... %gep ...
1143	//
1144	// => replace all uses of %gep with %new.gep and remove %gep
1145	//
1146	// %gep2 ; clone of %gep
1147	// %new.gep = gep i8, %gep2, %offset
1148	// ... %new.gep ...
1149	Instruction *NewGEP = GEP->clone();
1150	NewGEP->insertBefore(InsertPos: GEP);
1151
1152	Type *PtrIdxTy = DL->getIndexType(PtrTy: GEP->getType());
1153	IRBuilder<> Builder(GEP);
1154	NewGEP = cast<Instruction>(Val: Builder.CreatePtrAdd(
1155	Ptr: NewGEP, Offset: ConstantInt::get(Ty: PtrIdxTy, V: AccumulativeByteOffset, IsSigned: true),
1156	Name: GEP->getName(), NW: GEPWasInBounds));
1157	NewGEP->copyMetadata(SrcInst: *GEP);
1158
1159	GEP->replaceAllUsesWith(V: NewGEP);
1160	GEP->eraseFromParent();
1161
1162	return true;
1163	}
1164
1165	bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
1166	if (skipFunction(F))
1167	return false;
1168	auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1169	auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1170	auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1171	auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1172	return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1173	};
1174	SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1175	return Impl.run(F);
1176	}
1177
1178	bool SeparateConstOffsetFromGEP::run(Function &F) {
1179	if (DisableSeparateConstOffsetFromGEP)
1180	return false;
1181
1182	DL = &F.getDataLayout();
1183	bool Changed = false;
1184	for (BasicBlock &B : F) {
1185	if (!DT->isReachableFromEntry(A: &B))
1186	continue;
1187
1188	for (Instruction &I : llvm::make_early_inc_range(Range&: B))
1189	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: &I))
1190	Changed \|= splitGEP(GEP);
1191	// No need to split GEP ConstantExprs because all its indices are constant
1192	// already.
1193	}
1194
1195	Changed \|= reuniteExts(F);
1196
1197	if (VerifyNoDeadCode)
1198	verifyNoDeadCode(F);
1199
1200	return Changed;
1201	}
1202
1203	Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1204	ExprKey Key, Instruction *Dominatee,
1205	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> &DominatingExprs) {
1206	auto Pos = DominatingExprs.find(Val: Key);
1207	if (Pos == DominatingExprs.end())
1208	return nullptr;
1209
1210	auto &Candidates = Pos ->second;
1211	// Because we process the basic blocks in pre-order of the dominator tree, a
1212	// candidate that doesn't dominate the current instruction won't dominate any
1213	// future instruction either. Therefore, we pop it out of the stack. This
1214	// optimization makes the algorithm O(n).
1215	while (!Candidates.empty()) {
1216	Instruction *Candidate = Candidates.back();
1217	if (DT->dominates(Def: Candidate, User: Dominatee))
1218	return Candidate;
1219	Candidates.pop_back();
1220	}
1221	return nullptr;
1222	}
1223
1224	bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1225	if (!I->getType()->isIntOrIntVectorTy())
1226	return false;
1227
1228	// Dom: LHS+RHS
1229	// I: sext(LHS)+sext(RHS)
1230	// If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1231	// TODO: handle zext
1232	Value LHS = nullptr, RHS = nullptr;
1233	if (match(V: I, P: m_Add(L: m_SExt(Op: m_Value(V&: LHS)), R: m_SExt(Op: m_Value(V&: RHS))))) {
1234	if (LHS->getType() == RHS->getType()) {
1235	ExprKey Key = createNormalizedCommutablePair(A: LHS, B: RHS);
1236	if (auto *Dom = findClosestMatchingDominator(Key, Dominatee: I, DominatingExprs&: DominatingAdds)) {
1237	Instruction *NewSExt =
1238	new SExtInst (Dom, I->getType(), "", I->getIterator());
1239	NewSExt->takeName(V: I);
1240	I->replaceAllUsesWith(V: NewSExt);
1241	NewSExt->setDebugLoc(I->getDebugLoc());
1242	RecursivelyDeleteTriviallyDeadInstructions(V: I);
1243	return true;
1244	}
1245	}
1246	} else if (match(V: I, P: m_Sub(L: m_SExt(Op: m_Value(V&: LHS)), R: m_SExt(Op: m_Value(V&: RHS))))) {
1247	if (LHS->getType() == RHS->getType()) {
1248	if (auto *Dom =
1249	findClosestMatchingDominator(Key: {LHS, RHS}, Dominatee: I, DominatingExprs&: DominatingSubs)) {
1250	Instruction *NewSExt =
1251	new SExtInst (Dom, I->getType(), "", I->getIterator());
1252	NewSExt->takeName(V: I);
1253	I->replaceAllUsesWith(V: NewSExt);
1254	NewSExt->setDebugLoc(I->getDebugLoc());
1255	RecursivelyDeleteTriviallyDeadInstructions(V: I);
1256	return true;
1257	}
1258	}
1259	}
1260
1261	// Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1262	if (match(V: I, P: m_NSWAdd(L: m_Value(V&: LHS), R: m_Value(V&: RHS)))) {
1263	if (programUndefinedIfPoison(Inst: I)) {
1264	ExprKey Key = createNormalizedCommutablePair(A: LHS, B: RHS);
1265	DominatingAdds [Key].push_back(Elt: I);
1266	}
1267	} else if (match(V: I, P: m_NSWSub(L: m_Value(V&: LHS), R: m_Value(V&: RHS)))) {
1268	if (programUndefinedIfPoison(Inst: I))
1269	DominatingSubs [{LHS, RHS}].push_back(Elt: I);
1270	}
1271	return false;
1272	}
1273
1274	bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1275	bool Changed = false;
1276	DominatingAdds.clear();
1277	DominatingSubs.clear();
1278	for (const auto Node : depth_first(G: DT)) {
1279	BasicBlock *BB = Node->getBlock();
1280	for (Instruction &I : llvm::make_early_inc_range(Range&: *BB))
1281	Changed \|= reuniteExts(I: &I);
1282	}
1283	return Changed;
1284	}
1285
1286	void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1287	for (BasicBlock &B : F) {
1288	for (Instruction &I : B) {
1289	if (isInstructionTriviallyDead(I: &I)) {
1290	std::string ErrMessage;
1291	raw_string_ostream RSO(ErrMessage);
1292	RSO << "Dead instruction detected!\n" << I << "\n";
1293	llvm_unreachable(RSO.str().c_str());
1294	}
1295	}
1296	}
1297	}
1298
1299	bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1300	GetElementPtrInst FirstGEP, GetElementPtrInst SecondGEP, Loop *CurLoop) {
1301	if (!FirstGEP \|\| !FirstGEP->hasOneUse())
1302	return false;
1303
1304	if (!SecondGEP \|\| FirstGEP->getParent() != SecondGEP->getParent())
1305	return false;
1306
1307	if (FirstGEP == SecondGEP)
1308	return false;
1309
1310	unsigned FirstNum = FirstGEP->getNumOperands();
1311	unsigned SecondNum = SecondGEP->getNumOperands();
1312	// Give up if the number of operands are not 2.
1313	if (FirstNum != SecondNum \|\| FirstNum != `2`)
1314	return false;
1315
1316	Value *FirstBase = FirstGEP->getOperand(i_nocapture: `0`);
1317	Value *SecondBase = SecondGEP->getOperand(i_nocapture: `0`);
1318	Value *FirstOffset = FirstGEP->getOperand(i_nocapture: `1`);
1319	// Give up if the index of the first GEP is loop invariant.
1320	if (CurLoop->isLoopInvariant(V: FirstOffset))
1321	return false;
1322
1323	// Give up if base doesn't have same type.
1324	if (FirstBase->getType() != SecondBase->getType())
1325	return false;
1326
1327	Instruction *FirstOffsetDef = dyn_cast<Instruction>(Val: FirstOffset);
1328
1329	// Check if the second operand of first GEP has constant coefficient.
1330	// For an example, for the following code, we won't gain anything by
1331	// hoisting the second GEP out because the second GEP can be folded away.
1332	// %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1333	// %67 = shl i64 %scevgep.sum.ur159, 2
1334	// %uglygep160 = getelementptr i8 %65, i64 %67*
1335	// %uglygep161 = getelementptr i8 %uglygep160, i64 -1024*
1336
1337	// Skip constant shift instruction which may be generated by Splitting GEPs.
1338	if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1339	isa<ConstantInt>(Val: FirstOffsetDef->getOperand(i: `1`)))
1340	FirstOffsetDef = dyn_cast<Instruction>(Val: FirstOffsetDef->getOperand(i: `0`));
1341
1342	// Give up if FirstOffsetDef is an Add or Sub with constant.
1343	// Because it may not profitable at all due to constant folding.
1344	if (FirstOffsetDef)
1345	if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: FirstOffsetDef)) {
1346	unsigned opc = BO->getOpcode();
1347	if ((opc == Instruction::Add \|\| opc == Instruction::Sub) &&
1348	(isa<ConstantInt>(Val: BO->getOperand(i_nocapture: `0`)) \|\|
1349	isa<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`))))
1350	return false;
1351	}
1352	return true;
1353	}
1354
1355	bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value V, Loop L) {
1356	int UsesInLoop = `0`;
1357	for (User *U : V->users()) {
1358	if (Instruction *User = dyn_cast<Instruction>(Val: U))
1359	if (L->contains(Inst: User))
1360	if (++UsesInLoop > `1`)
1361	return true;
1362	}
1363	return false;
1364	}
1365
1366	void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1367	GetElementPtrInst *Second) {
1368	Value *Offset1 = First->getOperand(i_nocapture: `1`);
1369	Value *Offset2 = Second->getOperand(i_nocapture: `1`);
1370	First->setOperand(i_nocapture: `1`, Val_nocapture: Offset2);
1371	Second->setOperand(i_nocapture: `1`, Val_nocapture: Offset1);
1372
1373	// We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1374	const DataLayout &DAL = First->getDataLayout();
1375	APInt Offset(DAL.getIndexSizeInBits(
1376	AS: cast<PointerType>(Val: First->getType())->getAddressSpace()),
1377	`0`);
1378	Value *NewBase =
1379	First->stripAndAccumulateInBoundsConstantOffsets(DL: DAL, Offset);
1380	uint64_t ObjectSize;
1381	if (!getObjectSize(Ptr: NewBase, Size&: ObjectSize, DL: DAL, TLI) \|\|
1382	Offset.ugt(RHS: ObjectSize)) {
1383	// TODO(gep_nowrap): Make flag preservation more precise.
1384	First->setNoWrapFlags(GEPNoWrapFlags::none());
1385	Second->setNoWrapFlags(GEPNoWrapFlags::none());
1386	} else
1387	First->setIsInBounds(true);
1388	}
1389
1390	void SeparateConstOffsetFromGEPPass::printPipeline(
1391	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1392	static_cast<PassInfoMixin<SeparateConstOffsetFromGEPPass> >(this*)
1393	->printPipeline(OS, MapClassName2PassName);
1394	OS << `'<'`;
1395	if (LowerGEP)
1396	OS << "lower-gep";
1397	OS << `'>'`;
1398	}
1399
1400	PreservedAnalyses
1401	SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) {
1402	auto *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
1403	auto *LI = &AM.getResult<LoopAnalysis>(IR&: F);
1404	auto *TLI = &AM.getResult<TargetLibraryAnalysis>(IR&: F);
1405	auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1406	return AM.getResult<TargetIRAnalysis>(IR&: F);
1407	};
1408	SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1409	if (!Impl.run(F))
1410	return PreservedAnalyses::all();
1411	PreservedAnalyses PA;
1412	PA.preserveSet<CFGAnalyses>();
1413	return PA;
1414	}
1415

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp