SeparateConstOffsetFromGEP.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp]

1	//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Loop unrolling may create many similar GEPs for array accesses.
10	// e.g., a 2-level loop
11	//
12	// float a[32][32]; // global variable
13	//
14	// for (int i = 0; i < 2; ++i) {
15	// for (int j = 0; j < 2; ++j) {
16	// ...
17	// ... = a[x + i][y + j];
18	// ...
19	// }
20	// }
21	//
22	// will probably be unrolled to:
23	//
24	// gep %a, 0, %x, %y; load
25	// gep %a, 0, %x, %y + 1; load
26	// gep %a, 0, %x + 1, %y; load
27	// gep %a, 0, %x + 1, %y + 1; load
28	//
29	// LLVM's GVN does not use partial redundancy elimination yet, and is thus
30	// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31	// significant slowdown in targets with limited addressing modes. For instance,
32	// because the PTX target does not support the reg+reg addressing mode, the
33	// NVPTX backend emits PTX code that literally computes the pointer address of
34	// each GEP, wasting tons of registers. It emits the following PTX for the
35	// first load and similar PTX for other loads.
36	//
37	// mov.u32 %r1, %x;
38	// mov.u32 %r2, %y;
39	// mul.wide.u32 %rl2, %r1, 128;
40	// mov.u64 %rl3, a;
41	// add.s64 %rl4, %rl3, %rl2;
42	// mul.wide.u32 %rl5, %r2, 4;
43	// add.s64 %rl6, %rl4, %rl5;
44	// ld.global.f32 %f1, [%rl6];
45	//
46	// To reduce the register pressure, the optimization implemented in this file
47	// merges the common part of a group of GEPs, so we can compute each pointer
48	// address by adding a simple offset to the common part, saving many registers.
49	//
50	// It works by splitting each GEP into a variadic base and a constant offset.
51	// The variadic base can be computed once and reused by multiple GEPs, and the
52	// constant offsets can be nicely folded into the reg+immediate addressing mode
53	// (supported by most targets) without using any extra register.
54	//
55	// For instance, we transform the four GEPs and four loads in the above example
56	// into:
57	//
58	// base = gep a, 0, x, y
59	// load base
60	// load base + 1 sizeof(float)*
61	// load base + 32 sizeof(float)*
62	// load base + 33 sizeof(float)*
63	//
64	// Given the transformed IR, a backend that supports the reg+immediate
65	// addressing mode can easily fold the pointer arithmetics into the loads. For
66	// example, the NVPTX backend can easily fold the pointer arithmetics into the
67	// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68	//
69	// mov.u32 %r1, %tid.x;
70	// mov.u32 %r2, %tid.y;
71	// mul.wide.u32 %rl2, %r1, 128;
72	// mov.u64 %rl3, a;
73	// add.s64 %rl4, %rl3, %rl2;
74	// mul.wide.u32 %rl5, %r2, 4;
75	// add.s64 %rl6, %rl4, %rl5;
76	// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77	// ld.global.f32 %f2, [%rl6+4]; // much better
78	// ld.global.f32 %f3, [%rl6+128]; // much better
79	// ld.global.f32 %f4, [%rl6+132]; // much better
80	//
81	// Another improvement enabled by the LowerGEP flag is to lower a GEP with
82	// multiple indices to either multiple GEPs with a single index or arithmetic
83	// operations (depending on whether the target uses alias analysis in codegen).
84	// Such transformation can have following benefits:
85	// (1) It can always extract constants in the indices of structure type.
86	// (2) After such Lowering, there are more optimization opportunities such as
87	// CSE, LICM and CGP.
88	//
89	// E.g. The following GEPs have multiple indices:
90	// BB1:
91	// %p = getelementptr [10 x %struct] %ptr, i64 %i, i64 %j1, i32 3*
92	// load %p
93	// ...
94	// BB2:
95	// %p2 = getelementptr [10 x %struct] %ptr, i64 %i, i64 %j1, i32 2*
96	// load %p2
97	// ...
98	//
99	// We can not do CSE to the common part related to index "i64 %i". Lowering
100	// GEPs can achieve such goals.
101	// If the target does not use alias analysis in codegen, this pass will
102	// lower a GEP with multiple indices into arithmetic operations:
103	// BB1:
104	// %1 = ptrtoint [10 x %struct] %ptr to i64 ; CSE opportunity*
105	// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
106	// %3 = add i64 %1, %2 ; CSE opportunity
107	// %4 = mul i64 %j1, length_of_struct
108	// %5 = add i64 %3, %4
109	// %6 = add i64 %3, struct_field_3 ; Constant offset
110	// %p = inttoptr i64 %6 to i32*
111	// load %p
112	// ...
113	// BB2:
114	// %7 = ptrtoint [10 x %struct] %ptr to i64 ; CSE opportunity*
115	// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
116	// %9 = add i64 %7, %8 ; CSE opportunity
117	// %10 = mul i64 %j2, length_of_struct
118	// %11 = add i64 %9, %10
119	// %12 = add i64 %11, struct_field_2 ; Constant offset
120	// %p = inttoptr i64 %12 to i32*
121	// load %p2
122	// ...
123	//
124	// If the target uses alias analysis in codegen, this pass will lower a GEP
125	// with multiple indices into multiple GEPs with a single index:
126	// BB1:
127	// %1 = bitcast [10 x %struct] %ptr to i8* ; CSE opportunity*
128	// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
129	// %3 = getelementptr i8 %1, i64 %2 ; CSE opportunity*
130	// %4 = mul i64 %j1, length_of_struct
131	// %5 = getelementptr i8 %3, i64 %4*
132	// %6 = getelementptr i8 %5, struct_field_3 ; Constant offset*
133	// %p = bitcast i8* %6 to i32*
134	// load %p
135	// ...
136	// BB2:
137	// %7 = bitcast [10 x %struct] %ptr to i8* ; CSE opportunity*
138	// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
139	// %9 = getelementptr i8 %7, i64 %8 ; CSE opportunity*
140	// %10 = mul i64 %j2, length_of_struct
141	// %11 = getelementptr i8 %9, i64 %10*
142	// %12 = getelementptr i8 %11, struct_field_2 ; Constant offset*
143	// %p2 = bitcast i8* %12 to i32*
144	// load %p2
145	// ...
146	//
147	// Lowering GEPs can also benefit other passes such as LICM and CGP.
148	// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
149	// indices if one of the index is variant. If we lower such GEP into invariant
150	// parts and variant parts, LICM can hoist/sink those invariant parts.
151	// CGP (CodeGen Prepare) tries to sink address calculations that match the
152	// target's addressing modes. A GEP with multiple indices may not match and will
153	// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
154	// them. So we end up with a better addressing mode.
155	//
156	//===----------------------------------------------------------------------===//
157
158	#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
159	#include "llvm/ADT/APInt.h"
160	#include "llvm/ADT/DenseMap.h"
161	#include "llvm/ADT/DepthFirstIterator.h"
162	#include "llvm/ADT/SmallVector.h"
163	#include "llvm/Analysis/LoopInfo.h"
164	#include "llvm/Analysis/MemoryBuiltins.h"
165	#include "llvm/Analysis/TargetLibraryInfo.h"
166	#include "llvm/Analysis/TargetTransformInfo.h"
167	#include "llvm/Analysis/ValueTracking.h"
168	#include "llvm/IR/BasicBlock.h"
169	#include "llvm/IR/Constant.h"
170	#include "llvm/IR/Constants.h"
171	#include "llvm/IR/DataLayout.h"
172	#include "llvm/IR/DerivedTypes.h"
173	#include "llvm/IR/Dominators.h"
174	#include "llvm/IR/Function.h"
175	#include "llvm/IR/GetElementPtrTypeIterator.h"
176	#include "llvm/IR/IRBuilder.h"
177	#include "llvm/IR/InstrTypes.h"
178	#include "llvm/IR/Instruction.h"
179	#include "llvm/IR/Instructions.h"
180	#include "llvm/IR/Module.h"
181	#include "llvm/IR/PassManager.h"
182	#include "llvm/IR/PatternMatch.h"
183	#include "llvm/IR/Type.h"
184	#include "llvm/IR/User.h"
185	#include "llvm/IR/Value.h"
186	#include "llvm/InitializePasses.h"
187	#include "llvm/Pass.h"
188	#include "llvm/Support/Casting.h"
189	#include "llvm/Support/CommandLine.h"
190	#include "llvm/Support/ErrorHandling.h"
191	#include "llvm/Support/raw_ostream.h"
192	#include "llvm/Transforms/Scalar.h"
193	#include "llvm/Transforms/Utils/Local.h"
194	#include <cassert>
195	#include <cstdint>
196	#include <string>
197
198	using namespace llvm;
199	using namespace llvm::PatternMatch;
200
201	static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
202	"disable-separate-const-offset-from-gep", cl::init(Val: false),
203	cl::desc ("Do not separate the constant offset from a GEP instruction"),
204	cl::Hidden);
205
206	// Setting this flag may emit false positives when the input module already
207	// contains dead instructions. Therefore, we set it only in unit tests that are
208	// free of dead code.
209	static cl::opt<bool>
210	VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(Val: false),
211	cl::desc ("Verify this pass produces no dead code"),
212	cl::Hidden);
213
214	namespace {
215
216	/// A helper class for separating a constant offset from a GEP index.
217	///
218	/// In real programs, a GEP index may be more complicated than a simple addition
219	/// of something and a constant integer which can be trivially splitted. For
220	/// example, to split ((a << 3) \| 5) + b, we need to search deeper for the
221	/// constant offset, so that we can separate the index to (a << 3) + b and 5.
222	///
223	/// Therefore, this class looks into the expression that computes a given GEP
224	/// index, and tries to find a constant integer that can be hoisted to the
225	/// outermost level of the expression as an addition. Not every constant in an
226	/// expression can jump out. e.g., we cannot transform (b (a + 5)) to (b * a +*
227	/// 5); nor can we transform (3 (a + 5)) to (3 * a + 5), however in this case,*
228	/// -instcombine probably already optimized (3 (a + 5)) to (3 * a + 15).*
229	class ConstantOffsetExtractor {
230	public:
231	/// Extracts a constant offset from the given GEP index. It returns the
232	/// new index representing the remainder (equal to the original index minus
233	/// the constant offset), or nullptr if we cannot extract a constant offset.
234	/// \p Idx The given GEP index
235	/// \p GEP The given GEP
236	/// \p UserChainTail Outputs the tail of UserChain so that we can
237	/// garbage-collect unused instructions in UserChain.
238	/// \p PreservesNUW Outputs whether the extraction allows preserving the
239	/// GEP's nuw flag, if it has one.
240	static Value Extract(Value Idx, GetElementPtrInst *GEP,
241	User &UserChainTail, bool* &PreservesNUW);
242
243	/// Looks for a constant offset from the given GEP index without extracting
244	/// it. It returns the numeric value of the extracted constant offset (0 if
245	/// failed). The meaning of the arguments are the same as Extract.
246	static int64_t Find(Value Idx, GetElementPtrInst GEP);
247
248	private:
249	ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)
250	: IP (InsertionPt), DL(InsertionPt ->getDataLayout()) {}
251
252	/// Searches the expression that computes V for a non-zero constant C s.t.
253	/// V can be reassociated into the form V' + C. If the searching is
254	/// successful, returns C and update UserChain as a def-use chain from C to V;
255	/// otherwise, UserChain is empty.
256	///
257	/// \p V The given expression
258	/// \p SignExtended Whether V will be sign-extended in the computation of the
259	/// GEP index
260	/// \p ZeroExtended Whether V will be zero-extended in the computation of the
261	/// GEP index
262	/// \p NonNegative Whether V is guaranteed to be non-negative. For example,
263	/// an index of an inbounds GEP is guaranteed to be
264	/// non-negative. Levaraging this, we can better split
265	/// inbounds GEPs.
266	APInt find(Value V, bool* SignExtended, bool ZeroExtended, bool NonNegative);
267
268	/// A helper function to look into both operands of a binary operator.
269	APInt findInEitherOperand(BinaryOperator BO, bool* SignExtended,
270	bool ZeroExtended);
271
272	/// After finding the constant offset C from the GEP index I, we build a new
273	/// index I' s.t. I' + C = I. This function builds and returns the new
274	/// index I' according to UserChain produced by function "find".
275	///
276	/// The building conceptually takes two steps:
277	/// 1) iteratively distribute s/zext towards the leaves of the expression tree
278	/// that computes I
279	/// 2) reassociate the expression tree to the form I' + C.
280	///
281	/// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
282	/// sext to a, b and 5 so that we have
283	/// sext(a) + (sext(b) + 5).
284	/// Then, we reassociate it to
285	/// (sext(a) + sext(b)) + 5.
286	/// Given this form, we know I' is sext(a) + sext(b).
287	Value *rebuildWithoutConstOffset();
288
289	/// After the first step of rebuilding the GEP index without the constant
290	/// offset, distribute s/zext to the operands of all operators in UserChain.
291	/// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
292	/// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
293	///
294	/// The function also updates UserChain to point to new subexpressions after
295	/// distributing s/zext. e.g., the old UserChain of the above example is
296	/// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
297	/// and the new UserChain is
298	/// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
299	/// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
300	///
301	/// \p ChainIndex The index to UserChain. ChainIndex is initially
302	/// UserChain.size() - 1, and is decremented during
303	/// the recursion.
304	Value distributeExtsAndCloneChain(unsigned* ChainIndex);
305
306	/// Reassociates the GEP index to the form I' + C and returns I'.
307	Value removeConstOffset(unsigned* ChainIndex);
308
309	/// A helper function to apply ExtInsts, a list of s/zext, to value V.
310	/// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
311	/// returns "sext i32 (zext i16 V to i32) to i64".
312	Value applyExts(Value V);
313
314	/// A helper function that returns whether we can trace into the operands
315	/// of binary operator BO for a constant offset.
316	///
317	/// \p SignExtended Whether BO is surrounded by sext
318	/// \p ZeroExtended Whether BO is surrounded by zext
319	/// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
320	/// array index.
321	bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
322	bool NonNegative);
323
324	/// The path from the constant offset to the old GEP index. e.g., if the GEP
325	/// index is "a b + (c + 5)". After running function find, UserChain[0] will*
326	/// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
327	/// UserChain[2] will be the entire expression "a b + (c + 5)".*
328	///
329	/// This path helps to rebuild the new GEP index.
330	SmallVector<User *, `8`> UserChain;
331
332	/// A data structure used in rebuildWithoutConstOffset. Contains all
333	/// sext/zext instructions along UserChain.
334	SmallVector<CastInst *, `16`> ExtInsts;
335
336	/// Insertion position of cloned instructions.
337	BasicBlock::iterator IP;
338
339	const DataLayout &DL;
340	};
341
342	/// A pass that tries to split every GEP in the function into a variadic
343	/// base and a constant offset. It is a FunctionPass because searching for the
344	/// constant offset may inspect other basic blocks.
345	class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
346	public:
347	static char ID;
348
349	SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
350	: FunctionPass (ID), LowerGEP(LowerGEP) {
351	initializeSeparateConstOffsetFromGEPLegacyPassPass(
352	*PassRegistry::getPassRegistry());
353	}
354
355	void getAnalysisUsage(AnalysisUsage &AU) const override {
356	AU.addRequired<DominatorTreeWrapperPass>();
357	AU.addRequired<TargetTransformInfoWrapperPass>();
358	AU.addRequired<LoopInfoWrapperPass>();
359	AU.setPreservesCFG();
360	AU.addRequired<TargetLibraryInfoWrapperPass>();
361	}
362
363	bool runOnFunction(Function &F) override;
364
365	private:
366	bool LowerGEP;
367	};
368
369	/// A pass that tries to split every GEP in the function into a variadic
370	/// base and a constant offset. It is a FunctionPass because searching for the
371	/// constant offset may inspect other basic blocks.
372	class SeparateConstOffsetFromGEP {
373	public:
374	SeparateConstOffsetFromGEP(
375	DominatorTree DT, LoopInfo LI, TargetLibraryInfo *TLI,
376	function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
377	: DT(DT), LI(LI), TLI(TLI), GetTTI (GetTTI), LowerGEP(LowerGEP) {}
378
379	bool run(Function &F);
380
381	private:
382	/// Track the operands of an add or sub.
383	using ExprKey = std::pair<Value , Value >;
384
385	/// Create a pair for use as a map key for a commutable operation.
386	static ExprKey createNormalizedCommutablePair(Value A, Value B) {
387	if (A < B)
388	return {A, B};
389	return {B, A};
390	}
391
392	/// Tries to split the given GEP into a variadic base and a constant offset,
393	/// and returns true if the splitting succeeds.
394	bool splitGEP(GetElementPtrInst *GEP);
395
396	/// Tries to reorder the given GEP with the GEP that produces the base if
397	/// doing so results in producing a constant offset as the outermost
398	/// index.
399	bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);
400
401	/// Lower a GEP with multiple indices into multiple GEPs with a single index.
402	/// Function splitGEP already split the original GEP into a variadic part and
403	/// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
404	/// variadic part into a set of GEPs with a single index and applies
405	/// AccumulativeByteOffset to it.
406	/// \p Variadic The variadic part of the original GEP.
407	/// \p AccumulativeByteOffset The constant offset.
408	void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
409	int64_t AccumulativeByteOffset);
410
411	/// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form.
412	/// Function splitGEP already split the original GEP into a variadic part and
413	/// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
414	/// variadic part into a set of arithmetic operations and applies
415	/// AccumulativeByteOffset to it.
416	/// \p Variadic The variadic part of the original GEP.
417	/// \p AccumulativeByteOffset The constant offset.
418	void lowerToArithmetics(GetElementPtrInst *Variadic,
419	int64_t AccumulativeByteOffset);
420
421	/// Finds the constant offset within each index and accumulates them. If
422	/// LowerGEP is true, it finds in indices of both sequential and structure
423	/// types, otherwise it only finds in sequential indices. The output
424	/// NeedsExtraction indicates whether we successfully find a non-zero constant
425	/// offset.
426	int64_t accumulateByteOffset(GetElementPtrInst GEP, bool* &NeedsExtraction);
427
428	/// Canonicalize array indices to pointer-size integers. This helps to
429	/// simplify the logic of splitting a GEP. For example, if a + b is a
430	/// pointer-size integer, we have
431	/// gep base, a + b = gep (gep base, a), b
432	/// However, this equality may not hold if the size of a + b is smaller than
433	/// the pointer size, because LLVM conceptually sign-extends GEP indices to
434	/// pointer size before computing the address
435	/// (http://llvm.org/docs/LangRef.html#id181).
436	///
437	/// This canonicalization is very likely already done in clang and
438	/// instcombine. Therefore, the program will probably remain the same.
439	///
440	/// Returns true if the module changes.
441	///
442	/// Verified in @i32_add in split-gep.ll
443	bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
444
445	/// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
446	/// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
447	/// the constant offset. After extraction, it becomes desirable to reunion the
448	/// distributed sexts. For example,
449	///
450	/// &a[sext(i +nsw (j +nsw 5)]
451	/// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
452	/// => constant extraction &a[sext(i) + sext(j)] + 5
453	/// => reunion &a[sext(i +nsw j)] + 5
454	bool reuniteExts(Function &F);
455
456	/// A helper that reunites sexts in an instruction.
457	bool reuniteExts(Instruction *I);
458
459	/// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
460	Instruction *findClosestMatchingDominator(
461	ExprKey Key, Instruction *Dominatee,
462	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> &DominatingExprs);
463
464	/// Verify F is free of dead code.
465	void verifyNoDeadCode(Function &F);
466
467	bool hasMoreThanOneUseInLoop(Value v, Loop L);
468
469	// Swap the index operand of two GEP.
470	void swapGEPOperand(GetElementPtrInst First, GetElementPtrInst Second);
471
472	// Check if it is safe to swap operand of two GEP.
473	bool isLegalToSwapOperand(GetElementPtrInst First, GetElementPtrInst Second,
474	Loop *CurLoop);
475
476	const DataLayout DL = nullptr*;
477	DominatorTree DT = nullptr*;
478	LoopInfo *LI;
479	TargetLibraryInfo *TLI;
480	// Retrieved lazily since not always used.
481	function_ref<TargetTransformInfo &(Function &)> GetTTI;
482
483	/// Whether to lower a GEP with multiple indices into arithmetic operations or
484	/// multiple GEPs with a single index.
485	bool LowerGEP;
486
487	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> DominatingAdds;
488	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> DominatingSubs;
489	};
490
491	} // end anonymous namespace
492
493	char SeparateConstOffsetFromGEPLegacyPass::ID = `0`;
494
495	INITIALIZE_PASS_BEGIN(
496	SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
497	"Split GEPs to a variadic base and a constant offset for better CSE", false,
498	false)
499	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
500	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
501	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
502	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
503	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
504	INITIALIZE_PASS_END(
505	SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
506	"Split GEPs to a variadic base and a constant offset for better CSE", false,
507	false)
508
509	FunctionPass llvm::createSeparateConstOffsetFromGEPPass(bool* LowerGEP) {
510	return new SeparateConstOffsetFromGEPLegacyPass (LowerGEP);
511	}
512
513	bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
514	bool ZeroExtended,
515	BinaryOperator *BO,
516	bool NonNegative) {
517	// We only consider ADD, SUB and OR, because a non-zero constant found in
518	// expressions composed of these operations can be easily hoisted as a
519	// constant offset by reassociation.
520	if (BO->getOpcode() != Instruction::Add &&
521	BO->getOpcode() != Instruction::Sub &&
522	BO->getOpcode() != Instruction::Or) {
523	return false;
524	}
525
526	Value LHS = BO->getOperand(i_nocapture: `0`), RHS = BO->getOperand(i_nocapture: `1`);
527	// Do not trace into "or" unless it is equivalent to "add".
528	// This is the case if the or's disjoint flag is set.
529	if (BO->getOpcode() == Instruction::Or &&
530	!cast<PossiblyDisjointInst>(Val: BO)->isDisjoint())
531	return false;
532
533	// FIXME: We don't currently support constants from the RHS of subs,
534	// when we are zero-extended, because we need a way to zero-extended
535	// them before they are negated.
536	if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)
537	return false;
538
539	// In addition, tracing into BO requires that its surrounding s/zext (if
540	// any) is distributable to both operands.
541	//
542	// Suppose BO = A op B.
543	// SignExtended \| ZeroExtended \| Distributable?
544	// --------------+--------------+----------------------------------
545	// 0 \| 0 \| true because no s/zext exists
546	// 0 \| 1 \| zext(BO) == zext(A) op zext(B)
547	// 1 \| 0 \| sext(BO) == sext(A) op sext(B)
548	// 1 \| 1 \| zext(sext(BO)) ==
549	// \| \| zext(sext(A)) op zext(sext(B))
550	if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
551	// If a + b >= 0 and (a >= 0 or b >= 0), then
552	// sext(a + b) = sext(a) + sext(b)
553	// even if the addition is not marked nsw.
554	//
555	// Leveraging this invariant, we can trace into an sext'ed inbound GEP
556	// index if the constant offset is non-negative.
557	//
558	// Verified in @sext_add in split-gep.ll.
559	if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(Val: LHS)) {
560	if (!ConstLHS->isNegative())
561	return true;
562	}
563	if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Val: RHS)) {
564	if (!ConstRHS->isNegative())
565	return true;
566	}
567	}
568
569	// sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
570	// zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
571	if (BO->getOpcode() == Instruction::Add \|\|
572	BO->getOpcode() == Instruction::Sub) {
573	if (SignExtended && !BO->hasNoSignedWrap())
574	return false;
575	if (ZeroExtended && !BO->hasNoUnsignedWrap())
576	return false;
577	}
578
579	return true;
580	}
581
582	APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
583	bool SignExtended,
584	bool ZeroExtended) {
585	// Save off the current height of the chain, in case we need to restore it.
586	size_t ChainLength = UserChain.size();
587
588	// BO being non-negative does not shed light on whether its operands are
589	// non-negative. Clear the NonNegative flag here.
590	APInt ConstantOffset = find(V: BO->getOperand(i_nocapture: `0`), SignExtended, ZeroExtended,
591	/ NonNegative / false);
592	// If we found a constant offset in the left operand, stop and return that.
593	// This shortcut might cause us to miss opportunities of combining the
594	// constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
595	// However, such cases are probably already handled by -instcombine,
596	// given this pass runs after the standard optimizations.
597	if (ConstantOffset != `0`) return ConstantOffset;
598
599	// Reset the chain back to where it was when we started exploring this node,
600	// since visiting the LHS didn't pan out.
601	UserChain.resize(N: ChainLength);
602
603	ConstantOffset = find(V: BO->getOperand(i_nocapture: `1`), SignExtended, ZeroExtended,
604	/ NonNegative / false);
605	// If U is a sub operator, negate the constant offset found in the right
606	// operand.
607	if (BO->getOpcode() == Instruction::Sub)
608	ConstantOffset = -ConstantOffset;
609
610	// If RHS wasn't a suitable candidate either, reset the chain again.
611	if (ConstantOffset == `0`)
612	UserChain.resize(N: ChainLength);
613
614	return ConstantOffset;
615	}
616
617	APInt ConstantOffsetExtractor::find(Value V, bool* SignExtended,
618	bool ZeroExtended, bool NonNegative) {
619	// TODO(jingyue): We could trace into integer/pointer casts, such as
620	// inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
621	// integers because it gives good enough results for our benchmarks.
622	unsigned BitWidth = cast<IntegerType>(Val: V->getType())->getBitWidth();
623
624	// We cannot do much with Values that are not a User, such as an Argument.
625	User *U = dyn_cast<User>(Val: V);
626	if (U == nullptr) return APInt (BitWidth, `0`);
627
628	APInt ConstantOffset(BitWidth, `0`);
629	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: V)) {
630	// Hooray, we found it!
631	ConstantOffset = CI->getValue();
632	} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: V)) {
633	// Trace into subexpressions for more hoisting opportunities.
634	if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
635	ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
636	} else if (isa<TruncInst>(Val: V)) {
637	ConstantOffset =
638	find(V: U->getOperand(i: `0`), SignExtended, ZeroExtended, NonNegative)
639	.trunc(width: BitWidth);
640	} else if (isa<SExtInst>(Val: V)) {
641	ConstantOffset = find(V: U->getOperand(i: `0`), / SignExtended / true,
642	ZeroExtended, NonNegative).sext(width: BitWidth);
643	} else if (isa<ZExtInst>(Val: V)) {
644	// As an optimization, we can clear the SignExtended flag because
645	// sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
646	//
647	// Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
648	ConstantOffset =
649	find(V: U->getOperand(i: `0`), / SignExtended / false,
650	/ ZeroExtended / true, / NonNegative / false).zext(width: BitWidth);
651	}
652
653	// If we found a non-zero constant offset, add it to the path for
654	// rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
655	// help this optimization.
656	if (ConstantOffset != `0`)
657	UserChain.push_back(Elt: U);
658	return ConstantOffset;
659	}
660
661	Value ConstantOffsetExtractor::applyExts(Value V) {
662	Value *Current = V;
663	// ExtInsts is built in the use-def order. Therefore, we apply them to V
664	// in the reversed order.
665	for (CastInst *I : llvm::reverse(C&: ExtInsts)) {
666	if (Constant *C = dyn_cast<Constant>(Val: Current)) {
667	// Try to constant fold the cast.
668	Current = ConstantFoldCastOperand(Opcode: I->getOpcode(), C, DestTy: I->getType(), DL);
669	if (Current)
670	continue;
671	}
672
673	Instruction *Ext = I->clone();
674	Ext->setOperand(i: `0`, Val: Current);
675	Ext->insertBefore(BB&: *IP ->getParent(), InsertPos: IP);
676	Current = Ext;
677	}
678	return Current;
679	}
680
681	Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
682	distributeExtsAndCloneChain(ChainIndex: UserChain.size() - `1`);
683	// Remove all nullptrs (used to be s/zext) from UserChain.
684	unsigned NewSize = `0`;
685	for (User *I : UserChain) {
686	if (I != nullptr) {
687	UserChain [NewSize] = I;
688	NewSize++;
689	}
690	}
691	UserChain.resize(N: NewSize);
692	return removeConstOffset(ChainIndex: UserChain.size() - `1`);
693	}
694
695	Value *
696	ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
697	User *U = UserChain [ChainIndex];
698	if (ChainIndex == `0`) {
699	assert(isa<ConstantInt>(U));
700	// If U is a ConstantInt, applyExts will return a ConstantInt as well.
701	return UserChain [ChainIndex] = cast<ConstantInt>(Val: applyExts(V: U));
702	}
703
704	if (CastInst *Cast = dyn_cast<CastInst>(Val: U)) {
705	assert(
706	(isa<SExtInst>(Cast) \|\| isa<ZExtInst>(Cast) \|\| isa<TruncInst>(Cast)) &&
707	"Only following instructions can be traced: sext, zext & trunc");
708	ExtInsts.push_back(Elt: Cast);
709	UserChain [ChainIndex] = nullptr;
710	return distributeExtsAndCloneChain(ChainIndex: ChainIndex - `1`);
711	}
712
713	// Function find only trace into BinaryOperator and CastInst.
714	BinaryOperator *BO = cast<BinaryOperator>(Val: U);
715	// OpNo = which operand of BO is UserChain[ChainIndex - 1]
716	unsigned OpNo = (BO->getOperand(i_nocapture: `0`) == UserChain [ChainIndex - `1`] ? `0` : `1`);
717	Value *TheOther = applyExts(V: BO->getOperand(i_nocapture: `1` - OpNo));
718	Value *NextInChain = distributeExtsAndCloneChain(ChainIndex: ChainIndex - `1`);
719
720	BinaryOperator NewBO = nullptr*;
721	if (OpNo == `0`) {
722	NewBO = BinaryOperator::Create(Op: BO->getOpcode(), S1: NextInChain, S2: TheOther,
723	Name: BO->getName(), InsertBefore: IP);
724	} else {
725	NewBO = BinaryOperator::Create(Op: BO->getOpcode(), S1: TheOther, S2: NextInChain,
726	Name: BO->getName(), InsertBefore: IP);
727	}
728	return UserChain [ChainIndex] = NewBO;
729	}
730
731	Value ConstantOffsetExtractor::removeConstOffset(unsigned* ChainIndex) {
732	if (ChainIndex == `0`) {
733	assert(isa<ConstantInt>(UserChain[ChainIndex]));
734	return ConstantInt::getNullValue(Ty: UserChain [ChainIndex]->getType());
735	}
736
737	BinaryOperator *BO = cast<BinaryOperator>(Val: UserChain [ChainIndex]);
738	assert((BO->use_empty() \|\| BO->hasOneUse()) &&
739	"distributeExtsAndCloneChain clones each BinaryOperator in "
740	"UserChain, so no one should be used more than "
741	"once");
742
743	unsigned OpNo = (BO->getOperand(i_nocapture: `0`) == UserChain [ChainIndex - `1`] ? `0` : `1`);
744	assert(BO->getOperand(OpNo) == UserChain[ChainIndex - `1`]);
745	Value *NextInChain = removeConstOffset(ChainIndex: ChainIndex - `1`);
746	Value *TheOther = BO->getOperand(i_nocapture: `1` - OpNo);
747
748	// If NextInChain is 0 and not the LHS of a sub, we can simplify the
749	// sub-expression to be just TheOther.
750	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: NextInChain)) {
751	if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == `0`))
752	return TheOther;
753	}
754
755	BinaryOperator::BinaryOps NewOp = BO->getOpcode();
756	if (BO->getOpcode() == Instruction::Or) {
757	// Rebuild "or" as "add", because "or" may be invalid for the new
758	// expression.
759	//
760	// For instance, given
761	// a \| (b + 5) where a and b + 5 have no common bits,
762	// we can extract 5 as the constant offset.
763	//
764	// However, reusing the "or" in the new index would give us
765	// (a \| b) + 5
766	// which does not equal a \| (b + 5).
767	//
768	// Replacing the "or" with "add" is fine, because
769	// a \| (b + 5) = a + (b + 5) = (a + b) + 5
770	NewOp = Instruction::Add;
771	}
772
773	BinaryOperator *NewBO;
774	if (OpNo == `0`) {
775	NewBO = BinaryOperator::Create(Op: NewOp, S1: NextInChain, S2: TheOther, Name: "", InsertBefore: IP);
776	} else {
777	NewBO = BinaryOperator::Create(Op: NewOp, S1: TheOther, S2: NextInChain, Name: "", InsertBefore: IP);
778	}
779	NewBO->takeName(V: BO);
780	return NewBO;
781	}
782
783	/// A helper function to check if reassociating through an entry in the user
784	/// chain would invalidate the GEP's nuw flag.
785	static bool allowsPreservingNUW(const User *U) {
786	if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: U)) {
787	// Binary operations need to be effectively add nuw.
788	auto Opcode = BO->getOpcode();
789	if (Opcode == BinaryOperator::Or) {
790	// Ors are only considered here if they are disjoint. The addition that
791	// they represent in this case is NUW.
792	assert(cast<PossiblyDisjointInst>(BO)->isDisjoint());
793	return true;
794	}
795	return Opcode == BinaryOperator::Add && BO->hasNoUnsignedWrap();
796	}
797	// UserChain can only contain ConstantInt, CastInst, or BinaryOperator.
798	// Among the possible CastInsts, only trunc without nuw is a problem: If it
799	// is distributed through an add nuw, wrapping may occur:
800	// "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)"
801	if (const TruncInst *TI = dyn_cast<TruncInst>(Val: U))
802	return TI->hasNoUnsignedWrap();
803	return isa<CastInst>(Val: U) \|\| isa<ConstantInt>(Val: U);
804	}
805
806	Value ConstantOffsetExtractor::Extract(Value Idx, GetElementPtrInst *GEP,
807	User *&UserChainTail,
808	bool &PreservesNUW) {
809	ConstantOffsetExtractor Extractor(GEP->getIterator());
810	// Find a non-zero constant offset first.
811	APInt ConstantOffset =
812	Extractor.find(V: Idx, / SignExtended / false, / ZeroExtended / false,
813	NonNegative: GEP->isInBounds());
814	if (ConstantOffset == `0`) {
815	UserChainTail = nullptr;
816	PreservesNUW = true;
817	return nullptr;
818	}
819
820	PreservesNUW = all_of(Range&: Extractor.UserChain, P: allowsPreservingNUW);
821
822	// Separates the constant offset from the GEP index.
823	Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
824	UserChainTail = Extractor.UserChain.back();
825	return IdxWithoutConstOffset;
826	}
827
828	int64_t ConstantOffsetExtractor::Find(Value Idx, GetElementPtrInst GEP) {
829	// If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
830	return ConstantOffsetExtractor (GEP->getIterator())
831	.find(V: Idx, / SignExtended / false, / ZeroExtended / false,
832	NonNegative: GEP->isInBounds())
833	.getSExtValue();
834	}
835
836	bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
837	GetElementPtrInst *GEP) {
838	bool Changed = false;
839	Type *PtrIdxTy = DL->getIndexType(PtrTy: GEP->getType());
840	gep_type_iterator GTI = gep_type_begin(GEP: *GEP);
841	for (User::op_iterator I = GEP->op_begin() + `1`, E = GEP->op_end();
842	I != E; ++I, ++GTI) {
843	// Skip struct member indices which must be i32.
844	if (GTI.isSequential()) {
845	if ((*I)->getType() != PtrIdxTy) {
846	I = CastInst::CreateIntegerCast(S: I, Ty: PtrIdxTy, isSigned: true, Name: "idxprom",
847	InsertBefore: GEP->getIterator());
848	Changed = true;
849	}
850	}
851	}
852	return Changed;
853	}
854
855	int64_t
856	SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
857	bool &NeedsExtraction) {
858	NeedsExtraction = false;
859	int64_t AccumulativeByteOffset = `0`;
860	gep_type_iterator GTI = gep_type_begin(GEP: *GEP);
861	for (unsigned I = `1`, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
862	if (GTI.isSequential()) {
863	// Constant offsets of scalable types are not really constant.
864	if (GTI.getIndexedType()->isScalableTy())
865	continue;
866
867	// Tries to extract a constant offset from this GEP index.
868	int64_t ConstantOffset =
869	ConstantOffsetExtractor::Find(Idx: GEP->getOperand(i_nocapture: I), GEP);
870	if (ConstantOffset != `0`) {
871	NeedsExtraction = true;
872	// A GEP may have multiple indices. We accumulate the extracted
873	// constant offset to a byte offset, and later offset the remainder of
874	// the original GEP with this byte offset.
875	AccumulativeByteOffset +=
876	ConstantOffset * GTI.getSequentialElementStride(DL: *DL);
877	}
878	} else if (LowerGEP) {
879	StructType *StTy = GTI.getStructType();
880	uint64_t Field = cast<ConstantInt>(Val: GEP->getOperand(i_nocapture: I))->getZExtValue();
881	// Skip field 0 as the offset is always 0.
882	if (Field != `0`) {
883	NeedsExtraction = true;
884	AccumulativeByteOffset +=
885	DL->getStructLayout(Ty: StTy)->getElementOffset(Idx: Field);
886	}
887	}
888	}
889	return AccumulativeByteOffset;
890	}
891
892	void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
893	GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
894	IRBuilder<> Builder(Variadic);
895	Type *PtrIndexTy = DL->getIndexType(PtrTy: Variadic->getType());
896
897	Value *ResultPtr = Variadic->getOperand(i_nocapture: `0`);
898	Loop *L = LI->getLoopFor(BB: Variadic->getParent());
899	// Check if the base is not loop invariant or used more than once.
900	bool isSwapCandidate =
901	L && L->isLoopInvariant(V: ResultPtr) &&
902	!hasMoreThanOneUseInLoop(v: ResultPtr, L);
903	Value FirstResult = nullptr*;
904
905	gep_type_iterator GTI = gep_type_begin(GEP: *Variadic);
906	// Create an ugly GEP for each sequential index. We don't create GEPs for
907	// structure indices, as they are accumulated in the constant offset index.
908	for (unsigned I = `1`, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
909	if (GTI.isSequential()) {
910	Value *Idx = Variadic->getOperand(i_nocapture: I);
911	// Skip zero indices.
912	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Idx))
913	if (CI->isZero())
914	continue;
915
916	APInt ElementSize = APInt (PtrIndexTy->getIntegerBitWidth(),
917	GTI.getSequentialElementStride(DL: *DL));
918	// Scale the index by element size.
919	if (ElementSize != `1`) {
920	if (ElementSize.isPowerOf2()) {
921	Idx = Builder.CreateShl(
922	LHS: Idx, RHS: ConstantInt::get(Ty: PtrIndexTy, V: ElementSize.logBase2()));
923	} else {
924	Idx =
925	Builder.CreateMul(LHS: Idx, RHS: ConstantInt::get(Ty: PtrIndexTy, V: ElementSize));
926	}
927	}
928	// Create an ugly GEP with a single index for each index.
929	ResultPtr = Builder.CreatePtrAdd(Ptr: ResultPtr, Offset: Idx, Name: "uglygep");
930	if (FirstResult == nullptr)
931	FirstResult = ResultPtr;
932	}
933	}
934
935	// Create a GEP with the constant offset index.
936	if (AccumulativeByteOffset != `0`) {
937	Value *Offset = ConstantInt::get(Ty: PtrIndexTy, V: AccumulativeByteOffset);
938	ResultPtr = Builder.CreatePtrAdd(Ptr: ResultPtr, Offset, Name: "uglygep");
939	} else
940	isSwapCandidate = false;
941
942	// If we created a GEP with constant index, and the base is loop invariant,
943	// then we swap the first one with it, so LICM can move constant GEP out
944	// later.
945	auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(Val: FirstResult);
946	auto *SecondGEP = dyn_cast<GetElementPtrInst>(Val: ResultPtr);
947	if (isSwapCandidate && isLegalToSwapOperand(First: FirstGEP, Second: SecondGEP, CurLoop: L))
948	swapGEPOperand(First: FirstGEP, Second: SecondGEP);
949
950	Variadic->replaceAllUsesWith(V: ResultPtr);
951	Variadic->eraseFromParent();
952	}
953
954	void
955	SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
956	int64_t AccumulativeByteOffset) {
957	IRBuilder<> Builder(Variadic);
958	Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
959	assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
960	"Pointer type must match index type for arithmetic-based lowering of "
961	"split GEPs");
962
963	Value *ResultPtr = Builder.CreatePtrToInt(V: Variadic->getOperand(i_nocapture: `0`), DestTy: IntPtrTy);
964	gep_type_iterator GTI = gep_type_begin(GEP: *Variadic);
965	// Create ADD/SHL/MUL arithmetic operations for each sequential indices. We
966	// don't create arithmetics for structure indices, as they are accumulated
967	// in the constant offset index.
968	for (unsigned I = `1`, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
969	if (GTI.isSequential()) {
970	Value *Idx = Variadic->getOperand(i_nocapture: I);
971	// Skip zero indices.
972	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Idx))
973	if (CI->isZero())
974	continue;
975
976	APInt ElementSize = APInt (IntPtrTy->getIntegerBitWidth(),
977	GTI.getSequentialElementStride(DL: *DL));
978	// Scale the index by element size.
979	if (ElementSize != `1`) {
980	if (ElementSize.isPowerOf2()) {
981	Idx = Builder.CreateShl(
982	LHS: Idx, RHS: ConstantInt::get(Ty: IntPtrTy, V: ElementSize.logBase2()));
983	} else {
984	Idx = Builder.CreateMul(LHS: Idx, RHS: ConstantInt::get(Ty: IntPtrTy, V: ElementSize));
985	}
986	}
987	// Create an ADD for each index.
988	ResultPtr = Builder.CreateAdd(LHS: ResultPtr, RHS: Idx);
989	}
990	}
991
992	// Create an ADD for the constant offset index.
993	if (AccumulativeByteOffset != `0`) {
994	ResultPtr = Builder.CreateAdd(
995	LHS: ResultPtr, RHS: ConstantInt::get(Ty: IntPtrTy, V: AccumulativeByteOffset));
996	}
997
998	ResultPtr = Builder.CreateIntToPtr(V: ResultPtr, DestTy: Variadic->getType());
999	Variadic->replaceAllUsesWith(V: ResultPtr);
1000	Variadic->eraseFromParent();
1001	}
1002
1003	bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
1004	TargetTransformInfo &TTI) {
1005	auto PtrGEP = dyn_cast<GetElementPtrInst>(Val: GEP->getPointerOperand());
1006	if (!PtrGEP)
1007	return false;
1008
1009	bool NestedNeedsExtraction;
1010	int64_t NestedByteOffset =
1011	accumulateByteOffset(GEP: PtrGEP, NeedsExtraction&: NestedNeedsExtraction);
1012	if (!NestedNeedsExtraction)
1013	return false;
1014
1015	unsigned AddrSpace = PtrGEP->getPointerAddressSpace();
1016	if (!TTI.isLegalAddressingMode(Ty: GEP->getResultElementType(),
1017	/BaseGV=/nullptr, BaseOffset: NestedByteOffset,
1018	/HasBaseReg=/true, /Scale=/`0`, AddrSpace))
1019	return false;
1020
1021	bool GEPInBounds = GEP->isInBounds();
1022	bool PtrGEPInBounds = PtrGEP->isInBounds();
1023	bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
1024	if (IsChainInBounds) {
1025	auto IsKnownNonNegative = [this](Value *V) {
1026	return isKnownNonNegative(V, SQ: *DL);
1027	};
1028	IsChainInBounds &= all_of(Range: GEP->indices(), P: IsKnownNonNegative);
1029	if (IsChainInBounds)
1030	IsChainInBounds &= all_of(Range: PtrGEP->indices(), P: IsKnownNonNegative);
1031	}
1032
1033	IRBuilder<> Builder(GEP);
1034	// For trivial GEP chains, we can swap the indices.
1035	Value *NewSrc = Builder.CreateGEP(
1036	Ty: GEP->getSourceElementType(), Ptr: PtrGEP->getPointerOperand(),
1037	IdxList: SmallVector<Value *, `4`>(GEP->indices()), Name: "", NW: IsChainInBounds);
1038	Value *NewGEP = Builder.CreateGEP(Ty: PtrGEP->getSourceElementType(), Ptr: NewSrc,
1039	IdxList: SmallVector<Value *, `4`>(PtrGEP->indices()),
1040	Name: "", NW: IsChainInBounds);
1041	GEP->replaceAllUsesWith(V: NewGEP);
1042	RecursivelyDeleteTriviallyDeadInstructions(V: GEP);
1043	return true;
1044	}
1045
1046	bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
1047	// Skip vector GEPs.
1048	if (GEP->getType()->isVectorTy())
1049	return false;
1050
1051	// The backend can already nicely handle the case where all indices are
1052	// constant.
1053	if (GEP->hasAllConstantIndices())
1054	return false;
1055
1056	bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
1057
1058	bool NeedsExtraction;
1059	int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
1060
1061	TargetTransformInfo &TTI = GetTTI (*GEP->getFunction());
1062
1063	if (!NeedsExtraction) {
1064	Changed \|= reorderGEP(GEP, TTI);
1065	return Changed;
1066	}
1067
1068	// If LowerGEP is disabled, before really splitting the GEP, check whether the
1069	// backend supports the addressing mode we are about to produce. If no, this
1070	// splitting probably won't be beneficial.
1071	// If LowerGEP is enabled, even the extracted constant offset can not match
1072	// the addressing mode, we can still do optimizations to other lowered parts
1073	// of variable indices. Therefore, we don't check for addressing modes in that
1074	// case.
1075	if (!LowerGEP) {
1076	unsigned AddrSpace = GEP->getPointerAddressSpace();
1077	if (!TTI.isLegalAddressingMode(Ty: GEP->getResultElementType(),
1078	/BaseGV=/nullptr, BaseOffset: AccumulativeByteOffset,
1079	/HasBaseReg=/true, /Scale=/`0`,
1080	AddrSpace)) {
1081	return Changed;
1082	}
1083	}
1084
1085	// Track information for preserving GEP flags.
1086	bool AllOffsetsNonNegative = AccumulativeByteOffset >= `0`;
1087	bool AllNUWPreserved = true;
1088
1089	// Remove the constant offset in each sequential index. The resultant GEP
1090	// computes the variadic base.
1091	// Notice that we don't remove struct field indices here. If LowerGEP is
1092	// disabled, a structure index is not accumulated and we still use the old
1093	// one. If LowerGEP is enabled, a structure index is accumulated in the
1094	// constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later
1095	// handle the constant offset and won't need a new structure index.
1096	gep_type_iterator GTI = gep_type_begin(GEP: *GEP);
1097	for (unsigned I = `1`, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1098	if (GTI.isSequential()) {
1099	// Constant offsets of scalable types are not really constant.
1100	if (GTI.getIndexedType()->isScalableTy())
1101	continue;
1102
1103	// Splits this GEP index into a variadic part and a constant offset, and
1104	// uses the variadic part as the new index.
1105	Value *OldIdx = GEP->getOperand(i_nocapture: I);
1106	User *UserChainTail;
1107	bool PreservesNUW;
1108	Value *NewIdx = ConstantOffsetExtractor::Extract(
1109	Idx: OldIdx, GEP, UserChainTail, PreservesNUW);
1110	if (NewIdx != nullptr) {
1111	// Switches to the index with the constant offset removed.
1112	GEP->setOperand(i_nocapture: I, Val_nocapture: NewIdx);
1113	// After switching to the new index, we can garbage-collect UserChain
1114	// and the old index if they are not used.
1115	RecursivelyDeleteTriviallyDeadInstructions(V: UserChainTail);
1116	RecursivelyDeleteTriviallyDeadInstructions(V: OldIdx);
1117	AllOffsetsNonNegative =
1118	AllOffsetsNonNegative && isKnownNonNegative(V: NewIdx, SQ: *DL);
1119	AllNUWPreserved &= PreservesNUW;
1120	}
1121	}
1122	}
1123
1124	// Clear the inbounds attribute because the new index may be off-bound.
1125	// e.g.,
1126	//
1127	// b = add i64 a, 5
1128	// addr = gep inbounds float, float p, i64 b*
1129	//
1130	// is transformed to:
1131	//
1132	// addr2 = gep float, float p, i64 a ; inbounds removed*
1133	// addr = gep float, float addr2, i64 5 ; inbounds removed*
1134	//
1135	// If a is -4, although the old index b is in bounds, the new index a is
1136	// off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1137	// inbounds keyword is not present, the offsets are added to the base
1138	// address with silently-wrapping two's complement arithmetic".
1139	// Therefore, the final code will be a semantically equivalent.
1140	GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();
1141
1142	// If the initial GEP was inbounds/nusw and all variable indices and the
1143	// accumulated offsets are non-negative, they can be added in any order and
1144	// the intermediate results are in bounds and don't overflow in a nusw sense.
1145	// So, we can preserve the inbounds/nusw flag for both GEPs.
1146	bool CanPreserveInBoundsNUSW = AllOffsetsNonNegative;
1147
1148	// If the initial GEP was NUW and all operations that we reassociate were NUW
1149	// additions, the resulting GEPs are also NUW.
1150	if (GEP->hasNoUnsignedWrap() && AllNUWPreserved) {
1151	NewGEPFlags \|= GEPNoWrapFlags::noUnsignedWrap();
1152	// If the initial GEP additionally had NUSW (or inbounds, which implies
1153	// NUSW), we know that the indices in the initial GEP must all have their
1154	// signbit not set. For indices that are the result of NUW adds, the
1155	// add-operands therefore also don't have their signbit set. Therefore, all
1156	// indices of the resulting GEPs are non-negative -> we can preserve
1157	// the inbounds/nusw flag.
1158	CanPreserveInBoundsNUSW \|= GEP->hasNoUnsignedSignedWrap();
1159	}
1160
1161	if (CanPreserveInBoundsNUSW) {
1162	if (GEP->isInBounds())
1163	NewGEPFlags \|= GEPNoWrapFlags::inBounds();
1164	else if (GEP->hasNoUnsignedSignedWrap())
1165	NewGEPFlags \|= GEPNoWrapFlags::noUnsignedSignedWrap();
1166	}
1167
1168	GEP->setNoWrapFlags(NewGEPFlags);
1169
1170	// Lowers a GEP to either GEPs with a single index or arithmetic operations.
1171	if (LowerGEP) {
1172	// As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
1173	// arithmetic operations if the target uses alias analysis in codegen.
1174	// Additionally, pointers that aren't integral (and so can't be safely
1175	// converted to integers) or those whose offset size is different from their
1176	// pointer size (which means that doing integer arithmetic on them could
1177	// affect that data) can't be lowered in this way.
1178	unsigned AddrSpace = GEP->getPointerAddressSpace();
1179	bool PointerHasExtraData = DL->getPointerSizeInBits(AS: AddrSpace) !=
1180	DL->getIndexSizeInBits(AS: AddrSpace);
1181	if (TTI.useAA() \|\| DL->isNonIntegralAddressSpace(AddrSpace) \|\|
1182	PointerHasExtraData)
1183	lowerToSingleIndexGEPs(Variadic: GEP, AccumulativeByteOffset);
1184	else
1185	lowerToArithmetics(Variadic: GEP, AccumulativeByteOffset);
1186	return true;
1187	}
1188
1189	// No need to create another GEP if the accumulative byte offset is 0.
1190	if (AccumulativeByteOffset == `0`)
1191	return true;
1192
1193	// Offsets the base with the accumulative byte offset.
1194	//
1195	// %gep ; the base
1196	// ... %gep ...
1197	//
1198	// => add the offset
1199	//
1200	// %gep2 ; clone of %gep
1201	// %new.gep = gep i8, %gep2, %offset
1202	// %gep ; will be removed
1203	// ... %gep ...
1204	//
1205	// => replace all uses of %gep with %new.gep and remove %gep
1206	//
1207	// %gep2 ; clone of %gep
1208	// %new.gep = gep i8, %gep2, %offset
1209	// ... %new.gep ...
1210	Instruction *NewGEP = GEP->clone();
1211	NewGEP->insertBefore(InsertPos: GEP->getIterator());
1212
1213	Type *PtrIdxTy = DL->getIndexType(PtrTy: GEP->getType());
1214	IRBuilder<> Builder(GEP);
1215	NewGEP = cast<Instruction>(Val: Builder.CreatePtrAdd(
1216	Ptr: NewGEP, Offset: ConstantInt::get(Ty: PtrIdxTy, V: AccumulativeByteOffset, IsSigned: true),
1217	Name: GEP->getName(), NW: NewGEPFlags));
1218	NewGEP->copyMetadata(SrcInst: *GEP);
1219
1220	GEP->replaceAllUsesWith(V: NewGEP);
1221	GEP->eraseFromParent();
1222
1223	return true;
1224	}
1225
1226	bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
1227	if (skipFunction(F))
1228	return false;
1229	auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1230	auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1231	auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1232	auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1233	return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1234	};
1235	SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1236	return Impl.run(F);
1237	}
1238
1239	bool SeparateConstOffsetFromGEP::run(Function &F) {
1240	if (DisableSeparateConstOffsetFromGEP)
1241	return false;
1242
1243	DL = &F.getDataLayout();
1244	bool Changed = false;
1245	for (BasicBlock &B : F) {
1246	if (!DT->isReachableFromEntry(A: &B))
1247	continue;
1248
1249	for (Instruction &I : llvm::make_early_inc_range(Range&: B))
1250	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: &I))
1251	Changed \|= splitGEP(GEP);
1252	// No need to split GEP ConstantExprs because all its indices are constant
1253	// already.
1254	}
1255
1256	Changed \|= reuniteExts(F);
1257
1258	if (VerifyNoDeadCode)
1259	verifyNoDeadCode(F);
1260
1261	return Changed;
1262	}
1263
1264	Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1265	ExprKey Key, Instruction *Dominatee,
1266	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> &DominatingExprs) {
1267	auto Pos = DominatingExprs.find(Val: Key);
1268	if (Pos == DominatingExprs.end())
1269	return nullptr;
1270
1271	auto &Candidates = Pos ->second;
1272	// Because we process the basic blocks in pre-order of the dominator tree, a
1273	// candidate that doesn't dominate the current instruction won't dominate any
1274	// future instruction either. Therefore, we pop it out of the stack. This
1275	// optimization makes the algorithm O(n).
1276	while (!Candidates.empty()) {
1277	Instruction *Candidate = Candidates.back();
1278	if (DT->dominates(Def: Candidate, User: Dominatee))
1279	return Candidate;
1280	Candidates.pop_back();
1281	}
1282	return nullptr;
1283	}
1284
1285	bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1286	if (!I->getType()->isIntOrIntVectorTy())
1287	return false;
1288
1289	// Dom: LHS+RHS
1290	// I: sext(LHS)+sext(RHS)
1291	// If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1292	// TODO: handle zext
1293	Value LHS = nullptr, RHS = nullptr;
1294	if (match(V: I, P: m_Add(L: m_SExt(Op: m_Value(V&: LHS)), R: m_SExt(Op: m_Value(V&: RHS))))) {
1295	if (LHS->getType() == RHS->getType()) {
1296	ExprKey Key = createNormalizedCommutablePair(A: LHS, B: RHS);
1297	if (auto *Dom = findClosestMatchingDominator(Key, Dominatee: I, DominatingExprs&: DominatingAdds)) {
1298	Instruction *NewSExt =
1299	new SExtInst (Dom, I->getType(), "", I->getIterator());
1300	NewSExt->takeName(V: I);
1301	I->replaceAllUsesWith(V: NewSExt);
1302	NewSExt->setDebugLoc(I->getDebugLoc());
1303	RecursivelyDeleteTriviallyDeadInstructions(V: I);
1304	return true;
1305	}
1306	}
1307	} else if (match(V: I, P: m_Sub(L: m_SExt(Op: m_Value(V&: LHS)), R: m_SExt(Op: m_Value(V&: RHS))))) {
1308	if (LHS->getType() == RHS->getType()) {
1309	if (auto *Dom =
1310	findClosestMatchingDominator(Key: {LHS, RHS}, Dominatee: I, DominatingExprs&: DominatingSubs)) {
1311	Instruction *NewSExt =
1312	new SExtInst (Dom, I->getType(), "", I->getIterator());
1313	NewSExt->takeName(V: I);
1314	I->replaceAllUsesWith(V: NewSExt);
1315	NewSExt->setDebugLoc(I->getDebugLoc());
1316	RecursivelyDeleteTriviallyDeadInstructions(V: I);
1317	return true;
1318	}
1319	}
1320	}
1321
1322	// Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1323	if (match(V: I, P: m_NSWAdd(L: m_Value(V&: LHS), R: m_Value(V&: RHS)))) {
1324	if (programUndefinedIfPoison(Inst: I)) {
1325	ExprKey Key = createNormalizedCommutablePair(A: LHS, B: RHS);
1326	DominatingAdds [Key].push_back(Elt: I);
1327	}
1328	} else if (match(V: I, P: m_NSWSub(L: m_Value(V&: LHS), R: m_Value(V&: RHS)))) {
1329	if (programUndefinedIfPoison(Inst: I))
1330	DominatingSubs [{LHS, RHS}].push_back(Elt: I);
1331	}
1332	return false;
1333	}
1334
1335	bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1336	bool Changed = false;
1337	DominatingAdds.clear();
1338	DominatingSubs.clear();
1339	for (const auto Node : depth_first(G: DT)) {
1340	BasicBlock *BB = Node->getBlock();
1341	for (Instruction &I : llvm::make_early_inc_range(Range&: *BB))
1342	Changed \|= reuniteExts(I: &I);
1343	}
1344	return Changed;
1345	}
1346
1347	void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1348	for (BasicBlock &B : F) {
1349	for (Instruction &I : B) {
1350	if (isInstructionTriviallyDead(I: &I)) {
1351	std::string ErrMessage;
1352	raw_string_ostream RSO(ErrMessage);
1353	RSO << "Dead instruction detected!\n" << I << "\n";
1354	llvm_unreachable(RSO.str().c_str());
1355	}
1356	}
1357	}
1358	}
1359
1360	bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1361	GetElementPtrInst FirstGEP, GetElementPtrInst SecondGEP, Loop *CurLoop) {
1362	if (!FirstGEP \|\| !FirstGEP->hasOneUse())
1363	return false;
1364
1365	if (!SecondGEP \|\| FirstGEP->getParent() != SecondGEP->getParent())
1366	return false;
1367
1368	if (FirstGEP == SecondGEP)
1369	return false;
1370
1371	unsigned FirstNum = FirstGEP->getNumOperands();
1372	unsigned SecondNum = SecondGEP->getNumOperands();
1373	// Give up if the number of operands are not 2.
1374	if (FirstNum != SecondNum \|\| FirstNum != `2`)
1375	return false;
1376
1377	Value *FirstBase = FirstGEP->getOperand(i_nocapture: `0`);
1378	Value *SecondBase = SecondGEP->getOperand(i_nocapture: `0`);
1379	Value *FirstOffset = FirstGEP->getOperand(i_nocapture: `1`);
1380	// Give up if the index of the first GEP is loop invariant.
1381	if (CurLoop->isLoopInvariant(V: FirstOffset))
1382	return false;
1383
1384	// Give up if base doesn't have same type.
1385	if (FirstBase->getType() != SecondBase->getType())
1386	return false;
1387
1388	Instruction *FirstOffsetDef = dyn_cast<Instruction>(Val: FirstOffset);
1389
1390	// Check if the second operand of first GEP has constant coefficient.
1391	// For an example, for the following code, we won't gain anything by
1392	// hoisting the second GEP out because the second GEP can be folded away.
1393	// %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1394	// %67 = shl i64 %scevgep.sum.ur159, 2
1395	// %uglygep160 = getelementptr i8 %65, i64 %67*
1396	// %uglygep161 = getelementptr i8 %uglygep160, i64 -1024*
1397
1398	// Skip constant shift instruction which may be generated by Splitting GEPs.
1399	if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1400	isa<ConstantInt>(Val: FirstOffsetDef->getOperand(i: `1`)))
1401	FirstOffsetDef = dyn_cast<Instruction>(Val: FirstOffsetDef->getOperand(i: `0`));
1402
1403	// Give up if FirstOffsetDef is an Add or Sub with constant.
1404	// Because it may not profitable at all due to constant folding.
1405	if (FirstOffsetDef)
1406	if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: FirstOffsetDef)) {
1407	unsigned opc = BO->getOpcode();
1408	if ((opc == Instruction::Add \|\| opc == Instruction::Sub) &&
1409	(isa<ConstantInt>(Val: BO->getOperand(i_nocapture: `0`)) \|\|
1410	isa<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`))))
1411	return false;
1412	}
1413	return true;
1414	}
1415
1416	bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value V, Loop L) {
1417	// TODO: Could look at uses of globals, but we need to make sure we are
1418	// looking at the correct function.
1419	if (isa<Constant>(Val: V))
1420	return false;
1421
1422	int UsesInLoop = `0`;
1423	for (User *U : V->users()) {
1424	if (Instruction *User = dyn_cast<Instruction>(Val: U))
1425	if (L->contains(Inst: User))
1426	if (++UsesInLoop > `1`)
1427	return true;
1428	}
1429	return false;
1430	}
1431
1432	void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1433	GetElementPtrInst *Second) {
1434	Value *Offset1 = First->getOperand(i_nocapture: `1`);
1435	Value *Offset2 = Second->getOperand(i_nocapture: `1`);
1436	First->setOperand(i_nocapture: `1`, Val_nocapture: Offset2);
1437	Second->setOperand(i_nocapture: `1`, Val_nocapture: Offset1);
1438
1439	// We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1440	const DataLayout &DAL = First->getDataLayout();
1441	APInt Offset(DAL.getIndexSizeInBits(
1442	AS: cast<PointerType>(Val: First->getType())->getAddressSpace()),
1443	`0`);
1444	Value *NewBase =
1445	First->stripAndAccumulateInBoundsConstantOffsets(DL: DAL, Offset);
1446	uint64_t ObjectSize;
1447	if (!getObjectSize(Ptr: NewBase, Size&: ObjectSize, DL: DAL, TLI) \|\|
1448	Offset.ugt(RHS: ObjectSize)) {
1449	// TODO(gep_nowrap): Make flag preservation more precise.
1450	First->setNoWrapFlags(GEPNoWrapFlags::none());
1451	Second->setNoWrapFlags(GEPNoWrapFlags::none());
1452	} else
1453	First->setIsInBounds(true);
1454	}
1455
1456	void SeparateConstOffsetFromGEPPass::printPipeline(
1457	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1458	static_cast<PassInfoMixin<SeparateConstOffsetFromGEPPass> >(this*)
1459	->printPipeline(OS, MapClassName2PassName);
1460	OS << `'<'`;
1461	if (LowerGEP)
1462	OS << "lower-gep";
1463	OS << `'>'`;
1464	}
1465
1466	PreservedAnalyses
1467	SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) {
1468	auto *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
1469	auto *LI = &AM.getResult<LoopAnalysis>(IR&: F);
1470	auto *TLI = &AM.getResult<TargetLibraryAnalysis>(IR&: F);
1471	auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1472	return AM.getResult<TargetIRAnalysis>(IR&: F);
1473	};
1474	SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1475	if (!Impl.run(F))
1476	return PreservedAnalyses::all();
1477	PreservedAnalyses PA;
1478	PA.preserveSet<CFGAnalyses>();
1479	return PA;
1480	}
1481

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp