SeparateConstOffsetFromGEP.cpp source code [llvm_projects/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp]

1	//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Loop unrolling may create many similar GEPs for array accesses.
10	// e.g., a 2-level loop
11	//
12	// float a[32][32]; // global variable
13	//
14	// for (int i = 0; i < 2; ++i) {
15	// for (int j = 0; j < 2; ++j) {
16	// ...
17	// ... = a[x + i][y + j];
18	// ...
19	// }
20	// }
21	//
22	// will probably be unrolled to:
23	//
24	// gep %a, 0, %x, %y; load
25	// gep %a, 0, %x, %y + 1; load
26	// gep %a, 0, %x + 1, %y; load
27	// gep %a, 0, %x + 1, %y + 1; load
28	//
29	// LLVM's GVN does not use partial redundancy elimination yet, and is thus
30	// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
31	// significant slowdown in targets with limited addressing modes. For instance,
32	// because the PTX target does not support the reg+reg addressing mode, the
33	// NVPTX backend emits PTX code that literally computes the pointer address of
34	// each GEP, wasting tons of registers. It emits the following PTX for the
35	// first load and similar PTX for other loads.
36	//
37	// mov.u32 %r1, %x;
38	// mov.u32 %r2, %y;
39	// mul.wide.u32 %rl2, %r1, 128;
40	// mov.u64 %rl3, a;
41	// add.s64 %rl4, %rl3, %rl2;
42	// mul.wide.u32 %rl5, %r2, 4;
43	// add.s64 %rl6, %rl4, %rl5;
44	// ld.global.f32 %f1, [%rl6];
45	//
46	// To reduce the register pressure, the optimization implemented in this file
47	// merges the common part of a group of GEPs, so we can compute each pointer
48	// address by adding a simple offset to the common part, saving many registers.
49	//
50	// It works by splitting each GEP into a variadic base and a constant offset.
51	// The variadic base can be computed once and reused by multiple GEPs, and the
52	// constant offsets can be nicely folded into the reg+immediate addressing mode
53	// (supported by most targets) without using any extra register.
54	//
55	// For instance, we transform the four GEPs and four loads in the above example
56	// into:
57	//
58	// base = gep a, 0, x, y
59	// load base
60	// load base + 1 sizeof(float)*
61	// load base + 32 sizeof(float)*
62	// load base + 33 sizeof(float)*
63	//
64	// Given the transformed IR, a backend that supports the reg+immediate
65	// addressing mode can easily fold the pointer arithmetics into the loads. For
66	// example, the NVPTX backend can easily fold the pointer arithmetics into the
67	// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
68	//
69	// mov.u32 %r1, %tid.x;
70	// mov.u32 %r2, %tid.y;
71	// mul.wide.u32 %rl2, %r1, 128;
72	// mov.u64 %rl3, a;
73	// add.s64 %rl4, %rl3, %rl2;
74	// mul.wide.u32 %rl5, %r2, 4;
75	// add.s64 %rl6, %rl4, %rl5;
76	// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
77	// ld.global.f32 %f2, [%rl6+4]; // much better
78	// ld.global.f32 %f3, [%rl6+128]; // much better
79	// ld.global.f32 %f4, [%rl6+132]; // much better
80	//
81	// Another improvement enabled by the LowerGEP flag is to lower a GEP with
82	// multiple indices to multiple GEPs with a single index.
83	// Such transformation can have following benefits:
84	// (1) It can always extract constants in the indices of structure type.
85	// (2) After such Lowering, there are more optimization opportunities such as
86	// CSE, LICM and CGP.
87	//
88	// E.g. The following GEPs have multiple indices:
89	// BB1:
90	// %p = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 3
91	// load %p
92	// ...
93	// BB2:
94	// %p2 = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 2
95	// load %p2
96	// ...
97	//
98	// We can not do CSE to the common part related to index "i64 %i". Lowering
99	// GEPs can achieve such goals.
100	//
101	// This pass will lower a GEP with multiple indices into multiple GEPs with a
102	// single index:
103	// BB1:
104	// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity
105	// %3 = getelementptr i8, ptr %ptr, i64 %2 ; CSE opportunity
106	// %4 = mul i64 %j1, length_of_struct
107	// %5 = getelementptr i8, ptr %3, i64 %4
108	// %p = getelementptr i8, ptr %5, struct_field_3 ; Constant offset
109	// load %p
110	// ...
111	// BB2:
112	// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity
113	// %9 = getelementptr i8, ptr %ptr, i64 %8 ; CSE opportunity
114	// %10 = mul i64 %j2, length_of_struct
115	// %11 = getelementptr i8, ptr %9, i64 %10
116	// %p2 = getelementptr i8, ptr %11, struct_field_2 ; Constant offset
117	// load %p2
118	// ...
119	//
120	// Lowering GEPs can also benefit other passes such as LICM and CGP.
121	// LICM (Loop Invariant Code Motion) can not hoist/sink a GEP of multiple
122	// indices if one of the index is variant. If we lower such GEP into invariant
123	// parts and variant parts, LICM can hoist/sink those invariant parts.
124	// CGP (CodeGen Prepare) tries to sink address calculations that match the
125	// target's addressing modes. A GEP with multiple indices may not match and will
126	// not be sunk. If we lower such GEP into smaller parts, CGP may sink some of
127	// them. So we end up with a better addressing mode.
128	//
129	//===----------------------------------------------------------------------===//
130
131	#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
132	#include "llvm/ADT/APInt.h"
133	#include "llvm/ADT/DenseMap.h"
134	#include "llvm/ADT/DepthFirstIterator.h"
135	#include "llvm/ADT/SmallVector.h"
136	#include "llvm/Analysis/LoopInfo.h"
137	#include "llvm/Analysis/MemoryBuiltins.h"
138	#include "llvm/Analysis/TargetLibraryInfo.h"
139	#include "llvm/Analysis/TargetTransformInfo.h"
140	#include "llvm/Analysis/ValueTracking.h"
141	#include "llvm/IR/BasicBlock.h"
142	#include "llvm/IR/Constant.h"
143	#include "llvm/IR/Constants.h"
144	#include "llvm/IR/DataLayout.h"
145	#include "llvm/IR/DerivedTypes.h"
146	#include "llvm/IR/Dominators.h"
147	#include "llvm/IR/Function.h"
148	#include "llvm/IR/GetElementPtrTypeIterator.h"
149	#include "llvm/IR/IRBuilder.h"
150	#include "llvm/IR/InstrTypes.h"
151	#include "llvm/IR/Instruction.h"
152	#include "llvm/IR/Instructions.h"
153	#include "llvm/IR/Module.h"
154	#include "llvm/IR/PassManager.h"
155	#include "llvm/IR/PatternMatch.h"
156	#include "llvm/IR/Type.h"
157	#include "llvm/IR/User.h"
158	#include "llvm/IR/Value.h"
159	#include "llvm/InitializePasses.h"
160	#include "llvm/Pass.h"
161	#include "llvm/Support/Casting.h"
162	#include "llvm/Support/CommandLine.h"
163	#include "llvm/Support/ErrorHandling.h"
164	#include "llvm/Support/raw_ostream.h"
165	#include "llvm/Transforms/Scalar.h"
166	#include "llvm/Transforms/Utils/Local.h"
167	#include <cassert>
168	#include <cstdint>
169	#include <string>
170
171	using namespace llvm;
172	using namespace llvm::PatternMatch;
173
174	static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
175	"disable-separate-const-offset-from-gep", cl::init(Val: false),
176	cl::desc ("Do not separate the constant offset from a GEP instruction"),
177	cl::Hidden);
178
179	// Setting this flag may emit false positives when the input module already
180	// contains dead instructions. Therefore, we set it only in unit tests that are
181	// free of dead code.
182	static cl::opt<bool>
183	VerifyNoDeadCode("reassociate-geps-verify-no-dead-code", cl::init(Val: false),
184	cl::desc ("Verify this pass produces no dead code"),
185	cl::Hidden);
186
187	namespace {
188
189	/// A helper class for separating a constant offset from a GEP index.
190	///
191	/// In real programs, a GEP index may be more complicated than a simple addition
192	/// of something and a constant integer which can be trivially splitted. For
193	/// example, to split ((a << 3) \| 5) + b, we need to search deeper for the
194	/// constant offset, so that we can separate the index to (a << 3) + b and 5.
195	///
196	/// Therefore, this class looks into the expression that computes a given GEP
197	/// index, and tries to find a constant integer that can be hoisted to the
198	/// outermost level of the expression as an addition. Not every constant in an
199	/// expression can jump out. e.g., we cannot transform (b (a + 5)) to (b * a +*
200	/// 5); nor can we transform (3 (a + 5)) to (3 * a + 5), however in this case,*
201	/// -instcombine probably already optimized (3 (a + 5)) to (3 * a + 15).*
202	class ConstantOffsetExtractor {
203	public:
204	/// Extracts a constant offset from the given GEP index. It returns the
205	/// new index representing the remainder (equal to the original index minus
206	/// the constant offset), or nullptr if we cannot extract a constant offset.
207	/// \p Idx The given GEP index
208	/// \p GEP The given GEP
209	/// \p UserChainTail Outputs the tail of UserChain so that we can
210	/// garbage-collect unused instructions in UserChain.
211	/// \p PreservesNUW Outputs whether the extraction allows preserving the
212	/// GEP's nuw flag, if it has one.
213	static Value Extract(Value Idx, GetElementPtrInst *GEP,
214	User &UserChainTail, bool* &PreservesNUW);
215
216	/// Looks for a constant offset from the given GEP index without extracting
217	/// it. It returns the numeric value of the extracted constant offset (0 if
218	/// failed). The meaning of the arguments are the same as Extract.
219	static APInt Find(Value Idx, GetElementPtrInst GEP);
220
221	private:
222	ConstantOffsetExtractor(BasicBlock::iterator InsertionPt)
223	: IP (InsertionPt), DL(InsertionPt ->getDataLayout()) {}
224
225	/// Searches the expression that computes V for a non-zero constant C s.t.
226	/// V can be reassociated into the form V' + C. If the searching is
227	/// successful, returns C and update UserChain as a def-use chain from C to V;
228	/// otherwise, UserChain is empty.
229	///
230	/// \p V The given expression
231	/// \p GEP The base GEP instruction, used for determining relevant
232	/// types, flags, and non-negativity needed for safe
233	/// reassociation
234	/// \p Idx The original index of the GEP
235	/// \p SignExtended Whether V will be sign-extended in the computation of
236	/// the GEP index
237	/// \p ZeroExtended Whether V will be zero-extended in the computation of
238	/// the GEP index
239	APInt find(Value V, GetElementPtrInst GEP, Value Idx, bool* SignExtended,
240	bool ZeroExtended);
241
242	/// A helper function to look into both operands of a binary operator.
243	APInt findInEitherOperand(BinaryOperator BO, bool* SignExtended,
244	bool ZeroExtended);
245
246	/// After finding the constant offset C from the GEP index I, we build a new
247	/// index I' s.t. I' + C = I. This function builds and returns the new
248	/// index I' according to UserChain produced by function "find".
249	///
250	/// The building conceptually takes two steps:
251	/// 1) iteratively distribute sext/zext/trunc towards the leaves of the
252	/// expression tree that computes I
253	/// 2) reassociate the expression tree to the form I' + C.
254	///
255	/// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
256	/// sext to a, b and 5 so that we have
257	/// sext(a) + (sext(b) + 5).
258	/// Then, we reassociate it to
259	/// (sext(a) + sext(b)) + 5.
260	/// Given this form, we know I' is sext(a) + sext(b).
261	Value *rebuildWithoutConstOffset();
262
263	/// After the first step of rebuilding the GEP index without the constant
264	/// offset, distribute sext/zext/trunc to the operands of all operators in
265	/// UserChain. e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
266	/// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
267	///
268	/// The function also updates UserChain to point to new subexpressions after
269	/// distributing sext/zext/trunc. e.g., the old UserChain of the above example
270	/// is
271	/// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
272	/// and the new UserChain is
273	/// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
274	/// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
275	///
276	/// \p ChainIndex The index to UserChain. ChainIndex is initially
277	/// UserChain.size() - 1, and is decremented during
278	/// the recursion.
279	Value distributeCastsAndCloneChain(unsigned* ChainIndex);
280
281	/// Reassociates the GEP index to the form I' + C and returns I'.
282	Value removeConstOffset(unsigned* ChainIndex);
283
284	/// A helper function to apply CastInsts, a list of sext/zext/trunc, to value
285	/// V. e.g., if CastInsts = [sext i32 to i64, zext i16 to i32], this function
286	/// returns "sext i32 (zext i16 V to i32) to i64".
287	Value applyCasts(Value V);
288
289	/// A helper function that returns whether we can trace into the operands
290	/// of binary operator BO for a constant offset.
291	///
292	/// \p SignExtended Whether BO is surrounded by sext
293	/// \p ZeroExtended Whether BO is surrounded by zext
294	/// \p GEP The base GEP instruction, used for determining relevant
295	/// types and flags needed for safe reassociation.
296	/// \p Idx The original index of the GEP
297	bool canTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
298	GetElementPtrInst GEP, Value Idx);
299
300	/// The path from the constant offset to the old GEP index. e.g., if the GEP
301	/// index is "a b + (c + 5)". After running function find, UserChain[0] will*
302	/// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
303	/// UserChain[2] will be the entire expression "a b + (c + 5)".*
304	///
305	/// This path helps to rebuild the new GEP index.
306	SmallVector<User *, `8`> UserChain;
307
308	/// A data structure used in rebuildWithoutConstOffset. Contains all
309	/// sext/zext/trunc instructions along UserChain.
310	SmallVector<CastInst *, `16`> CastInsts;
311
312	/// Insertion position of cloned instructions.
313	BasicBlock::iterator IP;
314
315	const DataLayout &DL;
316	};
317
318	/// A pass that tries to split every GEP in the function into a variadic
319	/// base and a constant offset. It is a FunctionPass because searching for the
320	/// constant offset may inspect other basic blocks.
321	class SeparateConstOffsetFromGEPLegacyPass : public FunctionPass {
322	public:
323	static char ID;
324
325	SeparateConstOffsetFromGEPLegacyPass(bool LowerGEP = false)
326	: FunctionPass (ID), LowerGEP(LowerGEP) {
327	initializeSeparateConstOffsetFromGEPLegacyPassPass(
328	*PassRegistry::getPassRegistry());
329	}
330
331	void getAnalysisUsage(AnalysisUsage &AU) const override {
332	AU.addRequired<DominatorTreeWrapperPass>();
333	AU.addRequired<TargetTransformInfoWrapperPass>();
334	AU.addRequired<LoopInfoWrapperPass>();
335	AU.setPreservesCFG();
336	AU.addRequired<TargetLibraryInfoWrapperPass>();
337	}
338
339	bool runOnFunction(Function &F) override;
340
341	private:
342	bool LowerGEP;
343	};
344
345	/// A pass that tries to split every GEP in the function into a variadic
346	/// base and a constant offset. It is a FunctionPass because searching for the
347	/// constant offset may inspect other basic blocks.
348	class SeparateConstOffsetFromGEP {
349	public:
350	SeparateConstOffsetFromGEP(
351	DominatorTree DT, LoopInfo LI, TargetLibraryInfo *TLI,
352	function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
353	: DT(DT), LI(LI), TLI(TLI), GetTTI (GetTTI), LowerGEP(LowerGEP) {}
354
355	bool run(Function &F);
356
357	private:
358	/// Track the operands of an add or sub.
359	using ExprKey = std::pair<Value , Value >;
360
361	/// Create a pair for use as a map key for a commutable operation.
362	static ExprKey createNormalizedCommutablePair(Value A, Value B) {
363	if (A < B)
364	return {A, B};
365	return {B, A};
366	}
367
368	/// Tries to split the given GEP into a variadic base and a constant offset,
369	/// and returns true if the splitting succeeds.
370	bool splitGEP(GetElementPtrInst *GEP);
371
372	/// Tries to reorder the given GEP with the GEP that produces the base if
373	/// doing so results in producing a constant offset as the outermost
374	/// index.
375	bool reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI);
376
377	/// Lower a GEP with multiple indices into multiple GEPs with a single index.
378	/// Function splitGEP already split the original GEP into a variadic part and
379	/// a constant offset (i.e., AccumulativeByteOffset). This function lowers the
380	/// variadic part into a set of GEPs with a single index and applies
381	/// AccumulativeByteOffset to it.
382	/// \p Variadic The variadic part of the original GEP.
383	/// \p AccumulativeByteOffset The constant offset.
384	void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic,
385	const APInt &AccumulativeByteOffset);
386
387	/// Finds the constant offset within each index and accumulates them. If
388	/// LowerGEP is true, it finds in indices of both sequential and structure
389	/// types, otherwise it only finds in sequential indices. The output
390	/// NeedsExtraction indicates whether we successfully find a non-zero constant
391	/// offset.
392	APInt accumulateByteOffset(GetElementPtrInst GEP, bool* &NeedsExtraction);
393
394	/// Canonicalize array indices to pointer-size integers. This helps to
395	/// simplify the logic of splitting a GEP. For example, if a + b is a
396	/// pointer-size integer, we have
397	/// gep base, a + b = gep (gep base, a), b
398	/// However, this equality may not hold if the size of a + b is smaller than
399	/// the pointer size, because LLVM conceptually sign-extends GEP indices to
400	/// pointer size before computing the address
401	/// (http://llvm.org/docs/LangRef.html#id181).
402	///
403	/// This canonicalization is very likely already done in clang and
404	/// instcombine. Therefore, the program will probably remain the same.
405	///
406	/// Returns true if the module changes.
407	///
408	/// Verified in @i32_add in split-gep.ll
409	bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
410
411	/// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
412	/// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
413	/// the constant offset. After extraction, it becomes desirable to reunion the
414	/// distributed sexts. For example,
415	///
416	/// &a[sext(i +nsw (j +nsw 5)]
417	/// => distribute &a[sext(i) +nsw (sext(j) +nsw 5)]
418	/// => constant extraction &a[sext(i) + sext(j)] + 5
419	/// => reunion &a[sext(i +nsw j)] + 5
420	bool reuniteExts(Function &F);
421
422	/// A helper that reunites sexts in an instruction.
423	bool reuniteExts(Instruction *I);
424
425	/// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
426	Instruction *findClosestMatchingDominator(
427	ExprKey Key, Instruction *Dominatee,
428	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> &DominatingExprs);
429
430	/// Verify F is free of dead code.
431	void verifyNoDeadCode(Function &F);
432
433	bool hasMoreThanOneUseInLoop(Value v, Loop L);
434
435	// Swap the index operand of two GEP.
436	void swapGEPOperand(GetElementPtrInst First, GetElementPtrInst Second);
437
438	// Check if it is safe to swap operand of two GEP.
439	bool isLegalToSwapOperand(GetElementPtrInst First, GetElementPtrInst Second,
440	Loop *CurLoop);
441
442	const DataLayout DL = nullptr*;
443	DominatorTree DT = nullptr*;
444	LoopInfo *LI;
445	TargetLibraryInfo *TLI;
446	// Retrieved lazily since not always used.
447	function_ref<TargetTransformInfo &(Function &)> GetTTI;
448
449	/// Whether to lower a GEP with multiple indices into arithmetic operations or
450	/// multiple GEPs with a single index.
451	bool LowerGEP;
452
453	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> DominatingAdds;
454	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> DominatingSubs;
455	};
456
457	} // end anonymous namespace
458
459	char SeparateConstOffsetFromGEPLegacyPass::ID = `0`;
460
461	INITIALIZE_PASS_BEGIN(
462	SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
463	"Split GEPs to a variadic base and a constant offset for better CSE", false,
464	false)
465	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
466	INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
467	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
468	INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
469	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
470	INITIALIZE_PASS_END(
471	SeparateConstOffsetFromGEPLegacyPass, "separate-const-offset-from-gep",
472	"Split GEPs to a variadic base and a constant offset for better CSE", false,
473	false)
474
475	FunctionPass llvm::createSeparateConstOffsetFromGEPPass(bool* LowerGEP) {
476	return new SeparateConstOffsetFromGEPLegacyPass (LowerGEP);
477	}
478
479	// Checks if it is safe to reorder an add/sext result used in a GEP.
480	//
481	// An inbounds GEP does not guarantee that the index is non-negative.
482	// This helper checks first if the index is known non-negative. If the index is
483	// non-negative, the transform is always safe.
484	// Second, it checks whether the GEP is inbounds and directly based on a global
485	// or an alloca, which are required to prove futher transform validity.
486	// If the GEP:
487	// - Has a zero offset from the base, the index is non-negative (any negative
488	// value would produce poison/UB)
489	// - Has ObjectSize < (2^(N-1) - C + 1) stride, where C is a constant from the*
490	// add, stride is the element size of Idx, and N is bitwidth of Idx.
491	// This is because with this pattern:
492	// %add = add iN %val, C
493	// %sext = sext iN %add to i64
494	// %gep = getelementptr inbounds TYPE, %sext
495	// The worst-case is when %val sign-flips to produce the smallest magnitude
496	// negative value, at 2^(N-1)-1. In this case, the add/sext is -(2^(N-1)-C+1),
497	// and the sext/add is 2^(N-1)+C-1 (2^N difference). The original add/sext
498	// only produces a defined GEP when -(2^(N-1)-C+1) is inbounds. So, if
499	// ObjectSize < (2^(N-1) - C + 1) stride, it is impossible for the*
500	// worst-case sign-flip to be defined.
501	// Note that in this case the GEP is not neccesarily non-negative, but any
502	// negative results will still produce the same behavior in the reordered
503	// version with a defined GEP.
504	// This can also work for negative C, but the threshold is instead
505	// (2^(N-1)+C)stride, since the sign-flip is done in reverse and is instead*
506	// producing a large positive value that still needs to be inbounds to the
507	// object size. If C is negative, we cannot make any useful assumptions based
508	// on the offset, since it would need to be extremely large.
509	static bool canReorderAddSextToGEP(const GetElementPtrInst *GEP,
510	const Value Idx, const* BinaryOperator *Add,
511	const DataLayout &DL) {
512	if (isKnownNonNegative(V: Idx, SQ: DL))
513	return true;
514
515	if (!GEP->isInBounds())
516	return false;
517
518	const Value *Ptr = GEP->getPointerOperand();
519	int64_t Offset = `0`;
520	const Value *Base =
521	GetPointerBaseWithConstantOffset(Ptr: const_cast<Value *>(Ptr), Offset, DL);
522
523	// We need one of the operands to be a constant to be able to trace into the
524	// operator.
525	const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Add->getOperand(i_nocapture: `0`));
526	if (!CI)
527	CI = dyn_cast<ConstantInt>(Val: Add->getOperand(i_nocapture: `1`));
528	if (!CI)
529	return false;
530	// Calculate the threshold
531	APInt Threshold;
532	unsigned N = Add->getType()->getIntegerBitWidth();
533	TypeSize ElemSize = DL.getTypeAllocSize(Ty: GEP->getSourceElementType());
534	if (ElemSize.isScalable())
535	return false;
536	uint64_t Stride = ElemSize.getFixedValue();
537	if (!CI->isNegative()) {
538	// (2^(N-1) - C + 1) stride*
539	Threshold = (APInt::getSignedMinValue(numBits: N).zext(width: `128`) -
540	CI->getValue().zextOrTrunc(width: `128`) + `1`) *
541	APInt (`128`, Stride);
542	} else {
543	// (2^(N-1) + C) stride*
544	Threshold = (APInt::getSignedMinValue(numBits: N).zext(width: `128`) +
545	CI->getValue().sextOrTrunc(width: `128`)) *
546	APInt (`128`, Stride);
547	}
548
549	if (Base && (isa<AllocaInst>(Val: Base) \|\| isa<GlobalObject>(Val: Base)) &&
550	!CI->isNegative()) {
551	// If the offset is zero from an alloca or global, inbounds is sufficient to
552	// prove non-negativity if one add operand is non-negative
553	if (Offset == `0`)
554	return true;
555
556	// Check if the Offset < Threshold (positive CI only) otherwise
557	if (Offset < `0`)
558	return true;
559	if (APInt (`128`, (uint64_t)Offset).ult(RHS: Threshold))
560	return true;
561	} else {
562	// If we can't determine the offset from the base object, we can still use
563	// the underlying object and type size constraints
564	Base = getUnderlyingObject(V: Ptr);
565	// Can only prove non-negativity if the base object is known
566	if (!(isa<AllocaInst>(Val: Base) \|\| isa<GlobalObject>(Val: Base)))
567	return false;
568	}
569
570	// Check if the ObjectSize < Threshold (for both positive or negative C)
571	uint64_t ObjSize = `0`;
572	if (const auto *AI = dyn_cast<AllocaInst>(Val: Base)) {
573	if (auto AllocSize = AI->getAllocationSize(DL))
574	if (!AllocSize ->isScalable())
575	ObjSize = AllocSize ->getFixedValue();
576	} else if (const auto *GV = dyn_cast<GlobalVariable>(Val: Base)) {
577	TypeSize GVSize = DL.getTypeAllocSize(Ty: GV->getValueType());
578	if (!GVSize.isScalable())
579	ObjSize = GVSize.getFixedValue();
580	}
581	if (ObjSize > `0` && APInt (`128`, ObjSize).ult(RHS: Threshold))
582	return true;
583
584	return false;
585	}
586
587	bool ConstantOffsetExtractor::canTraceInto(bool SignExtended, bool ZeroExtended,
588	BinaryOperator *BO,
589	GetElementPtrInst GEP, Value Idx) {
590	// We only consider ADD, SUB and OR, because a non-zero constant found in
591	// expressions composed of these operations can be easily hoisted as a
592	// constant offset by reassociation.
593	if (BO->getOpcode() != Instruction::Add &&
594	BO->getOpcode() != Instruction::Sub &&
595	BO->getOpcode() != Instruction::Or) {
596	return false;
597	}
598
599	// Do not trace into "or" unless it is equivalent to "add nuw nsw".
600	// This is the case if the or's disjoint flag is set.
601	if (BO->getOpcode() == Instruction::Or &&
602	!cast<PossiblyDisjointInst>(Val: BO)->isDisjoint())
603	return false;
604
605	// FIXME: We don't currently support constants from the RHS of subs,
606	// when we are zero-extended, because we need a way to zero-extended
607	// them before they are negated.
608	if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)
609	return false;
610
611	// In addition, tracing into BO requires that its surrounding sext/zext/trunc
612	// (if any) is distributable to both operands.
613	//
614	// Suppose BO = A op B.
615	// SignExtended \| ZeroExtended \| Distributable?
616	// --------------+--------------+----------------------------------
617	// 0 \| 0 \| true because no s/zext exists
618	// 0 \| 1 \| zext(BO) == zext(A) op zext(B)
619	// 1 \| 0 \| sext(BO) == sext(A) op sext(B)
620	// 1 \| 1 \| zext(sext(BO)) ==
621	// \| \| zext(sext(A)) op zext(sext(B))
622	if (BO->getOpcode() == Instruction::Add && !ZeroExtended && GEP) {
623	// If a + b >= 0 and (a >= 0 or b >= 0), then
624	// sext(a + b) = sext(a) + sext(b)
625	// even if the addition is not marked nsw.
626	//
627	// Leveraging this invariant, we can trace into an sext'ed inbound GEP
628	// index under certain conditions (see canReorderAddSextToGEP).
629	//
630	// Verified in @sext_add in split-gep.ll.
631	if (canReorderAddSextToGEP(GEP, Idx, Add: BO, DL))
632	return true;
633	}
634
635	// For a sext(add nuw), allow tracing through when the enclosing GEP is both
636	// inbounds and nuw.
637	bool GEPInboundsNUW =
638	GEP ? (GEP->isInBounds() && GEP->hasNoUnsignedWrap()) : false;
639	if (BO->getOpcode() == Instruction::Add && SignExtended && !ZeroExtended &&
640	GEPInboundsNUW && BO->hasNoUnsignedWrap())
641	return true;
642
643	// sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
644	// zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
645	if (BO->getOpcode() == Instruction::Add \|\|
646	BO->getOpcode() == Instruction::Sub) {
647	if (SignExtended && !BO->hasNoSignedWrap())
648	return false;
649	if (ZeroExtended && !BO->hasNoUnsignedWrap())
650	return false;
651	}
652
653	return true;
654	}
655
656	APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
657	bool SignExtended,
658	bool ZeroExtended) {
659	// Save off the current height of the chain, in case we need to restore it.
660	size_t ChainLength = UserChain.size();
661
662	// BO cannot use information from the base GEP at this point, so clear it.
663	APInt ConstantOffset =
664	find(V: BO->getOperand(i_nocapture: `0`), GEP: nullptr, Idx: nullptr, SignExtended, ZeroExtended);
665	// If we found a constant offset in the left operand, stop and return that.
666	// This shortcut might cause us to miss opportunities of combining the
667	// constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
668	// However, such cases are probably already handled by -instcombine,
669	// given this pass runs after the standard optimizations.
670	if (ConstantOffset != `0`) return ConstantOffset;
671
672	// Reset the chain back to where it was when we started exploring this node,
673	// since visiting the LHS didn't pan out.
674	UserChain.resize(N: ChainLength);
675
676	ConstantOffset =
677	find(V: BO->getOperand(i_nocapture: `1`), GEP: nullptr, Idx: nullptr, SignExtended, ZeroExtended);
678	// If U is a sub operator, negate the constant offset found in the right
679	// operand.
680	if (BO->getOpcode() == Instruction::Sub)
681	ConstantOffset = -ConstantOffset;
682
683	// If RHS wasn't a suitable candidate either, reset the chain again.
684	if (ConstantOffset == `0`)
685	UserChain.resize(N: ChainLength);
686
687	return ConstantOffset;
688	}
689
690	APInt ConstantOffsetExtractor::find(Value V, GetElementPtrInst GEP,
691	Value Idx, bool* SignExtended,
692	bool ZeroExtended) {
693	// TODO(jingyue): We could trace into integer/pointer casts, such as
694	// inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
695	// integers because it gives good enough results for our benchmarks.
696	unsigned BitWidth = cast<IntegerType>(Val: V->getType())->getBitWidth();
697
698	// We cannot do much with Values that are not a User, such as an Argument.
699	User *U = dyn_cast<User>(Val: V);
700	if (U == nullptr) return APInt (BitWidth, `0`);
701
702	APInt ConstantOffset(BitWidth, `0`);
703	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: V)) {
704	// Hooray, we found it!
705	ConstantOffset = CI->getValue();
706	} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: V)) {
707	// Trace into subexpressions for more hoisting opportunities.
708	if (canTraceInto(SignExtended, ZeroExtended, BO, GEP, Idx))
709	ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
710	} else if (isa<TruncInst>(Val: V)) {
711	ConstantOffset =
712	find(V: U->getOperand(i: `0`), GEP, Idx, SignExtended, ZeroExtended)
713	.trunc(width: BitWidth);
714	} else if (isa<SExtInst>(Val: V)) {
715	ConstantOffset =
716	find(V: U->getOperand(i: `0`), GEP, Idx, / SignExtended / true, ZeroExtended)
717	.sext(width: BitWidth);
718	} else if (isa<ZExtInst>(Val: V)) {
719	// As an optimization, we can clear the SignExtended flag because
720	// sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
721	ConstantOffset = find(V: U->getOperand(i: `0`), GEP, Idx, / SignExtended / false,
722	/ ZeroExtended / true)
723	.zext(width: BitWidth);
724	}
725
726	// If we found a non-zero constant offset, add it to the path for
727	// rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
728	// help this optimization.
729	if (ConstantOffset != `0`)
730	UserChain.push_back(Elt: U);
731	return ConstantOffset;
732	}
733
734	Value ConstantOffsetExtractor::applyCasts(Value V) {
735	Value *Current = V;
736	// CastInsts is built in the use-def order. Therefore, we apply them to V
737	// in the reversed order.
738	for (CastInst *I : llvm::reverse(C&: CastInsts)) {
739	if (Constant *C = dyn_cast<Constant>(Val: Current)) {
740	// Try to constant fold the cast.
741	Current = ConstantFoldCastOperand(Opcode: I->getOpcode(), C, DestTy: I->getType(), DL);
742	if (Current)
743	continue;
744	}
745
746	Instruction *Cast = I->clone();
747	Cast->setOperand(i: `0`, Val: Current);
748	// In ConstantOffsetExtractor::find we do not analyze nuw/nsw for trunc, so
749	// we assume that it is ok to redistribute trunc over add/sub/or. But for
750	// example (add (trunc nuw A), (trunc nuw B)) is more poisonous than (trunc
751	// nuw (add A, B))). To make such redistributions legal we drop all the
752	// poison generating flags from cloned trunc instructions here.
753	if (isa<TruncInst>(Val: Cast))
754	Cast->dropPoisonGeneratingFlags();
755	Cast->insertBefore(BB&: *IP ->getParent(), InsertPos: IP);
756	Current = Cast;
757	}
758	return Current;
759	}
760
761	Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
762	distributeCastsAndCloneChain(ChainIndex: UserChain.size() - `1`);
763	// Remove all nullptrs (used to be sext/zext/trunc) from UserChain.
764	unsigned NewSize = `0`;
765	for (User *I : UserChain) {
766	if (I != nullptr) {
767	UserChain [NewSize] = I;
768	NewSize++;
769	}
770	}
771	UserChain.resize(N: NewSize);
772	return removeConstOffset(ChainIndex: UserChain.size() - `1`);
773	}
774
775	Value *
776	ConstantOffsetExtractor::distributeCastsAndCloneChain(unsigned ChainIndex) {
777	User *U = UserChain [ChainIndex];
778	if (ChainIndex == `0`) {
779	assert(isa<ConstantInt>(U));
780	// If U is a ConstantInt, applyCasts will return a ConstantInt as well.
781	return UserChain [ChainIndex] = cast<ConstantInt>(Val: applyCasts(V: U));
782	}
783
784	if (CastInst *Cast = dyn_cast<CastInst>(Val: U)) {
785	assert(
786	(isa<SExtInst>(Cast) \|\| isa<ZExtInst>(Cast) \|\| isa<TruncInst>(Cast)) &&
787	"Only following instructions can be traced: sext, zext & trunc");
788	CastInsts.push_back(Elt: Cast);
789	UserChain [ChainIndex] = nullptr;
790	return distributeCastsAndCloneChain(ChainIndex: ChainIndex - `1`);
791	}
792
793	// Function find only trace into BinaryOperator and CastInst.
794	BinaryOperator *BO = cast<BinaryOperator>(Val: U);
795	// OpNo = which operand of BO is UserChain[ChainIndex - 1]
796	unsigned OpNo = (BO->getOperand(i_nocapture: `0`) == UserChain [ChainIndex - `1`] ? `0` : `1`);
797	Value *TheOther = applyCasts(V: BO->getOperand(i_nocapture: `1` - OpNo));
798	Value *NextInChain = distributeCastsAndCloneChain(ChainIndex: ChainIndex - `1`);
799
800	BinaryOperator NewBO = nullptr*;
801	if (OpNo == `0`) {
802	NewBO = BinaryOperator::Create(Op: BO->getOpcode(), S1: NextInChain, S2: TheOther,
803	Name: BO->getName(), InsertBefore: IP);
804	} else {
805	NewBO = BinaryOperator::Create(Op: BO->getOpcode(), S1: TheOther, S2: NextInChain,
806	Name: BO->getName(), InsertBefore: IP);
807	}
808	return UserChain [ChainIndex] = NewBO;
809	}
810
811	Value ConstantOffsetExtractor::removeConstOffset(unsigned* ChainIndex) {
812	if (ChainIndex == `0`) {
813	assert(isa<ConstantInt>(UserChain[ChainIndex]));
814	return ConstantInt::getNullValue(Ty: UserChain [ChainIndex]->getType());
815	}
816
817	BinaryOperator *BO = cast<BinaryOperator>(Val: UserChain [ChainIndex]);
818	assert((BO->use_empty() \|\| BO->hasOneUse()) &&
819	"distributeCastsAndCloneChain clones each BinaryOperator in "
820	"UserChain, so no one should be used more than "
821	"once");
822
823	unsigned OpNo = (BO->getOperand(i_nocapture: `0`) == UserChain [ChainIndex - `1`] ? `0` : `1`);
824	assert(BO->getOperand(OpNo) == UserChain[ChainIndex - `1`]);
825	Value *NextInChain = removeConstOffset(ChainIndex: ChainIndex - `1`);
826	Value *TheOther = BO->getOperand(i_nocapture: `1` - OpNo);
827
828	// If NextInChain is 0 and not the LHS of a sub, we can simplify the
829	// sub-expression to be just TheOther.
830	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: NextInChain)) {
831	if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == `0`))
832	return TheOther;
833	}
834
835	BinaryOperator::BinaryOps NewOp = BO->getOpcode();
836	if (BO->getOpcode() == Instruction::Or) {
837	// Rebuild "or" as "add", because "or" may be invalid for the new
838	// expression.
839	//
840	// For instance, given
841	// a \| (b + 5) where a and b + 5 have no common bits,
842	// we can extract 5 as the constant offset.
843	//
844	// However, reusing the "or" in the new index would give us
845	// (a \| b) + 5
846	// which does not equal a \| (b + 5).
847	//
848	// Replacing the "or" with "add" is fine, because
849	// a \| (b + 5) = a + (b + 5) = (a + b) + 5
850	NewOp = Instruction::Add;
851	}
852
853	BinaryOperator *NewBO;
854	if (OpNo == `0`) {
855	NewBO = BinaryOperator::Create(Op: NewOp, S1: NextInChain, S2: TheOther, Name: "", InsertBefore: IP);
856	} else {
857	NewBO = BinaryOperator::Create(Op: NewOp, S1: TheOther, S2: NextInChain, Name: "", InsertBefore: IP);
858	}
859	NewBO->takeName(V: BO);
860	return NewBO;
861	}
862
863	/// A helper function to check if reassociating through an entry in the user
864	/// chain would invalidate the GEP's nuw flag.
865	static bool allowsPreservingNUW(const User *U) {
866	if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: U)) {
867	// Binary operations need to be effectively add nuw.
868	auto Opcode = BO->getOpcode();
869	if (Opcode == BinaryOperator::Or) {
870	// Ors are only considered here if they are disjoint. The addition that
871	// they represent in this case is NUW.
872	assert(cast<PossiblyDisjointInst>(BO)->isDisjoint());
873	return true;
874	}
875	return Opcode == BinaryOperator::Add && BO->hasNoUnsignedWrap();
876	}
877	// UserChain can only contain ConstantInt, CastInst, or BinaryOperator.
878	// Among the possible CastInsts, only trunc without nuw is a problem: If it
879	// is distributed through an add nuw, wrapping may occur:
880	// "add nuw trunc(a), trunc(b)" is more poisonous than "trunc(add nuw a, b)"
881	if (const TruncInst *TI = dyn_cast<TruncInst>(Val: U))
882	return TI->hasNoUnsignedWrap();
883	assert((isa<CastInst>(U) \|\| isa<ConstantInt>(U)) && "Unexpected User.");
884	return true;
885	}
886
887	Value ConstantOffsetExtractor::Extract(Value Idx, GetElementPtrInst *GEP,
888	User *&UserChainTail,
889	bool &PreservesNUW) {
890	ConstantOffsetExtractor Extractor(GEP->getIterator());
891	// Find a non-zero constant offset first.
892	APInt ConstantOffset = Extractor.find(V: Idx, GEP, Idx, / SignExtended / false,
893	/ ZeroExtended / false);
894	if (ConstantOffset == `0`) {
895	UserChainTail = nullptr;
896	PreservesNUW = true;
897	return nullptr;
898	}
899
900	PreservesNUW = all_of(Range&: Extractor.UserChain, P: allowsPreservingNUW);
901
902	// Separates the constant offset from the GEP index.
903	Value *IdxWithoutConstOffset = Extractor.rebuildWithoutConstOffset();
904	UserChainTail = Extractor.UserChain.back();
905	return IdxWithoutConstOffset;
906	}
907
908	APInt ConstantOffsetExtractor::Find(Value Idx, GetElementPtrInst GEP) {
909	return ConstantOffsetExtractor (GEP->getIterator())
910	.find(V: Idx, GEP, Idx, / SignExtended / false, / ZeroExtended / false);
911	}
912
913	bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
914	GetElementPtrInst *GEP) {
915	bool Changed = false;
916	Type *PtrIdxTy = DL->getIndexType(PtrTy: GEP->getType());
917	gep_type_iterator GTI = gep_type_begin(GEP: *GEP);
918	for (User::op_iterator I = GEP->op_begin() + `1`, E = GEP->op_end();
919	I != E; ++I, ++GTI) {
920	// Skip struct member indices which must be i32.
921	if (GTI.isSequential()) {
922	if ((*I)->getType() != PtrIdxTy) {
923	I = CastInst::CreateIntegerCast(S: I, Ty: PtrIdxTy, isSigned: true, Name: "idxprom",
924	InsertBefore: GEP->getIterator());
925	Changed = true;
926	}
927	}
928	}
929	return Changed;
930	}
931
932	APInt SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
933	bool &NeedsExtraction) {
934	NeedsExtraction = false;
935	unsigned IdxWidth = DL->getIndexTypeSizeInBits(Ty: GEP->getType());
936	APInt AccumulativeByteOffset(IdxWidth, `0`);
937	gep_type_iterator GTI = gep_type_begin(GEP: *GEP);
938	for (unsigned I = `1`, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
939	if (GTI.isSequential()) {
940	// Constant offsets of scalable types are not really constant.
941	if (GTI.getIndexedType()->isScalableTy())
942	continue;
943
944	// Tries to extract a constant offset from this GEP index.
945	APInt ConstantOffset =
946	ConstantOffsetExtractor::Find(Idx: GEP->getOperand(i_nocapture: I), GEP)
947	.sextOrTrunc(width: IdxWidth);
948	if (ConstantOffset != `0`) {
949	NeedsExtraction = true;
950	// A GEP may have multiple indices. We accumulate the extracted
951	// constant offset to a byte offset, and later offset the remainder of
952	// the original GEP with this byte offset.
953	AccumulativeByteOffset +=
954	ConstantOffset * APInt (IdxWidth,
955	GTI.getSequentialElementStride(DL: *DL),
956	/IsSigned=/true, /ImplicitTrunc=/true);
957	}
958	} else if (LowerGEP) {
959	StructType *StTy = GTI.getStructType();
960	uint64_t Field = cast<ConstantInt>(Val: GEP->getOperand(i_nocapture: I))->getZExtValue();
961	// Skip field 0 as the offset is always 0.
962	if (Field != `0`) {
963	NeedsExtraction = true;
964	AccumulativeByteOffset +=
965	APInt (IdxWidth, DL->getStructLayout(Ty: StTy)->getElementOffset(Idx: Field),
966	/IsSigned=/true, /ImplicitTrunc=/true);
967	}
968	}
969	}
970	return AccumulativeByteOffset;
971	}
972
973	void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
974	GetElementPtrInst Variadic, const* APInt &AccumulativeByteOffset) {
975	IRBuilder<> Builder(Variadic);
976	Type *PtrIndexTy = DL->getIndexType(PtrTy: Variadic->getType());
977
978	Value *ResultPtr = Variadic->getOperand(i_nocapture: `0`);
979	Loop *L = LI->getLoopFor(BB: Variadic->getParent());
980	// Check if the base is not loop invariant or used more than once.
981	bool isSwapCandidate =
982	L && L->isLoopInvariant(V: ResultPtr) &&
983	!hasMoreThanOneUseInLoop(v: ResultPtr, L);
984	Value FirstResult = nullptr*;
985
986	gep_type_iterator GTI = gep_type_begin(GEP: *Variadic);
987	// Create an ugly GEP for each sequential index. We don't create GEPs for
988	// structure indices, as they are accumulated in the constant offset index.
989	for (unsigned I = `1`, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
990	if (GTI.isSequential()) {
991	Value *Idx = Variadic->getOperand(i_nocapture: I);
992	// Skip zero indices.
993	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Idx))
994	if (CI->isZero())
995	continue;
996
997	APInt ElementSize = APInt (PtrIndexTy->getIntegerBitWidth(),
998	GTI.getSequentialElementStride(DL: *DL));
999	// Scale the index by element size.
1000	if (ElementSize != `1`) {
1001	if (ElementSize.isPowerOf2()) {
1002	Idx = Builder.CreateShl(
1003	LHS: Idx, RHS: ConstantInt::get(Ty: PtrIndexTy, V: ElementSize.logBase2()));
1004	} else {
1005	Idx =
1006	Builder.CreateMul(LHS: Idx, RHS: ConstantInt::get(Ty: PtrIndexTy, V: ElementSize));
1007	}
1008	}
1009	// Create an ugly GEP with a single index for each index.
1010	ResultPtr = Builder.CreatePtrAdd(Ptr: ResultPtr, Offset: Idx, Name: "uglygep");
1011	if (FirstResult == nullptr)
1012	FirstResult = ResultPtr;
1013	}
1014	}
1015
1016	// Create a GEP with the constant offset index.
1017	if (AccumulativeByteOffset != `0`) {
1018	Value *Offset = ConstantInt::get(Ty: PtrIndexTy, V: AccumulativeByteOffset);
1019	ResultPtr = Builder.CreatePtrAdd(Ptr: ResultPtr, Offset, Name: "uglygep");
1020	} else
1021	isSwapCandidate = false;
1022
1023	// If we created a GEP with constant index, and the base is loop invariant,
1024	// then we swap the first one with it, so LICM can move constant GEP out
1025	// later.
1026	auto *FirstGEP = dyn_cast_or_null<GetElementPtrInst>(Val: FirstResult);
1027	auto *SecondGEP = dyn_cast<GetElementPtrInst>(Val: ResultPtr);
1028	if (isSwapCandidate && isLegalToSwapOperand(First: FirstGEP, Second: SecondGEP, CurLoop: L))
1029	swapGEPOperand(First: FirstGEP, Second: SecondGEP);
1030
1031	Variadic->replaceAllUsesWith(V: ResultPtr);
1032	Variadic->eraseFromParent();
1033	}
1034
1035	bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
1036	TargetTransformInfo &TTI) {
1037	auto PtrGEP = dyn_cast<GetElementPtrInst>(Val: GEP->getPointerOperand());
1038	if (!PtrGEP)
1039	return false;
1040
1041	bool NestedNeedsExtraction;
1042	APInt NestedByteOffset = accumulateByteOffset(GEP: PtrGEP, NeedsExtraction&: NestedNeedsExtraction);
1043	if (!NestedNeedsExtraction)
1044	return false;
1045
1046	unsigned AddrSpace = PtrGEP->getPointerAddressSpace();
1047	if (!TTI.isLegalAddressingMode(Ty: GEP->getResultElementType(),
1048	/BaseGV=/nullptr,
1049	BaseOffset: NestedByteOffset.getSExtValue(),
1050	/HasBaseReg=/true, /Scale=/`0`, AddrSpace))
1051	return false;
1052
1053	bool GEPInBounds = GEP->isInBounds();
1054	bool PtrGEPInBounds = PtrGEP->isInBounds();
1055	bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
1056	if (IsChainInBounds) {
1057	auto IsKnownNonNegative = [this](Value *V) {
1058	return isKnownNonNegative(V, SQ: *DL);
1059	};
1060	IsChainInBounds &= all_of(Range: GEP->indices(), P: IsKnownNonNegative);
1061	if (IsChainInBounds)
1062	IsChainInBounds &= all_of(Range: PtrGEP->indices(), P: IsKnownNonNegative);
1063	}
1064
1065	IRBuilder<> Builder(GEP);
1066	// For trivial GEP chains, we can swap the indices.
1067	Value *NewSrc = Builder.CreateGEP(
1068	Ty: GEP->getSourceElementType(), Ptr: PtrGEP->getPointerOperand(),
1069	IdxList: SmallVector<Value *, `4`>(GEP->indices()), Name: "", NW: IsChainInBounds);
1070	Value *NewGEP = Builder.CreateGEP(Ty: PtrGEP->getSourceElementType(), Ptr: NewSrc,
1071	IdxList: SmallVector<Value *, `4`>(PtrGEP->indices()),
1072	Name: "", NW: IsChainInBounds);
1073	GEP->replaceAllUsesWith(V: NewGEP);
1074	RecursivelyDeleteTriviallyDeadInstructions(V: GEP);
1075	return true;
1076	}
1077
1078	bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
1079	// Skip vector GEPs.
1080	if (GEP->getType()->isVectorTy())
1081	return false;
1082
1083	// If the base of this GEP is a ptradd of a constant, lets pass the constant
1084	// along. This ensures that when we have a chain of GEPs the constant
1085	// offset from each is accumulated.
1086	Value *NewBase;
1087	const APInt *BaseOffset;
1088	bool ExtractBase = match(V: GEP->getPointerOperand(),
1089	P: m_PtrAdd(PointerOp: m_Value(V&: NewBase), OffsetOp: m_APInt(Res&: BaseOffset)));
1090
1091	unsigned IdxWidth = DL->getIndexTypeSizeInBits(Ty: GEP->getType());
1092	APInt BaseByteOffset =
1093	ExtractBase ? BaseOffset->sextOrTrunc(width: IdxWidth) : APInt (IdxWidth, `0`);
1094
1095	// The backend can already nicely handle the case where all indices are
1096	// constant.
1097	if (GEP->hasAllConstantIndices() && !ExtractBase)
1098	return false;
1099
1100	bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
1101
1102	bool NeedsExtraction;
1103	APInt NonBaseByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
1104	APInt AccumulativeByteOffset = BaseByteOffset + NonBaseByteOffset;
1105
1106	TargetTransformInfo &TTI = GetTTI (*GEP->getFunction());
1107
1108	if (!NeedsExtraction && !ExtractBase) {
1109	Changed \|= reorderGEP(GEP, TTI);
1110	return Changed;
1111	}
1112
1113	// If LowerGEP is disabled, before really splitting the GEP, check whether the
1114	// backend supports the addressing mode we are about to produce. If no, this
1115	// splitting probably won't be beneficial.
1116	// If LowerGEP is enabled, even the extracted constant offset can not match
1117	// the addressing mode, we can still do optimizations to other lowered parts
1118	// of variable indices. Therefore, we don't check for addressing modes in that
1119	// case.
1120	if (!LowerGEP) {
1121	unsigned AddrSpace = GEP->getPointerAddressSpace();
1122	if (!TTI.isLegalAddressingMode(
1123	Ty: GEP->getResultElementType(),
1124	/BaseGV=/nullptr, BaseOffset: AccumulativeByteOffset.getSExtValue(),
1125	/HasBaseReg=/true, /Scale=/`0`, AddrSpace)) {
1126	// If the addressing mode was not legal and the base byte offset was not
1127	// 0, it could be a case where the total offset became too large for
1128	// the addressing mode. Try again without extracting the base offset.
1129	if (!ExtractBase)
1130	return Changed;
1131	ExtractBase = false;
1132	BaseByteOffset = APInt (IdxWidth, `0`);
1133	AccumulativeByteOffset = NonBaseByteOffset;
1134	if (!TTI.isLegalAddressingMode(
1135	Ty: GEP->getResultElementType(),
1136	/BaseGV=/nullptr, BaseOffset: AccumulativeByteOffset.getSExtValue(),
1137	/HasBaseReg=/true, /Scale=/`0`, AddrSpace))
1138	return Changed;
1139	// We can proceed with just extracting the other (non-base) offsets.
1140	NeedsExtraction = true;
1141	}
1142	}
1143
1144	// Track information for preserving GEP flags.
1145	bool AllOffsetsNonNegative = AccumulativeByteOffset.isNonNegative();
1146	bool AllNUWPreserved = GEP->hasNoUnsignedWrap();
1147	bool NewGEPInBounds = GEP->isInBounds();
1148	bool NewGEPNUSW = GEP->hasNoUnsignedSignedWrap();
1149
1150	// Remove the constant offset in each sequential index. The resultant GEP
1151	// computes the variadic base.
1152	// Notice that we don't remove struct field indices here. If LowerGEP is
1153	// disabled, a structure index is not accumulated and we still use the old
1154	// one. If LowerGEP is enabled, a structure index is accumulated in the
1155	// constant offset. LowerToSingleIndexGEPs will later handle the constant
1156	// offset and won't need a new structure index.
1157	gep_type_iterator GTI = gep_type_begin(GEP: *GEP);
1158	for (unsigned I = `1`, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
1159	if (GTI.isSequential()) {
1160	// Constant offsets of scalable types are not really constant.
1161	if (GTI.getIndexedType()->isScalableTy())
1162	continue;
1163
1164	// Splits this GEP index into a variadic part and a constant offset, and
1165	// uses the variadic part as the new index.
1166	Value *Idx = GEP->getOperand(i_nocapture: I);
1167	User *UserChainTail;
1168	bool PreservesNUW;
1169	Value *NewIdx = ConstantOffsetExtractor::Extract(Idx, GEP, UserChainTail,
1170	PreservesNUW);
1171	if (NewIdx != nullptr) {
1172	// Switches to the index with the constant offset removed.
1173	GEP->setOperand(i_nocapture: I, Val_nocapture: NewIdx);
1174	// After switching to the new index, we can garbage-collect UserChain
1175	// and the old index if they are not used.
1176	RecursivelyDeleteTriviallyDeadInstructions(V: UserChainTail);
1177	RecursivelyDeleteTriviallyDeadInstructions(V: Idx);
1178	Idx = NewIdx;
1179	AllNUWPreserved &= PreservesNUW;
1180	}
1181	AllOffsetsNonNegative =
1182	AllOffsetsNonNegative && isKnownNonNegative(V: Idx, SQ: *DL);
1183	}
1184	}
1185	if (ExtractBase) {
1186	GEPOperator *Base = cast<GEPOperator>(Val: GEP->getPointerOperand());
1187	AllNUWPreserved &= Base->hasNoUnsignedWrap();
1188	NewGEPInBounds &= Base->isInBounds();
1189	NewGEPNUSW &= Base->hasNoUnsignedSignedWrap();
1190	AllOffsetsNonNegative &= BaseByteOffset.isNonNegative();
1191
1192	GEP->setOperand(i_nocapture: `0`, Val_nocapture: NewBase);
1193	RecursivelyDeleteTriviallyDeadInstructions(V: Base);
1194	}
1195
1196	// Clear the inbounds attribute because the new index may be off-bound.
1197	// e.g.,
1198	//
1199	// b = add i64 a, 5
1200	// addr = gep inbounds float, float p, i64 b*
1201	//
1202	// is transformed to:
1203	//
1204	// addr2 = gep float, float p, i64 a ; inbounds removed*
1205	// addr = gep float, float addr2, i64 5 ; inbounds removed*
1206	//
1207	// If a is -4, although the old index b is in bounds, the new index a is
1208	// off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
1209	// inbounds keyword is not present, the offsets are added to the base
1210	// address with silently-wrapping two's complement arithmetic".
1211	// Therefore, the final code will be a semantically equivalent.
1212	GEPNoWrapFlags NewGEPFlags = GEPNoWrapFlags::none();
1213
1214	// If the initial GEP was inbounds/nusw and all variable indices and the
1215	// accumulated offsets are non-negative, they can be added in any order and
1216	// the intermediate results are in bounds and don't overflow in a nusw sense.
1217	// So, we can preserve the inbounds/nusw flag for both GEPs.
1218	bool CanPreserveInBoundsNUSW = AllOffsetsNonNegative;
1219
1220	// If the initial GEP was NUW and all operations that we reassociate were NUW
1221	// additions, the resulting GEPs are also NUW.
1222	if (AllNUWPreserved) {
1223	NewGEPFlags \|= GEPNoWrapFlags::noUnsignedWrap();
1224	// If the initial GEP additionally had NUSW (or inbounds, which implies
1225	// NUSW), we know that the indices in the initial GEP must all have their
1226	// signbit not set. For indices that are the result of NUW adds, the
1227	// add-operands therefore also don't have their signbit set. Therefore, all
1228	// indices of the resulting GEPs are non-negative -> we can preserve
1229	// the inbounds/nusw flag.
1230	CanPreserveInBoundsNUSW \|= NewGEPNUSW;
1231	}
1232
1233	if (CanPreserveInBoundsNUSW) {
1234	if (NewGEPInBounds)
1235	NewGEPFlags \|= GEPNoWrapFlags::inBounds();
1236	else if (NewGEPNUSW)
1237	NewGEPFlags \|= GEPNoWrapFlags::noUnsignedSignedWrap();
1238	}
1239
1240	GEP->setNoWrapFlags(NewGEPFlags);
1241
1242	// Lowers a GEP to GEPs with a single index.
1243	if (LowerGEP) {
1244	lowerToSingleIndexGEPs(Variadic: GEP, AccumulativeByteOffset);
1245	return true;
1246	}
1247
1248	// No need to create another GEP if the accumulative byte offset is 0.
1249	if (AccumulativeByteOffset == `0`)
1250	return true;
1251
1252	// Offsets the base with the accumulative byte offset.
1253	//
1254	// %gep ; the base
1255	// ... %gep ...
1256	//
1257	// => add the offset
1258	//
1259	// %gep2 ; clone of %gep
1260	// %new.gep = gep i8, %gep2, %offset
1261	// %gep ; will be removed
1262	// ... %gep ...
1263	//
1264	// => replace all uses of %gep with %new.gep and remove %gep
1265	//
1266	// %gep2 ; clone of %gep
1267	// %new.gep = gep i8, %gep2, %offset
1268	// ... %new.gep ...
1269	Instruction *NewGEP = GEP->clone();
1270	NewGEP->insertBefore(InsertPos: GEP->getIterator());
1271
1272	Type *PtrIdxTy = DL->getIndexType(PtrTy: GEP->getType());
1273	IRBuilder<> Builder(GEP);
1274	NewGEP = cast<Instruction>(Val: Builder.CreatePtrAdd(
1275	Ptr: NewGEP, Offset: ConstantInt::get(Ty: PtrIdxTy, V: AccumulativeByteOffset),
1276	Name: GEP->getName(), NW: NewGEPFlags));
1277	NewGEP->copyMetadata(SrcInst: *GEP);
1278
1279	GEP->replaceAllUsesWith(V: NewGEP);
1280	GEP->eraseFromParent();
1281
1282	return true;
1283	}
1284
1285	bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
1286	if (skipFunction(F))
1287	return false;
1288	auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1289	auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1290	auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1291	auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
1292	return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1293	};
1294	SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1295	return Impl.run(F);
1296	}
1297
1298	bool SeparateConstOffsetFromGEP::run(Function &F) {
1299	if (DisableSeparateConstOffsetFromGEP)
1300	return false;
1301
1302	DL = &F.getDataLayout();
1303	bool Changed = false;
1304
1305	ReversePostOrderTraversal<Function *> RPOT(&F);
1306	for (BasicBlock *B : RPOT) {
1307	if (!DT->isReachableFromEntry(A: B))
1308	continue;
1309
1310	for (Instruction &I : llvm::make_early_inc_range(Range&: *B))
1311	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: &I))
1312	Changed \|= splitGEP(GEP);
1313	// No need to split GEP ConstantExprs because all its indices are constant
1314	// already.
1315	}
1316
1317	Changed \|= reuniteExts(F);
1318
1319	if (VerifyNoDeadCode)
1320	verifyNoDeadCode(F);
1321
1322	return Changed;
1323	}
1324
1325	Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
1326	ExprKey Key, Instruction *Dominatee,
1327	DenseMap<ExprKey, SmallVector<Instruction *, `2`>> &DominatingExprs) {
1328	auto Pos = DominatingExprs.find(Val: Key);
1329	if (Pos == DominatingExprs.end())
1330	return nullptr;
1331
1332	auto &Candidates = Pos ->second;
1333	// Because we process the basic blocks in pre-order of the dominator tree, a
1334	// candidate that doesn't dominate the current instruction won't dominate any
1335	// future instruction either. Therefore, we pop it out of the stack. This
1336	// optimization makes the algorithm O(n).
1337	while (!Candidates.empty()) {
1338	Instruction *Candidate = Candidates.back();
1339	if (DT->dominates(Def: Candidate, User: Dominatee))
1340	return Candidate;
1341	Candidates.pop_back();
1342	}
1343	return nullptr;
1344	}
1345
1346	bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
1347	if (!I->getType()->isIntOrIntVectorTy())
1348	return false;
1349
1350	// Dom: LHS+RHS
1351	// I: sext(LHS)+sext(RHS)
1352	// If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
1353	// TODO: handle zext
1354	Value LHS = nullptr, RHS = nullptr;
1355	if (match(V: I, P: m_Add(L: m_SExt(Op: m_Value(V&: LHS)), R: m_SExt(Op: m_Value(V&: RHS))))) {
1356	if (LHS->getType() == RHS->getType()) {
1357	ExprKey Key = createNormalizedCommutablePair(A: LHS, B: RHS);
1358	if (auto *Dom = findClosestMatchingDominator(Key, Dominatee: I, DominatingExprs&: DominatingAdds)) {
1359	Instruction *NewSExt =
1360	new SExtInst (Dom, I->getType(), "", I->getIterator());
1361	NewSExt->takeName(V: I);
1362	I->replaceAllUsesWith(V: NewSExt);
1363	NewSExt->setDebugLoc(I->getDebugLoc());
1364	RecursivelyDeleteTriviallyDeadInstructions(V: I);
1365	return true;
1366	}
1367	}
1368	} else if (match(V: I, P: m_Sub(L: m_SExt(Op: m_Value(V&: LHS)), R: m_SExt(Op: m_Value(V&: RHS))))) {
1369	if (LHS->getType() == RHS->getType()) {
1370	if (auto *Dom =
1371	findClosestMatchingDominator(Key: {LHS, RHS}, Dominatee: I, DominatingExprs&: DominatingSubs)) {
1372	Instruction *NewSExt =
1373	new SExtInst (Dom, I->getType(), "", I->getIterator());
1374	NewSExt->takeName(V: I);
1375	I->replaceAllUsesWith(V: NewSExt);
1376	NewSExt->setDebugLoc(I->getDebugLoc());
1377	RecursivelyDeleteTriviallyDeadInstructions(V: I);
1378	return true;
1379	}
1380	}
1381	}
1382
1383	// Add I to DominatingExprs if it's an add/sub that can't sign overflow.
1384	if (match(V: I, P: m_NSWAdd(L: m_Value(V&: LHS), R: m_Value(V&: RHS)))) {
1385	if (programUndefinedIfPoison(Inst: I)) {
1386	ExprKey Key = createNormalizedCommutablePair(A: LHS, B: RHS);
1387	DominatingAdds [Key].push_back(Elt: I);
1388	}
1389	} else if (match(V: I, P: m_NSWSub(L: m_Value(V&: LHS), R: m_Value(V&: RHS)))) {
1390	if (programUndefinedIfPoison(Inst: I))
1391	DominatingSubs [{LHS, RHS}].push_back(Elt: I);
1392	}
1393	return false;
1394	}
1395
1396	bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
1397	bool Changed = false;
1398	DominatingAdds.clear();
1399	DominatingSubs.clear();
1400	for (const auto Node : depth_first(G: DT)) {
1401	BasicBlock *BB = Node->getBlock();
1402	for (Instruction &I : llvm::make_early_inc_range(Range&: *BB))
1403	Changed \|= reuniteExts(I: &I);
1404	}
1405	return Changed;
1406	}
1407
1408	void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
1409	for (BasicBlock &B : F) {
1410	for (Instruction &I : B) {
1411	if (isInstructionTriviallyDead(I: &I)) {
1412	std::string ErrMessage;
1413	raw_string_ostream RSO(ErrMessage);
1414	RSO << "Dead instruction detected!\n" << I << "\n";
1415	llvm_unreachable(RSO.str().c_str());
1416	}
1417	}
1418	}
1419	}
1420
1421	bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
1422	GetElementPtrInst FirstGEP, GetElementPtrInst SecondGEP, Loop *CurLoop) {
1423	if (!FirstGEP \|\| !FirstGEP->hasOneUse())
1424	return false;
1425
1426	if (!SecondGEP \|\| FirstGEP->getParent() != SecondGEP->getParent())
1427	return false;
1428
1429	if (FirstGEP == SecondGEP)
1430	return false;
1431
1432	unsigned FirstNum = FirstGEP->getNumOperands();
1433	unsigned SecondNum = SecondGEP->getNumOperands();
1434	// Give up if the number of operands are not 2.
1435	if (FirstNum != SecondNum \|\| FirstNum != `2`)
1436	return false;
1437
1438	Value *FirstBase = FirstGEP->getOperand(i_nocapture: `0`);
1439	Value *SecondBase = SecondGEP->getOperand(i_nocapture: `0`);
1440	Value *FirstOffset = FirstGEP->getOperand(i_nocapture: `1`);
1441	// Give up if the index of the first GEP is loop invariant.
1442	if (CurLoop->isLoopInvariant(V: FirstOffset))
1443	return false;
1444
1445	// Give up if base doesn't have same type.
1446	if (FirstBase->getType() != SecondBase->getType())
1447	return false;
1448
1449	Instruction *FirstOffsetDef = dyn_cast<Instruction>(Val: FirstOffset);
1450
1451	// Check if the second operand of first GEP has constant coefficient.
1452	// For an example, for the following code, we won't gain anything by
1453	// hoisting the second GEP out because the second GEP can be folded away.
1454	// %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
1455	// %67 = shl i64 %scevgep.sum.ur159, 2
1456	// %uglygep160 = getelementptr i8 %65, i64 %67*
1457	// %uglygep161 = getelementptr i8 %uglygep160, i64 -1024*
1458
1459	// Skip constant shift instruction which may be generated by Splitting GEPs.
1460	if (FirstOffsetDef && FirstOffsetDef->isShift() &&
1461	isa<ConstantInt>(Val: FirstOffsetDef->getOperand(i: `1`)))
1462	FirstOffsetDef = dyn_cast<Instruction>(Val: FirstOffsetDef->getOperand(i: `0`));
1463
1464	// Give up if FirstOffsetDef is an Add or Sub with constant.
1465	// Because it may not profitable at all due to constant folding.
1466	if (FirstOffsetDef)
1467	if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Val: FirstOffsetDef)) {
1468	unsigned opc = BO->getOpcode();
1469	if ((opc == Instruction::Add \|\| opc == Instruction::Sub) &&
1470	(isa<ConstantInt>(Val: BO->getOperand(i_nocapture: `0`)) \|\|
1471	isa<ConstantInt>(Val: BO->getOperand(i_nocapture: `1`))))
1472	return false;
1473	}
1474	return true;
1475	}
1476
1477	bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value V, Loop L) {
1478	// TODO: Could look at uses of globals, but we need to make sure we are
1479	// looking at the correct function.
1480	if (isa<Constant>(Val: V))
1481	return false;
1482
1483	int UsesInLoop = `0`;
1484	for (User *U : V->users()) {
1485	if (Instruction *User = dyn_cast<Instruction>(Val: U))
1486	if (L->contains(Inst: User))
1487	if (++UsesInLoop > `1`)
1488	return true;
1489	}
1490	return false;
1491	}
1492
1493	void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
1494	GetElementPtrInst *Second) {
1495	Value *Offset1 = First->getOperand(i_nocapture: `1`);
1496	Value *Offset2 = Second->getOperand(i_nocapture: `1`);
1497	First->setOperand(i_nocapture: `1`, Val_nocapture: Offset2);
1498	Second->setOperand(i_nocapture: `1`, Val_nocapture: Offset1);
1499
1500	// We changed p+o+c to p+c+o, p+c may not be inbound anymore.
1501	const DataLayout &DAL = First->getDataLayout();
1502	APInt Offset(DAL.getIndexSizeInBits(
1503	AS: cast<PointerType>(Val: First->getType())->getAddressSpace()),
1504	`0`);
1505	Value *NewBase =
1506	First->stripAndAccumulateInBoundsConstantOffsets(DL: DAL, Offset);
1507	uint64_t ObjectSize;
1508	if (!getObjectSize(Ptr: NewBase, Size&: ObjectSize, DL: DAL, TLI) \|\|
1509	Offset.ugt(RHS: ObjectSize)) {
1510	// TODO(gep_nowrap): Make flag preservation more precise.
1511	First->setNoWrapFlags(GEPNoWrapFlags::none());
1512	Second->setNoWrapFlags(GEPNoWrapFlags::none());
1513	} else
1514	First->setIsInBounds(true);
1515	}
1516
1517	void SeparateConstOffsetFromGEPPass::printPipeline(
1518	raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1519	static_cast<PassInfoMixin<SeparateConstOffsetFromGEPPass> >(this*)
1520	->printPipeline(OS, MapClassName2PassName);
1521	OS << `'<'`;
1522	if (LowerGEP)
1523	OS << "lower-gep";
1524	OS << `'>'`;
1525	}
1526
1527	PreservedAnalyses
1528	SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) {
1529	auto *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
1530	auto *LI = &AM.getResult<LoopAnalysis>(IR&: F);
1531	auto *TLI = &AM.getResult<TargetLibraryAnalysis>(IR&: F);
1532	auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
1533	return AM.getResult<TargetIRAnalysis>(IR&: F);
1534	};
1535	SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
1536	if (!Impl.run(F))
1537	return PreservedAnalyses::all();
1538	PreservedAnalyses PA;
1539	PA.preserveSet<CFGAnalyses>();
1540	return PA;
1541	}
1542

Browse the source code of llvm_projects/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp