1//===- InferAddressSpace.cpp - --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// CUDA C/C++ includes memory space designation as variable type qualifers (such
10// as __global__ and __shared__). Knowing the space of a memory access allows
11// CUDA compilers to emit faster PTX loads and stores. For example, a load from
12// shared memory can be translated to `ld.shared` which is roughly 10% faster
13// than a generic `ld` on an NVIDIA Tesla K40c.
14//
15// Unfortunately, type qualifiers only apply to variable declarations, so CUDA
16// compilers must infer the memory space of an address expression from
17// type-qualified variables.
18//
19// LLVM IR uses non-zero (so-called) specific address spaces to represent memory
20// spaces (e.g. addrspace(3) means shared memory). The Clang frontend
21// places only type-qualified variables in specific address spaces, and then
22// conservatively `addrspacecast`s each type-qualified variable to addrspace(0)
23// (so-called the generic address space) for other instructions to use.
24//
25// For example, the Clang translates the following CUDA code
26// __shared__ float a[10];
27// float v = a[i];
28// to
29// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
30// %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i
31// %v = load float, float* %1 ; emits ld.f32
32// @a is in addrspace(3) since it's type-qualified, but its use from %1 is
33// redirected to %0 (the generic version of @a).
34//
35// The optimization implemented in this file propagates specific address spaces
36// from type-qualified variable declarations to its users. For example, it
37// optimizes the above IR to
38// %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
39// %v = load float addrspace(3)* %1 ; emits ld.shared.f32
40// propagating the addrspace(3) from @a to %1. As the result, the NVPTX
41// codegen is able to emit ld.shared.f32 for %v.
42//
43// Address space inference works in two steps. First, it uses a data-flow
44// analysis to infer as many generic pointers as possible to point to only one
45// specific address space. In the above example, it can prove that %1 only
46// points to addrspace(3). This algorithm was published in
47// CUDA: Compiling and optimizing for a GPU platform
48// Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang
49// ICCS 2012
50//
51// Then, address space inference replaces all refinable generic pointers with
52// equivalent specific pointers.
53//
54// The major challenge of implementing this optimization is handling PHINodes,
55// which may create loops in the data flow graph. This brings two complications.
56//
57// First, the data flow analysis in Step 1 needs to be circular. For example,
58// %generic.input = addrspacecast float addrspace(3)* %input to float*
59// loop:
60// %y = phi [ %generic.input, %y2 ]
61// %y2 = getelementptr %y, 1
62// %v = load %y2
63// br ..., label %loop, ...
64// proving %y specific requires proving both %generic.input and %y2 specific,
65// but proving %y2 specific circles back to %y. To address this complication,
66// the data flow analysis operates on a lattice:
67// uninitialized > specific address spaces > generic.
68// All address expressions (our implementation only considers phi, bitcast,
69// addrspacecast, and getelementptr) start with the uninitialized address space.
70// The monotone transfer function moves the address space of a pointer down a
71// lattice path from uninitialized to specific and then to generic. A join
72// operation of two different specific address spaces pushes the expression down
73// to the generic address space. The analysis completes once it reaches a fixed
74// point.
75//
76// Second, IR rewriting in Step 2 also needs to be circular. For example,
77// converting %y to addrspace(3) requires the compiler to know the converted
78// %y2, but converting %y2 needs the converted %y. To address this complication,
79// we break these cycles using "poison" placeholders. When converting an
80// instruction `I` to a new address space, if its operand `Op` is not converted
81// yet, we let `I` temporarily use `poison` and fix all the uses later.
82// For instance, our algorithm first converts %y to
83// %y' = phi float addrspace(3)* [ %input, poison ]
84// Then, it converts %y2 to
85// %y2' = getelementptr %y', 1
86// Finally, it fixes the poison in %y' so that
87// %y' = phi float addrspace(3)* [ %input, %y2' ]
88//
89//===----------------------------------------------------------------------===//
90
91#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
92#include "llvm/ADT/ArrayRef.h"
93#include "llvm/ADT/DenseMap.h"
94#include "llvm/ADT/DenseSet.h"
95#include "llvm/ADT/SetVector.h"
96#include "llvm/ADT/SmallVector.h"
97#include "llvm/Analysis/AssumptionCache.h"
98#include "llvm/Analysis/TargetTransformInfo.h"
99#include "llvm/Analysis/ValueTracking.h"
100#include "llvm/IR/BasicBlock.h"
101#include "llvm/IR/Constant.h"
102#include "llvm/IR/Constants.h"
103#include "llvm/IR/Dominators.h"
104#include "llvm/IR/Function.h"
105#include "llvm/IR/IRBuilder.h"
106#include "llvm/IR/InstIterator.h"
107#include "llvm/IR/Instruction.h"
108#include "llvm/IR/Instructions.h"
109#include "llvm/IR/IntrinsicInst.h"
110#include "llvm/IR/Intrinsics.h"
111#include "llvm/IR/LLVMContext.h"
112#include "llvm/IR/Operator.h"
113#include "llvm/IR/PassManager.h"
114#include "llvm/IR/Type.h"
115#include "llvm/IR/Use.h"
116#include "llvm/IR/User.h"
117#include "llvm/IR/Value.h"
118#include "llvm/IR/ValueHandle.h"
119#include "llvm/InitializePasses.h"
120#include "llvm/Pass.h"
121#include "llvm/Support/Casting.h"
122#include "llvm/Support/CommandLine.h"
123#include "llvm/Support/Debug.h"
124#include "llvm/Support/ErrorHandling.h"
125#include "llvm/Support/raw_ostream.h"
126#include "llvm/Transforms/Scalar.h"
127#include "llvm/Transforms/Utils/Local.h"
128#include "llvm/Transforms/Utils/ValueMapper.h"
129#include <cassert>
130#include <iterator>
131#include <limits>
132#include <utility>
133#include <vector>
134
135#define DEBUG_TYPE "infer-address-spaces"
136
137using namespace llvm;
138
139static cl::opt<bool> AssumeDefaultIsFlatAddressSpace(
140 "assume-default-is-flat-addrspace", cl::init(Val: false), cl::ReallyHidden,
141 cl::desc("The default address space is assumed as the flat address space. "
142 "This is mainly for test purpose."));
143
144static const unsigned UninitializedAddressSpace =
145 std::numeric_limits<unsigned>::max();
146
147namespace {
148
149using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
150// Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on
151// the *def* of a value, PredicatedAddrSpaceMapTy is map where a new
152// addrspace is inferred on the *use* of a pointer. This map is introduced to
153// infer addrspace from the addrspace predicate assumption built from assume
154// intrinsic. In that scenario, only specific uses (under valid assumption
155// context) could be inferred with a new addrspace.
156using PredicatedAddrSpaceMapTy =
157 DenseMap<std::pair<const Value *, const Value *>, unsigned>;
158using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;
159
160class InferAddressSpaces : public FunctionPass {
161 unsigned FlatAddrSpace = 0;
162
163public:
164 static char ID;
165
166 InferAddressSpaces()
167 : FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {
168 initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry());
169 }
170 InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {
171 initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry());
172 }
173
174 void getAnalysisUsage(AnalysisUsage &AU) const override {
175 AU.setPreservesCFG();
176 AU.addPreserved<DominatorTreeWrapperPass>();
177 AU.addRequired<AssumptionCacheTracker>();
178 AU.addRequired<TargetTransformInfoWrapperPass>();
179 }
180
181 bool runOnFunction(Function &F) override;
182};
183
184class InferAddressSpacesImpl {
185 AssumptionCache &AC;
186 Function *F = nullptr;
187 const DominatorTree *DT = nullptr;
188 const TargetTransformInfo *TTI = nullptr;
189 const DataLayout *DL = nullptr;
190
191 /// Target specific address space which uses of should be replaced if
192 /// possible.
193 unsigned FlatAddrSpace = 0;
194
195 // Try to update the address space of V. If V is updated, returns true and
196 // false otherwise.
197 bool updateAddressSpace(const Value &V,
198 ValueToAddrSpaceMapTy &InferredAddrSpace,
199 PredicatedAddrSpaceMapTy &PredicatedAS) const;
200
201 // Tries to infer the specific address space of each address expression in
202 // Postorder.
203 void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
204 ValueToAddrSpaceMapTy &InferredAddrSpace,
205 PredicatedAddrSpaceMapTy &PredicatedAS) const;
206
207 bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
208
209 Value *cloneInstructionWithNewAddressSpace(
210 Instruction *I, unsigned NewAddrSpace,
211 const ValueToValueMapTy &ValueWithNewAddrSpace,
212 const PredicatedAddrSpaceMapTy &PredicatedAS,
213 SmallVectorImpl<const Use *> *PoisonUsesToFix) const;
214
215 void performPointerReplacement(
216 Value *V, Value *NewV, Use &U, ValueToValueMapTy &ValueWithNewAddrSpace,
217 SmallVectorImpl<Instruction *> &DeadInstructions) const;
218
219 // Changes the flat address expressions in function F to point to specific
220 // address spaces if InferredAddrSpace says so. Postorder is the postorder of
221 // all flat expressions in the use-def graph of function F.
222 bool rewriteWithNewAddressSpaces(
223 ArrayRef<WeakTrackingVH> Postorder,
224 const ValueToAddrSpaceMapTy &InferredAddrSpace,
225 const PredicatedAddrSpaceMapTy &PredicatedAS) const;
226
227 void appendsFlatAddressExpressionToPostorderStack(
228 Value *V, PostorderStackTy &PostorderStack,
229 DenseSet<Value *> &Visited) const;
230
231 bool rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV,
232 Value *NewV) const;
233 void collectRewritableIntrinsicOperands(IntrinsicInst *II,
234 PostorderStackTy &PostorderStack,
235 DenseSet<Value *> &Visited) const;
236
237 std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
238
239 Value *cloneValueWithNewAddressSpace(
240 Value *V, unsigned NewAddrSpace,
241 const ValueToValueMapTy &ValueWithNewAddrSpace,
242 const PredicatedAddrSpaceMapTy &PredicatedAS,
243 SmallVectorImpl<const Use *> *PoisonUsesToFix) const;
244 unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
245
246 unsigned getPredicatedAddrSpace(const Value &PtrV,
247 const Value *UserCtx) const;
248
249public:
250 InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT,
251 const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
252 : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
253 bool run(Function &F);
254};
255
256} // end anonymous namespace
257
258char InferAddressSpaces::ID = 0;
259
260INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
261 false, false)
262INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
263INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
264INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
265 false, false)
266
267static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) {
268 assert(Ty->isPtrOrPtrVectorTy());
269 PointerType *NPT = PointerType::get(C&: Ty->getContext(), AddressSpace: NewAddrSpace);
270 return Ty->getWithNewType(EltTy: NPT);
271}
272
273// Check whether that's no-op pointer bicast using a pair of
274// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
275// different address spaces.
276static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
277 const TargetTransformInfo *TTI) {
278 assert(I2P->getOpcode() == Instruction::IntToPtr);
279 auto *P2I = dyn_cast<Operator>(Val: I2P->getOperand(i: 0));
280 if (!P2I || P2I->getOpcode() != Instruction::PtrToInt)
281 return false;
282 // Check it's really safe to treat that pair of `ptrtoint`/`inttoptr` as a
283 // no-op cast. Besides checking both of them are no-op casts, as the
284 // reinterpreted pointer may be used in other pointer arithmetic, we also
285 // need to double-check that through the target-specific hook. That ensures
286 // the underlying target also agrees that's a no-op address space cast and
287 // pointer bits are preserved.
288 // The current IR spec doesn't have clear rules on address space casts,
289 // especially a clear definition for pointer bits in non-default address
290 // spaces. It would be undefined if that pointer is dereferenced after an
291 // invalid reinterpret cast. Also, due to the unclearness for the meaning of
292 // bits in non-default address spaces in the current spec, the pointer
293 // arithmetic may also be undefined after invalid pointer reinterpret cast.
294 // However, as we confirm through the target hooks that it's a no-op
295 // addrspacecast, it doesn't matter since the bits should be the same.
296 unsigned P2IOp0AS = P2I->getOperand(i: 0)->getType()->getPointerAddressSpace();
297 unsigned I2PAS = I2P->getType()->getPointerAddressSpace();
298 return CastInst::isNoopCast(Opcode: Instruction::CastOps(I2P->getOpcode()),
299 SrcTy: I2P->getOperand(i: 0)->getType(), DstTy: I2P->getType(),
300 DL) &&
301 CastInst::isNoopCast(Opcode: Instruction::CastOps(P2I->getOpcode()),
302 SrcTy: P2I->getOperand(i: 0)->getType(), DstTy: P2I->getType(),
303 DL) &&
304 (P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(FromAS: P2IOp0AS, ToAS: I2PAS));
305}
306
307// Returns true if V is an address expression.
308// TODO: Currently, we only consider:
309// - arguments
310// - phi, bitcast, addrspacecast, and getelementptr operators
311static bool isAddressExpression(const Value &V, const DataLayout &DL,
312 const TargetTransformInfo *TTI) {
313
314 if (const Argument *Arg = dyn_cast<Argument>(Val: &V))
315 return Arg->getType()->isPointerTy() &&
316 TTI->getAssumedAddrSpace(V: &V) != UninitializedAddressSpace;
317
318 const Operator *Op = dyn_cast<Operator>(Val: &V);
319 if (!Op)
320 return false;
321
322 switch (Op->getOpcode()) {
323 case Instruction::PHI:
324 assert(Op->getType()->isPtrOrPtrVectorTy());
325 return true;
326 case Instruction::BitCast:
327 case Instruction::AddrSpaceCast:
328 case Instruction::GetElementPtr:
329 return true;
330 case Instruction::Select:
331 return Op->getType()->isPtrOrPtrVectorTy();
332 case Instruction::Call: {
333 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &V);
334 return II && II->getIntrinsicID() == Intrinsic::ptrmask;
335 }
336 case Instruction::IntToPtr:
337 return isNoopPtrIntCastPair(I2P: Op, DL, TTI);
338 default:
339 // That value is an address expression if it has an assumed address space.
340 return TTI->getAssumedAddrSpace(V: &V) != UninitializedAddressSpace;
341 }
342}
343
344// Returns the pointer operands of V.
345//
346// Precondition: V is an address expression.
347static SmallVector<Value *, 2>
348getPointerOperands(const Value &V, const DataLayout &DL,
349 const TargetTransformInfo *TTI) {
350 if (isa<Argument>(Val: &V))
351 return {};
352
353 const Operator &Op = cast<Operator>(Val: V);
354 switch (Op.getOpcode()) {
355 case Instruction::PHI: {
356 auto IncomingValues = cast<PHINode>(Val: Op).incoming_values();
357 return {IncomingValues.begin(), IncomingValues.end()};
358 }
359 case Instruction::BitCast:
360 case Instruction::AddrSpaceCast:
361 case Instruction::GetElementPtr:
362 return {Op.getOperand(i: 0)};
363 case Instruction::Select:
364 return {Op.getOperand(i: 1), Op.getOperand(i: 2)};
365 case Instruction::Call: {
366 const IntrinsicInst &II = cast<IntrinsicInst>(Val: Op);
367 assert(II.getIntrinsicID() == Intrinsic::ptrmask &&
368 "unexpected intrinsic call");
369 return {II.getArgOperand(i: 0)};
370 }
371 case Instruction::IntToPtr: {
372 assert(isNoopPtrIntCastPair(&Op, DL, TTI));
373 auto *P2I = cast<Operator>(Val: Op.getOperand(i: 0));
374 return {P2I->getOperand(i: 0)};
375 }
376 default:
377 llvm_unreachable("Unexpected instruction type.");
378 }
379}
380
381bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
382 Value *OldV,
383 Value *NewV) const {
384 Module *M = II->getParent()->getParent()->getParent();
385 Intrinsic::ID IID = II->getIntrinsicID();
386 switch (IID) {
387 case Intrinsic::objectsize:
388 case Intrinsic::masked_load: {
389 Type *DestTy = II->getType();
390 Type *SrcTy = NewV->getType();
391 Function *NewDecl =
392 Intrinsic::getOrInsertDeclaration(M, id: IID, Tys: {DestTy, SrcTy});
393 II->setArgOperand(i: 0, v: NewV);
394 II->setCalledFunction(NewDecl);
395 return true;
396 }
397 case Intrinsic::ptrmask:
398 // This is handled as an address expression, not as a use memory operation.
399 return false;
400 case Intrinsic::masked_gather: {
401 Type *RetTy = II->getType();
402 Type *NewPtrTy = NewV->getType();
403 Function *NewDecl =
404 Intrinsic::getOrInsertDeclaration(M, id: IID, Tys: {RetTy, NewPtrTy});
405 II->setArgOperand(i: 0, v: NewV);
406 II->setCalledFunction(NewDecl);
407 return true;
408 }
409 case Intrinsic::masked_store:
410 case Intrinsic::masked_scatter: {
411 Type *ValueTy = II->getOperand(i_nocapture: 0)->getType();
412 Type *NewPtrTy = NewV->getType();
413 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
414 M, id: II->getIntrinsicID(), Tys: {ValueTy, NewPtrTy});
415 II->setArgOperand(i: 1, v: NewV);
416 II->setCalledFunction(NewDecl);
417 return true;
418 }
419 case Intrinsic::prefetch:
420 case Intrinsic::is_constant: {
421 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
422 M, id: II->getIntrinsicID(), Tys: {NewV->getType()});
423 II->setArgOperand(i: 0, v: NewV);
424 II->setCalledFunction(NewDecl);
425 return true;
426 }
427 case Intrinsic::fake_use: {
428 II->replaceUsesOfWith(From: OldV, To: NewV);
429 return true;
430 }
431 case Intrinsic::lifetime_start:
432 case Intrinsic::lifetime_end: {
433 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
434 M, id: II->getIntrinsicID(), Tys: {NewV->getType()});
435 II->setArgOperand(i: 1, v: NewV);
436 II->setCalledFunction(NewDecl);
437 return true;
438 }
439 default: {
440 Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
441 if (!Rewrite)
442 return false;
443 if (Rewrite != II)
444 II->replaceAllUsesWith(V: Rewrite);
445 return true;
446 }
447 }
448}
449
450void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
451 IntrinsicInst *II, PostorderStackTy &PostorderStack,
452 DenseSet<Value *> &Visited) const {
453 auto IID = II->getIntrinsicID();
454 switch (IID) {
455 case Intrinsic::ptrmask:
456 case Intrinsic::objectsize:
457 appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0),
458 PostorderStack, Visited);
459 break;
460 case Intrinsic::is_constant: {
461 Value *Ptr = II->getArgOperand(i: 0);
462 if (Ptr->getType()->isPtrOrPtrVectorTy()) {
463 appendsFlatAddressExpressionToPostorderStack(V: Ptr, PostorderStack,
464 Visited);
465 }
466
467 break;
468 }
469 case Intrinsic::masked_load:
470 case Intrinsic::masked_gather:
471 case Intrinsic::prefetch:
472 appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0),
473 PostorderStack, Visited);
474 break;
475 case Intrinsic::masked_store:
476 case Intrinsic::masked_scatter:
477 appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 1),
478 PostorderStack, Visited);
479 break;
480 case Intrinsic::fake_use: {
481 for (Value *Op : II->operands()) {
482 if (Op->getType()->isPtrOrPtrVectorTy()) {
483 appendsFlatAddressExpressionToPostorderStack(V: Op, PostorderStack,
484 Visited);
485 }
486 }
487
488 break;
489 }
490 case Intrinsic::lifetime_start:
491 case Intrinsic::lifetime_end: {
492 appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 1),
493 PostorderStack, Visited);
494 break;
495 }
496 default:
497 SmallVector<int, 2> OpIndexes;
498 if (TTI->collectFlatAddressOperands(OpIndexes, IID)) {
499 for (int Idx : OpIndexes) {
500 appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: Idx),
501 PostorderStack, Visited);
502 }
503 }
504 break;
505 }
506}
507
508// Returns all flat address expressions in function F. The elements are
509// If V is an unvisited flat address expression, appends V to PostorderStack
510// and marks it as visited.
511void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
512 Value *V, PostorderStackTy &PostorderStack,
513 DenseSet<Value *> &Visited) const {
514 assert(V->getType()->isPtrOrPtrVectorTy());
515
516 // Generic addressing expressions may be hidden in nested constant
517 // expressions.
518 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V)) {
519 // TODO: Look in non-address parts, like icmp operands.
520 if (isAddressExpression(V: *CE, DL: *DL, TTI) && Visited.insert(V: CE).second)
521 PostorderStack.emplace_back(Args&: CE, Args: false);
522
523 return;
524 }
525
526 if (V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
527 isAddressExpression(V: *V, DL: *DL, TTI)) {
528 if (Visited.insert(V).second) {
529 PostorderStack.emplace_back(Args&: V, Args: false);
530
531 if (auto *Op = dyn_cast<Operator>(Val: V))
532 for (auto &O : Op->operands())
533 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val&: O))
534 if (isAddressExpression(V: *CE, DL: *DL, TTI) && Visited.insert(V: CE).second)
535 PostorderStack.emplace_back(Args&: CE, Args: false);
536 }
537 }
538}
539
540// Returns all flat address expressions in function F. The elements are ordered
541// in postorder.
542std::vector<WeakTrackingVH>
543InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
544 // This function implements a non-recursive postorder traversal of a partial
545 // use-def graph of function F.
546 PostorderStackTy PostorderStack;
547 // The set of visited expressions.
548 DenseSet<Value *> Visited;
549
550 auto PushPtrOperand = [&](Value *Ptr) {
551 appendsFlatAddressExpressionToPostorderStack(V: Ptr, PostorderStack, Visited);
552 };
553
554 // Look at operations that may be interesting accelerate by moving to a known
555 // address space. We aim at generating after loads and stores, but pure
556 // addressing calculations may also be faster.
557 for (Instruction &I : instructions(F)) {
558 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
559 PushPtrOperand(GEP->getPointerOperand());
560 } else if (auto *LI = dyn_cast<LoadInst>(Val: &I))
561 PushPtrOperand(LI->getPointerOperand());
562 else if (auto *SI = dyn_cast<StoreInst>(Val: &I))
563 PushPtrOperand(SI->getPointerOperand());
564 else if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: &I))
565 PushPtrOperand(RMW->getPointerOperand());
566 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &I))
567 PushPtrOperand(CmpX->getPointerOperand());
568 else if (auto *MI = dyn_cast<MemIntrinsic>(Val: &I)) {
569 // For memset/memcpy/memmove, any pointer operand can be replaced.
570 PushPtrOperand(MI->getRawDest());
571
572 // Handle 2nd operand for memcpy/memmove.
573 if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI))
574 PushPtrOperand(MTI->getRawSource());
575 } else if (auto *II = dyn_cast<IntrinsicInst>(Val: &I))
576 collectRewritableIntrinsicOperands(II, PostorderStack, Visited);
577 else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: &I)) {
578 if (Cmp->getOperand(i_nocapture: 0)->getType()->isPtrOrPtrVectorTy()) {
579 PushPtrOperand(Cmp->getOperand(i_nocapture: 0));
580 PushPtrOperand(Cmp->getOperand(i_nocapture: 1));
581 }
582 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Val: &I)) {
583 PushPtrOperand(ASC->getPointerOperand());
584 } else if (auto *I2P = dyn_cast<IntToPtrInst>(Val: &I)) {
585 if (isNoopPtrIntCastPair(I2P: cast<Operator>(Val: I2P), DL: *DL, TTI))
586 PushPtrOperand(cast<Operator>(Val: I2P->getOperand(i_nocapture: 0))->getOperand(i: 0));
587 } else if (auto *RI = dyn_cast<ReturnInst>(Val: &I)) {
588 if (auto *RV = RI->getReturnValue();
589 RV && RV->getType()->isPtrOrPtrVectorTy())
590 PushPtrOperand(RV);
591 }
592 }
593
594 std::vector<WeakTrackingVH> Postorder; // The resultant postorder.
595 while (!PostorderStack.empty()) {
596 Value *TopVal = PostorderStack.back().getPointer();
597 // If the operands of the expression on the top are already explored,
598 // adds that expression to the resultant postorder.
599 if (PostorderStack.back().getInt()) {
600 if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace)
601 Postorder.push_back(x: TopVal);
602 PostorderStack.pop_back();
603 continue;
604 }
605 // Otherwise, adds its operands to the stack and explores them.
606 PostorderStack.back().setInt(true);
607 // Skip values with an assumed address space.
608 if (TTI->getAssumedAddrSpace(V: TopVal) == UninitializedAddressSpace) {
609 for (Value *PtrOperand : getPointerOperands(V: *TopVal, DL: *DL, TTI)) {
610 appendsFlatAddressExpressionToPostorderStack(V: PtrOperand, PostorderStack,
611 Visited);
612 }
613 }
614 }
615 return Postorder;
616}
617
618// A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
619// of OperandUse.get() in the new address space. If the clone is not ready yet,
620// returns poison in the new address space as a placeholder.
621static Value *operandWithNewAddressSpaceOrCreatePoison(
622 const Use &OperandUse, unsigned NewAddrSpace,
623 const ValueToValueMapTy &ValueWithNewAddrSpace,
624 const PredicatedAddrSpaceMapTy &PredicatedAS,
625 SmallVectorImpl<const Use *> *PoisonUsesToFix) {
626 Value *Operand = OperandUse.get();
627
628 Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: Operand->getType(), NewAddrSpace);
629
630 if (Constant *C = dyn_cast<Constant>(Val: Operand))
631 return ConstantExpr::getAddrSpaceCast(C, Ty: NewPtrTy);
632
633 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: Operand))
634 return NewOperand;
635
636 Instruction *Inst = cast<Instruction>(Val: OperandUse.getUser());
637 auto I = PredicatedAS.find(Val: std::make_pair(x&: Inst, y&: Operand));
638 if (I != PredicatedAS.end()) {
639 // Insert an addrspacecast on that operand before the user.
640 unsigned NewAS = I->second;
641 Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: Operand->getType(), NewAddrSpace: NewAS);
642 auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);
643 NewI->insertBefore(InsertPos: Inst->getIterator());
644 NewI->setDebugLoc(Inst->getDebugLoc());
645 return NewI;
646 }
647
648 PoisonUsesToFix->push_back(Elt: &OperandUse);
649 return PoisonValue::get(T: NewPtrTy);
650}
651
652// Returns a clone of `I` with its operands converted to those specified in
653// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
654// operand whose address space needs to be modified might not exist in
655// ValueWithNewAddrSpace. In that case, uses poison as a placeholder operand and
656// adds that operand use to PoisonUsesToFix so that caller can fix them later.
657//
658// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
659// from a pointer whose type already matches. Therefore, this function returns a
660// Value* instead of an Instruction*.
661//
662// This may also return nullptr in the case the instruction could not be
663// rewritten.
664Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
665 Instruction *I, unsigned NewAddrSpace,
666 const ValueToValueMapTy &ValueWithNewAddrSpace,
667 const PredicatedAddrSpaceMapTy &PredicatedAS,
668 SmallVectorImpl<const Use *> *PoisonUsesToFix) const {
669 Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace);
670
671 if (I->getOpcode() == Instruction::AddrSpaceCast) {
672 Value *Src = I->getOperand(i: 0);
673 // Because `I` is flat, the source address space must be specific.
674 // Therefore, the inferred address space must be the source space, according
675 // to our algorithm.
676 assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
677 return Src;
678 }
679
680 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
681 // Technically the intrinsic ID is a pointer typed argument, so specially
682 // handle calls early.
683 assert(II->getIntrinsicID() == Intrinsic::ptrmask);
684 Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison(
685 OperandUse: II->getArgOperandUse(i: 0), NewAddrSpace, ValueWithNewAddrSpace,
686 PredicatedAS, PoisonUsesToFix);
687 Value *Rewrite =
688 TTI->rewriteIntrinsicWithAddressSpace(II, OldV: II->getArgOperand(i: 0), NewV: NewPtr);
689 if (Rewrite) {
690 assert(Rewrite != II && "cannot modify this pointer operation in place");
691 return Rewrite;
692 }
693
694 return nullptr;
695 }
696
697 unsigned AS = TTI->getAssumedAddrSpace(V: I);
698 if (AS != UninitializedAddressSpace) {
699 // For the assumed address space, insert an `addrspacecast` to make that
700 // explicit.
701 Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace: AS);
702 auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);
703 NewI->insertAfter(InsertPos: I->getIterator());
704 NewI->setDebugLoc(I->getDebugLoc());
705 return NewI;
706 }
707
708 // Computes the converted pointer operands.
709 SmallVector<Value *, 4> NewPointerOperands;
710 for (const Use &OperandUse : I->operands()) {
711 if (!OperandUse.get()->getType()->isPtrOrPtrVectorTy())
712 NewPointerOperands.push_back(Elt: nullptr);
713 else
714 NewPointerOperands.push_back(Elt: operandWithNewAddressSpaceOrCreatePoison(
715 OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
716 PoisonUsesToFix));
717 }
718
719 switch (I->getOpcode()) {
720 case Instruction::BitCast:
721 return new BitCastInst(NewPointerOperands[0], NewPtrType);
722 case Instruction::PHI: {
723 assert(I->getType()->isPtrOrPtrVectorTy());
724 PHINode *PHI = cast<PHINode>(Val: I);
725 PHINode *NewPHI = PHINode::Create(Ty: NewPtrType, NumReservedValues: PHI->getNumIncomingValues());
726 for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) {
727 unsigned OperandNo = PHINode::getOperandNumForIncomingValue(i: Index);
728 NewPHI->addIncoming(V: NewPointerOperands[OperandNo],
729 BB: PHI->getIncomingBlock(i: Index));
730 }
731 return NewPHI;
732 }
733 case Instruction::GetElementPtr: {
734 GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val: I);
735 GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
736 PointeeType: GEP->getSourceElementType(), Ptr: NewPointerOperands[0],
737 IdxList: SmallVector<Value *, 4>(GEP->indices()));
738 NewGEP->setIsInBounds(GEP->isInBounds());
739 return NewGEP;
740 }
741 case Instruction::Select:
742 assert(I->getType()->isPtrOrPtrVectorTy());
743 return SelectInst::Create(C: I->getOperand(i: 0), S1: NewPointerOperands[1],
744 S2: NewPointerOperands[2], NameStr: "", InsertBefore: nullptr, MDFrom: I);
745 case Instruction::IntToPtr: {
746 assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI));
747 Value *Src = cast<Operator>(Val: I->getOperand(i: 0))->getOperand(i: 0);
748 if (Src->getType() == NewPtrType)
749 return Src;
750
751 // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a
752 // source address space from a generic pointer source need to insert a cast
753 // back.
754 return new AddrSpaceCastInst(Src, NewPtrType);
755 }
756 default:
757 llvm_unreachable("Unexpected opcode");
758 }
759}
760
761// Similar to cloneInstructionWithNewAddressSpace, returns a clone of the
762// constant expression `CE` with its operands replaced as specified in
763// ValueWithNewAddrSpace.
764static Value *cloneConstantExprWithNewAddressSpace(
765 ConstantExpr *CE, unsigned NewAddrSpace,
766 const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
767 const TargetTransformInfo *TTI) {
768 Type *TargetType =
769 CE->getType()->isPtrOrPtrVectorTy()
770 ? getPtrOrVecOfPtrsWithNewAS(Ty: CE->getType(), NewAddrSpace)
771 : CE->getType();
772
773 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
774 // Because CE is flat, the source address space must be specific.
775 // Therefore, the inferred address space must be the source space according
776 // to our algorithm.
777 assert(CE->getOperand(0)->getType()->getPointerAddressSpace() ==
778 NewAddrSpace);
779 return CE->getOperand(i_nocapture: 0);
780 }
781
782 if (CE->getOpcode() == Instruction::BitCast) {
783 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: CE->getOperand(i_nocapture: 0)))
784 return ConstantExpr::getBitCast(C: cast<Constant>(Val: NewOperand), Ty: TargetType);
785 return ConstantExpr::getAddrSpaceCast(C: CE, Ty: TargetType);
786 }
787
788 if (CE->getOpcode() == Instruction::IntToPtr) {
789 assert(isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI));
790 Constant *Src = cast<ConstantExpr>(Val: CE->getOperand(i_nocapture: 0))->getOperand(i_nocapture: 0);
791 assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
792 return Src;
793 }
794
795 // Computes the operands of the new constant expression.
796 bool IsNew = false;
797 SmallVector<Constant *, 4> NewOperands;
798 for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) {
799 Constant *Operand = CE->getOperand(i_nocapture: Index);
800 // If the address space of `Operand` needs to be modified, the new operand
801 // with the new address space should already be in ValueWithNewAddrSpace
802 // because (1) the constant expressions we consider (i.e. addrspacecast,
803 // bitcast, and getelementptr) do not incur cycles in the data flow graph
804 // and (2) this function is called on constant expressions in postorder.
805 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: Operand)) {
806 IsNew = true;
807 NewOperands.push_back(Elt: cast<Constant>(Val: NewOperand));
808 continue;
809 }
810 if (auto *CExpr = dyn_cast<ConstantExpr>(Val: Operand))
811 if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
812 CE: CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) {
813 IsNew = true;
814 NewOperands.push_back(Elt: cast<Constant>(Val: NewOperand));
815 continue;
816 }
817 // Otherwise, reuses the old operand.
818 NewOperands.push_back(Elt: Operand);
819 }
820
821 // If !IsNew, we will replace the Value with itself. However, replaced values
822 // are assumed to wrapped in an addrspacecast cast later so drop it now.
823 if (!IsNew)
824 return nullptr;
825
826 if (CE->getOpcode() == Instruction::GetElementPtr) {
827 // Needs to specify the source type while constructing a getelementptr
828 // constant expression.
829 return CE->getWithOperands(Ops: NewOperands, Ty: TargetType, /*OnlyIfReduced=*/false,
830 SrcTy: cast<GEPOperator>(Val: CE)->getSourceElementType());
831 }
832
833 return CE->getWithOperands(Ops: NewOperands, Ty: TargetType);
834}
835
836// Returns a clone of the value `V`, with its operands replaced as specified in
837// ValueWithNewAddrSpace. This function is called on every flat address
838// expression whose address space needs to be modified, in postorder.
839//
840// See cloneInstructionWithNewAddressSpace for the meaning of PoisonUsesToFix.
841Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
842 Value *V, unsigned NewAddrSpace,
843 const ValueToValueMapTy &ValueWithNewAddrSpace,
844 const PredicatedAddrSpaceMapTy &PredicatedAS,
845 SmallVectorImpl<const Use *> *PoisonUsesToFix) const {
846 // All values in Postorder are flat address expressions.
847 assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
848 isAddressExpression(*V, *DL, TTI));
849
850 if (auto *Arg = dyn_cast<Argument>(Val: V)) {
851 // Arguments are address space casted in the function body, as we do not
852 // want to change the function signature.
853 Function *F = Arg->getParent();
854 BasicBlock::iterator Insert = F->getEntryBlock().getFirstNonPHIIt();
855
856 Type *NewPtrTy = PointerType::get(C&: Arg->getContext(), AddressSpace: NewAddrSpace);
857 auto *NewI = new AddrSpaceCastInst(Arg, NewPtrTy);
858 NewI->insertBefore(InsertPos: Insert);
859 return NewI;
860 }
861
862 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
863 Value *NewV = cloneInstructionWithNewAddressSpace(
864 I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
865 if (Instruction *NewI = dyn_cast_or_null<Instruction>(Val: NewV)) {
866 if (NewI->getParent() == nullptr) {
867 NewI->insertBefore(InsertPos: I->getIterator());
868 NewI->takeName(V: I);
869 NewI->setDebugLoc(I->getDebugLoc());
870 }
871 }
872 return NewV;
873 }
874
875 return cloneConstantExprWithNewAddressSpace(
876 CE: cast<ConstantExpr>(Val: V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI);
877}
878
879// Defines the join operation on the address space lattice (see the file header
880// comments).
881unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,
882 unsigned AS2) const {
883 if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace)
884 return FlatAddrSpace;
885
886 if (AS1 == UninitializedAddressSpace)
887 return AS2;
888 if (AS2 == UninitializedAddressSpace)
889 return AS1;
890
891 // The join of two different specific address spaces is flat.
892 return (AS1 == AS2) ? AS1 : FlatAddrSpace;
893}
894
895bool InferAddressSpacesImpl::run(Function &CurFn) {
896 F = &CurFn;
897 DL = &F->getDataLayout();
898
899 if (AssumeDefaultIsFlatAddressSpace)
900 FlatAddrSpace = 0;
901
902 if (FlatAddrSpace == UninitializedAddressSpace) {
903 FlatAddrSpace = TTI->getFlatAddressSpace();
904 if (FlatAddrSpace == UninitializedAddressSpace)
905 return false;
906 }
907
908 // Collects all flat address expressions in postorder.
909 std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F&: *F);
910
911 // Runs a data-flow analysis to refine the address spaces of every expression
912 // in Postorder.
913 ValueToAddrSpaceMapTy InferredAddrSpace;
914 PredicatedAddrSpaceMapTy PredicatedAS;
915 inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS);
916
917 // Changes the address spaces of the flat address expressions who are inferred
918 // to point to a specific address space.
919 return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace,
920 PredicatedAS);
921}
922
923// Constants need to be tracked through RAUW to handle cases with nested
924// constant expressions, so wrap values in WeakTrackingVH.
925void InferAddressSpacesImpl::inferAddressSpaces(
926 ArrayRef<WeakTrackingVH> Postorder,
927 ValueToAddrSpaceMapTy &InferredAddrSpace,
928 PredicatedAddrSpaceMapTy &PredicatedAS) const {
929 SetVector<Value *> Worklist(llvm::from_range, Postorder);
930 // Initially, all expressions are in the uninitialized address space.
931 for (Value *V : Postorder)
932 InferredAddrSpace[V] = UninitializedAddressSpace;
933
934 while (!Worklist.empty()) {
935 Value *V = Worklist.pop_back_val();
936
937 // Try to update the address space of the stack top according to the
938 // address spaces of its operands.
939 if (!updateAddressSpace(V: *V, InferredAddrSpace, PredicatedAS))
940 continue;
941
942 for (Value *User : V->users()) {
943 // Skip if User is already in the worklist.
944 if (Worklist.count(key: User))
945 continue;
946
947 auto Pos = InferredAddrSpace.find(Val: User);
948 // Our algorithm only updates the address spaces of flat address
949 // expressions, which are those in InferredAddrSpace.
950 if (Pos == InferredAddrSpace.end())
951 continue;
952
953 // Function updateAddressSpace moves the address space down a lattice
954 // path. Therefore, nothing to do if User is already inferred as flat (the
955 // bottom element in the lattice).
956 if (Pos->second == FlatAddrSpace)
957 continue;
958
959 Worklist.insert(X: User);
960 }
961 }
962}
963
964unsigned
965InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &Ptr,
966 const Value *UserCtx) const {
967 const Instruction *UserCtxI = dyn_cast<Instruction>(Val: UserCtx);
968 if (!UserCtxI)
969 return UninitializedAddressSpace;
970
971 const Value *StrippedPtr = Ptr.stripInBoundsOffsets();
972 for (auto &AssumeVH : AC.assumptionsFor(V: StrippedPtr)) {
973 if (!AssumeVH)
974 continue;
975 CallInst *CI = cast<CallInst>(Val&: AssumeVH);
976 if (!isValidAssumeForContext(I: CI, CxtI: UserCtxI, DT))
977 continue;
978
979 const Value *Ptr;
980 unsigned AS;
981 std::tie(args&: Ptr, args&: AS) = TTI->getPredicatedAddrSpace(V: CI->getArgOperand(i: 0));
982 if (Ptr)
983 return AS;
984 }
985
986 return UninitializedAddressSpace;
987}
988
989bool InferAddressSpacesImpl::updateAddressSpace(
990 const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace,
991 PredicatedAddrSpaceMapTy &PredicatedAS) const {
992 assert(InferredAddrSpace.count(&V));
993
994 LLVM_DEBUG(dbgs() << "Updating the address space of\n " << V << '\n');
995
996 // The new inferred address space equals the join of the address spaces
997 // of all its pointer operands.
998 unsigned NewAS = UninitializedAddressSpace;
999
1000 // isAddressExpression should guarantee that V is an operator or an argument.
1001 assert(isa<Operator>(V) || isa<Argument>(V));
1002
1003 if (isa<Operator>(Val: V) &&
1004 cast<Operator>(Val: V).getOpcode() == Instruction::Select) {
1005 const Operator &Op = cast<Operator>(Val: V);
1006 Value *Src0 = Op.getOperand(i: 1);
1007 Value *Src1 = Op.getOperand(i: 2);
1008
1009 auto I = InferredAddrSpace.find(Val: Src0);
1010 unsigned Src0AS = (I != InferredAddrSpace.end())
1011 ? I->second
1012 : Src0->getType()->getPointerAddressSpace();
1013
1014 auto J = InferredAddrSpace.find(Val: Src1);
1015 unsigned Src1AS = (J != InferredAddrSpace.end())
1016 ? J->second
1017 : Src1->getType()->getPointerAddressSpace();
1018
1019 auto *C0 = dyn_cast<Constant>(Val: Src0);
1020 auto *C1 = dyn_cast<Constant>(Val: Src1);
1021
1022 // If one of the inputs is a constant, we may be able to do a constant
1023 // addrspacecast of it. Defer inferring the address space until the input
1024 // address space is known.
1025 if ((C1 && Src0AS == UninitializedAddressSpace) ||
1026 (C0 && Src1AS == UninitializedAddressSpace))
1027 return false;
1028
1029 if (C0 && isSafeToCastConstAddrSpace(C: C0, NewAS: Src1AS))
1030 NewAS = Src1AS;
1031 else if (C1 && isSafeToCastConstAddrSpace(C: C1, NewAS: Src0AS))
1032 NewAS = Src0AS;
1033 else
1034 NewAS = joinAddressSpaces(AS1: Src0AS, AS2: Src1AS);
1035 } else {
1036 unsigned AS = TTI->getAssumedAddrSpace(V: &V);
1037 if (AS != UninitializedAddressSpace) {
1038 // Use the assumed address space directly.
1039 NewAS = AS;
1040 } else {
1041 // Otherwise, infer the address space from its pointer operands.
1042 for (Value *PtrOperand : getPointerOperands(V, DL: *DL, TTI)) {
1043 auto I = InferredAddrSpace.find(Val: PtrOperand);
1044 unsigned OperandAS;
1045 if (I == InferredAddrSpace.end()) {
1046 OperandAS = PtrOperand->getType()->getPointerAddressSpace();
1047 if (OperandAS == FlatAddrSpace) {
1048 // Check AC for assumption dominating V.
1049 unsigned AS = getPredicatedAddrSpace(Ptr: *PtrOperand, UserCtx: &V);
1050 if (AS != UninitializedAddressSpace) {
1051 LLVM_DEBUG(dbgs()
1052 << " deduce operand AS from the predicate addrspace "
1053 << AS << '\n');
1054 OperandAS = AS;
1055 // Record this use with the predicated AS.
1056 PredicatedAS[std::make_pair(x: &V, y&: PtrOperand)] = OperandAS;
1057 }
1058 }
1059 } else
1060 OperandAS = I->second;
1061
1062 // join(flat, *) = flat. So we can break if NewAS is already flat.
1063 NewAS = joinAddressSpaces(AS1: NewAS, AS2: OperandAS);
1064 if (NewAS == FlatAddrSpace)
1065 break;
1066 }
1067 }
1068 }
1069
1070 unsigned OldAS = InferredAddrSpace.lookup(Val: &V);
1071 assert(OldAS != FlatAddrSpace);
1072 if (OldAS == NewAS)
1073 return false;
1074
1075 // If any updates are made, grabs its users to the worklist because
1076 // their address spaces can also be possibly updated.
1077 LLVM_DEBUG(dbgs() << " to " << NewAS << '\n');
1078 InferredAddrSpace[&V] = NewAS;
1079 return true;
1080}
1081
1082/// Replace operand \p OpIdx in \p Inst, if the value is the same as \p OldVal
1083/// with \p NewVal.
1084static bool replaceOperandIfSame(Instruction *Inst, unsigned OpIdx,
1085 Value *OldVal, Value *NewVal) {
1086 Use &U = Inst->getOperandUse(i: OpIdx);
1087 if (U.get() == OldVal) {
1088 U.set(NewVal);
1089 return true;
1090 }
1091
1092 return false;
1093}
1094
1095template <typename InstrType>
1096static bool replaceSimplePointerUse(const TargetTransformInfo &TTI,
1097 InstrType *MemInstr, unsigned AddrSpace,
1098 Value *OldV, Value *NewV) {
1099 if (!MemInstr->isVolatile() || TTI.hasVolatileVariant(I: MemInstr, AddrSpace)) {
1100 return replaceOperandIfSame(MemInstr, InstrType::getPointerOperandIndex(),
1101 OldV, NewV);
1102 }
1103
1104 return false;
1105}
1106
1107/// If \p OldV is used as the pointer operand of a compatible memory operation
1108/// \p Inst, replaces the pointer operand with NewV.
1109///
1110/// This covers memory instructions with a single pointer operand that can have
1111/// its address space changed by simply mutating the use to a new value.
1112///
1113/// \p returns true the user replacement was made.
1114static bool replaceIfSimplePointerUse(const TargetTransformInfo &TTI,
1115 User *Inst, unsigned AddrSpace,
1116 Value *OldV, Value *NewV) {
1117 if (auto *LI = dyn_cast<LoadInst>(Val: Inst))
1118 return replaceSimplePointerUse(TTI, MemInstr: LI, AddrSpace, OldV, NewV);
1119
1120 if (auto *SI = dyn_cast<StoreInst>(Val: Inst))
1121 return replaceSimplePointerUse(TTI, MemInstr: SI, AddrSpace, OldV, NewV);
1122
1123 if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: Inst))
1124 return replaceSimplePointerUse(TTI, MemInstr: RMW, AddrSpace, OldV, NewV);
1125
1126 if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst))
1127 return replaceSimplePointerUse(TTI, MemInstr: CmpX, AddrSpace, OldV, NewV);
1128
1129 return false;
1130}
1131
1132/// Update memory intrinsic uses that require more complex processing than
1133/// simple memory instructions. These require re-mangling and may have multiple
1134/// pointer operands.
1135static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
1136 Value *NewV) {
1137 IRBuilder<> B(MI);
1138 if (auto *MSI = dyn_cast<MemSetInst>(Val: MI)) {
1139 B.CreateMemSet(Ptr: NewV, Val: MSI->getValue(), Size: MSI->getLength(), Align: MSI->getDestAlign(),
1140 isVolatile: false, // isVolatile
1141 AAInfo: MI->getAAMetadata());
1142 } else if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI)) {
1143 Value *Src = MTI->getRawSource();
1144 Value *Dest = MTI->getRawDest();
1145
1146 // Be careful in case this is a self-to-self copy.
1147 if (Src == OldV)
1148 Src = NewV;
1149
1150 if (Dest == OldV)
1151 Dest = NewV;
1152
1153 if (auto *MCI = dyn_cast<MemCpyInst>(Val: MTI)) {
1154 if (MCI->isForceInlined())
1155 B.CreateMemCpyInline(Dst: Dest, DstAlign: MTI->getDestAlign(), Src,
1156 SrcAlign: MTI->getSourceAlign(), Size: MTI->getLength(),
1157 isVolatile: false, // isVolatile
1158 AAInfo: MI->getAAMetadata());
1159 else
1160 B.CreateMemCpy(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, SrcAlign: MTI->getSourceAlign(),
1161 Size: MTI->getLength(),
1162 isVolatile: false, // isVolatile
1163 AAInfo: MI->getAAMetadata());
1164 } else {
1165 assert(isa<MemMoveInst>(MTI));
1166 B.CreateMemMove(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, SrcAlign: MTI->getSourceAlign(),
1167 Size: MTI->getLength(),
1168 isVolatile: false, // isVolatile
1169 AAInfo: MI->getAAMetadata());
1170 }
1171 } else
1172 llvm_unreachable("unhandled MemIntrinsic");
1173
1174 MI->eraseFromParent();
1175 return true;
1176}
1177
1178// \p returns true if it is OK to change the address space of constant \p C with
1179// a ConstantExpr addrspacecast.
1180bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C,
1181 unsigned NewAS) const {
1182 assert(NewAS != UninitializedAddressSpace);
1183
1184 unsigned SrcAS = C->getType()->getPointerAddressSpace();
1185 if (SrcAS == NewAS || isa<UndefValue>(Val: C))
1186 return true;
1187
1188 // Prevent illegal casts between different non-flat address spaces.
1189 if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace)
1190 return false;
1191
1192 if (isa<ConstantPointerNull>(Val: C))
1193 return true;
1194
1195 if (auto *Op = dyn_cast<Operator>(Val: C)) {
1196 // If we already have a constant addrspacecast, it should be safe to cast it
1197 // off.
1198 if (Op->getOpcode() == Instruction::AddrSpaceCast)
1199 return isSafeToCastConstAddrSpace(C: cast<Constant>(Val: Op->getOperand(i: 0)),
1200 NewAS);
1201
1202 if (Op->getOpcode() == Instruction::IntToPtr &&
1203 Op->getType()->getPointerAddressSpace() == FlatAddrSpace)
1204 return true;
1205 }
1206
1207 return false;
1208}
1209
1210static Value::use_iterator skipToNextUser(Value::use_iterator I,
1211 Value::use_iterator End) {
1212 User *CurUser = I->getUser();
1213 ++I;
1214
1215 while (I != End && I->getUser() == CurUser)
1216 ++I;
1217
1218 return I;
1219}
1220
1221void InferAddressSpacesImpl::performPointerReplacement(
1222 Value *V, Value *NewV, Use &U, ValueToValueMapTy &ValueWithNewAddrSpace,
1223 SmallVectorImpl<Instruction *> &DeadInstructions) const {
1224
1225 User *CurUser = U.getUser();
1226
1227 unsigned AddrSpace = V->getType()->getPointerAddressSpace();
1228 if (replaceIfSimplePointerUse(TTI: *TTI, Inst: CurUser, AddrSpace, OldV: V, NewV))
1229 return;
1230
1231 // Skip if the current user is the new value itself.
1232 if (CurUser == NewV)
1233 return;
1234
1235 auto *CurUserI = dyn_cast<Instruction>(Val: CurUser);
1236 if (!CurUserI || CurUserI->getFunction() != F)
1237 return;
1238
1239 // Handle more complex cases like intrinsic that need to be remangled.
1240 if (auto *MI = dyn_cast<MemIntrinsic>(Val: CurUser)) {
1241 if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, OldV: V, NewV))
1242 return;
1243 }
1244
1245 if (auto *II = dyn_cast<IntrinsicInst>(Val: CurUser)) {
1246 if (rewriteIntrinsicOperands(II, OldV: V, NewV))
1247 return;
1248 }
1249
1250 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: CurUserI)) {
1251 // If we can infer that both pointers are in the same addrspace,
1252 // transform e.g.
1253 // %cmp = icmp eq float* %p, %q
1254 // into
1255 // %cmp = icmp eq float addrspace(3)* %new_p, %new_q
1256
1257 unsigned NewAS = NewV->getType()->getPointerAddressSpace();
1258 int SrcIdx = U.getOperandNo();
1259 int OtherIdx = (SrcIdx == 0) ? 1 : 0;
1260 Value *OtherSrc = Cmp->getOperand(i_nocapture: OtherIdx);
1261
1262 if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(Val: OtherSrc)) {
1263 if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) {
1264 Cmp->setOperand(i_nocapture: OtherIdx, Val_nocapture: OtherNewV);
1265 Cmp->setOperand(i_nocapture: SrcIdx, Val_nocapture: NewV);
1266 return;
1267 }
1268 }
1269
1270 // Even if the type mismatches, we can cast the constant.
1271 if (auto *KOtherSrc = dyn_cast<Constant>(Val: OtherSrc)) {
1272 if (isSafeToCastConstAddrSpace(C: KOtherSrc, NewAS)) {
1273 Cmp->setOperand(i_nocapture: SrcIdx, Val_nocapture: NewV);
1274 Cmp->setOperand(i_nocapture: OtherIdx, Val_nocapture: ConstantExpr::getAddrSpaceCast(
1275 C: KOtherSrc, Ty: NewV->getType()));
1276 return;
1277 }
1278 }
1279 }
1280
1281 if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Val: CurUserI)) {
1282 unsigned NewAS = NewV->getType()->getPointerAddressSpace();
1283 if (ASC->getDestAddressSpace() == NewAS) {
1284 ASC->replaceAllUsesWith(V: NewV);
1285 DeadInstructions.push_back(Elt: ASC);
1286 return;
1287 }
1288 }
1289
1290 // Otherwise, replaces the use with flat(NewV).
1291 if (isa<Instruction>(Val: V) || isa<Instruction>(Val: NewV)) {
1292 // Don't create a copy of the original addrspacecast.
1293 if (U == V && isa<AddrSpaceCastInst>(Val: V))
1294 return;
1295
1296 // Insert the addrspacecast after NewV.
1297 BasicBlock::iterator InsertPos;
1298 if (Instruction *NewVInst = dyn_cast<Instruction>(Val: NewV))
1299 InsertPos = std::next(x: NewVInst->getIterator());
1300 else
1301 InsertPos = std::next(x: cast<Instruction>(Val: V)->getIterator());
1302
1303 while (isa<PHINode>(Val: InsertPos))
1304 ++InsertPos;
1305 // This instruction may contain multiple uses of V, update them all.
1306 CurUser->replaceUsesOfWith(
1307 From: V, To: new AddrSpaceCastInst(NewV, V->getType(), "", InsertPos));
1308 } else {
1309 CurUserI->replaceUsesOfWith(
1310 From: V, To: ConstantExpr::getAddrSpaceCast(C: cast<Constant>(Val: NewV), Ty: V->getType()));
1311 }
1312}
1313
1314bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
1315 ArrayRef<WeakTrackingVH> Postorder,
1316 const ValueToAddrSpaceMapTy &InferredAddrSpace,
1317 const PredicatedAddrSpaceMapTy &PredicatedAS) const {
1318 // For each address expression to be modified, creates a clone of it with its
1319 // pointer operands converted to the new address space. Since the pointer
1320 // operands are converted, the clone is naturally in the new address space by
1321 // construction.
1322 ValueToValueMapTy ValueWithNewAddrSpace;
1323 SmallVector<const Use *, 32> PoisonUsesToFix;
1324 for (Value *V : Postorder) {
1325 unsigned NewAddrSpace = InferredAddrSpace.lookup(Val: V);
1326
1327 // In some degenerate cases (e.g. invalid IR in unreachable code), we may
1328 // not even infer the value to have its original address space.
1329 if (NewAddrSpace == UninitializedAddressSpace)
1330 continue;
1331
1332 if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
1333 Value *New =
1334 cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace,
1335 PredicatedAS, PoisonUsesToFix: &PoisonUsesToFix);
1336 if (New)
1337 ValueWithNewAddrSpace[V] = New;
1338 }
1339 }
1340
1341 if (ValueWithNewAddrSpace.empty())
1342 return false;
1343
1344 // Fixes all the poison uses generated by cloneInstructionWithNewAddressSpace.
1345 for (const Use *PoisonUse : PoisonUsesToFix) {
1346 User *V = PoisonUse->getUser();
1347 User *NewV = cast_or_null<User>(Val: ValueWithNewAddrSpace.lookup(Val: V));
1348 if (!NewV)
1349 continue;
1350
1351 unsigned OperandNo = PoisonUse->getOperandNo();
1352 assert(isa<PoisonValue>(NewV->getOperand(OperandNo)));
1353 NewV->setOperand(i: OperandNo, Val: ValueWithNewAddrSpace.lookup(Val: PoisonUse->get()));
1354 }
1355
1356 SmallVector<Instruction *, 16> DeadInstructions;
1357 ValueToValueMapTy VMap;
1358 ValueMapper VMapper(VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1359
1360 // Replaces the uses of the old address expressions with the new ones.
1361 for (const WeakTrackingVH &WVH : Postorder) {
1362 assert(WVH && "value was unexpectedly deleted");
1363 Value *V = WVH;
1364 Value *NewV = ValueWithNewAddrSpace.lookup(Val: V);
1365 if (NewV == nullptr)
1366 continue;
1367
1368 LLVM_DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n "
1369 << *NewV << '\n');
1370
1371 if (Constant *C = dyn_cast<Constant>(Val: V)) {
1372 Constant *Replace =
1373 ConstantExpr::getAddrSpaceCast(C: cast<Constant>(Val: NewV), Ty: C->getType());
1374 if (C != Replace) {
1375 LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace
1376 << ": " << *Replace << '\n');
1377 SmallVector<User *, 16> WorkList;
1378 for (User *U : make_early_inc_range(Range: C->users())) {
1379 if (auto *I = dyn_cast<Instruction>(Val: U)) {
1380 if (I->getFunction() == F)
1381 I->replaceUsesOfWith(From: C, To: Replace);
1382 } else {
1383 WorkList.append(in_start: U->user_begin(), in_end: U->user_end());
1384 }
1385 }
1386 if (!WorkList.empty()) {
1387 VMap[C] = Replace;
1388 DenseSet<User *> Visited{WorkList.begin(), WorkList.end()};
1389 while (!WorkList.empty()) {
1390 User *U = WorkList.pop_back_val();
1391 if (auto *I = dyn_cast<Instruction>(Val: U)) {
1392 if (I->getFunction() == F)
1393 VMapper.remapInstruction(I&: *I);
1394 continue;
1395 }
1396 for (User *U2 : U->users())
1397 if (Visited.insert(V: U2).second)
1398 WorkList.push_back(Elt: U2);
1399 }
1400 }
1401 V = Replace;
1402 }
1403 }
1404
1405 Value::use_iterator I, E, Next;
1406 for (I = V->use_begin(), E = V->use_end(); I != E;) {
1407 Use &U = *I;
1408
1409 // Some users may see the same pointer operand in multiple operands. Skip
1410 // to the next instruction.
1411 I = skipToNextUser(I, End: E);
1412
1413 performPointerReplacement(V, NewV, U, ValueWithNewAddrSpace,
1414 DeadInstructions);
1415 }
1416
1417 if (V->use_empty()) {
1418 if (Instruction *I = dyn_cast<Instruction>(Val: V))
1419 DeadInstructions.push_back(Elt: I);
1420 }
1421 }
1422
1423 for (Instruction *I : DeadInstructions)
1424 RecursivelyDeleteTriviallyDeadInstructions(V: I);
1425
1426 return true;
1427}
1428
1429bool InferAddressSpaces::runOnFunction(Function &F) {
1430 if (skipFunction(F))
1431 return false;
1432
1433 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
1434 DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
1435 return InferAddressSpacesImpl(
1436 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), DT,
1437 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
1438 FlatAddrSpace)
1439 .run(CurFn&: F);
1440}
1441
1442FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {
1443 return new InferAddressSpaces(AddressSpace);
1444}
1445
1446InferAddressSpacesPass::InferAddressSpacesPass()
1447 : FlatAddrSpace(UninitializedAddressSpace) {}
1448InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
1449 : FlatAddrSpace(AddressSpace) {}
1450
1451PreservedAnalyses InferAddressSpacesPass::run(Function &F,
1452 FunctionAnalysisManager &AM) {
1453 bool Changed =
1454 InferAddressSpacesImpl(AM.getResult<AssumptionAnalysis>(IR&: F),
1455 AM.getCachedResult<DominatorTreeAnalysis>(IR&: F),
1456 &AM.getResult<TargetIRAnalysis>(IR&: F), FlatAddrSpace)
1457 .run(CurFn&: F);
1458 if (Changed) {
1459 PreservedAnalyses PA;
1460 PA.preserveSet<CFGAnalyses>();
1461 PA.preserve<DominatorTreeAnalysis>();
1462 return PA;
1463 }
1464 return PreservedAnalyses::all();
1465}
1466