1//===- InferAddressSpace.cpp - --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// CUDA C/C++ includes memory space designation as variable type qualifers (such
10// as __global__ and __shared__). Knowing the space of a memory access allows
11// CUDA compilers to emit faster PTX loads and stores. For example, a load from
12// shared memory can be translated to `ld.shared` which is roughly 10% faster
13// than a generic `ld` on an NVIDIA Tesla K40c.
14//
15// Unfortunately, type qualifiers only apply to variable declarations, so CUDA
16// compilers must infer the memory space of an address expression from
17// type-qualified variables.
18//
19// LLVM IR uses non-zero (so-called) specific address spaces to represent memory
20// spaces (e.g. addrspace(3) means shared memory). The Clang frontend
21// places only type-qualified variables in specific address spaces, and then
22// conservatively `addrspacecast`s each type-qualified variable to addrspace(0)
23// (so-called the generic address space) for other instructions to use.
24//
25// For example, the Clang translates the following CUDA code
26// __shared__ float a[10];
27// float v = a[i];
28// to
29// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
30// %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i
31// %v = load float, float* %1 ; emits ld.f32
32// @a is in addrspace(3) since it's type-qualified, but its use from %1 is
33// redirected to %0 (the generic version of @a).
34//
35// The optimization implemented in this file propagates specific address spaces
36// from type-qualified variable declarations to its users. For example, it
37// optimizes the above IR to
38// %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
39// %v = load float addrspace(3)* %1 ; emits ld.shared.f32
40// propagating the addrspace(3) from @a to %1. As the result, the NVPTX
41// codegen is able to emit ld.shared.f32 for %v.
42//
43// Address space inference works in two steps. First, it uses a data-flow
44// analysis to infer as many generic pointers as possible to point to only one
45// specific address space. In the above example, it can prove that %1 only
46// points to addrspace(3). This algorithm was published in
47// CUDA: Compiling and optimizing for a GPU platform
48// Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang
49// ICCS 2012
50//
51// Then, address space inference replaces all refinable generic pointers with
52// equivalent specific pointers.
53//
54// The major challenge of implementing this optimization is handling PHINodes,
55// which may create loops in the data flow graph. This brings two complications.
56//
57// First, the data flow analysis in Step 1 needs to be circular. For example,
58// %generic.input = addrspacecast float addrspace(3)* %input to float*
59// loop:
60// %y = phi [ %generic.input, %y2 ]
61// %y2 = getelementptr %y, 1
62// %v = load %y2
63// br ..., label %loop, ...
64// proving %y specific requires proving both %generic.input and %y2 specific,
65// but proving %y2 specific circles back to %y. To address this complication,
66// the data flow analysis operates on a lattice:
67// uninitialized > specific address spaces > generic.
68// All address expressions (our implementation only considers phi, bitcast,
69// addrspacecast, and getelementptr) start with the uninitialized address space.
70// The monotone transfer function moves the address space of a pointer down a
71// lattice path from uninitialized to specific and then to generic. A join
72// operation of two different specific address spaces pushes the expression down
73// to the generic address space. The analysis completes once it reaches a fixed
74// point.
75//
76// Second, IR rewriting in Step 2 also needs to be circular. For example,
77// converting %y to addrspace(3) requires the compiler to know the converted
78// %y2, but converting %y2 needs the converted %y. To address this complication,
79// we break these cycles using "poison" placeholders. When converting an
80// instruction `I` to a new address space, if its operand `Op` is not converted
81// yet, we let `I` temporarily use `poison` and fix all the uses later.
82// For instance, our algorithm first converts %y to
83// %y' = phi float addrspace(3)* [ %input, poison ]
84// Then, it converts %y2 to
85// %y2' = getelementptr %y', 1
86// Finally, it fixes the poison in %y' so that
87// %y' = phi float addrspace(3)* [ %input, %y2' ]
88//
89//===----------------------------------------------------------------------===//
90
91#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
92#include "llvm/ADT/ArrayRef.h"
93#include "llvm/ADT/DenseMap.h"
94#include "llvm/ADT/DenseSet.h"
95#include "llvm/ADT/SetVector.h"
96#include "llvm/ADT/SmallVector.h"
97#include "llvm/Analysis/AssumptionCache.h"
98#include "llvm/Analysis/TargetTransformInfo.h"
99#include "llvm/Analysis/ValueTracking.h"
100#include "llvm/IR/BasicBlock.h"
101#include "llvm/IR/Constant.h"
102#include "llvm/IR/Constants.h"
103#include "llvm/IR/Dominators.h"
104#include "llvm/IR/Function.h"
105#include "llvm/IR/IRBuilder.h"
106#include "llvm/IR/InstIterator.h"
107#include "llvm/IR/Instruction.h"
108#include "llvm/IR/Instructions.h"
109#include "llvm/IR/IntrinsicInst.h"
110#include "llvm/IR/Intrinsics.h"
111#include "llvm/IR/LLVMContext.h"
112#include "llvm/IR/Operator.h"
113#include "llvm/IR/PassManager.h"
114#include "llvm/IR/Type.h"
115#include "llvm/IR/Use.h"
116#include "llvm/IR/User.h"
117#include "llvm/IR/Value.h"
118#include "llvm/IR/ValueHandle.h"
119#include "llvm/InitializePasses.h"
120#include "llvm/Pass.h"
121#include "llvm/Support/Casting.h"
122#include "llvm/Support/CommandLine.h"
123#include "llvm/Support/Debug.h"
124#include "llvm/Support/ErrorHandling.h"
125#include "llvm/Support/raw_ostream.h"
126#include "llvm/Transforms/Scalar.h"
127#include "llvm/Transforms/Utils/Local.h"
128#include "llvm/Transforms/Utils/ValueMapper.h"
129#include <cassert>
130#include <iterator>
131#include <limits>
132#include <utility>
133#include <vector>
134
135#define DEBUG_TYPE "infer-address-spaces"
136
137using namespace llvm;
138
139static cl::opt<bool> AssumeDefaultIsFlatAddressSpace(
140 "assume-default-is-flat-addrspace", cl::init(Val: false), cl::ReallyHidden,
141 cl::desc("The default address space is assumed as the flat address space. "
142 "This is mainly for test purpose."));
143
144static const unsigned UninitializedAddressSpace =
145 std::numeric_limits<unsigned>::max();
146
147namespace {
148
149using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
150// Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on
151// the *def* of a value, PredicatedAddrSpaceMapTy is map where a new
152// addrspace is inferred on the *use* of a pointer. This map is introduced to
153// infer addrspace from the addrspace predicate assumption built from assume
154// intrinsic. In that scenario, only specific uses (under valid assumption
155// context) could be inferred with a new addrspace.
156using PredicatedAddrSpaceMapTy =
157 DenseMap<std::pair<const Value *, const Value *>, unsigned>;
158using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;
159
160class InferAddressSpaces : public FunctionPass {
161 unsigned FlatAddrSpace = 0;
162
163public:
164 static char ID;
165
166 InferAddressSpaces()
167 : FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {
168 initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry());
169 }
170 InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {
171 initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry());
172 }
173
174 void getAnalysisUsage(AnalysisUsage &AU) const override {
175 AU.setPreservesCFG();
176 AU.addPreserved<DominatorTreeWrapperPass>();
177 AU.addRequired<AssumptionCacheTracker>();
178 AU.addRequired<TargetTransformInfoWrapperPass>();
179 }
180
181 bool runOnFunction(Function &F) override;
182};
183
184class InferAddressSpacesImpl {
185 AssumptionCache &AC;
186 Function *F = nullptr;
187 const DominatorTree *DT = nullptr;
188 const TargetTransformInfo *TTI = nullptr;
189 const DataLayout *DL = nullptr;
190
191 /// Target specific address space which uses of should be replaced if
192 /// possible.
193 unsigned FlatAddrSpace = 0;
194
195 // Try to update the address space of V. If V is updated, returns true and
196 // false otherwise.
197 bool updateAddressSpace(const Value &V,
198 ValueToAddrSpaceMapTy &InferredAddrSpace,
199 PredicatedAddrSpaceMapTy &PredicatedAS) const;
200
201 // Tries to infer the specific address space of each address expression in
202 // Postorder.
203 void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
204 ValueToAddrSpaceMapTy &InferredAddrSpace,
205 PredicatedAddrSpaceMapTy &PredicatedAS) const;
206
207 bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
208
209 Value *clonePtrMaskWithNewAddressSpace(
210 IntrinsicInst *I, unsigned NewAddrSpace,
211 const ValueToValueMapTy &ValueWithNewAddrSpace,
212 const PredicatedAddrSpaceMapTy &PredicatedAS,
213 SmallVectorImpl<const Use *> *PoisonUsesToFix) const;
214
215 Value *cloneInstructionWithNewAddressSpace(
216 Instruction *I, unsigned NewAddrSpace,
217 const ValueToValueMapTy &ValueWithNewAddrSpace,
218 const PredicatedAddrSpaceMapTy &PredicatedAS,
219 SmallVectorImpl<const Use *> *PoisonUsesToFix) const;
220
221 void performPointerReplacement(
222 Value *V, Value *NewV, Use &U, ValueToValueMapTy &ValueWithNewAddrSpace,
223 SmallVectorImpl<Instruction *> &DeadInstructions) const;
224
225 // Changes the flat address expressions in function F to point to specific
226 // address spaces if InferredAddrSpace says so. Postorder is the postorder of
227 // all flat expressions in the use-def graph of function F.
228 bool rewriteWithNewAddressSpaces(
229 ArrayRef<WeakTrackingVH> Postorder,
230 const ValueToAddrSpaceMapTy &InferredAddrSpace,
231 const PredicatedAddrSpaceMapTy &PredicatedAS) const;
232
233 void appendsFlatAddressExpressionToPostorderStack(
234 Value *V, PostorderStackTy &PostorderStack,
235 DenseSet<Value *> &Visited) const;
236
237 bool rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV,
238 Value *NewV) const;
239 void collectRewritableIntrinsicOperands(IntrinsicInst *II,
240 PostorderStackTy &PostorderStack,
241 DenseSet<Value *> &Visited) const;
242
243 std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
244
245 Value *cloneValueWithNewAddressSpace(
246 Value *V, unsigned NewAddrSpace,
247 const ValueToValueMapTy &ValueWithNewAddrSpace,
248 const PredicatedAddrSpaceMapTy &PredicatedAS,
249 SmallVectorImpl<const Use *> *PoisonUsesToFix) const;
250 unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
251
252 unsigned getPredicatedAddrSpace(const Value &PtrV,
253 const Value *UserCtx) const;
254
255public:
256 InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT,
257 const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
258 : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
259 bool run(Function &F);
260};
261
262} // end anonymous namespace
263
264char InferAddressSpaces::ID = 0;
265
266INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
267 false, false)
268INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
269INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
270INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
271 false, false)
272
273static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) {
274 assert(Ty->isPtrOrPtrVectorTy());
275 PointerType *NPT = PointerType::get(C&: Ty->getContext(), AddressSpace: NewAddrSpace);
276 return Ty->getWithNewType(EltTy: NPT);
277}
278
279// Check whether that's no-op pointer bicast using a pair of
280// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
281// different address spaces.
282static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
283 const TargetTransformInfo *TTI) {
284 assert(I2P->getOpcode() == Instruction::IntToPtr);
285 auto *P2I = dyn_cast<Operator>(Val: I2P->getOperand(i: 0));
286 if (!P2I || P2I->getOpcode() != Instruction::PtrToInt)
287 return false;
288 // Check it's really safe to treat that pair of `ptrtoint`/`inttoptr` as a
289 // no-op cast. Besides checking both of them are no-op casts, as the
290 // reinterpreted pointer may be used in other pointer arithmetic, we also
291 // need to double-check that through the target-specific hook. That ensures
292 // the underlying target also agrees that's a no-op address space cast and
293 // pointer bits are preserved.
294 // The current IR spec doesn't have clear rules on address space casts,
295 // especially a clear definition for pointer bits in non-default address
296 // spaces. It would be undefined if that pointer is dereferenced after an
297 // invalid reinterpret cast. Also, due to the unclearness for the meaning of
298 // bits in non-default address spaces in the current spec, the pointer
299 // arithmetic may also be undefined after invalid pointer reinterpret cast.
300 // However, as we confirm through the target hooks that it's a no-op
301 // addrspacecast, it doesn't matter since the bits should be the same.
302 unsigned P2IOp0AS = P2I->getOperand(i: 0)->getType()->getPointerAddressSpace();
303 unsigned I2PAS = I2P->getType()->getPointerAddressSpace();
304 return CastInst::isNoopCast(Opcode: Instruction::CastOps(I2P->getOpcode()),
305 SrcTy: I2P->getOperand(i: 0)->getType(), DstTy: I2P->getType(),
306 DL) &&
307 CastInst::isNoopCast(Opcode: Instruction::CastOps(P2I->getOpcode()),
308 SrcTy: P2I->getOperand(i: 0)->getType(), DstTy: P2I->getType(),
309 DL) &&
310 (P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(FromAS: P2IOp0AS, ToAS: I2PAS));
311}
312
313// Returns true if V is an address expression.
314// TODO: Currently, we only consider:
315// - arguments
316// - phi, bitcast, addrspacecast, and getelementptr operators
317static bool isAddressExpression(const Value &V, const DataLayout &DL,
318 const TargetTransformInfo *TTI) {
319
320 if (const Argument *Arg = dyn_cast<Argument>(Val: &V))
321 return Arg->getType()->isPointerTy() &&
322 TTI->getAssumedAddrSpace(V: &V) != UninitializedAddressSpace;
323
324 const Operator *Op = dyn_cast<Operator>(Val: &V);
325 if (!Op)
326 return false;
327
328 switch (Op->getOpcode()) {
329 case Instruction::PHI:
330 assert(Op->getType()->isPtrOrPtrVectorTy());
331 return true;
332 case Instruction::BitCast:
333 case Instruction::AddrSpaceCast:
334 case Instruction::GetElementPtr:
335 return true;
336 case Instruction::Select:
337 return Op->getType()->isPtrOrPtrVectorTy();
338 case Instruction::Call: {
339 const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &V);
340 return II && II->getIntrinsicID() == Intrinsic::ptrmask;
341 }
342 case Instruction::IntToPtr:
343 return isNoopPtrIntCastPair(I2P: Op, DL, TTI);
344 default:
345 // That value is an address expression if it has an assumed address space.
346 return TTI->getAssumedAddrSpace(V: &V) != UninitializedAddressSpace;
347 }
348}
349
350// Returns the pointer operands of V.
351//
352// Precondition: V is an address expression.
353static SmallVector<Value *, 2>
354getPointerOperands(const Value &V, const DataLayout &DL,
355 const TargetTransformInfo *TTI) {
356 if (isa<Argument>(Val: &V))
357 return {};
358
359 const Operator &Op = cast<Operator>(Val: V);
360 switch (Op.getOpcode()) {
361 case Instruction::PHI: {
362 auto IncomingValues = cast<PHINode>(Val: Op).incoming_values();
363 return {IncomingValues.begin(), IncomingValues.end()};
364 }
365 case Instruction::BitCast:
366 case Instruction::AddrSpaceCast:
367 case Instruction::GetElementPtr:
368 return {Op.getOperand(i: 0)};
369 case Instruction::Select:
370 return {Op.getOperand(i: 1), Op.getOperand(i: 2)};
371 case Instruction::Call: {
372 const IntrinsicInst &II = cast<IntrinsicInst>(Val: Op);
373 assert(II.getIntrinsicID() == Intrinsic::ptrmask &&
374 "unexpected intrinsic call");
375 return {II.getArgOperand(i: 0)};
376 }
377 case Instruction::IntToPtr: {
378 assert(isNoopPtrIntCastPair(&Op, DL, TTI));
379 auto *P2I = cast<Operator>(Val: Op.getOperand(i: 0));
380 return {P2I->getOperand(i: 0)};
381 }
382 default:
383 llvm_unreachable("Unexpected instruction type.");
384 }
385}
386
387bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
388 Value *OldV,
389 Value *NewV) const {
390 Module *M = II->getParent()->getParent()->getParent();
391 Intrinsic::ID IID = II->getIntrinsicID();
392 switch (IID) {
393 case Intrinsic::objectsize:
394 case Intrinsic::masked_load: {
395 Type *DestTy = II->getType();
396 Type *SrcTy = NewV->getType();
397 Function *NewDecl =
398 Intrinsic::getOrInsertDeclaration(M, id: IID, Tys: {DestTy, SrcTy});
399 II->setArgOperand(i: 0, v: NewV);
400 II->setCalledFunction(NewDecl);
401 return true;
402 }
403 case Intrinsic::ptrmask:
404 // This is handled as an address expression, not as a use memory operation.
405 return false;
406 case Intrinsic::masked_gather: {
407 Type *RetTy = II->getType();
408 Type *NewPtrTy = NewV->getType();
409 Function *NewDecl =
410 Intrinsic::getOrInsertDeclaration(M, id: IID, Tys: {RetTy, NewPtrTy});
411 II->setArgOperand(i: 0, v: NewV);
412 II->setCalledFunction(NewDecl);
413 return true;
414 }
415 case Intrinsic::masked_store:
416 case Intrinsic::masked_scatter: {
417 Type *ValueTy = II->getOperand(i_nocapture: 0)->getType();
418 Type *NewPtrTy = NewV->getType();
419 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
420 M, id: II->getIntrinsicID(), Tys: {ValueTy, NewPtrTy});
421 II->setArgOperand(i: 1, v: NewV);
422 II->setCalledFunction(NewDecl);
423 return true;
424 }
425 case Intrinsic::prefetch:
426 case Intrinsic::is_constant: {
427 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
428 M, id: II->getIntrinsicID(), Tys: {NewV->getType()});
429 II->setArgOperand(i: 0, v: NewV);
430 II->setCalledFunction(NewDecl);
431 return true;
432 }
433 case Intrinsic::fake_use: {
434 II->replaceUsesOfWith(From: OldV, To: NewV);
435 return true;
436 }
437 case Intrinsic::lifetime_start:
438 case Intrinsic::lifetime_end: {
439 // Always force lifetime markers to work directly on the alloca.
440 NewV = NewV->stripPointerCasts();
441 Function *NewDecl = Intrinsic::getOrInsertDeclaration(
442 M, id: II->getIntrinsicID(), Tys: {NewV->getType()});
443 II->setArgOperand(i: 0, v: NewV);
444 II->setCalledFunction(NewDecl);
445 return true;
446 }
447 default: {
448 Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
449 if (!Rewrite)
450 return false;
451 if (Rewrite != II)
452 II->replaceAllUsesWith(V: Rewrite);
453 return true;
454 }
455 }
456}
457
458void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
459 IntrinsicInst *II, PostorderStackTy &PostorderStack,
460 DenseSet<Value *> &Visited) const {
461 auto IID = II->getIntrinsicID();
462 switch (IID) {
463 case Intrinsic::ptrmask:
464 case Intrinsic::objectsize:
465 appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0),
466 PostorderStack, Visited);
467 break;
468 case Intrinsic::is_constant: {
469 Value *Ptr = II->getArgOperand(i: 0);
470 if (Ptr->getType()->isPtrOrPtrVectorTy()) {
471 appendsFlatAddressExpressionToPostorderStack(V: Ptr, PostorderStack,
472 Visited);
473 }
474
475 break;
476 }
477 case Intrinsic::masked_load:
478 case Intrinsic::masked_gather:
479 case Intrinsic::prefetch:
480 appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0),
481 PostorderStack, Visited);
482 break;
483 case Intrinsic::masked_store:
484 case Intrinsic::masked_scatter:
485 appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 1),
486 PostorderStack, Visited);
487 break;
488 case Intrinsic::fake_use: {
489 for (Value *Op : II->operands()) {
490 if (Op->getType()->isPtrOrPtrVectorTy()) {
491 appendsFlatAddressExpressionToPostorderStack(V: Op, PostorderStack,
492 Visited);
493 }
494 }
495
496 break;
497 }
498 case Intrinsic::lifetime_start:
499 case Intrinsic::lifetime_end: {
500 appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0),
501 PostorderStack, Visited);
502 break;
503 }
504 default:
505 SmallVector<int, 2> OpIndexes;
506 if (TTI->collectFlatAddressOperands(OpIndexes, IID)) {
507 for (int Idx : OpIndexes) {
508 appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: Idx),
509 PostorderStack, Visited);
510 }
511 }
512 break;
513 }
514}
515
516// Returns all flat address expressions in function F. The elements are
517// If V is an unvisited flat address expression, appends V to PostorderStack
518// and marks it as visited.
519void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
520 Value *V, PostorderStackTy &PostorderStack,
521 DenseSet<Value *> &Visited) const {
522 assert(V->getType()->isPtrOrPtrVectorTy());
523
524 // Generic addressing expressions may be hidden in nested constant
525 // expressions.
526 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V)) {
527 // TODO: Look in non-address parts, like icmp operands.
528 if (isAddressExpression(V: *CE, DL: *DL, TTI) && Visited.insert(V: CE).second)
529 PostorderStack.emplace_back(Args&: CE, Args: false);
530
531 return;
532 }
533
534 if (V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
535 isAddressExpression(V: *V, DL: *DL, TTI)) {
536 if (Visited.insert(V).second) {
537 PostorderStack.emplace_back(Args&: V, Args: false);
538
539 if (auto *Op = dyn_cast<Operator>(Val: V))
540 for (auto &O : Op->operands())
541 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val&: O))
542 if (isAddressExpression(V: *CE, DL: *DL, TTI) && Visited.insert(V: CE).second)
543 PostorderStack.emplace_back(Args&: CE, Args: false);
544 }
545 }
546}
547
548// Returns all flat address expressions in function F. The elements are ordered
549// in postorder.
550std::vector<WeakTrackingVH>
551InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
552 // This function implements a non-recursive postorder traversal of a partial
553 // use-def graph of function F.
554 PostorderStackTy PostorderStack;
555 // The set of visited expressions.
556 DenseSet<Value *> Visited;
557
558 auto PushPtrOperand = [&](Value *Ptr) {
559 appendsFlatAddressExpressionToPostorderStack(V: Ptr, PostorderStack, Visited);
560 };
561
562 // Look at operations that may be interesting accelerate by moving to a known
563 // address space. We aim at generating after loads and stores, but pure
564 // addressing calculations may also be faster.
565 for (Instruction &I : instructions(F)) {
566 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) {
567 PushPtrOperand(GEP->getPointerOperand());
568 } else if (auto *LI = dyn_cast<LoadInst>(Val: &I))
569 PushPtrOperand(LI->getPointerOperand());
570 else if (auto *SI = dyn_cast<StoreInst>(Val: &I))
571 PushPtrOperand(SI->getPointerOperand());
572 else if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: &I))
573 PushPtrOperand(RMW->getPointerOperand());
574 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &I))
575 PushPtrOperand(CmpX->getPointerOperand());
576 else if (auto *MI = dyn_cast<MemIntrinsic>(Val: &I)) {
577 // For memset/memcpy/memmove, any pointer operand can be replaced.
578 PushPtrOperand(MI->getRawDest());
579
580 // Handle 2nd operand for memcpy/memmove.
581 if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI))
582 PushPtrOperand(MTI->getRawSource());
583 } else if (auto *II = dyn_cast<IntrinsicInst>(Val: &I))
584 collectRewritableIntrinsicOperands(II, PostorderStack, Visited);
585 else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: &I)) {
586 if (Cmp->getOperand(i_nocapture: 0)->getType()->isPtrOrPtrVectorTy()) {
587 PushPtrOperand(Cmp->getOperand(i_nocapture: 0));
588 PushPtrOperand(Cmp->getOperand(i_nocapture: 1));
589 }
590 } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Val: &I)) {
591 PushPtrOperand(ASC->getPointerOperand());
592 } else if (auto *I2P = dyn_cast<IntToPtrInst>(Val: &I)) {
593 if (isNoopPtrIntCastPair(I2P: cast<Operator>(Val: I2P), DL: *DL, TTI))
594 PushPtrOperand(cast<Operator>(Val: I2P->getOperand(i_nocapture: 0))->getOperand(i: 0));
595 } else if (auto *RI = dyn_cast<ReturnInst>(Val: &I)) {
596 if (auto *RV = RI->getReturnValue();
597 RV && RV->getType()->isPtrOrPtrVectorTy())
598 PushPtrOperand(RV);
599 }
600 }
601
602 std::vector<WeakTrackingVH> Postorder; // The resultant postorder.
603 while (!PostorderStack.empty()) {
604 Value *TopVal = PostorderStack.back().getPointer();
605 // If the operands of the expression on the top are already explored,
606 // adds that expression to the resultant postorder.
607 if (PostorderStack.back().getInt()) {
608 if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace)
609 Postorder.push_back(x: TopVal);
610 PostorderStack.pop_back();
611 continue;
612 }
613 // Otherwise, adds its operands to the stack and explores them.
614 PostorderStack.back().setInt(true);
615 // Skip values with an assumed address space.
616 if (TTI->getAssumedAddrSpace(V: TopVal) == UninitializedAddressSpace) {
617 for (Value *PtrOperand : getPointerOperands(V: *TopVal, DL: *DL, TTI)) {
618 appendsFlatAddressExpressionToPostorderStack(V: PtrOperand, PostorderStack,
619 Visited);
620 }
621 }
622 }
623 return Postorder;
624}
625
626// Inserts an addrspacecast for a phi node operand, handling the proper
627// insertion position based on the operand type.
628static Value *phiNodeOperandWithNewAddressSpace(AddrSpaceCastInst *NewI,
629 Value *Operand) {
630 auto InsertBefore = [NewI](auto It) {
631 NewI->insertBefore(It);
632 NewI->setDebugLoc(It->getDebugLoc());
633 return NewI;
634 };
635
636 if (auto *Arg = dyn_cast<Argument>(Val: Operand)) {
637 // For arguments, insert the cast at the beginning of entry block.
638 // Consider inserting at the dominating block for better placement.
639 Function *F = Arg->getParent();
640 auto InsertI = F->getEntryBlock().getFirstNonPHIIt();
641 return InsertBefore(InsertI);
642 }
643
644 // No check for Constant here, as constants are already handled.
645 assert(isa<Instruction>(Operand));
646
647 Instruction *OpInst = cast<Instruction>(Val: Operand);
648 if (LLVM_UNLIKELY(OpInst->getOpcode() == Instruction::PHI)) {
649 // If the operand is defined by another PHI node, insert after the first
650 // non-PHI instruction at the corresponding basic block.
651 auto InsertI = OpInst->getParent()->getFirstNonPHIIt();
652 return InsertBefore(InsertI);
653 }
654
655 // Otherwise, insert immediately after the operand definition.
656 NewI->insertAfter(InsertPos: OpInst->getIterator());
657 NewI->setDebugLoc(OpInst->getDebugLoc());
658 return NewI;
659}
660
661// A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
662// of OperandUse.get() in the new address space. If the clone is not ready yet,
663// returns poison in the new address space as a placeholder.
664static Value *operandWithNewAddressSpaceOrCreatePoison(
665 const Use &OperandUse, unsigned NewAddrSpace,
666 const ValueToValueMapTy &ValueWithNewAddrSpace,
667 const PredicatedAddrSpaceMapTy &PredicatedAS,
668 SmallVectorImpl<const Use *> *PoisonUsesToFix) {
669 Value *Operand = OperandUse.get();
670
671 Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: Operand->getType(), NewAddrSpace);
672
673 if (Constant *C = dyn_cast<Constant>(Val: Operand))
674 return ConstantExpr::getAddrSpaceCast(C, Ty: NewPtrTy);
675
676 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: Operand))
677 return NewOperand;
678
679 Instruction *Inst = cast<Instruction>(Val: OperandUse.getUser());
680 auto I = PredicatedAS.find(Val: std::make_pair(x&: Inst, y&: Operand));
681 if (I != PredicatedAS.end()) {
682 // Insert an addrspacecast on that operand before the user.
683 unsigned NewAS = I->second;
684 Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: Operand->getType(), NewAddrSpace: NewAS);
685 auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);
686
687 if (LLVM_UNLIKELY(Inst->getOpcode() == Instruction::PHI))
688 return phiNodeOperandWithNewAddressSpace(NewI, Operand);
689
690 NewI->insertBefore(InsertPos: Inst->getIterator());
691 NewI->setDebugLoc(Inst->getDebugLoc());
692 return NewI;
693 }
694
695 PoisonUsesToFix->push_back(Elt: &OperandUse);
696 return PoisonValue::get(T: NewPtrTy);
697}
698
699// A helper function for cloneInstructionWithNewAddressSpace. Handles the
700// conversion of a ptrmask intrinsic instruction.
701Value *InferAddressSpacesImpl::clonePtrMaskWithNewAddressSpace(
702 IntrinsicInst *I, unsigned NewAddrSpace,
703 const ValueToValueMapTy &ValueWithNewAddrSpace,
704 const PredicatedAddrSpaceMapTy &PredicatedAS,
705 SmallVectorImpl<const Use *> *PoisonUsesToFix) const {
706 const Use &PtrOpUse = I->getArgOperandUse(i: 0);
707 unsigned OldAddrSpace = PtrOpUse->getType()->getPointerAddressSpace();
708 Value *MaskOp = I->getArgOperand(i: 1);
709 Type *MaskTy = MaskOp->getType();
710
711 KnownBits OldPtrBits{DL->getPointerSizeInBits(AS: OldAddrSpace)};
712 KnownBits NewPtrBits{DL->getPointerSizeInBits(AS: NewAddrSpace)};
713 if (!TTI->isNoopAddrSpaceCast(FromAS: OldAddrSpace, ToAS: NewAddrSpace)) {
714 std::tie(args&: OldPtrBits, args&: NewPtrBits) =
715 TTI->computeKnownBitsAddrSpaceCast(ToAS: NewAddrSpace, PtrOp: *PtrOpUse.get());
716 }
717
718 // If the pointers in both addrspaces have a bitwise representation and if the
719 // representation of the new pointer is smaller (fewer bits) than the old one,
720 // check if the mask is applicable to the ptr in the new addrspace. Any
721 // masking only clearing the low bits will also apply in the new addrspace
722 // Note: checking if the mask clears high bits is not sufficient as those
723 // might have already been 0 in the old ptr.
724 if (OldPtrBits.getBitWidth() > NewPtrBits.getBitWidth()) {
725 KnownBits MaskBits =
726 computeKnownBits(V: MaskOp, DL: *DL, /*AssumptionCache=*/AC: nullptr, CxtI: I);
727 // Set all unknown bits of the old ptr to 1, so that we are conservative in
728 // checking which bits are cleared by the mask.
729 OldPtrBits.One |= ~OldPtrBits.Zero;
730 // Check which bits are cleared by the mask in the old ptr.
731 KnownBits ClearedBits = KnownBits::sub(LHS: OldPtrBits, RHS: OldPtrBits & MaskBits);
732
733 // If the mask isn't applicable to the new ptr, leave the ptrmask as-is and
734 // insert an addrspacecast after it.
735 if (ClearedBits.countMaxActiveBits() > NewPtrBits.countMaxActiveBits()) {
736 std::optional<BasicBlock::iterator> InsertPoint =
737 I->getInsertionPointAfterDef();
738 assert(InsertPoint && "insertion after ptrmask should be possible");
739 Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace);
740 Instruction *AddrSpaceCast =
741 new AddrSpaceCastInst(I, NewPtrType, "", *InsertPoint);
742 AddrSpaceCast->setDebugLoc(I->getDebugLoc());
743 return AddrSpaceCast;
744 }
745 }
746
747 IRBuilder<> B(I);
748 if (NewPtrBits.getBitWidth() < MaskTy->getScalarSizeInBits()) {
749 MaskTy = MaskTy->getWithNewBitWidth(NewBitWidth: NewPtrBits.getBitWidth());
750 MaskOp = B.CreateTrunc(V: MaskOp, DestTy: MaskTy);
751 }
752 Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison(
753 OperandUse: PtrOpUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
754 PoisonUsesToFix);
755 return B.CreateIntrinsic(ID: Intrinsic::ptrmask, Types: {NewPtr->getType(), MaskTy},
756 Args: {NewPtr, MaskOp});
757}
758
759// Returns a clone of `I` with its operands converted to those specified in
760// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
761// operand whose address space needs to be modified might not exist in
762// ValueWithNewAddrSpace. In that case, uses poison as a placeholder operand and
763// adds that operand use to PoisonUsesToFix so that caller can fix them later.
764//
765// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
766// from a pointer whose type already matches. Therefore, this function returns a
767// Value* instead of an Instruction*.
768Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
769 Instruction *I, unsigned NewAddrSpace,
770 const ValueToValueMapTy &ValueWithNewAddrSpace,
771 const PredicatedAddrSpaceMapTy &PredicatedAS,
772 SmallVectorImpl<const Use *> *PoisonUsesToFix) const {
773 Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace);
774
775 if (I->getOpcode() == Instruction::AddrSpaceCast) {
776 Value *Src = I->getOperand(i: 0);
777 // Because `I` is flat, the source address space must be specific.
778 // Therefore, the inferred address space must be the source space, according
779 // to our algorithm.
780 assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
781 return Src;
782 }
783
784 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
785 // Technically the intrinsic ID is a pointer typed argument, so specially
786 // handle calls early.
787 assert(II->getIntrinsicID() == Intrinsic::ptrmask);
788 return clonePtrMaskWithNewAddressSpace(
789 I: II, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
790 }
791
792 unsigned AS = TTI->getAssumedAddrSpace(V: I);
793 if (AS != UninitializedAddressSpace) {
794 // For the assumed address space, insert an `addrspacecast` to make that
795 // explicit.
796 Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace: AS);
797 auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);
798 NewI->insertAfter(InsertPos: I->getIterator());
799 NewI->setDebugLoc(I->getDebugLoc());
800 return NewI;
801 }
802
803 // Computes the converted pointer operands.
804 SmallVector<Value *, 4> NewPointerOperands;
805 for (const Use &OperandUse : I->operands()) {
806 if (!OperandUse.get()->getType()->isPtrOrPtrVectorTy())
807 NewPointerOperands.push_back(Elt: nullptr);
808 else
809 NewPointerOperands.push_back(Elt: operandWithNewAddressSpaceOrCreatePoison(
810 OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
811 PoisonUsesToFix));
812 }
813
814 switch (I->getOpcode()) {
815 case Instruction::BitCast:
816 return new BitCastInst(NewPointerOperands[0], NewPtrType);
817 case Instruction::PHI: {
818 assert(I->getType()->isPtrOrPtrVectorTy());
819 PHINode *PHI = cast<PHINode>(Val: I);
820 PHINode *NewPHI = PHINode::Create(Ty: NewPtrType, NumReservedValues: PHI->getNumIncomingValues());
821 for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) {
822 unsigned OperandNo = PHINode::getOperandNumForIncomingValue(i: Index);
823 NewPHI->addIncoming(V: NewPointerOperands[OperandNo],
824 BB: PHI->getIncomingBlock(i: Index));
825 }
826 return NewPHI;
827 }
828 case Instruction::GetElementPtr: {
829 GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val: I);
830 GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
831 PointeeType: GEP->getSourceElementType(), Ptr: NewPointerOperands[0],
832 IdxList: SmallVector<Value *, 4>(GEP->indices()));
833 NewGEP->setIsInBounds(GEP->isInBounds());
834 return NewGEP;
835 }
836 case Instruction::Select:
837 assert(I->getType()->isPtrOrPtrVectorTy());
838 return SelectInst::Create(C: I->getOperand(i: 0), S1: NewPointerOperands[1],
839 S2: NewPointerOperands[2], NameStr: "", InsertBefore: nullptr, MDFrom: I);
840 case Instruction::IntToPtr: {
841 assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI));
842 Value *Src = cast<Operator>(Val: I->getOperand(i: 0))->getOperand(i: 0);
843 if (Src->getType() == NewPtrType)
844 return Src;
845
846 // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a
847 // source address space from a generic pointer source need to insert a cast
848 // back.
849 return new AddrSpaceCastInst(Src, NewPtrType);
850 }
851 default:
852 llvm_unreachable("Unexpected opcode");
853 }
854}
855
856// Similar to cloneInstructionWithNewAddressSpace, returns a clone of the
857// constant expression `CE` with its operands replaced as specified in
858// ValueWithNewAddrSpace.
859static Value *cloneConstantExprWithNewAddressSpace(
860 ConstantExpr *CE, unsigned NewAddrSpace,
861 const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
862 const TargetTransformInfo *TTI) {
863 Type *TargetType =
864 CE->getType()->isPtrOrPtrVectorTy()
865 ? getPtrOrVecOfPtrsWithNewAS(Ty: CE->getType(), NewAddrSpace)
866 : CE->getType();
867
868 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
869 // Because CE is flat, the source address space must be specific.
870 // Therefore, the inferred address space must be the source space according
871 // to our algorithm.
872 assert(CE->getOperand(0)->getType()->getPointerAddressSpace() ==
873 NewAddrSpace);
874 return CE->getOperand(i_nocapture: 0);
875 }
876
877 if (CE->getOpcode() == Instruction::BitCast) {
878 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: CE->getOperand(i_nocapture: 0)))
879 return ConstantExpr::getBitCast(C: cast<Constant>(Val: NewOperand), Ty: TargetType);
880 return ConstantExpr::getAddrSpaceCast(C: CE, Ty: TargetType);
881 }
882
883 if (CE->getOpcode() == Instruction::IntToPtr) {
884 assert(isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI));
885 Constant *Src = cast<ConstantExpr>(Val: CE->getOperand(i_nocapture: 0))->getOperand(i_nocapture: 0);
886 assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
887 return Src;
888 }
889
890 // Computes the operands of the new constant expression.
891 bool IsNew = false;
892 SmallVector<Constant *, 4> NewOperands;
893 for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) {
894 Constant *Operand = CE->getOperand(i_nocapture: Index);
895 // If the address space of `Operand` needs to be modified, the new operand
896 // with the new address space should already be in ValueWithNewAddrSpace
897 // because (1) the constant expressions we consider (i.e. addrspacecast,
898 // bitcast, and getelementptr) do not incur cycles in the data flow graph
899 // and (2) this function is called on constant expressions in postorder.
900 if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: Operand)) {
901 IsNew = true;
902 NewOperands.push_back(Elt: cast<Constant>(Val: NewOperand));
903 continue;
904 }
905 if (auto *CExpr = dyn_cast<ConstantExpr>(Val: Operand))
906 if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
907 CE: CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) {
908 IsNew = true;
909 NewOperands.push_back(Elt: cast<Constant>(Val: NewOperand));
910 continue;
911 }
912 // Otherwise, reuses the old operand.
913 NewOperands.push_back(Elt: Operand);
914 }
915
916 // If !IsNew, we will replace the Value with itself. However, replaced values
917 // are assumed to wrapped in an addrspacecast cast later so drop it now.
918 if (!IsNew)
919 return nullptr;
920
921 if (CE->getOpcode() == Instruction::GetElementPtr) {
922 // Needs to specify the source type while constructing a getelementptr
923 // constant expression.
924 return CE->getWithOperands(Ops: NewOperands, Ty: TargetType, /*OnlyIfReduced=*/false,
925 SrcTy: cast<GEPOperator>(Val: CE)->getSourceElementType());
926 }
927
928 return CE->getWithOperands(Ops: NewOperands, Ty: TargetType);
929}
930
931// Returns a clone of the value `V`, with its operands replaced as specified in
932// ValueWithNewAddrSpace. This function is called on every flat address
933// expression whose address space needs to be modified, in postorder.
934//
935// See cloneInstructionWithNewAddressSpace for the meaning of PoisonUsesToFix.
936Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
937 Value *V, unsigned NewAddrSpace,
938 const ValueToValueMapTy &ValueWithNewAddrSpace,
939 const PredicatedAddrSpaceMapTy &PredicatedAS,
940 SmallVectorImpl<const Use *> *PoisonUsesToFix) const {
941 // All values in Postorder are flat address expressions.
942 assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
943 isAddressExpression(*V, *DL, TTI));
944
945 if (auto *Arg = dyn_cast<Argument>(Val: V)) {
946 // Arguments are address space casted in the function body, as we do not
947 // want to change the function signature.
948 Function *F = Arg->getParent();
949 BasicBlock::iterator Insert = F->getEntryBlock().getFirstNonPHIIt();
950
951 Type *NewPtrTy = PointerType::get(C&: Arg->getContext(), AddressSpace: NewAddrSpace);
952 auto *NewI = new AddrSpaceCastInst(Arg, NewPtrTy);
953 NewI->insertBefore(InsertPos: Insert);
954 return NewI;
955 }
956
957 if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
958 Value *NewV = cloneInstructionWithNewAddressSpace(
959 I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
960 if (Instruction *NewI = dyn_cast_or_null<Instruction>(Val: NewV)) {
961 if (NewI->getParent() == nullptr) {
962 NewI->insertBefore(InsertPos: I->getIterator());
963 NewI->takeName(V: I);
964 NewI->setDebugLoc(I->getDebugLoc());
965 }
966 }
967 return NewV;
968 }
969
970 return cloneConstantExprWithNewAddressSpace(
971 CE: cast<ConstantExpr>(Val: V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI);
972}
973
974// Defines the join operation on the address space lattice (see the file header
975// comments).
976unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1,
977 unsigned AS2) const {
978 if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace)
979 return FlatAddrSpace;
980
981 if (AS1 == UninitializedAddressSpace)
982 return AS2;
983 if (AS2 == UninitializedAddressSpace)
984 return AS1;
985
986 // The join of two different specific address spaces is flat.
987 return (AS1 == AS2) ? AS1 : FlatAddrSpace;
988}
989
990bool InferAddressSpacesImpl::run(Function &CurFn) {
991 F = &CurFn;
992 DL = &F->getDataLayout();
993
994 if (AssumeDefaultIsFlatAddressSpace)
995 FlatAddrSpace = 0;
996
997 if (FlatAddrSpace == UninitializedAddressSpace) {
998 FlatAddrSpace = TTI->getFlatAddressSpace();
999 if (FlatAddrSpace == UninitializedAddressSpace)
1000 return false;
1001 }
1002
1003 // Collects all flat address expressions in postorder.
1004 std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F&: *F);
1005
1006 // Runs a data-flow analysis to refine the address spaces of every expression
1007 // in Postorder.
1008 ValueToAddrSpaceMapTy InferredAddrSpace;
1009 PredicatedAddrSpaceMapTy PredicatedAS;
1010 inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS);
1011
1012 // Changes the address spaces of the flat address expressions who are inferred
1013 // to point to a specific address space.
1014 return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace,
1015 PredicatedAS);
1016}
1017
1018// Constants need to be tracked through RAUW to handle cases with nested
1019// constant expressions, so wrap values in WeakTrackingVH.
1020void InferAddressSpacesImpl::inferAddressSpaces(
1021 ArrayRef<WeakTrackingVH> Postorder,
1022 ValueToAddrSpaceMapTy &InferredAddrSpace,
1023 PredicatedAddrSpaceMapTy &PredicatedAS) const {
1024 SetVector<Value *> Worklist(llvm::from_range, Postorder);
1025 // Initially, all expressions are in the uninitialized address space.
1026 for (Value *V : Postorder)
1027 InferredAddrSpace[V] = UninitializedAddressSpace;
1028
1029 while (!Worklist.empty()) {
1030 Value *V = Worklist.pop_back_val();
1031
1032 // Try to update the address space of the stack top according to the
1033 // address spaces of its operands.
1034 if (!updateAddressSpace(V: *V, InferredAddrSpace, PredicatedAS))
1035 continue;
1036
1037 for (Value *User : V->users()) {
1038 // Skip if User is already in the worklist.
1039 if (Worklist.count(key: User))
1040 continue;
1041
1042 auto Pos = InferredAddrSpace.find(Val: User);
1043 // Our algorithm only updates the address spaces of flat address
1044 // expressions, which are those in InferredAddrSpace.
1045 if (Pos == InferredAddrSpace.end())
1046 continue;
1047
1048 // Function updateAddressSpace moves the address space down a lattice
1049 // path. Therefore, nothing to do if User is already inferred as flat (the
1050 // bottom element in the lattice).
1051 if (Pos->second == FlatAddrSpace)
1052 continue;
1053
1054 Worklist.insert(X: User);
1055 }
1056 }
1057}
1058
1059unsigned
1060InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &Ptr,
1061 const Value *UserCtx) const {
1062 const Instruction *UserCtxI = dyn_cast<Instruction>(Val: UserCtx);
1063 if (!UserCtxI)
1064 return UninitializedAddressSpace;
1065
1066 const Value *StrippedPtr = Ptr.stripInBoundsOffsets();
1067 for (auto &AssumeVH : AC.assumptionsFor(V: StrippedPtr)) {
1068 if (!AssumeVH)
1069 continue;
1070 CallInst *CI = cast<CallInst>(Val&: AssumeVH);
1071 if (!isValidAssumeForContext(I: CI, CxtI: UserCtxI, DT))
1072 continue;
1073
1074 const Value *Ptr;
1075 unsigned AS;
1076 std::tie(args&: Ptr, args&: AS) = TTI->getPredicatedAddrSpace(V: CI->getArgOperand(i: 0));
1077 if (Ptr)
1078 return AS;
1079 }
1080
1081 return UninitializedAddressSpace;
1082}
1083
1084bool InferAddressSpacesImpl::updateAddressSpace(
1085 const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace,
1086 PredicatedAddrSpaceMapTy &PredicatedAS) const {
1087 assert(InferredAddrSpace.count(&V));
1088
1089 LLVM_DEBUG(dbgs() << "Updating the address space of\n " << V << '\n');
1090
1091 // The new inferred address space equals the join of the address spaces
1092 // of all its pointer operands.
1093 unsigned NewAS = UninitializedAddressSpace;
1094
1095 // isAddressExpression should guarantee that V is an operator or an argument.
1096 assert(isa<Operator>(V) || isa<Argument>(V));
1097
1098 unsigned AS = TTI->getAssumedAddrSpace(V: &V);
1099 if (AS != UninitializedAddressSpace) {
1100 // Use the assumed address space directly.
1101 NewAS = AS;
1102 } else {
1103 // Otherwise, infer the address space from its pointer operands.
1104 SmallVector<Constant *, 2> ConstantPtrOps;
1105 for (Value *PtrOperand : getPointerOperands(V, DL: *DL, TTI)) {
1106 auto I = InferredAddrSpace.find(Val: PtrOperand);
1107 unsigned OperandAS;
1108 if (I == InferredAddrSpace.end()) {
1109 OperandAS = PtrOperand->getType()->getPointerAddressSpace();
1110 if (auto *C = dyn_cast<Constant>(Val: PtrOperand);
1111 C && OperandAS == FlatAddrSpace) {
1112 // Defer joining the address space of constant pointer operands.
1113 ConstantPtrOps.push_back(Elt: C);
1114 continue;
1115 }
1116 if (OperandAS == FlatAddrSpace) {
1117 // Check AC for assumption dominating V.
1118 unsigned AS = getPredicatedAddrSpace(Ptr: *PtrOperand, UserCtx: &V);
1119 if (AS != UninitializedAddressSpace) {
1120 LLVM_DEBUG(dbgs()
1121 << " deduce operand AS from the predicate addrspace "
1122 << AS << '\n');
1123 OperandAS = AS;
1124 // Record this use with the predicated AS.
1125 PredicatedAS[std::make_pair(x: &V, y&: PtrOperand)] = OperandAS;
1126 }
1127 }
1128 } else
1129 OperandAS = I->second;
1130
1131 // join(flat, *) = flat. So we can break if NewAS is already flat.
1132 NewAS = joinAddressSpaces(AS1: NewAS, AS2: OperandAS);
1133 if (NewAS == FlatAddrSpace)
1134 break;
1135 }
1136 if (NewAS != FlatAddrSpace && NewAS != UninitializedAddressSpace) {
1137 if (any_of(Range&: ConstantPtrOps, P: [=](Constant *C) {
1138 return !isSafeToCastConstAddrSpace(C, NewAS);
1139 }))
1140 NewAS = FlatAddrSpace;
1141 }
1142 }
1143
1144 unsigned OldAS = InferredAddrSpace.lookup(Val: &V);
1145 assert(OldAS != FlatAddrSpace);
1146 if (OldAS == NewAS)
1147 return false;
1148
1149 // If any updates are made, grabs its users to the worklist because
1150 // their address spaces can also be possibly updated.
1151 LLVM_DEBUG(dbgs() << " to " << NewAS << '\n');
1152 InferredAddrSpace[&V] = NewAS;
1153 return true;
1154}
1155
1156/// Replace operand \p OpIdx in \p Inst, if the value is the same as \p OldVal
1157/// with \p NewVal.
1158static bool replaceOperandIfSame(Instruction *Inst, unsigned OpIdx,
1159 Value *OldVal, Value *NewVal) {
1160 Use &U = Inst->getOperandUse(i: OpIdx);
1161 if (U.get() == OldVal) {
1162 U.set(NewVal);
1163 return true;
1164 }
1165
1166 return false;
1167}
1168
1169template <typename InstrType>
1170static bool replaceSimplePointerUse(const TargetTransformInfo &TTI,
1171 InstrType *MemInstr, unsigned AddrSpace,
1172 Value *OldV, Value *NewV) {
1173 if (!MemInstr->isVolatile() || TTI.hasVolatileVariant(I: MemInstr, AddrSpace)) {
1174 return replaceOperandIfSame(MemInstr, InstrType::getPointerOperandIndex(),
1175 OldV, NewV);
1176 }
1177
1178 return false;
1179}
1180
1181/// If \p OldV is used as the pointer operand of a compatible memory operation
1182/// \p Inst, replaces the pointer operand with NewV.
1183///
1184/// This covers memory instructions with a single pointer operand that can have
1185/// its address space changed by simply mutating the use to a new value.
1186///
1187/// \p returns true the user replacement was made.
1188static bool replaceIfSimplePointerUse(const TargetTransformInfo &TTI,
1189 User *Inst, unsigned AddrSpace,
1190 Value *OldV, Value *NewV) {
1191 if (auto *LI = dyn_cast<LoadInst>(Val: Inst))
1192 return replaceSimplePointerUse(TTI, MemInstr: LI, AddrSpace, OldV, NewV);
1193
1194 if (auto *SI = dyn_cast<StoreInst>(Val: Inst))
1195 return replaceSimplePointerUse(TTI, MemInstr: SI, AddrSpace, OldV, NewV);
1196
1197 if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: Inst))
1198 return replaceSimplePointerUse(TTI, MemInstr: RMW, AddrSpace, OldV, NewV);
1199
1200 if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst))
1201 return replaceSimplePointerUse(TTI, MemInstr: CmpX, AddrSpace, OldV, NewV);
1202
1203 return false;
1204}
1205
1206/// Update memory intrinsic uses that require more complex processing than
1207/// simple memory instructions. These require re-mangling and may have multiple
1208/// pointer operands.
1209static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
1210 Value *NewV) {
1211 IRBuilder<> B(MI);
1212 if (auto *MSI = dyn_cast<MemSetInst>(Val: MI)) {
1213 B.CreateMemSet(Ptr: NewV, Val: MSI->getValue(), Size: MSI->getLength(), Align: MSI->getDestAlign(),
1214 isVolatile: false, // isVolatile
1215 AAInfo: MI->getAAMetadata());
1216 } else if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI)) {
1217 Value *Src = MTI->getRawSource();
1218 Value *Dest = MTI->getRawDest();
1219
1220 // Be careful in case this is a self-to-self copy.
1221 if (Src == OldV)
1222 Src = NewV;
1223
1224 if (Dest == OldV)
1225 Dest = NewV;
1226
1227 if (auto *MCI = dyn_cast<MemCpyInst>(Val: MTI)) {
1228 if (MCI->isForceInlined())
1229 B.CreateMemCpyInline(Dst: Dest, DstAlign: MTI->getDestAlign(), Src,
1230 SrcAlign: MTI->getSourceAlign(), Size: MTI->getLength(),
1231 isVolatile: false, // isVolatile
1232 AAInfo: MI->getAAMetadata());
1233 else
1234 B.CreateMemCpy(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, SrcAlign: MTI->getSourceAlign(),
1235 Size: MTI->getLength(),
1236 isVolatile: false, // isVolatile
1237 AAInfo: MI->getAAMetadata());
1238 } else {
1239 assert(isa<MemMoveInst>(MTI));
1240 B.CreateMemMove(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, SrcAlign: MTI->getSourceAlign(),
1241 Size: MTI->getLength(),
1242 isVolatile: false, // isVolatile
1243 AAInfo: MI->getAAMetadata());
1244 }
1245 } else
1246 llvm_unreachable("unhandled MemIntrinsic");
1247
1248 MI->eraseFromParent();
1249 return true;
1250}
1251
1252// \p returns true if it is OK to change the address space of constant \p C with
1253// a ConstantExpr addrspacecast.
1254bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C,
1255 unsigned NewAS) const {
1256 assert(NewAS != UninitializedAddressSpace);
1257
1258 unsigned SrcAS = C->getType()->getPointerAddressSpace();
1259 if (SrcAS == NewAS || isa<UndefValue>(Val: C))
1260 return true;
1261
1262 // Prevent illegal casts between different non-flat address spaces.
1263 if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace)
1264 return false;
1265
1266 if (isa<ConstantPointerNull>(Val: C) || isa<ConstantAggregateZero>(Val: C))
1267 return true;
1268
1269 if (auto *Op = dyn_cast<Operator>(Val: C)) {
1270 // If we already have a constant addrspacecast, it should be safe to cast it
1271 // off.
1272 if (Op->getOpcode() == Instruction::AddrSpaceCast)
1273 return isSafeToCastConstAddrSpace(C: cast<Constant>(Val: Op->getOperand(i: 0)),
1274 NewAS);
1275
1276 if (Op->getOpcode() == Instruction::IntToPtr &&
1277 Op->getType()->getPointerAddressSpace() == FlatAddrSpace)
1278 return true;
1279 }
1280
1281 return false;
1282}
1283
1284static Value::use_iterator skipToNextUser(Value::use_iterator I,
1285 Value::use_iterator End) {
1286 User *CurUser = I->getUser();
1287 ++I;
1288
1289 while (I != End && I->getUser() == CurUser)
1290 ++I;
1291
1292 return I;
1293}
1294
1295void InferAddressSpacesImpl::performPointerReplacement(
1296 Value *V, Value *NewV, Use &U, ValueToValueMapTy &ValueWithNewAddrSpace,
1297 SmallVectorImpl<Instruction *> &DeadInstructions) const {
1298
1299 User *CurUser = U.getUser();
1300
1301 unsigned AddrSpace = V->getType()->getPointerAddressSpace();
1302 if (replaceIfSimplePointerUse(TTI: *TTI, Inst: CurUser, AddrSpace, OldV: V, NewV))
1303 return;
1304
1305 // Skip if the current user is the new value itself.
1306 if (CurUser == NewV)
1307 return;
1308
1309 auto *CurUserI = dyn_cast<Instruction>(Val: CurUser);
1310 if (!CurUserI || CurUserI->getFunction() != F)
1311 return;
1312
1313 // Handle more complex cases like intrinsic that need to be remangled.
1314 if (auto *MI = dyn_cast<MemIntrinsic>(Val: CurUser)) {
1315 if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, OldV: V, NewV))
1316 return;
1317 }
1318
1319 if (auto *II = dyn_cast<IntrinsicInst>(Val: CurUser)) {
1320 if (rewriteIntrinsicOperands(II, OldV: V, NewV))
1321 return;
1322 }
1323
1324 if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: CurUserI)) {
1325 // If we can infer that both pointers are in the same addrspace,
1326 // transform e.g.
1327 // %cmp = icmp eq float* %p, %q
1328 // into
1329 // %cmp = icmp eq float addrspace(3)* %new_p, %new_q
1330
1331 unsigned NewAS = NewV->getType()->getPointerAddressSpace();
1332 int SrcIdx = U.getOperandNo();
1333 int OtherIdx = (SrcIdx == 0) ? 1 : 0;
1334 Value *OtherSrc = Cmp->getOperand(i_nocapture: OtherIdx);
1335
1336 if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(Val: OtherSrc)) {
1337 if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) {
1338 Cmp->setOperand(i_nocapture: OtherIdx, Val_nocapture: OtherNewV);
1339 Cmp->setOperand(i_nocapture: SrcIdx, Val_nocapture: NewV);
1340 return;
1341 }
1342 }
1343
1344 // Even if the type mismatches, we can cast the constant.
1345 if (auto *KOtherSrc = dyn_cast<Constant>(Val: OtherSrc)) {
1346 if (isSafeToCastConstAddrSpace(C: KOtherSrc, NewAS)) {
1347 Cmp->setOperand(i_nocapture: SrcIdx, Val_nocapture: NewV);
1348 Cmp->setOperand(i_nocapture: OtherIdx, Val_nocapture: ConstantExpr::getAddrSpaceCast(
1349 C: KOtherSrc, Ty: NewV->getType()));
1350 return;
1351 }
1352 }
1353 }
1354
1355 if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Val: CurUserI)) {
1356 unsigned NewAS = NewV->getType()->getPointerAddressSpace();
1357 if (ASC->getDestAddressSpace() == NewAS) {
1358 ASC->replaceAllUsesWith(V: NewV);
1359 DeadInstructions.push_back(Elt: ASC);
1360 return;
1361 }
1362 }
1363
1364 // Otherwise, replaces the use with flat(NewV).
1365 if (isa<Instruction>(Val: V) || isa<Instruction>(Val: NewV)) {
1366 // Don't create a copy of the original addrspacecast.
1367 if (U == V && isa<AddrSpaceCastInst>(Val: V))
1368 return;
1369
1370 // Insert the addrspacecast after NewV.
1371 BasicBlock::iterator InsertPos;
1372 if (Instruction *NewVInst = dyn_cast<Instruction>(Val: NewV))
1373 InsertPos = std::next(x: NewVInst->getIterator());
1374 else
1375 InsertPos = std::next(x: cast<Instruction>(Val: V)->getIterator());
1376
1377 while (isa<PHINode>(Val: InsertPos))
1378 ++InsertPos;
1379 // This instruction may contain multiple uses of V, update them all.
1380 CurUser->replaceUsesOfWith(
1381 From: V, To: new AddrSpaceCastInst(NewV, V->getType(), "", InsertPos));
1382 } else {
1383 CurUserI->replaceUsesOfWith(
1384 From: V, To: ConstantExpr::getAddrSpaceCast(C: cast<Constant>(Val: NewV), Ty: V->getType()));
1385 }
1386}
1387
1388bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
1389 ArrayRef<WeakTrackingVH> Postorder,
1390 const ValueToAddrSpaceMapTy &InferredAddrSpace,
1391 const PredicatedAddrSpaceMapTy &PredicatedAS) const {
1392 // For each address expression to be modified, creates a clone of it with its
1393 // pointer operands converted to the new address space. Since the pointer
1394 // operands are converted, the clone is naturally in the new address space by
1395 // construction.
1396 ValueToValueMapTy ValueWithNewAddrSpace;
1397 SmallVector<const Use *, 32> PoisonUsesToFix;
1398 for (Value *V : Postorder) {
1399 unsigned NewAddrSpace = InferredAddrSpace.lookup(Val: V);
1400
1401 // In some degenerate cases (e.g. invalid IR in unreachable code), we may
1402 // not even infer the value to have its original address space.
1403 if (NewAddrSpace == UninitializedAddressSpace)
1404 continue;
1405
1406 if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
1407 Value *New =
1408 cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace,
1409 PredicatedAS, PoisonUsesToFix: &PoisonUsesToFix);
1410 if (New)
1411 ValueWithNewAddrSpace[V] = New;
1412 }
1413 }
1414
1415 if (ValueWithNewAddrSpace.empty())
1416 return false;
1417
1418 // Fixes all the poison uses generated by cloneInstructionWithNewAddressSpace.
1419 for (const Use *PoisonUse : PoisonUsesToFix) {
1420 User *V = PoisonUse->getUser();
1421 User *NewV = cast_or_null<User>(Val: ValueWithNewAddrSpace.lookup(Val: V));
1422 if (!NewV)
1423 continue;
1424
1425 unsigned OperandNo = PoisonUse->getOperandNo();
1426 assert(isa<PoisonValue>(NewV->getOperand(OperandNo)));
1427 WeakTrackingVH NewOp = ValueWithNewAddrSpace.lookup(Val: PoisonUse->get());
1428 assert(NewOp &&
1429 "poison replacements in ValueWithNewAddrSpace shouldn't be null");
1430 NewV->setOperand(i: OperandNo, Val: NewOp);
1431 }
1432
1433 SmallVector<Instruction *, 16> DeadInstructions;
1434 ValueToValueMapTy VMap;
1435 ValueMapper VMapper(VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1436
1437 // Replaces the uses of the old address expressions with the new ones.
1438 for (const WeakTrackingVH &WVH : Postorder) {
1439 assert(WVH && "value was unexpectedly deleted");
1440 Value *V = WVH;
1441 Value *NewV = ValueWithNewAddrSpace.lookup(Val: V);
1442 if (NewV == nullptr)
1443 continue;
1444
1445 LLVM_DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n "
1446 << *NewV << '\n');
1447
1448 if (Constant *C = dyn_cast<Constant>(Val: V)) {
1449 Constant *Replace =
1450 ConstantExpr::getAddrSpaceCast(C: cast<Constant>(Val: NewV), Ty: C->getType());
1451 if (C != Replace) {
1452 LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace
1453 << ": " << *Replace << '\n');
1454 SmallVector<User *, 16> WorkList;
1455 for (User *U : make_early_inc_range(Range: C->users())) {
1456 if (auto *I = dyn_cast<Instruction>(Val: U)) {
1457 if (I->getFunction() == F)
1458 I->replaceUsesOfWith(From: C, To: Replace);
1459 } else {
1460 WorkList.append(in_start: U->user_begin(), in_end: U->user_end());
1461 }
1462 }
1463 if (!WorkList.empty()) {
1464 VMap[C] = Replace;
1465 DenseSet<User *> Visited{WorkList.begin(), WorkList.end()};
1466 while (!WorkList.empty()) {
1467 User *U = WorkList.pop_back_val();
1468 if (auto *I = dyn_cast<Instruction>(Val: U)) {
1469 if (I->getFunction() == F)
1470 VMapper.remapInstruction(I&: *I);
1471 continue;
1472 }
1473 for (User *U2 : U->users())
1474 if (Visited.insert(V: U2).second)
1475 WorkList.push_back(Elt: U2);
1476 }
1477 }
1478 V = Replace;
1479 }
1480 }
1481
1482 Value::use_iterator I, E, Next;
1483 for (I = V->use_begin(), E = V->use_end(); I != E;) {
1484 Use &U = *I;
1485
1486 // Some users may see the same pointer operand in multiple operands. Skip
1487 // to the next instruction.
1488 I = skipToNextUser(I, End: E);
1489
1490 performPointerReplacement(V, NewV, U, ValueWithNewAddrSpace,
1491 DeadInstructions);
1492 }
1493
1494 if (V->use_empty()) {
1495 if (Instruction *I = dyn_cast<Instruction>(Val: V))
1496 DeadInstructions.push_back(Elt: I);
1497 }
1498 }
1499
1500 for (Instruction *I : DeadInstructions)
1501 RecursivelyDeleteTriviallyDeadInstructions(V: I);
1502
1503 return true;
1504}
1505
1506bool InferAddressSpaces::runOnFunction(Function &F) {
1507 if (skipFunction(F))
1508 return false;
1509
1510 auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
1511 DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
1512 return InferAddressSpacesImpl(
1513 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), DT,
1514 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
1515 FlatAddrSpace)
1516 .run(CurFn&: F);
1517}
1518
1519FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {
1520 return new InferAddressSpaces(AddressSpace);
1521}
1522
1523InferAddressSpacesPass::InferAddressSpacesPass()
1524 : FlatAddrSpace(UninitializedAddressSpace) {}
1525InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
1526 : FlatAddrSpace(AddressSpace) {}
1527
1528PreservedAnalyses InferAddressSpacesPass::run(Function &F,
1529 FunctionAnalysisManager &AM) {
1530 bool Changed =
1531 InferAddressSpacesImpl(AM.getResult<AssumptionAnalysis>(IR&: F),
1532 AM.getCachedResult<DominatorTreeAnalysis>(IR&: F),
1533 &AM.getResult<TargetIRAnalysis>(IR&: F), FlatAddrSpace)
1534 .run(CurFn&: F);
1535 if (Changed) {
1536 PreservedAnalyses PA;
1537 PA.preserveSet<CFGAnalyses>();
1538 PA.preserve<DominatorTreeAnalysis>();
1539 return PA;
1540 }
1541 return PreservedAnalyses::all();
1542}
1543