| 1 | //===- InferAddressSpace.cpp - --------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // CUDA C/C++ includes memory space designation as variable type qualifers (such |
| 10 | // as __global__ and __shared__). Knowing the space of a memory access allows |
| 11 | // CUDA compilers to emit faster PTX loads and stores. For example, a load from |
| 12 | // shared memory can be translated to `ld.shared` which is roughly 10% faster |
| 13 | // than a generic `ld` on an NVIDIA Tesla K40c. |
| 14 | // |
| 15 | // Unfortunately, type qualifiers only apply to variable declarations, so CUDA |
| 16 | // compilers must infer the memory space of an address expression from |
| 17 | // type-qualified variables. |
| 18 | // |
| 19 | // LLVM IR uses non-zero (so-called) specific address spaces to represent memory |
| 20 | // spaces (e.g. addrspace(3) means shared memory). The Clang frontend |
| 21 | // places only type-qualified variables in specific address spaces, and then |
| 22 | // conservatively `addrspacecast`s each type-qualified variable to addrspace(0) |
| 23 | // (so-called the generic address space) for other instructions to use. |
| 24 | // |
| 25 | // For example, the Clang translates the following CUDA code |
| 26 | // __shared__ float a[10]; |
| 27 | // float v = a[i]; |
| 28 | // to |
| 29 | // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* |
| 30 | // %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i |
| 31 | // %v = load float, float* %1 ; emits ld.f32 |
| 32 | // @a is in addrspace(3) since it's type-qualified, but its use from %1 is |
| 33 | // redirected to %0 (the generic version of @a). |
| 34 | // |
| 35 | // The optimization implemented in this file propagates specific address spaces |
| 36 | // from type-qualified variable declarations to its users. For example, it |
| 37 | // optimizes the above IR to |
| 38 | // %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i |
| 39 | // %v = load float addrspace(3)* %1 ; emits ld.shared.f32 |
| 40 | // propagating the addrspace(3) from @a to %1. As the result, the NVPTX |
| 41 | // codegen is able to emit ld.shared.f32 for %v. |
| 42 | // |
| 43 | // Address space inference works in two steps. First, it uses a data-flow |
| 44 | // analysis to infer as many generic pointers as possible to point to only one |
| 45 | // specific address space. In the above example, it can prove that %1 only |
| 46 | // points to addrspace(3). This algorithm was published in |
| 47 | // CUDA: Compiling and optimizing for a GPU platform |
| 48 | // Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang |
| 49 | // ICCS 2012 |
| 50 | // |
| 51 | // Then, address space inference replaces all refinable generic pointers with |
| 52 | // equivalent specific pointers. |
| 53 | // |
| 54 | // The major challenge of implementing this optimization is handling PHINodes, |
| 55 | // which may create loops in the data flow graph. This brings two complications. |
| 56 | // |
| 57 | // First, the data flow analysis in Step 1 needs to be circular. For example, |
| 58 | // %generic.input = addrspacecast float addrspace(3)* %input to float* |
| 59 | // loop: |
| 60 | // %y = phi [ %generic.input, %y2 ] |
| 61 | // %y2 = getelementptr %y, 1 |
| 62 | // %v = load %y2 |
| 63 | // br ..., label %loop, ... |
| 64 | // proving %y specific requires proving both %generic.input and %y2 specific, |
| 65 | // but proving %y2 specific circles back to %y. To address this complication, |
| 66 | // the data flow analysis operates on a lattice: |
| 67 | // uninitialized > specific address spaces > generic. |
| 68 | // All address expressions (our implementation only considers phi, bitcast, |
| 69 | // addrspacecast, and getelementptr) start with the uninitialized address space. |
| 70 | // The monotone transfer function moves the address space of a pointer down a |
| 71 | // lattice path from uninitialized to specific and then to generic. A join |
| 72 | // operation of two different specific address spaces pushes the expression down |
| 73 | // to the generic address space. The analysis completes once it reaches a fixed |
| 74 | // point. |
| 75 | // |
| 76 | // Second, IR rewriting in Step 2 also needs to be circular. For example, |
| 77 | // converting %y to addrspace(3) requires the compiler to know the converted |
| 78 | // %y2, but converting %y2 needs the converted %y. To address this complication, |
| 79 | // we break these cycles using "poison" placeholders. When converting an |
| 80 | // instruction `I` to a new address space, if its operand `Op` is not converted |
| 81 | // yet, we let `I` temporarily use `poison` and fix all the uses later. |
| 82 | // For instance, our algorithm first converts %y to |
| 83 | // %y' = phi float addrspace(3)* [ %input, poison ] |
| 84 | // Then, it converts %y2 to |
| 85 | // %y2' = getelementptr %y', 1 |
| 86 | // Finally, it fixes the poison in %y' so that |
| 87 | // %y' = phi float addrspace(3)* [ %input, %y2' ] |
| 88 | // |
| 89 | //===----------------------------------------------------------------------===// |
| 90 | |
| 91 | #include "llvm/Transforms/Scalar/InferAddressSpaces.h" |
| 92 | #include "llvm/ADT/ArrayRef.h" |
| 93 | #include "llvm/ADT/DenseMap.h" |
| 94 | #include "llvm/ADT/DenseSet.h" |
| 95 | #include "llvm/ADT/SetVector.h" |
| 96 | #include "llvm/ADT/SmallVector.h" |
| 97 | #include "llvm/Analysis/AssumptionCache.h" |
| 98 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 99 | #include "llvm/Analysis/ValueTracking.h" |
| 100 | #include "llvm/IR/BasicBlock.h" |
| 101 | #include "llvm/IR/Constant.h" |
| 102 | #include "llvm/IR/Constants.h" |
| 103 | #include "llvm/IR/Dominators.h" |
| 104 | #include "llvm/IR/Function.h" |
| 105 | #include "llvm/IR/IRBuilder.h" |
| 106 | #include "llvm/IR/InstIterator.h" |
| 107 | #include "llvm/IR/Instruction.h" |
| 108 | #include "llvm/IR/Instructions.h" |
| 109 | #include "llvm/IR/IntrinsicInst.h" |
| 110 | #include "llvm/IR/Intrinsics.h" |
| 111 | #include "llvm/IR/LLVMContext.h" |
| 112 | #include "llvm/IR/Operator.h" |
| 113 | #include "llvm/IR/PassManager.h" |
| 114 | #include "llvm/IR/Type.h" |
| 115 | #include "llvm/IR/Use.h" |
| 116 | #include "llvm/IR/User.h" |
| 117 | #include "llvm/IR/Value.h" |
| 118 | #include "llvm/IR/ValueHandle.h" |
| 119 | #include "llvm/InitializePasses.h" |
| 120 | #include "llvm/Pass.h" |
| 121 | #include "llvm/Support/Casting.h" |
| 122 | #include "llvm/Support/CommandLine.h" |
| 123 | #include "llvm/Support/Debug.h" |
| 124 | #include "llvm/Support/ErrorHandling.h" |
| 125 | #include "llvm/Support/raw_ostream.h" |
| 126 | #include "llvm/Transforms/Scalar.h" |
| 127 | #include "llvm/Transforms/Utils/Local.h" |
| 128 | #include "llvm/Transforms/Utils/ValueMapper.h" |
| 129 | #include <cassert> |
| 130 | #include <iterator> |
| 131 | #include <limits> |
| 132 | #include <utility> |
| 133 | #include <vector> |
| 134 | |
| 135 | #define DEBUG_TYPE "infer-address-spaces" |
| 136 | |
| 137 | using namespace llvm; |
| 138 | |
| 139 | static cl::opt<bool> AssumeDefaultIsFlatAddressSpace( |
| 140 | "assume-default-is-flat-addrspace" , cl::init(Val: false), cl::ReallyHidden, |
| 141 | cl::desc("The default address space is assumed as the flat address space. " |
| 142 | "This is mainly for test purpose." )); |
| 143 | |
| 144 | static const unsigned UninitializedAddressSpace = |
| 145 | std::numeric_limits<unsigned>::max(); |
| 146 | |
| 147 | namespace { |
| 148 | |
| 149 | using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>; |
| 150 | // Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on |
| 151 | // the *def* of a value, PredicatedAddrSpaceMapTy is map where a new |
| 152 | // addrspace is inferred on the *use* of a pointer. This map is introduced to |
| 153 | // infer addrspace from the addrspace predicate assumption built from assume |
| 154 | // intrinsic. In that scenario, only specific uses (under valid assumption |
| 155 | // context) could be inferred with a new addrspace. |
| 156 | using PredicatedAddrSpaceMapTy = |
| 157 | DenseMap<std::pair<const Value *, const Value *>, unsigned>; |
| 158 | using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>; |
| 159 | |
| 160 | class InferAddressSpaces : public FunctionPass { |
| 161 | unsigned FlatAddrSpace = 0; |
| 162 | |
| 163 | public: |
| 164 | static char ID; |
| 165 | |
| 166 | InferAddressSpaces() |
| 167 | : FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) { |
| 168 | initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry()); |
| 169 | } |
| 170 | InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) { |
| 171 | initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry()); |
| 172 | } |
| 173 | |
| 174 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 175 | AU.setPreservesCFG(); |
| 176 | AU.addPreserved<DominatorTreeWrapperPass>(); |
| 177 | AU.addRequired<AssumptionCacheTracker>(); |
| 178 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
| 179 | } |
| 180 | |
| 181 | bool runOnFunction(Function &F) override; |
| 182 | }; |
| 183 | |
| 184 | class InferAddressSpacesImpl { |
| 185 | AssumptionCache &AC; |
| 186 | Function *F = nullptr; |
| 187 | const DominatorTree *DT = nullptr; |
| 188 | const TargetTransformInfo *TTI = nullptr; |
| 189 | const DataLayout *DL = nullptr; |
| 190 | |
| 191 | /// Target specific address space which uses of should be replaced if |
| 192 | /// possible. |
| 193 | unsigned FlatAddrSpace = 0; |
| 194 | |
| 195 | // Try to update the address space of V. If V is updated, returns true and |
| 196 | // false otherwise. |
| 197 | bool updateAddressSpace(const Value &V, |
| 198 | ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 199 | PredicatedAddrSpaceMapTy &PredicatedAS) const; |
| 200 | |
| 201 | // Tries to infer the specific address space of each address expression in |
| 202 | // Postorder. |
| 203 | void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, |
| 204 | ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 205 | PredicatedAddrSpaceMapTy &PredicatedAS) const; |
| 206 | |
| 207 | bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; |
| 208 | |
| 209 | Value *cloneInstructionWithNewAddressSpace( |
| 210 | Instruction *I, unsigned NewAddrSpace, |
| 211 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 212 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 213 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const; |
| 214 | |
| 215 | void performPointerReplacement( |
| 216 | Value *V, Value *NewV, Use &U, ValueToValueMapTy &ValueWithNewAddrSpace, |
| 217 | SmallVectorImpl<Instruction *> &DeadInstructions) const; |
| 218 | |
| 219 | // Changes the flat address expressions in function F to point to specific |
| 220 | // address spaces if InferredAddrSpace says so. Postorder is the postorder of |
| 221 | // all flat expressions in the use-def graph of function F. |
| 222 | bool rewriteWithNewAddressSpaces( |
| 223 | ArrayRef<WeakTrackingVH> Postorder, |
| 224 | const ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 225 | const PredicatedAddrSpaceMapTy &PredicatedAS) const; |
| 226 | |
| 227 | void appendsFlatAddressExpressionToPostorderStack( |
| 228 | Value *V, PostorderStackTy &PostorderStack, |
| 229 | DenseSet<Value *> &Visited) const; |
| 230 | |
| 231 | bool rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV, |
| 232 | Value *NewV) const; |
| 233 | void collectRewritableIntrinsicOperands(IntrinsicInst *II, |
| 234 | PostorderStackTy &PostorderStack, |
| 235 | DenseSet<Value *> &Visited) const; |
| 236 | |
| 237 | std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const; |
| 238 | |
| 239 | Value *cloneValueWithNewAddressSpace( |
| 240 | Value *V, unsigned NewAddrSpace, |
| 241 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 242 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 243 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const; |
| 244 | unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; |
| 245 | |
| 246 | unsigned getPredicatedAddrSpace(const Value &PtrV, |
| 247 | const Value *UserCtx) const; |
| 248 | |
| 249 | public: |
| 250 | InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT, |
| 251 | const TargetTransformInfo *TTI, unsigned FlatAddrSpace) |
| 252 | : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {} |
| 253 | bool run(Function &F); |
| 254 | }; |
| 255 | |
| 256 | } // end anonymous namespace |
| 257 | |
| 258 | char InferAddressSpaces::ID = 0; |
| 259 | |
| 260 | INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces" , |
| 261 | false, false) |
| 262 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
| 263 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
| 264 | INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces" , |
| 265 | false, false) |
| 266 | |
| 267 | static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) { |
| 268 | assert(Ty->isPtrOrPtrVectorTy()); |
| 269 | PointerType *NPT = PointerType::get(C&: Ty->getContext(), AddressSpace: NewAddrSpace); |
| 270 | return Ty->getWithNewType(EltTy: NPT); |
| 271 | } |
| 272 | |
| 273 | // Check whether that's no-op pointer bicast using a pair of |
| 274 | // `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over |
| 275 | // different address spaces. |
| 276 | static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL, |
| 277 | const TargetTransformInfo *TTI) { |
| 278 | assert(I2P->getOpcode() == Instruction::IntToPtr); |
| 279 | auto *P2I = dyn_cast<Operator>(Val: I2P->getOperand(i: 0)); |
| 280 | if (!P2I || P2I->getOpcode() != Instruction::PtrToInt) |
| 281 | return false; |
| 282 | // Check it's really safe to treat that pair of `ptrtoint`/`inttoptr` as a |
| 283 | // no-op cast. Besides checking both of them are no-op casts, as the |
| 284 | // reinterpreted pointer may be used in other pointer arithmetic, we also |
| 285 | // need to double-check that through the target-specific hook. That ensures |
| 286 | // the underlying target also agrees that's a no-op address space cast and |
| 287 | // pointer bits are preserved. |
| 288 | // The current IR spec doesn't have clear rules on address space casts, |
| 289 | // especially a clear definition for pointer bits in non-default address |
| 290 | // spaces. It would be undefined if that pointer is dereferenced after an |
| 291 | // invalid reinterpret cast. Also, due to the unclearness for the meaning of |
| 292 | // bits in non-default address spaces in the current spec, the pointer |
| 293 | // arithmetic may also be undefined after invalid pointer reinterpret cast. |
| 294 | // However, as we confirm through the target hooks that it's a no-op |
| 295 | // addrspacecast, it doesn't matter since the bits should be the same. |
| 296 | unsigned P2IOp0AS = P2I->getOperand(i: 0)->getType()->getPointerAddressSpace(); |
| 297 | unsigned I2PAS = I2P->getType()->getPointerAddressSpace(); |
| 298 | return CastInst::isNoopCast(Opcode: Instruction::CastOps(I2P->getOpcode()), |
| 299 | SrcTy: I2P->getOperand(i: 0)->getType(), DstTy: I2P->getType(), |
| 300 | DL) && |
| 301 | CastInst::isNoopCast(Opcode: Instruction::CastOps(P2I->getOpcode()), |
| 302 | SrcTy: P2I->getOperand(i: 0)->getType(), DstTy: P2I->getType(), |
| 303 | DL) && |
| 304 | (P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(FromAS: P2IOp0AS, ToAS: I2PAS)); |
| 305 | } |
| 306 | |
| 307 | // Returns true if V is an address expression. |
| 308 | // TODO: Currently, we only consider: |
| 309 | // - arguments |
| 310 | // - phi, bitcast, addrspacecast, and getelementptr operators |
| 311 | static bool isAddressExpression(const Value &V, const DataLayout &DL, |
| 312 | const TargetTransformInfo *TTI) { |
| 313 | |
| 314 | if (const Argument *Arg = dyn_cast<Argument>(Val: &V)) |
| 315 | return Arg->getType()->isPointerTy() && |
| 316 | TTI->getAssumedAddrSpace(V: &V) != UninitializedAddressSpace; |
| 317 | |
| 318 | const Operator *Op = dyn_cast<Operator>(Val: &V); |
| 319 | if (!Op) |
| 320 | return false; |
| 321 | |
| 322 | switch (Op->getOpcode()) { |
| 323 | case Instruction::PHI: |
| 324 | assert(Op->getType()->isPtrOrPtrVectorTy()); |
| 325 | return true; |
| 326 | case Instruction::BitCast: |
| 327 | case Instruction::AddrSpaceCast: |
| 328 | case Instruction::GetElementPtr: |
| 329 | return true; |
| 330 | case Instruction::Select: |
| 331 | return Op->getType()->isPtrOrPtrVectorTy(); |
| 332 | case Instruction::Call: { |
| 333 | const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &V); |
| 334 | return II && II->getIntrinsicID() == Intrinsic::ptrmask; |
| 335 | } |
| 336 | case Instruction::IntToPtr: |
| 337 | return isNoopPtrIntCastPair(I2P: Op, DL, TTI); |
| 338 | default: |
| 339 | // That value is an address expression if it has an assumed address space. |
| 340 | return TTI->getAssumedAddrSpace(V: &V) != UninitializedAddressSpace; |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | // Returns the pointer operands of V. |
| 345 | // |
| 346 | // Precondition: V is an address expression. |
| 347 | static SmallVector<Value *, 2> |
| 348 | getPointerOperands(const Value &V, const DataLayout &DL, |
| 349 | const TargetTransformInfo *TTI) { |
| 350 | if (isa<Argument>(Val: &V)) |
| 351 | return {}; |
| 352 | |
| 353 | const Operator &Op = cast<Operator>(Val: V); |
| 354 | switch (Op.getOpcode()) { |
| 355 | case Instruction::PHI: { |
| 356 | auto IncomingValues = cast<PHINode>(Val: Op).incoming_values(); |
| 357 | return {IncomingValues.begin(), IncomingValues.end()}; |
| 358 | } |
| 359 | case Instruction::BitCast: |
| 360 | case Instruction::AddrSpaceCast: |
| 361 | case Instruction::GetElementPtr: |
| 362 | return {Op.getOperand(i: 0)}; |
| 363 | case Instruction::Select: |
| 364 | return {Op.getOperand(i: 1), Op.getOperand(i: 2)}; |
| 365 | case Instruction::Call: { |
| 366 | const IntrinsicInst &II = cast<IntrinsicInst>(Val: Op); |
| 367 | assert(II.getIntrinsicID() == Intrinsic::ptrmask && |
| 368 | "unexpected intrinsic call" ); |
| 369 | return {II.getArgOperand(i: 0)}; |
| 370 | } |
| 371 | case Instruction::IntToPtr: { |
| 372 | assert(isNoopPtrIntCastPair(&Op, DL, TTI)); |
| 373 | auto *P2I = cast<Operator>(Val: Op.getOperand(i: 0)); |
| 374 | return {P2I->getOperand(i: 0)}; |
| 375 | } |
| 376 | default: |
| 377 | llvm_unreachable("Unexpected instruction type." ); |
| 378 | } |
| 379 | } |
| 380 | |
| 381 | bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, |
| 382 | Value *OldV, |
| 383 | Value *NewV) const { |
| 384 | Module *M = II->getParent()->getParent()->getParent(); |
| 385 | Intrinsic::ID IID = II->getIntrinsicID(); |
| 386 | switch (IID) { |
| 387 | case Intrinsic::objectsize: |
| 388 | case Intrinsic::masked_load: { |
| 389 | Type *DestTy = II->getType(); |
| 390 | Type *SrcTy = NewV->getType(); |
| 391 | Function *NewDecl = |
| 392 | Intrinsic::getOrInsertDeclaration(M, id: IID, Tys: {DestTy, SrcTy}); |
| 393 | II->setArgOperand(i: 0, v: NewV); |
| 394 | II->setCalledFunction(NewDecl); |
| 395 | return true; |
| 396 | } |
| 397 | case Intrinsic::ptrmask: |
| 398 | // This is handled as an address expression, not as a use memory operation. |
| 399 | return false; |
| 400 | case Intrinsic::masked_gather: { |
| 401 | Type *RetTy = II->getType(); |
| 402 | Type *NewPtrTy = NewV->getType(); |
| 403 | Function *NewDecl = |
| 404 | Intrinsic::getOrInsertDeclaration(M, id: IID, Tys: {RetTy, NewPtrTy}); |
| 405 | II->setArgOperand(i: 0, v: NewV); |
| 406 | II->setCalledFunction(NewDecl); |
| 407 | return true; |
| 408 | } |
| 409 | case Intrinsic::masked_store: |
| 410 | case Intrinsic::masked_scatter: { |
| 411 | Type *ValueTy = II->getOperand(i_nocapture: 0)->getType(); |
| 412 | Type *NewPtrTy = NewV->getType(); |
| 413 | Function *NewDecl = Intrinsic::getOrInsertDeclaration( |
| 414 | M, id: II->getIntrinsicID(), Tys: {ValueTy, NewPtrTy}); |
| 415 | II->setArgOperand(i: 1, v: NewV); |
| 416 | II->setCalledFunction(NewDecl); |
| 417 | return true; |
| 418 | } |
| 419 | case Intrinsic::prefetch: |
| 420 | case Intrinsic::is_constant: { |
| 421 | Function *NewDecl = Intrinsic::getOrInsertDeclaration( |
| 422 | M, id: II->getIntrinsicID(), Tys: {NewV->getType()}); |
| 423 | II->setArgOperand(i: 0, v: NewV); |
| 424 | II->setCalledFunction(NewDecl); |
| 425 | return true; |
| 426 | } |
| 427 | case Intrinsic::fake_use: { |
| 428 | II->replaceUsesOfWith(From: OldV, To: NewV); |
| 429 | return true; |
| 430 | } |
| 431 | case Intrinsic::lifetime_start: |
| 432 | case Intrinsic::lifetime_end: { |
| 433 | Function *NewDecl = Intrinsic::getOrInsertDeclaration( |
| 434 | M, id: II->getIntrinsicID(), Tys: {NewV->getType()}); |
| 435 | II->setArgOperand(i: 1, v: NewV); |
| 436 | II->setCalledFunction(NewDecl); |
| 437 | return true; |
| 438 | } |
| 439 | default: { |
| 440 | Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); |
| 441 | if (!Rewrite) |
| 442 | return false; |
| 443 | if (Rewrite != II) |
| 444 | II->replaceAllUsesWith(V: Rewrite); |
| 445 | return true; |
| 446 | } |
| 447 | } |
| 448 | } |
| 449 | |
| 450 | void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( |
| 451 | IntrinsicInst *II, PostorderStackTy &PostorderStack, |
| 452 | DenseSet<Value *> &Visited) const { |
| 453 | auto IID = II->getIntrinsicID(); |
| 454 | switch (IID) { |
| 455 | case Intrinsic::ptrmask: |
| 456 | case Intrinsic::objectsize: |
| 457 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0), |
| 458 | PostorderStack, Visited); |
| 459 | break; |
| 460 | case Intrinsic::is_constant: { |
| 461 | Value *Ptr = II->getArgOperand(i: 0); |
| 462 | if (Ptr->getType()->isPtrOrPtrVectorTy()) { |
| 463 | appendsFlatAddressExpressionToPostorderStack(V: Ptr, PostorderStack, |
| 464 | Visited); |
| 465 | } |
| 466 | |
| 467 | break; |
| 468 | } |
| 469 | case Intrinsic::masked_load: |
| 470 | case Intrinsic::masked_gather: |
| 471 | case Intrinsic::prefetch: |
| 472 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0), |
| 473 | PostorderStack, Visited); |
| 474 | break; |
| 475 | case Intrinsic::masked_store: |
| 476 | case Intrinsic::masked_scatter: |
| 477 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 1), |
| 478 | PostorderStack, Visited); |
| 479 | break; |
| 480 | case Intrinsic::fake_use: { |
| 481 | for (Value *Op : II->operands()) { |
| 482 | if (Op->getType()->isPtrOrPtrVectorTy()) { |
| 483 | appendsFlatAddressExpressionToPostorderStack(V: Op, PostorderStack, |
| 484 | Visited); |
| 485 | } |
| 486 | } |
| 487 | |
| 488 | break; |
| 489 | } |
| 490 | case Intrinsic::lifetime_start: |
| 491 | case Intrinsic::lifetime_end: { |
| 492 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 1), |
| 493 | PostorderStack, Visited); |
| 494 | break; |
| 495 | } |
| 496 | default: |
| 497 | SmallVector<int, 2> OpIndexes; |
| 498 | if (TTI->collectFlatAddressOperands(OpIndexes, IID)) { |
| 499 | for (int Idx : OpIndexes) { |
| 500 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: Idx), |
| 501 | PostorderStack, Visited); |
| 502 | } |
| 503 | } |
| 504 | break; |
| 505 | } |
| 506 | } |
| 507 | |
| 508 | // Returns all flat address expressions in function F. The elements are |
| 509 | // If V is an unvisited flat address expression, appends V to PostorderStack |
| 510 | // and marks it as visited. |
| 511 | void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack( |
| 512 | Value *V, PostorderStackTy &PostorderStack, |
| 513 | DenseSet<Value *> &Visited) const { |
| 514 | assert(V->getType()->isPtrOrPtrVectorTy()); |
| 515 | |
| 516 | // Generic addressing expressions may be hidden in nested constant |
| 517 | // expressions. |
| 518 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V)) { |
| 519 | // TODO: Look in non-address parts, like icmp operands. |
| 520 | if (isAddressExpression(V: *CE, DL: *DL, TTI) && Visited.insert(V: CE).second) |
| 521 | PostorderStack.emplace_back(Args&: CE, Args: false); |
| 522 | |
| 523 | return; |
| 524 | } |
| 525 | |
| 526 | if (V->getType()->getPointerAddressSpace() == FlatAddrSpace && |
| 527 | isAddressExpression(V: *V, DL: *DL, TTI)) { |
| 528 | if (Visited.insert(V).second) { |
| 529 | PostorderStack.emplace_back(Args&: V, Args: false); |
| 530 | |
| 531 | if (auto *Op = dyn_cast<Operator>(Val: V)) |
| 532 | for (auto &O : Op->operands()) |
| 533 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val&: O)) |
| 534 | if (isAddressExpression(V: *CE, DL: *DL, TTI) && Visited.insert(V: CE).second) |
| 535 | PostorderStack.emplace_back(Args&: CE, Args: false); |
| 536 | } |
| 537 | } |
| 538 | } |
| 539 | |
| 540 | // Returns all flat address expressions in function F. The elements are ordered |
| 541 | // in postorder. |
| 542 | std::vector<WeakTrackingVH> |
| 543 | InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { |
| 544 | // This function implements a non-recursive postorder traversal of a partial |
| 545 | // use-def graph of function F. |
| 546 | PostorderStackTy PostorderStack; |
| 547 | // The set of visited expressions. |
| 548 | DenseSet<Value *> Visited; |
| 549 | |
| 550 | auto PushPtrOperand = [&](Value *Ptr) { |
| 551 | appendsFlatAddressExpressionToPostorderStack(V: Ptr, PostorderStack, Visited); |
| 552 | }; |
| 553 | |
| 554 | // Look at operations that may be interesting accelerate by moving to a known |
| 555 | // address space. We aim at generating after loads and stores, but pure |
| 556 | // addressing calculations may also be faster. |
| 557 | for (Instruction &I : instructions(F)) { |
| 558 | if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) { |
| 559 | PushPtrOperand(GEP->getPointerOperand()); |
| 560 | } else if (auto *LI = dyn_cast<LoadInst>(Val: &I)) |
| 561 | PushPtrOperand(LI->getPointerOperand()); |
| 562 | else if (auto *SI = dyn_cast<StoreInst>(Val: &I)) |
| 563 | PushPtrOperand(SI->getPointerOperand()); |
| 564 | else if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: &I)) |
| 565 | PushPtrOperand(RMW->getPointerOperand()); |
| 566 | else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &I)) |
| 567 | PushPtrOperand(CmpX->getPointerOperand()); |
| 568 | else if (auto *MI = dyn_cast<MemIntrinsic>(Val: &I)) { |
| 569 | // For memset/memcpy/memmove, any pointer operand can be replaced. |
| 570 | PushPtrOperand(MI->getRawDest()); |
| 571 | |
| 572 | // Handle 2nd operand for memcpy/memmove. |
| 573 | if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI)) |
| 574 | PushPtrOperand(MTI->getRawSource()); |
| 575 | } else if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) |
| 576 | collectRewritableIntrinsicOperands(II, PostorderStack, Visited); |
| 577 | else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: &I)) { |
| 578 | if (Cmp->getOperand(i_nocapture: 0)->getType()->isPtrOrPtrVectorTy()) { |
| 579 | PushPtrOperand(Cmp->getOperand(i_nocapture: 0)); |
| 580 | PushPtrOperand(Cmp->getOperand(i_nocapture: 1)); |
| 581 | } |
| 582 | } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Val: &I)) { |
| 583 | PushPtrOperand(ASC->getPointerOperand()); |
| 584 | } else if (auto *I2P = dyn_cast<IntToPtrInst>(Val: &I)) { |
| 585 | if (isNoopPtrIntCastPair(I2P: cast<Operator>(Val: I2P), DL: *DL, TTI)) |
| 586 | PushPtrOperand(cast<Operator>(Val: I2P->getOperand(i_nocapture: 0))->getOperand(i: 0)); |
| 587 | } else if (auto *RI = dyn_cast<ReturnInst>(Val: &I)) { |
| 588 | if (auto *RV = RI->getReturnValue(); |
| 589 | RV && RV->getType()->isPtrOrPtrVectorTy()) |
| 590 | PushPtrOperand(RV); |
| 591 | } |
| 592 | } |
| 593 | |
| 594 | std::vector<WeakTrackingVH> Postorder; // The resultant postorder. |
| 595 | while (!PostorderStack.empty()) { |
| 596 | Value *TopVal = PostorderStack.back().getPointer(); |
| 597 | // If the operands of the expression on the top are already explored, |
| 598 | // adds that expression to the resultant postorder. |
| 599 | if (PostorderStack.back().getInt()) { |
| 600 | if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace) |
| 601 | Postorder.push_back(x: TopVal); |
| 602 | PostorderStack.pop_back(); |
| 603 | continue; |
| 604 | } |
| 605 | // Otherwise, adds its operands to the stack and explores them. |
| 606 | PostorderStack.back().setInt(true); |
| 607 | // Skip values with an assumed address space. |
| 608 | if (TTI->getAssumedAddrSpace(V: TopVal) == UninitializedAddressSpace) { |
| 609 | for (Value *PtrOperand : getPointerOperands(V: *TopVal, DL: *DL, TTI)) { |
| 610 | appendsFlatAddressExpressionToPostorderStack(V: PtrOperand, PostorderStack, |
| 611 | Visited); |
| 612 | } |
| 613 | } |
| 614 | } |
| 615 | return Postorder; |
| 616 | } |
| 617 | |
| 618 | // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone |
| 619 | // of OperandUse.get() in the new address space. If the clone is not ready yet, |
| 620 | // returns poison in the new address space as a placeholder. |
| 621 | static Value *operandWithNewAddressSpaceOrCreatePoison( |
| 622 | const Use &OperandUse, unsigned NewAddrSpace, |
| 623 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 624 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 625 | SmallVectorImpl<const Use *> *PoisonUsesToFix) { |
| 626 | Value *Operand = OperandUse.get(); |
| 627 | |
| 628 | Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: Operand->getType(), NewAddrSpace); |
| 629 | |
| 630 | if (Constant *C = dyn_cast<Constant>(Val: Operand)) |
| 631 | return ConstantExpr::getAddrSpaceCast(C, Ty: NewPtrTy); |
| 632 | |
| 633 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: Operand)) |
| 634 | return NewOperand; |
| 635 | |
| 636 | Instruction *Inst = cast<Instruction>(Val: OperandUse.getUser()); |
| 637 | auto I = PredicatedAS.find(Val: std::make_pair(x&: Inst, y&: Operand)); |
| 638 | if (I != PredicatedAS.end()) { |
| 639 | // Insert an addrspacecast on that operand before the user. |
| 640 | unsigned NewAS = I->second; |
| 641 | Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: Operand->getType(), NewAddrSpace: NewAS); |
| 642 | auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy); |
| 643 | NewI->insertBefore(InsertPos: Inst->getIterator()); |
| 644 | NewI->setDebugLoc(Inst->getDebugLoc()); |
| 645 | return NewI; |
| 646 | } |
| 647 | |
| 648 | PoisonUsesToFix->push_back(Elt: &OperandUse); |
| 649 | return PoisonValue::get(T: NewPtrTy); |
| 650 | } |
| 651 | |
| 652 | // Returns a clone of `I` with its operands converted to those specified in |
| 653 | // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an |
| 654 | // operand whose address space needs to be modified might not exist in |
| 655 | // ValueWithNewAddrSpace. In that case, uses poison as a placeholder operand and |
| 656 | // adds that operand use to PoisonUsesToFix so that caller can fix them later. |
| 657 | // |
| 658 | // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast |
| 659 | // from a pointer whose type already matches. Therefore, this function returns a |
| 660 | // Value* instead of an Instruction*. |
| 661 | // |
| 662 | // This may also return nullptr in the case the instruction could not be |
| 663 | // rewritten. |
| 664 | Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( |
| 665 | Instruction *I, unsigned NewAddrSpace, |
| 666 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 667 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 668 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const { |
| 669 | Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace); |
| 670 | |
| 671 | if (I->getOpcode() == Instruction::AddrSpaceCast) { |
| 672 | Value *Src = I->getOperand(i: 0); |
| 673 | // Because `I` is flat, the source address space must be specific. |
| 674 | // Therefore, the inferred address space must be the source space, according |
| 675 | // to our algorithm. |
| 676 | assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); |
| 677 | return Src; |
| 678 | } |
| 679 | |
| 680 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
| 681 | // Technically the intrinsic ID is a pointer typed argument, so specially |
| 682 | // handle calls early. |
| 683 | assert(II->getIntrinsicID() == Intrinsic::ptrmask); |
| 684 | Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison( |
| 685 | OperandUse: II->getArgOperandUse(i: 0), NewAddrSpace, ValueWithNewAddrSpace, |
| 686 | PredicatedAS, PoisonUsesToFix); |
| 687 | Value *Rewrite = |
| 688 | TTI->rewriteIntrinsicWithAddressSpace(II, OldV: II->getArgOperand(i: 0), NewV: NewPtr); |
| 689 | if (Rewrite) { |
| 690 | assert(Rewrite != II && "cannot modify this pointer operation in place" ); |
| 691 | return Rewrite; |
| 692 | } |
| 693 | |
| 694 | return nullptr; |
| 695 | } |
| 696 | |
| 697 | unsigned AS = TTI->getAssumedAddrSpace(V: I); |
| 698 | if (AS != UninitializedAddressSpace) { |
| 699 | // For the assumed address space, insert an `addrspacecast` to make that |
| 700 | // explicit. |
| 701 | Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace: AS); |
| 702 | auto *NewI = new AddrSpaceCastInst(I, NewPtrTy); |
| 703 | NewI->insertAfter(InsertPos: I->getIterator()); |
| 704 | NewI->setDebugLoc(I->getDebugLoc()); |
| 705 | return NewI; |
| 706 | } |
| 707 | |
| 708 | // Computes the converted pointer operands. |
| 709 | SmallVector<Value *, 4> NewPointerOperands; |
| 710 | for (const Use &OperandUse : I->operands()) { |
| 711 | if (!OperandUse.get()->getType()->isPtrOrPtrVectorTy()) |
| 712 | NewPointerOperands.push_back(Elt: nullptr); |
| 713 | else |
| 714 | NewPointerOperands.push_back(Elt: operandWithNewAddressSpaceOrCreatePoison( |
| 715 | OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, |
| 716 | PoisonUsesToFix)); |
| 717 | } |
| 718 | |
| 719 | switch (I->getOpcode()) { |
| 720 | case Instruction::BitCast: |
| 721 | return new BitCastInst(NewPointerOperands[0], NewPtrType); |
| 722 | case Instruction::PHI: { |
| 723 | assert(I->getType()->isPtrOrPtrVectorTy()); |
| 724 | PHINode *PHI = cast<PHINode>(Val: I); |
| 725 | PHINode *NewPHI = PHINode::Create(Ty: NewPtrType, NumReservedValues: PHI->getNumIncomingValues()); |
| 726 | for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) { |
| 727 | unsigned OperandNo = PHINode::getOperandNumForIncomingValue(i: Index); |
| 728 | NewPHI->addIncoming(V: NewPointerOperands[OperandNo], |
| 729 | BB: PHI->getIncomingBlock(i: Index)); |
| 730 | } |
| 731 | return NewPHI; |
| 732 | } |
| 733 | case Instruction::GetElementPtr: { |
| 734 | GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val: I); |
| 735 | GetElementPtrInst *NewGEP = GetElementPtrInst::Create( |
| 736 | PointeeType: GEP->getSourceElementType(), Ptr: NewPointerOperands[0], |
| 737 | IdxList: SmallVector<Value *, 4>(GEP->indices())); |
| 738 | NewGEP->setIsInBounds(GEP->isInBounds()); |
| 739 | return NewGEP; |
| 740 | } |
| 741 | case Instruction::Select: |
| 742 | assert(I->getType()->isPtrOrPtrVectorTy()); |
| 743 | return SelectInst::Create(C: I->getOperand(i: 0), S1: NewPointerOperands[1], |
| 744 | S2: NewPointerOperands[2], NameStr: "" , InsertBefore: nullptr, MDFrom: I); |
| 745 | case Instruction::IntToPtr: { |
| 746 | assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI)); |
| 747 | Value *Src = cast<Operator>(Val: I->getOperand(i: 0))->getOperand(i: 0); |
| 748 | if (Src->getType() == NewPtrType) |
| 749 | return Src; |
| 750 | |
| 751 | // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a |
| 752 | // source address space from a generic pointer source need to insert a cast |
| 753 | // back. |
| 754 | return new AddrSpaceCastInst(Src, NewPtrType); |
| 755 | } |
| 756 | default: |
| 757 | llvm_unreachable("Unexpected opcode" ); |
| 758 | } |
| 759 | } |
| 760 | |
| 761 | // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the |
| 762 | // constant expression `CE` with its operands replaced as specified in |
| 763 | // ValueWithNewAddrSpace. |
| 764 | static Value *cloneConstantExprWithNewAddressSpace( |
| 765 | ConstantExpr *CE, unsigned NewAddrSpace, |
| 766 | const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL, |
| 767 | const TargetTransformInfo *TTI) { |
| 768 | Type *TargetType = |
| 769 | CE->getType()->isPtrOrPtrVectorTy() |
| 770 | ? getPtrOrVecOfPtrsWithNewAS(Ty: CE->getType(), NewAddrSpace) |
| 771 | : CE->getType(); |
| 772 | |
| 773 | if (CE->getOpcode() == Instruction::AddrSpaceCast) { |
| 774 | // Because CE is flat, the source address space must be specific. |
| 775 | // Therefore, the inferred address space must be the source space according |
| 776 | // to our algorithm. |
| 777 | assert(CE->getOperand(0)->getType()->getPointerAddressSpace() == |
| 778 | NewAddrSpace); |
| 779 | return CE->getOperand(i_nocapture: 0); |
| 780 | } |
| 781 | |
| 782 | if (CE->getOpcode() == Instruction::BitCast) { |
| 783 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: CE->getOperand(i_nocapture: 0))) |
| 784 | return ConstantExpr::getBitCast(C: cast<Constant>(Val: NewOperand), Ty: TargetType); |
| 785 | return ConstantExpr::getAddrSpaceCast(C: CE, Ty: TargetType); |
| 786 | } |
| 787 | |
| 788 | if (CE->getOpcode() == Instruction::IntToPtr) { |
| 789 | assert(isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI)); |
| 790 | Constant *Src = cast<ConstantExpr>(Val: CE->getOperand(i_nocapture: 0))->getOperand(i_nocapture: 0); |
| 791 | assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); |
| 792 | return Src; |
| 793 | } |
| 794 | |
| 795 | // Computes the operands of the new constant expression. |
| 796 | bool IsNew = false; |
| 797 | SmallVector<Constant *, 4> NewOperands; |
| 798 | for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) { |
| 799 | Constant *Operand = CE->getOperand(i_nocapture: Index); |
| 800 | // If the address space of `Operand` needs to be modified, the new operand |
| 801 | // with the new address space should already be in ValueWithNewAddrSpace |
| 802 | // because (1) the constant expressions we consider (i.e. addrspacecast, |
| 803 | // bitcast, and getelementptr) do not incur cycles in the data flow graph |
| 804 | // and (2) this function is called on constant expressions in postorder. |
| 805 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: Operand)) { |
| 806 | IsNew = true; |
| 807 | NewOperands.push_back(Elt: cast<Constant>(Val: NewOperand)); |
| 808 | continue; |
| 809 | } |
| 810 | if (auto *CExpr = dyn_cast<ConstantExpr>(Val: Operand)) |
| 811 | if (Value *NewOperand = cloneConstantExprWithNewAddressSpace( |
| 812 | CE: CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) { |
| 813 | IsNew = true; |
| 814 | NewOperands.push_back(Elt: cast<Constant>(Val: NewOperand)); |
| 815 | continue; |
| 816 | } |
| 817 | // Otherwise, reuses the old operand. |
| 818 | NewOperands.push_back(Elt: Operand); |
| 819 | } |
| 820 | |
| 821 | // If !IsNew, we will replace the Value with itself. However, replaced values |
| 822 | // are assumed to wrapped in an addrspacecast cast later so drop it now. |
| 823 | if (!IsNew) |
| 824 | return nullptr; |
| 825 | |
| 826 | if (CE->getOpcode() == Instruction::GetElementPtr) { |
| 827 | // Needs to specify the source type while constructing a getelementptr |
| 828 | // constant expression. |
| 829 | return CE->getWithOperands(Ops: NewOperands, Ty: TargetType, /*OnlyIfReduced=*/false, |
| 830 | SrcTy: cast<GEPOperator>(Val: CE)->getSourceElementType()); |
| 831 | } |
| 832 | |
| 833 | return CE->getWithOperands(Ops: NewOperands, Ty: TargetType); |
| 834 | } |
| 835 | |
| 836 | // Returns a clone of the value `V`, with its operands replaced as specified in |
| 837 | // ValueWithNewAddrSpace. This function is called on every flat address |
| 838 | // expression whose address space needs to be modified, in postorder. |
| 839 | // |
| 840 | // See cloneInstructionWithNewAddressSpace for the meaning of PoisonUsesToFix. |
| 841 | Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace( |
| 842 | Value *V, unsigned NewAddrSpace, |
| 843 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 844 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 845 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const { |
| 846 | // All values in Postorder are flat address expressions. |
| 847 | assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace && |
| 848 | isAddressExpression(*V, *DL, TTI)); |
| 849 | |
| 850 | if (auto *Arg = dyn_cast<Argument>(Val: V)) { |
| 851 | // Arguments are address space casted in the function body, as we do not |
| 852 | // want to change the function signature. |
| 853 | Function *F = Arg->getParent(); |
| 854 | BasicBlock::iterator Insert = F->getEntryBlock().getFirstNonPHIIt(); |
| 855 | |
| 856 | Type *NewPtrTy = PointerType::get(C&: Arg->getContext(), AddressSpace: NewAddrSpace); |
| 857 | auto *NewI = new AddrSpaceCastInst(Arg, NewPtrTy); |
| 858 | NewI->insertBefore(InsertPos: Insert); |
| 859 | return NewI; |
| 860 | } |
| 861 | |
| 862 | if (Instruction *I = dyn_cast<Instruction>(Val: V)) { |
| 863 | Value *NewV = cloneInstructionWithNewAddressSpace( |
| 864 | I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix); |
| 865 | if (Instruction *NewI = dyn_cast_or_null<Instruction>(Val: NewV)) { |
| 866 | if (NewI->getParent() == nullptr) { |
| 867 | NewI->insertBefore(InsertPos: I->getIterator()); |
| 868 | NewI->takeName(V: I); |
| 869 | NewI->setDebugLoc(I->getDebugLoc()); |
| 870 | } |
| 871 | } |
| 872 | return NewV; |
| 873 | } |
| 874 | |
| 875 | return cloneConstantExprWithNewAddressSpace( |
| 876 | CE: cast<ConstantExpr>(Val: V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI); |
| 877 | } |
| 878 | |
| 879 | // Defines the join operation on the address space lattice (see the file header |
| 880 | // comments). |
| 881 | unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1, |
| 882 | unsigned AS2) const { |
| 883 | if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace) |
| 884 | return FlatAddrSpace; |
| 885 | |
| 886 | if (AS1 == UninitializedAddressSpace) |
| 887 | return AS2; |
| 888 | if (AS2 == UninitializedAddressSpace) |
| 889 | return AS1; |
| 890 | |
| 891 | // The join of two different specific address spaces is flat. |
| 892 | return (AS1 == AS2) ? AS1 : FlatAddrSpace; |
| 893 | } |
| 894 | |
| 895 | bool InferAddressSpacesImpl::run(Function &CurFn) { |
| 896 | F = &CurFn; |
| 897 | DL = &F->getDataLayout(); |
| 898 | |
| 899 | if (AssumeDefaultIsFlatAddressSpace) |
| 900 | FlatAddrSpace = 0; |
| 901 | |
| 902 | if (FlatAddrSpace == UninitializedAddressSpace) { |
| 903 | FlatAddrSpace = TTI->getFlatAddressSpace(); |
| 904 | if (FlatAddrSpace == UninitializedAddressSpace) |
| 905 | return false; |
| 906 | } |
| 907 | |
| 908 | // Collects all flat address expressions in postorder. |
| 909 | std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F&: *F); |
| 910 | |
| 911 | // Runs a data-flow analysis to refine the address spaces of every expression |
| 912 | // in Postorder. |
| 913 | ValueToAddrSpaceMapTy InferredAddrSpace; |
| 914 | PredicatedAddrSpaceMapTy PredicatedAS; |
| 915 | inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS); |
| 916 | |
| 917 | // Changes the address spaces of the flat address expressions who are inferred |
| 918 | // to point to a specific address space. |
| 919 | return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, |
| 920 | PredicatedAS); |
| 921 | } |
| 922 | |
| 923 | // Constants need to be tracked through RAUW to handle cases with nested |
| 924 | // constant expressions, so wrap values in WeakTrackingVH. |
| 925 | void InferAddressSpacesImpl::inferAddressSpaces( |
| 926 | ArrayRef<WeakTrackingVH> Postorder, |
| 927 | ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 928 | PredicatedAddrSpaceMapTy &PredicatedAS) const { |
| 929 | SetVector<Value *> Worklist(llvm::from_range, Postorder); |
| 930 | // Initially, all expressions are in the uninitialized address space. |
| 931 | for (Value *V : Postorder) |
| 932 | InferredAddrSpace[V] = UninitializedAddressSpace; |
| 933 | |
| 934 | while (!Worklist.empty()) { |
| 935 | Value *V = Worklist.pop_back_val(); |
| 936 | |
| 937 | // Try to update the address space of the stack top according to the |
| 938 | // address spaces of its operands. |
| 939 | if (!updateAddressSpace(V: *V, InferredAddrSpace, PredicatedAS)) |
| 940 | continue; |
| 941 | |
| 942 | for (Value *User : V->users()) { |
| 943 | // Skip if User is already in the worklist. |
| 944 | if (Worklist.count(key: User)) |
| 945 | continue; |
| 946 | |
| 947 | auto Pos = InferredAddrSpace.find(Val: User); |
| 948 | // Our algorithm only updates the address spaces of flat address |
| 949 | // expressions, which are those in InferredAddrSpace. |
| 950 | if (Pos == InferredAddrSpace.end()) |
| 951 | continue; |
| 952 | |
| 953 | // Function updateAddressSpace moves the address space down a lattice |
| 954 | // path. Therefore, nothing to do if User is already inferred as flat (the |
| 955 | // bottom element in the lattice). |
| 956 | if (Pos->second == FlatAddrSpace) |
| 957 | continue; |
| 958 | |
| 959 | Worklist.insert(X: User); |
| 960 | } |
| 961 | } |
| 962 | } |
| 963 | |
| 964 | unsigned |
| 965 | InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &Ptr, |
| 966 | const Value *UserCtx) const { |
| 967 | const Instruction *UserCtxI = dyn_cast<Instruction>(Val: UserCtx); |
| 968 | if (!UserCtxI) |
| 969 | return UninitializedAddressSpace; |
| 970 | |
| 971 | const Value *StrippedPtr = Ptr.stripInBoundsOffsets(); |
| 972 | for (auto &AssumeVH : AC.assumptionsFor(V: StrippedPtr)) { |
| 973 | if (!AssumeVH) |
| 974 | continue; |
| 975 | CallInst *CI = cast<CallInst>(Val&: AssumeVH); |
| 976 | if (!isValidAssumeForContext(I: CI, CxtI: UserCtxI, DT)) |
| 977 | continue; |
| 978 | |
| 979 | const Value *Ptr; |
| 980 | unsigned AS; |
| 981 | std::tie(args&: Ptr, args&: AS) = TTI->getPredicatedAddrSpace(V: CI->getArgOperand(i: 0)); |
| 982 | if (Ptr) |
| 983 | return AS; |
| 984 | } |
| 985 | |
| 986 | return UninitializedAddressSpace; |
| 987 | } |
| 988 | |
| 989 | bool InferAddressSpacesImpl::updateAddressSpace( |
| 990 | const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 991 | PredicatedAddrSpaceMapTy &PredicatedAS) const { |
| 992 | assert(InferredAddrSpace.count(&V)); |
| 993 | |
| 994 | LLVM_DEBUG(dbgs() << "Updating the address space of\n " << V << '\n'); |
| 995 | |
| 996 | // The new inferred address space equals the join of the address spaces |
| 997 | // of all its pointer operands. |
| 998 | unsigned NewAS = UninitializedAddressSpace; |
| 999 | |
| 1000 | // isAddressExpression should guarantee that V is an operator or an argument. |
| 1001 | assert(isa<Operator>(V) || isa<Argument>(V)); |
| 1002 | |
| 1003 | if (isa<Operator>(Val: V) && |
| 1004 | cast<Operator>(Val: V).getOpcode() == Instruction::Select) { |
| 1005 | const Operator &Op = cast<Operator>(Val: V); |
| 1006 | Value *Src0 = Op.getOperand(i: 1); |
| 1007 | Value *Src1 = Op.getOperand(i: 2); |
| 1008 | |
| 1009 | auto I = InferredAddrSpace.find(Val: Src0); |
| 1010 | unsigned Src0AS = (I != InferredAddrSpace.end()) |
| 1011 | ? I->second |
| 1012 | : Src0->getType()->getPointerAddressSpace(); |
| 1013 | |
| 1014 | auto J = InferredAddrSpace.find(Val: Src1); |
| 1015 | unsigned Src1AS = (J != InferredAddrSpace.end()) |
| 1016 | ? J->second |
| 1017 | : Src1->getType()->getPointerAddressSpace(); |
| 1018 | |
| 1019 | auto *C0 = dyn_cast<Constant>(Val: Src0); |
| 1020 | auto *C1 = dyn_cast<Constant>(Val: Src1); |
| 1021 | |
| 1022 | // If one of the inputs is a constant, we may be able to do a constant |
| 1023 | // addrspacecast of it. Defer inferring the address space until the input |
| 1024 | // address space is known. |
| 1025 | if ((C1 && Src0AS == UninitializedAddressSpace) || |
| 1026 | (C0 && Src1AS == UninitializedAddressSpace)) |
| 1027 | return false; |
| 1028 | |
| 1029 | if (C0 && isSafeToCastConstAddrSpace(C: C0, NewAS: Src1AS)) |
| 1030 | NewAS = Src1AS; |
| 1031 | else if (C1 && isSafeToCastConstAddrSpace(C: C1, NewAS: Src0AS)) |
| 1032 | NewAS = Src0AS; |
| 1033 | else |
| 1034 | NewAS = joinAddressSpaces(AS1: Src0AS, AS2: Src1AS); |
| 1035 | } else { |
| 1036 | unsigned AS = TTI->getAssumedAddrSpace(V: &V); |
| 1037 | if (AS != UninitializedAddressSpace) { |
| 1038 | // Use the assumed address space directly. |
| 1039 | NewAS = AS; |
| 1040 | } else { |
| 1041 | // Otherwise, infer the address space from its pointer operands. |
| 1042 | for (Value *PtrOperand : getPointerOperands(V, DL: *DL, TTI)) { |
| 1043 | auto I = InferredAddrSpace.find(Val: PtrOperand); |
| 1044 | unsigned OperandAS; |
| 1045 | if (I == InferredAddrSpace.end()) { |
| 1046 | OperandAS = PtrOperand->getType()->getPointerAddressSpace(); |
| 1047 | if (OperandAS == FlatAddrSpace) { |
| 1048 | // Check AC for assumption dominating V. |
| 1049 | unsigned AS = getPredicatedAddrSpace(Ptr: *PtrOperand, UserCtx: &V); |
| 1050 | if (AS != UninitializedAddressSpace) { |
| 1051 | LLVM_DEBUG(dbgs() |
| 1052 | << " deduce operand AS from the predicate addrspace " |
| 1053 | << AS << '\n'); |
| 1054 | OperandAS = AS; |
| 1055 | // Record this use with the predicated AS. |
| 1056 | PredicatedAS[std::make_pair(x: &V, y&: PtrOperand)] = OperandAS; |
| 1057 | } |
| 1058 | } |
| 1059 | } else |
| 1060 | OperandAS = I->second; |
| 1061 | |
| 1062 | // join(flat, *) = flat. So we can break if NewAS is already flat. |
| 1063 | NewAS = joinAddressSpaces(AS1: NewAS, AS2: OperandAS); |
| 1064 | if (NewAS == FlatAddrSpace) |
| 1065 | break; |
| 1066 | } |
| 1067 | } |
| 1068 | } |
| 1069 | |
| 1070 | unsigned OldAS = InferredAddrSpace.lookup(Val: &V); |
| 1071 | assert(OldAS != FlatAddrSpace); |
| 1072 | if (OldAS == NewAS) |
| 1073 | return false; |
| 1074 | |
| 1075 | // If any updates are made, grabs its users to the worklist because |
| 1076 | // their address spaces can also be possibly updated. |
| 1077 | LLVM_DEBUG(dbgs() << " to " << NewAS << '\n'); |
| 1078 | InferredAddrSpace[&V] = NewAS; |
| 1079 | return true; |
| 1080 | } |
| 1081 | |
| 1082 | /// Replace operand \p OpIdx in \p Inst, if the value is the same as \p OldVal |
| 1083 | /// with \p NewVal. |
| 1084 | static bool replaceOperandIfSame(Instruction *Inst, unsigned OpIdx, |
| 1085 | Value *OldVal, Value *NewVal) { |
| 1086 | Use &U = Inst->getOperandUse(i: OpIdx); |
| 1087 | if (U.get() == OldVal) { |
| 1088 | U.set(NewVal); |
| 1089 | return true; |
| 1090 | } |
| 1091 | |
| 1092 | return false; |
| 1093 | } |
| 1094 | |
| 1095 | template <typename InstrType> |
| 1096 | static bool replaceSimplePointerUse(const TargetTransformInfo &TTI, |
| 1097 | InstrType *MemInstr, unsigned AddrSpace, |
| 1098 | Value *OldV, Value *NewV) { |
| 1099 | if (!MemInstr->isVolatile() || TTI.hasVolatileVariant(I: MemInstr, AddrSpace)) { |
| 1100 | return replaceOperandIfSame(MemInstr, InstrType::getPointerOperandIndex(), |
| 1101 | OldV, NewV); |
| 1102 | } |
| 1103 | |
| 1104 | return false; |
| 1105 | } |
| 1106 | |
| 1107 | /// If \p OldV is used as the pointer operand of a compatible memory operation |
| 1108 | /// \p Inst, replaces the pointer operand with NewV. |
| 1109 | /// |
| 1110 | /// This covers memory instructions with a single pointer operand that can have |
| 1111 | /// its address space changed by simply mutating the use to a new value. |
| 1112 | /// |
| 1113 | /// \p returns true the user replacement was made. |
| 1114 | static bool replaceIfSimplePointerUse(const TargetTransformInfo &TTI, |
| 1115 | User *Inst, unsigned AddrSpace, |
| 1116 | Value *OldV, Value *NewV) { |
| 1117 | if (auto *LI = dyn_cast<LoadInst>(Val: Inst)) |
| 1118 | return replaceSimplePointerUse(TTI, MemInstr: LI, AddrSpace, OldV, NewV); |
| 1119 | |
| 1120 | if (auto *SI = dyn_cast<StoreInst>(Val: Inst)) |
| 1121 | return replaceSimplePointerUse(TTI, MemInstr: SI, AddrSpace, OldV, NewV); |
| 1122 | |
| 1123 | if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: Inst)) |
| 1124 | return replaceSimplePointerUse(TTI, MemInstr: RMW, AddrSpace, OldV, NewV); |
| 1125 | |
| 1126 | if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst)) |
| 1127 | return replaceSimplePointerUse(TTI, MemInstr: CmpX, AddrSpace, OldV, NewV); |
| 1128 | |
| 1129 | return false; |
| 1130 | } |
| 1131 | |
| 1132 | /// Update memory intrinsic uses that require more complex processing than |
| 1133 | /// simple memory instructions. These require re-mangling and may have multiple |
| 1134 | /// pointer operands. |
| 1135 | static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, |
| 1136 | Value *NewV) { |
| 1137 | IRBuilder<> B(MI); |
| 1138 | if (auto *MSI = dyn_cast<MemSetInst>(Val: MI)) { |
| 1139 | B.CreateMemSet(Ptr: NewV, Val: MSI->getValue(), Size: MSI->getLength(), Align: MSI->getDestAlign(), |
| 1140 | isVolatile: false, // isVolatile |
| 1141 | AAInfo: MI->getAAMetadata()); |
| 1142 | } else if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI)) { |
| 1143 | Value *Src = MTI->getRawSource(); |
| 1144 | Value *Dest = MTI->getRawDest(); |
| 1145 | |
| 1146 | // Be careful in case this is a self-to-self copy. |
| 1147 | if (Src == OldV) |
| 1148 | Src = NewV; |
| 1149 | |
| 1150 | if (Dest == OldV) |
| 1151 | Dest = NewV; |
| 1152 | |
| 1153 | if (auto *MCI = dyn_cast<MemCpyInst>(Val: MTI)) { |
| 1154 | if (MCI->isForceInlined()) |
| 1155 | B.CreateMemCpyInline(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, |
| 1156 | SrcAlign: MTI->getSourceAlign(), Size: MTI->getLength(), |
| 1157 | isVolatile: false, // isVolatile |
| 1158 | AAInfo: MI->getAAMetadata()); |
| 1159 | else |
| 1160 | B.CreateMemCpy(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, SrcAlign: MTI->getSourceAlign(), |
| 1161 | Size: MTI->getLength(), |
| 1162 | isVolatile: false, // isVolatile |
| 1163 | AAInfo: MI->getAAMetadata()); |
| 1164 | } else { |
| 1165 | assert(isa<MemMoveInst>(MTI)); |
| 1166 | B.CreateMemMove(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, SrcAlign: MTI->getSourceAlign(), |
| 1167 | Size: MTI->getLength(), |
| 1168 | isVolatile: false, // isVolatile |
| 1169 | AAInfo: MI->getAAMetadata()); |
| 1170 | } |
| 1171 | } else |
| 1172 | llvm_unreachable("unhandled MemIntrinsic" ); |
| 1173 | |
| 1174 | MI->eraseFromParent(); |
| 1175 | return true; |
| 1176 | } |
| 1177 | |
| 1178 | // \p returns true if it is OK to change the address space of constant \p C with |
| 1179 | // a ConstantExpr addrspacecast. |
| 1180 | bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C, |
| 1181 | unsigned NewAS) const { |
| 1182 | assert(NewAS != UninitializedAddressSpace); |
| 1183 | |
| 1184 | unsigned SrcAS = C->getType()->getPointerAddressSpace(); |
| 1185 | if (SrcAS == NewAS || isa<UndefValue>(Val: C)) |
| 1186 | return true; |
| 1187 | |
| 1188 | // Prevent illegal casts between different non-flat address spaces. |
| 1189 | if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace) |
| 1190 | return false; |
| 1191 | |
| 1192 | if (isa<ConstantPointerNull>(Val: C)) |
| 1193 | return true; |
| 1194 | |
| 1195 | if (auto *Op = dyn_cast<Operator>(Val: C)) { |
| 1196 | // If we already have a constant addrspacecast, it should be safe to cast it |
| 1197 | // off. |
| 1198 | if (Op->getOpcode() == Instruction::AddrSpaceCast) |
| 1199 | return isSafeToCastConstAddrSpace(C: cast<Constant>(Val: Op->getOperand(i: 0)), |
| 1200 | NewAS); |
| 1201 | |
| 1202 | if (Op->getOpcode() == Instruction::IntToPtr && |
| 1203 | Op->getType()->getPointerAddressSpace() == FlatAddrSpace) |
| 1204 | return true; |
| 1205 | } |
| 1206 | |
| 1207 | return false; |
| 1208 | } |
| 1209 | |
| 1210 | static Value::use_iterator skipToNextUser(Value::use_iterator I, |
| 1211 | Value::use_iterator End) { |
| 1212 | User *CurUser = I->getUser(); |
| 1213 | ++I; |
| 1214 | |
| 1215 | while (I != End && I->getUser() == CurUser) |
| 1216 | ++I; |
| 1217 | |
| 1218 | return I; |
| 1219 | } |
| 1220 | |
| 1221 | void InferAddressSpacesImpl::performPointerReplacement( |
| 1222 | Value *V, Value *NewV, Use &U, ValueToValueMapTy &ValueWithNewAddrSpace, |
| 1223 | SmallVectorImpl<Instruction *> &DeadInstructions) const { |
| 1224 | |
| 1225 | User *CurUser = U.getUser(); |
| 1226 | |
| 1227 | unsigned AddrSpace = V->getType()->getPointerAddressSpace(); |
| 1228 | if (replaceIfSimplePointerUse(TTI: *TTI, Inst: CurUser, AddrSpace, OldV: V, NewV)) |
| 1229 | return; |
| 1230 | |
| 1231 | // Skip if the current user is the new value itself. |
| 1232 | if (CurUser == NewV) |
| 1233 | return; |
| 1234 | |
| 1235 | auto *CurUserI = dyn_cast<Instruction>(Val: CurUser); |
| 1236 | if (!CurUserI || CurUserI->getFunction() != F) |
| 1237 | return; |
| 1238 | |
| 1239 | // Handle more complex cases like intrinsic that need to be remangled. |
| 1240 | if (auto *MI = dyn_cast<MemIntrinsic>(Val: CurUser)) { |
| 1241 | if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, OldV: V, NewV)) |
| 1242 | return; |
| 1243 | } |
| 1244 | |
| 1245 | if (auto *II = dyn_cast<IntrinsicInst>(Val: CurUser)) { |
| 1246 | if (rewriteIntrinsicOperands(II, OldV: V, NewV)) |
| 1247 | return; |
| 1248 | } |
| 1249 | |
| 1250 | if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: CurUserI)) { |
| 1251 | // If we can infer that both pointers are in the same addrspace, |
| 1252 | // transform e.g. |
| 1253 | // %cmp = icmp eq float* %p, %q |
| 1254 | // into |
| 1255 | // %cmp = icmp eq float addrspace(3)* %new_p, %new_q |
| 1256 | |
| 1257 | unsigned NewAS = NewV->getType()->getPointerAddressSpace(); |
| 1258 | int SrcIdx = U.getOperandNo(); |
| 1259 | int OtherIdx = (SrcIdx == 0) ? 1 : 0; |
| 1260 | Value *OtherSrc = Cmp->getOperand(i_nocapture: OtherIdx); |
| 1261 | |
| 1262 | if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(Val: OtherSrc)) { |
| 1263 | if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) { |
| 1264 | Cmp->setOperand(i_nocapture: OtherIdx, Val_nocapture: OtherNewV); |
| 1265 | Cmp->setOperand(i_nocapture: SrcIdx, Val_nocapture: NewV); |
| 1266 | return; |
| 1267 | } |
| 1268 | } |
| 1269 | |
| 1270 | // Even if the type mismatches, we can cast the constant. |
| 1271 | if (auto *KOtherSrc = dyn_cast<Constant>(Val: OtherSrc)) { |
| 1272 | if (isSafeToCastConstAddrSpace(C: KOtherSrc, NewAS)) { |
| 1273 | Cmp->setOperand(i_nocapture: SrcIdx, Val_nocapture: NewV); |
| 1274 | Cmp->setOperand(i_nocapture: OtherIdx, Val_nocapture: ConstantExpr::getAddrSpaceCast( |
| 1275 | C: KOtherSrc, Ty: NewV->getType())); |
| 1276 | return; |
| 1277 | } |
| 1278 | } |
| 1279 | } |
| 1280 | |
| 1281 | if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Val: CurUserI)) { |
| 1282 | unsigned NewAS = NewV->getType()->getPointerAddressSpace(); |
| 1283 | if (ASC->getDestAddressSpace() == NewAS) { |
| 1284 | ASC->replaceAllUsesWith(V: NewV); |
| 1285 | DeadInstructions.push_back(Elt: ASC); |
| 1286 | return; |
| 1287 | } |
| 1288 | } |
| 1289 | |
| 1290 | // Otherwise, replaces the use with flat(NewV). |
| 1291 | if (isa<Instruction>(Val: V) || isa<Instruction>(Val: NewV)) { |
| 1292 | // Don't create a copy of the original addrspacecast. |
| 1293 | if (U == V && isa<AddrSpaceCastInst>(Val: V)) |
| 1294 | return; |
| 1295 | |
| 1296 | // Insert the addrspacecast after NewV. |
| 1297 | BasicBlock::iterator InsertPos; |
| 1298 | if (Instruction *NewVInst = dyn_cast<Instruction>(Val: NewV)) |
| 1299 | InsertPos = std::next(x: NewVInst->getIterator()); |
| 1300 | else |
| 1301 | InsertPos = std::next(x: cast<Instruction>(Val: V)->getIterator()); |
| 1302 | |
| 1303 | while (isa<PHINode>(Val: InsertPos)) |
| 1304 | ++InsertPos; |
| 1305 | // This instruction may contain multiple uses of V, update them all. |
| 1306 | CurUser->replaceUsesOfWith( |
| 1307 | From: V, To: new AddrSpaceCastInst(NewV, V->getType(), "" , InsertPos)); |
| 1308 | } else { |
| 1309 | CurUserI->replaceUsesOfWith( |
| 1310 | From: V, To: ConstantExpr::getAddrSpaceCast(C: cast<Constant>(Val: NewV), Ty: V->getType())); |
| 1311 | } |
| 1312 | } |
| 1313 | |
| 1314 | bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( |
| 1315 | ArrayRef<WeakTrackingVH> Postorder, |
| 1316 | const ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 1317 | const PredicatedAddrSpaceMapTy &PredicatedAS) const { |
| 1318 | // For each address expression to be modified, creates a clone of it with its |
| 1319 | // pointer operands converted to the new address space. Since the pointer |
| 1320 | // operands are converted, the clone is naturally in the new address space by |
| 1321 | // construction. |
| 1322 | ValueToValueMapTy ValueWithNewAddrSpace; |
| 1323 | SmallVector<const Use *, 32> PoisonUsesToFix; |
| 1324 | for (Value *V : Postorder) { |
| 1325 | unsigned NewAddrSpace = InferredAddrSpace.lookup(Val: V); |
| 1326 | |
| 1327 | // In some degenerate cases (e.g. invalid IR in unreachable code), we may |
| 1328 | // not even infer the value to have its original address space. |
| 1329 | if (NewAddrSpace == UninitializedAddressSpace) |
| 1330 | continue; |
| 1331 | |
| 1332 | if (V->getType()->getPointerAddressSpace() != NewAddrSpace) { |
| 1333 | Value *New = |
| 1334 | cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace, |
| 1335 | PredicatedAS, PoisonUsesToFix: &PoisonUsesToFix); |
| 1336 | if (New) |
| 1337 | ValueWithNewAddrSpace[V] = New; |
| 1338 | } |
| 1339 | } |
| 1340 | |
| 1341 | if (ValueWithNewAddrSpace.empty()) |
| 1342 | return false; |
| 1343 | |
| 1344 | // Fixes all the poison uses generated by cloneInstructionWithNewAddressSpace. |
| 1345 | for (const Use *PoisonUse : PoisonUsesToFix) { |
| 1346 | User *V = PoisonUse->getUser(); |
| 1347 | User *NewV = cast_or_null<User>(Val: ValueWithNewAddrSpace.lookup(Val: V)); |
| 1348 | if (!NewV) |
| 1349 | continue; |
| 1350 | |
| 1351 | unsigned OperandNo = PoisonUse->getOperandNo(); |
| 1352 | assert(isa<PoisonValue>(NewV->getOperand(OperandNo))); |
| 1353 | NewV->setOperand(i: OperandNo, Val: ValueWithNewAddrSpace.lookup(Val: PoisonUse->get())); |
| 1354 | } |
| 1355 | |
| 1356 | SmallVector<Instruction *, 16> DeadInstructions; |
| 1357 | ValueToValueMapTy VMap; |
| 1358 | ValueMapper VMapper(VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); |
| 1359 | |
| 1360 | // Replaces the uses of the old address expressions with the new ones. |
| 1361 | for (const WeakTrackingVH &WVH : Postorder) { |
| 1362 | assert(WVH && "value was unexpectedly deleted" ); |
| 1363 | Value *V = WVH; |
| 1364 | Value *NewV = ValueWithNewAddrSpace.lookup(Val: V); |
| 1365 | if (NewV == nullptr) |
| 1366 | continue; |
| 1367 | |
| 1368 | LLVM_DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n " |
| 1369 | << *NewV << '\n'); |
| 1370 | |
| 1371 | if (Constant *C = dyn_cast<Constant>(Val: V)) { |
| 1372 | Constant *Replace = |
| 1373 | ConstantExpr::getAddrSpaceCast(C: cast<Constant>(Val: NewV), Ty: C->getType()); |
| 1374 | if (C != Replace) { |
| 1375 | LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace |
| 1376 | << ": " << *Replace << '\n'); |
| 1377 | SmallVector<User *, 16> WorkList; |
| 1378 | for (User *U : make_early_inc_range(Range: C->users())) { |
| 1379 | if (auto *I = dyn_cast<Instruction>(Val: U)) { |
| 1380 | if (I->getFunction() == F) |
| 1381 | I->replaceUsesOfWith(From: C, To: Replace); |
| 1382 | } else { |
| 1383 | WorkList.append(in_start: U->user_begin(), in_end: U->user_end()); |
| 1384 | } |
| 1385 | } |
| 1386 | if (!WorkList.empty()) { |
| 1387 | VMap[C] = Replace; |
| 1388 | DenseSet<User *> Visited{WorkList.begin(), WorkList.end()}; |
| 1389 | while (!WorkList.empty()) { |
| 1390 | User *U = WorkList.pop_back_val(); |
| 1391 | if (auto *I = dyn_cast<Instruction>(Val: U)) { |
| 1392 | if (I->getFunction() == F) |
| 1393 | VMapper.remapInstruction(I&: *I); |
| 1394 | continue; |
| 1395 | } |
| 1396 | for (User *U2 : U->users()) |
| 1397 | if (Visited.insert(V: U2).second) |
| 1398 | WorkList.push_back(Elt: U2); |
| 1399 | } |
| 1400 | } |
| 1401 | V = Replace; |
| 1402 | } |
| 1403 | } |
| 1404 | |
| 1405 | Value::use_iterator I, E, Next; |
| 1406 | for (I = V->use_begin(), E = V->use_end(); I != E;) { |
| 1407 | Use &U = *I; |
| 1408 | |
| 1409 | // Some users may see the same pointer operand in multiple operands. Skip |
| 1410 | // to the next instruction. |
| 1411 | I = skipToNextUser(I, End: E); |
| 1412 | |
| 1413 | performPointerReplacement(V, NewV, U, ValueWithNewAddrSpace, |
| 1414 | DeadInstructions); |
| 1415 | } |
| 1416 | |
| 1417 | if (V->use_empty()) { |
| 1418 | if (Instruction *I = dyn_cast<Instruction>(Val: V)) |
| 1419 | DeadInstructions.push_back(Elt: I); |
| 1420 | } |
| 1421 | } |
| 1422 | |
| 1423 | for (Instruction *I : DeadInstructions) |
| 1424 | RecursivelyDeleteTriviallyDeadInstructions(V: I); |
| 1425 | |
| 1426 | return true; |
| 1427 | } |
| 1428 | |
| 1429 | bool InferAddressSpaces::runOnFunction(Function &F) { |
| 1430 | if (skipFunction(F)) |
| 1431 | return false; |
| 1432 | |
| 1433 | auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); |
| 1434 | DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; |
| 1435 | return InferAddressSpacesImpl( |
| 1436 | getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), DT, |
| 1437 | &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), |
| 1438 | FlatAddrSpace) |
| 1439 | .run(CurFn&: F); |
| 1440 | } |
| 1441 | |
| 1442 | FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) { |
| 1443 | return new InferAddressSpaces(AddressSpace); |
| 1444 | } |
| 1445 | |
| 1446 | InferAddressSpacesPass::InferAddressSpacesPass() |
| 1447 | : FlatAddrSpace(UninitializedAddressSpace) {} |
| 1448 | InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace) |
| 1449 | : FlatAddrSpace(AddressSpace) {} |
| 1450 | |
| 1451 | PreservedAnalyses InferAddressSpacesPass::run(Function &F, |
| 1452 | FunctionAnalysisManager &AM) { |
| 1453 | bool Changed = |
| 1454 | InferAddressSpacesImpl(AM.getResult<AssumptionAnalysis>(IR&: F), |
| 1455 | AM.getCachedResult<DominatorTreeAnalysis>(IR&: F), |
| 1456 | &AM.getResult<TargetIRAnalysis>(IR&: F), FlatAddrSpace) |
| 1457 | .run(CurFn&: F); |
| 1458 | if (Changed) { |
| 1459 | PreservedAnalyses PA; |
| 1460 | PA.preserveSet<CFGAnalyses>(); |
| 1461 | PA.preserve<DominatorTreeAnalysis>(); |
| 1462 | return PA; |
| 1463 | } |
| 1464 | return PreservedAnalyses::all(); |
| 1465 | } |
| 1466 | |