| 1 | //===- InferAddressSpace.cpp - --------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // CUDA C/C++ includes memory space designation as variable type qualifers (such |
| 10 | // as __global__ and __shared__). Knowing the space of a memory access allows |
| 11 | // CUDA compilers to emit faster PTX loads and stores. For example, a load from |
| 12 | // shared memory can be translated to `ld.shared` which is roughly 10% faster |
| 13 | // than a generic `ld` on an NVIDIA Tesla K40c. |
| 14 | // |
| 15 | // Unfortunately, type qualifiers only apply to variable declarations, so CUDA |
| 16 | // compilers must infer the memory space of an address expression from |
| 17 | // type-qualified variables. |
| 18 | // |
| 19 | // LLVM IR uses non-zero (so-called) specific address spaces to represent memory |
| 20 | // spaces (e.g. addrspace(3) means shared memory). The Clang frontend |
| 21 | // places only type-qualified variables in specific address spaces, and then |
| 22 | // conservatively `addrspacecast`s each type-qualified variable to addrspace(0) |
| 23 | // (so-called the generic address space) for other instructions to use. |
| 24 | // |
| 25 | // For example, the Clang translates the following CUDA code |
| 26 | // __shared__ float a[10]; |
| 27 | // float v = a[i]; |
| 28 | // to |
| 29 | // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* |
| 30 | // %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i |
| 31 | // %v = load float, float* %1 ; emits ld.f32 |
| 32 | // @a is in addrspace(3) since it's type-qualified, but its use from %1 is |
| 33 | // redirected to %0 (the generic version of @a). |
| 34 | // |
| 35 | // The optimization implemented in this file propagates specific address spaces |
| 36 | // from type-qualified variable declarations to its users. For example, it |
| 37 | // optimizes the above IR to |
| 38 | // %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i |
| 39 | // %v = load float addrspace(3)* %1 ; emits ld.shared.f32 |
| 40 | // propagating the addrspace(3) from @a to %1. As the result, the NVPTX |
| 41 | // codegen is able to emit ld.shared.f32 for %v. |
| 42 | // |
| 43 | // Address space inference works in two steps. First, it uses a data-flow |
| 44 | // analysis to infer as many generic pointers as possible to point to only one |
| 45 | // specific address space. In the above example, it can prove that %1 only |
| 46 | // points to addrspace(3). This algorithm was published in |
| 47 | // CUDA: Compiling and optimizing for a GPU platform |
| 48 | // Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang |
| 49 | // ICCS 2012 |
| 50 | // |
| 51 | // Then, address space inference replaces all refinable generic pointers with |
| 52 | // equivalent specific pointers. |
| 53 | // |
| 54 | // The major challenge of implementing this optimization is handling PHINodes, |
| 55 | // which may create loops in the data flow graph. This brings two complications. |
| 56 | // |
| 57 | // First, the data flow analysis in Step 1 needs to be circular. For example, |
| 58 | // %generic.input = addrspacecast float addrspace(3)* %input to float* |
| 59 | // loop: |
| 60 | // %y = phi [ %generic.input, %y2 ] |
| 61 | // %y2 = getelementptr %y, 1 |
| 62 | // %v = load %y2 |
| 63 | // br ..., label %loop, ... |
| 64 | // proving %y specific requires proving both %generic.input and %y2 specific, |
| 65 | // but proving %y2 specific circles back to %y. To address this complication, |
| 66 | // the data flow analysis operates on a lattice: |
| 67 | // uninitialized > specific address spaces > generic. |
| 68 | // All address expressions (our implementation only considers phi, bitcast, |
| 69 | // addrspacecast, and getelementptr) start with the uninitialized address space. |
| 70 | // The monotone transfer function moves the address space of a pointer down a |
| 71 | // lattice path from uninitialized to specific and then to generic. A join |
| 72 | // operation of two different specific address spaces pushes the expression down |
| 73 | // to the generic address space. The analysis completes once it reaches a fixed |
| 74 | // point. |
| 75 | // |
| 76 | // Second, IR rewriting in Step 2 also needs to be circular. For example, |
| 77 | // converting %y to addrspace(3) requires the compiler to know the converted |
| 78 | // %y2, but converting %y2 needs the converted %y. To address this complication, |
| 79 | // we break these cycles using "poison" placeholders. When converting an |
| 80 | // instruction `I` to a new address space, if its operand `Op` is not converted |
| 81 | // yet, we let `I` temporarily use `poison` and fix all the uses later. |
| 82 | // For instance, our algorithm first converts %y to |
| 83 | // %y' = phi float addrspace(3)* [ %input, poison ] |
| 84 | // Then, it converts %y2 to |
| 85 | // %y2' = getelementptr %y', 1 |
| 86 | // Finally, it fixes the poison in %y' so that |
| 87 | // %y' = phi float addrspace(3)* [ %input, %y2' ] |
| 88 | // |
| 89 | //===----------------------------------------------------------------------===// |
| 90 | |
| 91 | #include "llvm/Transforms/Scalar/InferAddressSpaces.h" |
| 92 | #include "llvm/ADT/ArrayRef.h" |
| 93 | #include "llvm/ADT/DenseMap.h" |
| 94 | #include "llvm/ADT/DenseSet.h" |
| 95 | #include "llvm/ADT/SetVector.h" |
| 96 | #include "llvm/ADT/SmallVector.h" |
| 97 | #include "llvm/Analysis/AssumptionCache.h" |
| 98 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 99 | #include "llvm/Analysis/ValueTracking.h" |
| 100 | #include "llvm/IR/Argument.h" |
| 101 | #include "llvm/IR/BasicBlock.h" |
| 102 | #include "llvm/IR/Constant.h" |
| 103 | #include "llvm/IR/Constants.h" |
| 104 | #include "llvm/IR/Dominators.h" |
| 105 | #include "llvm/IR/Function.h" |
| 106 | #include "llvm/IR/IRBuilder.h" |
| 107 | #include "llvm/IR/InstIterator.h" |
| 108 | #include "llvm/IR/Instruction.h" |
| 109 | #include "llvm/IR/Instructions.h" |
| 110 | #include "llvm/IR/IntrinsicInst.h" |
| 111 | #include "llvm/IR/Intrinsics.h" |
| 112 | #include "llvm/IR/LLVMContext.h" |
| 113 | #include "llvm/IR/Operator.h" |
| 114 | #include "llvm/IR/PassManager.h" |
| 115 | #include "llvm/IR/PatternMatch.h" |
| 116 | #include "llvm/IR/Type.h" |
| 117 | #include "llvm/IR/Use.h" |
| 118 | #include "llvm/IR/User.h" |
| 119 | #include "llvm/IR/Value.h" |
| 120 | #include "llvm/IR/ValueHandle.h" |
| 121 | #include "llvm/InitializePasses.h" |
| 122 | #include "llvm/Pass.h" |
| 123 | #include "llvm/Support/Casting.h" |
| 124 | #include "llvm/Support/CommandLine.h" |
| 125 | #include "llvm/Support/Debug.h" |
| 126 | #include "llvm/Support/ErrorHandling.h" |
| 127 | #include "llvm/Support/KnownBits.h" |
| 128 | #include "llvm/Support/raw_ostream.h" |
| 129 | #include "llvm/Transforms/Scalar.h" |
| 130 | #include "llvm/Transforms/Utils/Local.h" |
| 131 | #include "llvm/Transforms/Utils/ValueMapper.h" |
| 132 | #include <cassert> |
| 133 | #include <iterator> |
| 134 | #include <limits> |
| 135 | #include <optional> |
| 136 | #include <utility> |
| 137 | #include <vector> |
| 138 | |
| 139 | #define DEBUG_TYPE "infer-address-spaces" |
| 140 | |
| 141 | using namespace llvm; |
| 142 | using namespace llvm::PatternMatch; |
| 143 | |
| 144 | static cl::opt<bool> AssumeDefaultIsFlatAddressSpace( |
| 145 | "assume-default-is-flat-addrspace" , cl::init(Val: false), cl::ReallyHidden, |
| 146 | cl::desc("The default address space is assumed as the flat address space. " |
| 147 | "This is mainly for test purpose." )); |
| 148 | |
| 149 | static const unsigned UninitializedAddressSpace = |
| 150 | std::numeric_limits<unsigned>::max(); |
| 151 | |
| 152 | namespace { |
| 153 | |
| 154 | using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>; |
| 155 | // Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on |
| 156 | // the *def* of a value, PredicatedAddrSpaceMapTy is map where a new |
| 157 | // addrspace is inferred on the *use* of a pointer. This map is introduced to |
| 158 | // infer addrspace from the addrspace predicate assumption built from assume |
| 159 | // intrinsic. In that scenario, only specific uses (under valid assumption |
| 160 | // context) could be inferred with a new addrspace. |
| 161 | using PredicatedAddrSpaceMapTy = |
| 162 | DenseMap<std::pair<const Value *, const Value *>, unsigned>; |
| 163 | using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>; |
| 164 | |
| 165 | class InferAddressSpaces : public FunctionPass { |
| 166 | unsigned FlatAddrSpace = 0; |
| 167 | |
| 168 | public: |
| 169 | static char ID; |
| 170 | |
| 171 | InferAddressSpaces() |
| 172 | : FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) { |
| 173 | initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry()); |
| 174 | } |
| 175 | InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) { |
| 176 | initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry()); |
| 177 | } |
| 178 | |
| 179 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 180 | AU.setPreservesCFG(); |
| 181 | AU.addPreserved<DominatorTreeWrapperPass>(); |
| 182 | AU.addRequired<AssumptionCacheTracker>(); |
| 183 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
| 184 | } |
| 185 | |
| 186 | bool runOnFunction(Function &F) override; |
| 187 | }; |
| 188 | |
| 189 | class InferAddressSpacesImpl { |
| 190 | AssumptionCache &AC; |
| 191 | Function *F = nullptr; |
| 192 | const DominatorTree *DT = nullptr; |
| 193 | const TargetTransformInfo *TTI = nullptr; |
| 194 | const DataLayout *DL = nullptr; |
| 195 | |
| 196 | /// Target specific address space which uses of should be replaced if |
| 197 | /// possible. |
| 198 | unsigned FlatAddrSpace = 0; |
| 199 | DenseMap<const Value *, Value *> PtrIntCastPairs; |
| 200 | |
| 201 | // Tries to find if the inttoptr instruction is derived from an pointer have |
| 202 | // specific address space, and is safe to propagate the address space to the |
| 203 | // new pointer that inttoptr produces. |
| 204 | Value *getIntToPtrPointerOperand(const Operator *I2P) const; |
| 205 | // Tries to find if the inttoptr instruction is derived from an pointer have |
| 206 | // specific address space, and is safe to propagate the address space to the |
| 207 | // new pointer that inttoptr produces. If the old pointer is found, cache the |
| 208 | // <OldPtr, inttoptr> pairs to a map. |
| 209 | void collectIntToPtrPointerOperand(); |
| 210 | // Check if an old pointer is found ahead of time. The safety has been checked |
| 211 | // when collecting the inttoptr original pointer and the result is cached in |
| 212 | // PtrIntCastPairs. |
| 213 | bool isSafeToCastIntToPtrAddrSpace(const Operator *I2P) const { |
| 214 | return PtrIntCastPairs.contains(Val: I2P); |
| 215 | } |
| 216 | bool isAddressExpression(const Value &V, const DataLayout &DL, |
| 217 | const TargetTransformInfo *TTI) const; |
| 218 | Value *cloneConstantExprWithNewAddressSpace( |
| 219 | ConstantExpr *CE, unsigned NewAddrSpace, |
| 220 | const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL, |
| 221 | const TargetTransformInfo *TTI) const; |
| 222 | |
| 223 | SmallVector<Value *, 2> |
| 224 | getPointerOperands(const Value &V, const DataLayout &DL, |
| 225 | const TargetTransformInfo *TTI) const; |
| 226 | |
| 227 | // Try to update the address space of V. If V is updated, returns true and |
| 228 | // false otherwise. |
| 229 | bool updateAddressSpace(const Value &V, |
| 230 | ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 231 | PredicatedAddrSpaceMapTy &PredicatedAS) const; |
| 232 | |
| 233 | // Tries to infer the specific address space of each address expression in |
| 234 | // Postorder. |
| 235 | void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, |
| 236 | ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 237 | PredicatedAddrSpaceMapTy &PredicatedAS) const; |
| 238 | |
| 239 | bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; |
| 240 | |
| 241 | Value *clonePtrMaskWithNewAddressSpace( |
| 242 | IntrinsicInst *I, unsigned NewAddrSpace, |
| 243 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 244 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 245 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const; |
| 246 | |
| 247 | Value *cloneInstructionWithNewAddressSpace( |
| 248 | Instruction *I, unsigned NewAddrSpace, |
| 249 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 250 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 251 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const; |
| 252 | |
| 253 | void performPointerReplacement( |
| 254 | Value *V, Value *NewV, Use &U, ValueToValueMapTy &ValueWithNewAddrSpace, |
| 255 | SmallVectorImpl<Instruction *> &DeadInstructions) const; |
| 256 | |
| 257 | // Changes the flat address expressions in function F to point to specific |
| 258 | // address spaces if InferredAddrSpace says so. Postorder is the postorder of |
| 259 | // all flat expressions in the use-def graph of function F. |
| 260 | bool rewriteWithNewAddressSpaces( |
| 261 | ArrayRef<WeakTrackingVH> Postorder, |
| 262 | const ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 263 | const PredicatedAddrSpaceMapTy &PredicatedAS) const; |
| 264 | |
| 265 | void appendsFlatAddressExpressionToPostorderStack( |
| 266 | Value *V, PostorderStackTy &PostorderStack, |
| 267 | DenseSet<Value *> &Visited) const; |
| 268 | |
| 269 | bool rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV, |
| 270 | Value *NewV) const; |
| 271 | void collectRewritableIntrinsicOperands(IntrinsicInst *II, |
| 272 | PostorderStackTy &PostorderStack, |
| 273 | DenseSet<Value *> &Visited) const; |
| 274 | |
| 275 | std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const; |
| 276 | |
| 277 | Value *cloneValueWithNewAddressSpace( |
| 278 | Value *V, unsigned NewAddrSpace, |
| 279 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 280 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 281 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const; |
| 282 | unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; |
| 283 | |
| 284 | unsigned getPredicatedAddrSpace(const Value &PtrV, |
| 285 | const Value *UserCtx) const; |
| 286 | |
| 287 | public: |
| 288 | InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT, |
| 289 | const TargetTransformInfo *TTI, unsigned FlatAddrSpace) |
| 290 | : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {} |
| 291 | bool run(Function &F); |
| 292 | }; |
| 293 | |
| 294 | } // end anonymous namespace |
| 295 | |
| 296 | char InferAddressSpaces::ID = 0; |
| 297 | |
| 298 | INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces" , |
| 299 | false, false) |
| 300 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
| 301 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
| 302 | INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces" , |
| 303 | false, false) |
| 304 | |
| 305 | static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) { |
| 306 | assert(Ty->isPtrOrPtrVectorTy()); |
| 307 | PointerType *NPT = PointerType::get(C&: Ty->getContext(), AddressSpace: NewAddrSpace); |
| 308 | return Ty->getWithNewType(EltTy: NPT); |
| 309 | } |
| 310 | |
| 311 | // Check whether that's no-op pointer bitcast using a pair of |
| 312 | // `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over |
| 313 | // different address spaces. |
| 314 | static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL, |
| 315 | const TargetTransformInfo *TTI) { |
| 316 | assert(I2P->getOpcode() == Instruction::IntToPtr); |
| 317 | auto *P2I = dyn_cast<Operator>(Val: I2P->getOperand(i: 0)); |
| 318 | if (!P2I || P2I->getOpcode() != Instruction::PtrToInt) |
| 319 | return false; |
| 320 | // Check it's really safe to treat that pair of `ptrtoint`/`inttoptr` as a |
| 321 | // no-op cast. Besides checking both of them are no-op casts, as the |
| 322 | // reinterpreted pointer may be used in other pointer arithmetic, we also |
| 323 | // need to double-check that through the target-specific hook. That ensures |
| 324 | // the underlying target also agrees that's a no-op address space cast and |
| 325 | // pointer bits are preserved. |
| 326 | // The current IR spec doesn't have clear rules on address space casts, |
| 327 | // especially a clear definition for pointer bits in non-default address |
| 328 | // spaces. It would be undefined if that pointer is dereferenced after an |
| 329 | // invalid reinterpret cast. Also, due to the unclearness for the meaning of |
| 330 | // bits in non-default address spaces in the current spec, the pointer |
| 331 | // arithmetic may also be undefined after invalid pointer reinterpret cast. |
| 332 | // However, as we confirm through the target hooks that it's a no-op |
| 333 | // addrspacecast, it doesn't matter since the bits should be the same. |
| 334 | unsigned P2IOp0AS = P2I->getOperand(i: 0)->getType()->getPointerAddressSpace(); |
| 335 | unsigned I2PAS = I2P->getType()->getPointerAddressSpace(); |
| 336 | return CastInst::isNoopCast(Opcode: Instruction::CastOps(I2P->getOpcode()), |
| 337 | SrcTy: I2P->getOperand(i: 0)->getType(), DstTy: I2P->getType(), |
| 338 | DL) && |
| 339 | CastInst::isNoopCast(Opcode: Instruction::CastOps(P2I->getOpcode()), |
| 340 | SrcTy: P2I->getOperand(i: 0)->getType(), DstTy: P2I->getType(), |
| 341 | DL) && |
| 342 | (P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(FromAS: P2IOp0AS, ToAS: I2PAS)); |
| 343 | } |
| 344 | |
| 345 | // Returns true if V is an address expression. |
| 346 | // TODO: Currently, we only consider: |
| 347 | // - arguments |
| 348 | // - phi, bitcast, addrspacecast, and getelementptr operators |
| 349 | bool InferAddressSpacesImpl::isAddressExpression( |
| 350 | const Value &V, const DataLayout &DL, |
| 351 | const TargetTransformInfo *TTI) const { |
| 352 | |
| 353 | if (const Argument *Arg = dyn_cast<Argument>(Val: &V)) |
| 354 | return Arg->getType()->isPointerTy() && |
| 355 | TTI->getAssumedAddrSpace(V: &V) != UninitializedAddressSpace; |
| 356 | |
| 357 | const Operator *Op = dyn_cast<Operator>(Val: &V); |
| 358 | if (!Op) |
| 359 | return false; |
| 360 | |
| 361 | switch (Op->getOpcode()) { |
| 362 | case Instruction::PHI: |
| 363 | assert(Op->getType()->isPtrOrPtrVectorTy()); |
| 364 | return true; |
| 365 | case Instruction::BitCast: |
| 366 | case Instruction::AddrSpaceCast: |
| 367 | case Instruction::GetElementPtr: |
| 368 | return true; |
| 369 | case Instruction::Select: |
| 370 | return Op->getType()->isPtrOrPtrVectorTy(); |
| 371 | case Instruction::Call: { |
| 372 | const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &V); |
| 373 | return II && II->getIntrinsicID() == Intrinsic::ptrmask; |
| 374 | } |
| 375 | case Instruction::IntToPtr: |
| 376 | return isNoopPtrIntCastPair(I2P: Op, DL, TTI) || |
| 377 | isSafeToCastIntToPtrAddrSpace(I2P: Op); |
| 378 | default: |
| 379 | // That value is an address expression if it has an assumed address space. |
| 380 | return TTI->getAssumedAddrSpace(V: &V) != UninitializedAddressSpace; |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | // Returns the pointer operands of V. |
| 385 | // |
| 386 | // Precondition: V is an address expression. |
| 387 | SmallVector<Value *, 2> InferAddressSpacesImpl::getPointerOperands( |
| 388 | const Value &V, const DataLayout &DL, |
| 389 | const TargetTransformInfo *TTI) const { |
| 390 | if (isa<Argument>(Val: &V)) |
| 391 | return {}; |
| 392 | |
| 393 | const Operator &Op = cast<Operator>(Val: V); |
| 394 | switch (Op.getOpcode()) { |
| 395 | case Instruction::PHI: { |
| 396 | auto IncomingValues = cast<PHINode>(Val: Op).incoming_values(); |
| 397 | return {IncomingValues.begin(), IncomingValues.end()}; |
| 398 | } |
| 399 | case Instruction::BitCast: |
| 400 | case Instruction::AddrSpaceCast: |
| 401 | case Instruction::GetElementPtr: |
| 402 | return {Op.getOperand(i: 0)}; |
| 403 | case Instruction::Select: |
| 404 | return {Op.getOperand(i: 1), Op.getOperand(i: 2)}; |
| 405 | case Instruction::Call: { |
| 406 | const IntrinsicInst &II = cast<IntrinsicInst>(Val: Op); |
| 407 | assert(II.getIntrinsicID() == Intrinsic::ptrmask && |
| 408 | "unexpected intrinsic call" ); |
| 409 | return {II.getArgOperand(i: 0)}; |
| 410 | } |
| 411 | case Instruction::IntToPtr: { |
| 412 | if (isNoopPtrIntCastPair(I2P: &Op, DL, TTI)) { |
| 413 | auto *P2I = cast<Operator>(Val: Op.getOperand(i: 0)); |
| 414 | return {P2I->getOperand(i: 0)}; |
| 415 | } |
| 416 | assert(isSafeToCastIntToPtrAddrSpace(&Op)); |
| 417 | return {getIntToPtrPointerOperand(I2P: &Op)}; |
| 418 | } |
| 419 | default: |
| 420 | llvm_unreachable("Unexpected instruction type." ); |
| 421 | } |
| 422 | } |
| 423 | |
| 424 | // Return mask. The 1 in mask indicate the bit is changed. |
| 425 | // This helper function is to compute the max know changed bits for ptr1 and |
| 426 | // ptr2 after the operation `ptr2 = ptr1 Op Mask`. |
| 427 | static APInt computeMaxChangedPtrBits(const Operator *Op, const Value *Mask, |
| 428 | const DataLayout &DL, AssumptionCache *AC, |
| 429 | const DominatorTree *DT) { |
| 430 | KnownBits Known = computeKnownBits(V: Mask, DL, AC, CxtI: nullptr, DT); |
| 431 | switch (Op->getOpcode()) { |
| 432 | case Instruction::Xor: |
| 433 | case Instruction::Or: |
| 434 | return ~Known.Zero; |
| 435 | case Instruction::And: |
| 436 | return ~Known.One; |
| 437 | default: |
| 438 | return APInt::getAllOnes(numBits: Known.getBitWidth()); |
| 439 | } |
| 440 | } |
| 441 | |
| 442 | Value * |
| 443 | InferAddressSpacesImpl::getIntToPtrPointerOperand(const Operator *I2P) const { |
| 444 | assert(I2P->getOpcode() == Instruction::IntToPtr); |
| 445 | if (I2P->getType()->isVectorTy()) |
| 446 | return nullptr; |
| 447 | |
| 448 | // If I2P has been accessed and has the corresponding old pointer value, just |
| 449 | // return true. |
| 450 | if (auto *OldPtr = PtrIntCastPairs.lookup(Val: I2P)) |
| 451 | return OldPtr; |
| 452 | |
| 453 | Value *LogicalOp = I2P->getOperand(i: 0); |
| 454 | Value *OldPtr, *Mask; |
| 455 | if (!match(V: LogicalOp, |
| 456 | P: m_c_BitwiseLogic(L: m_PtrToInt(Op: m_Value(V&: OldPtr)), R: m_Value(V&: Mask)))) |
| 457 | return nullptr; |
| 458 | |
| 459 | Operator *AsCast = dyn_cast<AddrSpaceCastOperator>(Val: OldPtr); |
| 460 | if (!AsCast) |
| 461 | return nullptr; |
| 462 | |
| 463 | unsigned SrcAS = I2P->getType()->getPointerAddressSpace(); |
| 464 | unsigned DstAS = AsCast->getOperand(i: 0)->getType()->getPointerAddressSpace(); |
| 465 | APInt PreservedPtrMask = TTI->getAddrSpaceCastPreservedPtrMask(SrcAS, DstAS); |
| 466 | if (PreservedPtrMask.isZero()) |
| 467 | return nullptr; |
| 468 | APInt ChangedPtrBits = |
| 469 | computeMaxChangedPtrBits(Op: cast<Operator>(Val: LogicalOp), Mask, DL: *DL, AC: &AC, DT); |
| 470 | // Check if the address bits change is within the preserved mask. If the bits |
| 471 | // change is not preserved, it is not safe to perform address space cast. |
| 472 | // The following pattern is not safe to cast address space. |
| 473 | // %1 = ptrtoint ptr addrspace(3) %sp to i32 |
| 474 | // %2 = zext i32 %1 to i64 |
| 475 | // %gp = inttoptr i64 %2 to ptr |
| 476 | assert(ChangedPtrBits.getBitWidth() == PreservedPtrMask.getBitWidth()); |
| 477 | if (ChangedPtrBits.isSubsetOf(RHS: PreservedPtrMask)) |
| 478 | return OldPtr; |
| 479 | |
| 480 | return nullptr; |
| 481 | } |
| 482 | |
| 483 | void InferAddressSpacesImpl::collectIntToPtrPointerOperand() { |
| 484 | // Only collect inttoptr instruction. |
| 485 | // TODO: We need to collect inttoptr constant expression as well. |
| 486 | for (Instruction &I : instructions(F)) { |
| 487 | if (!dyn_cast<IntToPtrInst>(Val: &I)) |
| 488 | continue; |
| 489 | if (auto *OldPtr = getIntToPtrPointerOperand(I2P: cast<Operator>(Val: &I))) |
| 490 | PtrIntCastPairs.insert(KV: {&I, OldPtr}); |
| 491 | } |
| 492 | } |
| 493 | |
| 494 | bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, |
| 495 | Value *OldV, |
| 496 | Value *NewV) const { |
| 497 | Module *M = II->getParent()->getParent()->getParent(); |
| 498 | Intrinsic::ID IID = II->getIntrinsicID(); |
| 499 | switch (IID) { |
| 500 | case Intrinsic::objectsize: |
| 501 | case Intrinsic::masked_load: { |
| 502 | Type *DestTy = II->getType(); |
| 503 | Type *SrcTy = NewV->getType(); |
| 504 | Function *NewDecl = |
| 505 | Intrinsic::getOrInsertDeclaration(M, id: IID, Tys: {DestTy, SrcTy}); |
| 506 | II->setArgOperand(i: 0, v: NewV); |
| 507 | II->setCalledFunction(NewDecl); |
| 508 | return true; |
| 509 | } |
| 510 | case Intrinsic::ptrmask: |
| 511 | // This is handled as an address expression, not as a use memory operation. |
| 512 | return false; |
| 513 | case Intrinsic::masked_gather: { |
| 514 | Type *RetTy = II->getType(); |
| 515 | Type *NewPtrTy = NewV->getType(); |
| 516 | Function *NewDecl = |
| 517 | Intrinsic::getOrInsertDeclaration(M, id: IID, Tys: {RetTy, NewPtrTy}); |
| 518 | II->setArgOperand(i: 0, v: NewV); |
| 519 | II->setCalledFunction(NewDecl); |
| 520 | return true; |
| 521 | } |
| 522 | case Intrinsic::masked_store: |
| 523 | case Intrinsic::masked_scatter: { |
| 524 | Type *ValueTy = II->getOperand(i_nocapture: 0)->getType(); |
| 525 | Type *NewPtrTy = NewV->getType(); |
| 526 | Function *NewDecl = Intrinsic::getOrInsertDeclaration( |
| 527 | M, id: II->getIntrinsicID(), Tys: {ValueTy, NewPtrTy}); |
| 528 | II->setArgOperand(i: 1, v: NewV); |
| 529 | II->setCalledFunction(NewDecl); |
| 530 | return true; |
| 531 | } |
| 532 | case Intrinsic::prefetch: |
| 533 | case Intrinsic::is_constant: { |
| 534 | Function *NewDecl = Intrinsic::getOrInsertDeclaration( |
| 535 | M, id: II->getIntrinsicID(), Tys: {NewV->getType()}); |
| 536 | II->setArgOperand(i: 0, v: NewV); |
| 537 | II->setCalledFunction(NewDecl); |
| 538 | return true; |
| 539 | } |
| 540 | case Intrinsic::fake_use: { |
| 541 | II->replaceUsesOfWith(From: OldV, To: NewV); |
| 542 | return true; |
| 543 | } |
| 544 | case Intrinsic::lifetime_start: |
| 545 | case Intrinsic::lifetime_end: { |
| 546 | // Always force lifetime markers to work directly on the alloca. |
| 547 | NewV = NewV->stripPointerCasts(); |
| 548 | Function *NewDecl = Intrinsic::getOrInsertDeclaration( |
| 549 | M, id: II->getIntrinsicID(), Tys: {NewV->getType()}); |
| 550 | II->setArgOperand(i: 0, v: NewV); |
| 551 | II->setCalledFunction(NewDecl); |
| 552 | return true; |
| 553 | } |
| 554 | default: { |
| 555 | Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); |
| 556 | if (!Rewrite) |
| 557 | return false; |
| 558 | if (Rewrite != II) |
| 559 | II->replaceAllUsesWith(V: Rewrite); |
| 560 | return true; |
| 561 | } |
| 562 | } |
| 563 | } |
| 564 | |
| 565 | void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( |
| 566 | IntrinsicInst *II, PostorderStackTy &PostorderStack, |
| 567 | DenseSet<Value *> &Visited) const { |
| 568 | auto IID = II->getIntrinsicID(); |
| 569 | switch (IID) { |
| 570 | case Intrinsic::ptrmask: |
| 571 | case Intrinsic::objectsize: |
| 572 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0), |
| 573 | PostorderStack, Visited); |
| 574 | break; |
| 575 | case Intrinsic::is_constant: { |
| 576 | Value *Ptr = II->getArgOperand(i: 0); |
| 577 | if (Ptr->getType()->isPtrOrPtrVectorTy()) { |
| 578 | appendsFlatAddressExpressionToPostorderStack(V: Ptr, PostorderStack, |
| 579 | Visited); |
| 580 | } |
| 581 | |
| 582 | break; |
| 583 | } |
| 584 | case Intrinsic::masked_load: |
| 585 | case Intrinsic::masked_gather: |
| 586 | case Intrinsic::prefetch: |
| 587 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0), |
| 588 | PostorderStack, Visited); |
| 589 | break; |
| 590 | case Intrinsic::masked_store: |
| 591 | case Intrinsic::masked_scatter: |
| 592 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 1), |
| 593 | PostorderStack, Visited); |
| 594 | break; |
| 595 | case Intrinsic::fake_use: { |
| 596 | for (Value *Op : II->operands()) { |
| 597 | if (Op->getType()->isPtrOrPtrVectorTy()) { |
| 598 | appendsFlatAddressExpressionToPostorderStack(V: Op, PostorderStack, |
| 599 | Visited); |
| 600 | } |
| 601 | } |
| 602 | |
| 603 | break; |
| 604 | } |
| 605 | case Intrinsic::lifetime_start: |
| 606 | case Intrinsic::lifetime_end: { |
| 607 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0), |
| 608 | PostorderStack, Visited); |
| 609 | break; |
| 610 | } |
| 611 | default: |
| 612 | SmallVector<int, 2> OpIndexes; |
| 613 | if (TTI->collectFlatAddressOperands(OpIndexes, IID)) { |
| 614 | for (int Idx : OpIndexes) { |
| 615 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: Idx), |
| 616 | PostorderStack, Visited); |
| 617 | } |
| 618 | } |
| 619 | break; |
| 620 | } |
| 621 | } |
| 622 | |
| 623 | // Returns all flat address expressions in function F. The elements are |
| 624 | // If V is an unvisited flat address expression, appends V to PostorderStack |
| 625 | // and marks it as visited. |
| 626 | void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack( |
| 627 | Value *V, PostorderStackTy &PostorderStack, |
| 628 | DenseSet<Value *> &Visited) const { |
| 629 | assert(V->getType()->isPtrOrPtrVectorTy()); |
| 630 | |
| 631 | // Generic addressing expressions may be hidden in nested constant |
| 632 | // expressions. |
| 633 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V)) { |
| 634 | // TODO: Look in non-address parts, like icmp operands. |
| 635 | if (isAddressExpression(V: *CE, DL: *DL, TTI) && Visited.insert(V: CE).second) |
| 636 | PostorderStack.emplace_back(Args&: CE, Args: false); |
| 637 | |
| 638 | return; |
| 639 | } |
| 640 | |
| 641 | if (V->getType()->getPointerAddressSpace() == FlatAddrSpace && |
| 642 | isAddressExpression(V: *V, DL: *DL, TTI)) { |
| 643 | if (Visited.insert(V).second) { |
| 644 | PostorderStack.emplace_back(Args&: V, Args: false); |
| 645 | |
| 646 | if (auto *Op = dyn_cast<Operator>(Val: V)) |
| 647 | for (auto &O : Op->operands()) |
| 648 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val&: O)) |
| 649 | if (isAddressExpression(V: *CE, DL: *DL, TTI) && Visited.insert(V: CE).second) |
| 650 | PostorderStack.emplace_back(Args&: CE, Args: false); |
| 651 | } |
| 652 | } |
| 653 | } |
| 654 | |
| 655 | // Returns all flat address expressions in function F. The elements are ordered |
| 656 | // in postorder. |
| 657 | std::vector<WeakTrackingVH> |
| 658 | InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { |
| 659 | // This function implements a non-recursive postorder traversal of a partial |
| 660 | // use-def graph of function F. |
| 661 | PostorderStackTy PostorderStack; |
| 662 | // The set of visited expressions. |
| 663 | DenseSet<Value *> Visited; |
| 664 | |
| 665 | auto PushPtrOperand = [&](Value *Ptr) { |
| 666 | appendsFlatAddressExpressionToPostorderStack(V: Ptr, PostorderStack, Visited); |
| 667 | }; |
| 668 | |
| 669 | // Look at operations that may be interesting accelerate by moving to a known |
| 670 | // address space. We aim at generating after loads and stores, but pure |
| 671 | // addressing calculations may also be faster. |
| 672 | for (Instruction &I : instructions(F)) { |
| 673 | if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) { |
| 674 | PushPtrOperand(GEP->getPointerOperand()); |
| 675 | } else if (auto *LI = dyn_cast<LoadInst>(Val: &I)) |
| 676 | PushPtrOperand(LI->getPointerOperand()); |
| 677 | else if (auto *SI = dyn_cast<StoreInst>(Val: &I)) |
| 678 | PushPtrOperand(SI->getPointerOperand()); |
| 679 | else if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: &I)) |
| 680 | PushPtrOperand(RMW->getPointerOperand()); |
| 681 | else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &I)) |
| 682 | PushPtrOperand(CmpX->getPointerOperand()); |
| 683 | else if (auto *MI = dyn_cast<MemIntrinsic>(Val: &I)) { |
| 684 | // For memset/memcpy/memmove, any pointer operand can be replaced. |
| 685 | PushPtrOperand(MI->getRawDest()); |
| 686 | |
| 687 | // Handle 2nd operand for memcpy/memmove. |
| 688 | if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI)) |
| 689 | PushPtrOperand(MTI->getRawSource()); |
| 690 | } else if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) |
| 691 | collectRewritableIntrinsicOperands(II, PostorderStack, Visited); |
| 692 | else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: &I)) { |
| 693 | if (Cmp->getOperand(i_nocapture: 0)->getType()->isPtrOrPtrVectorTy()) { |
| 694 | PushPtrOperand(Cmp->getOperand(i_nocapture: 0)); |
| 695 | PushPtrOperand(Cmp->getOperand(i_nocapture: 1)); |
| 696 | } |
| 697 | } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Val: &I)) { |
| 698 | PushPtrOperand(ASC->getPointerOperand()); |
| 699 | } else if (auto *I2P = dyn_cast<IntToPtrInst>(Val: &I)) { |
| 700 | if (isNoopPtrIntCastPair(I2P: cast<Operator>(Val: I2P), DL: *DL, TTI)) |
| 701 | PushPtrOperand(cast<Operator>(Val: I2P->getOperand(i_nocapture: 0))->getOperand(i: 0)); |
| 702 | else if (isSafeToCastIntToPtrAddrSpace(I2P: cast<Operator>(Val: I2P))) |
| 703 | PushPtrOperand(getIntToPtrPointerOperand(I2P: cast<Operator>(Val: I2P))); |
| 704 | } else if (auto *RI = dyn_cast<ReturnInst>(Val: &I)) { |
| 705 | if (auto *RV = RI->getReturnValue(); |
| 706 | RV && RV->getType()->isPtrOrPtrVectorTy()) |
| 707 | PushPtrOperand(RV); |
| 708 | } |
| 709 | } |
| 710 | |
| 711 | std::vector<WeakTrackingVH> Postorder; // The resultant postorder. |
| 712 | while (!PostorderStack.empty()) { |
| 713 | Value *TopVal = PostorderStack.back().getPointer(); |
| 714 | // If the operands of the expression on the top are already explored, |
| 715 | // adds that expression to the resultant postorder. |
| 716 | if (PostorderStack.back().getInt()) { |
| 717 | if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace) |
| 718 | Postorder.push_back(x: TopVal); |
| 719 | PostorderStack.pop_back(); |
| 720 | continue; |
| 721 | } |
| 722 | // Otherwise, adds its operands to the stack and explores them. |
| 723 | PostorderStack.back().setInt(true); |
| 724 | // Skip values with an assumed address space. |
| 725 | if (TTI->getAssumedAddrSpace(V: TopVal) == UninitializedAddressSpace) { |
| 726 | for (Value *PtrOperand : getPointerOperands(V: *TopVal, DL: *DL, TTI)) { |
| 727 | appendsFlatAddressExpressionToPostorderStack(V: PtrOperand, PostorderStack, |
| 728 | Visited); |
| 729 | } |
| 730 | } |
| 731 | } |
| 732 | return Postorder; |
| 733 | } |
| 734 | |
| 735 | // Inserts an addrspacecast for a phi node operand, handling the proper |
| 736 | // insertion position based on the operand type. |
| 737 | static Value *phiNodeOperandWithNewAddressSpace(AddrSpaceCastInst *NewI, |
| 738 | Value *Operand) { |
| 739 | auto InsertBefore = [NewI](auto It) { |
| 740 | NewI->insertBefore(It); |
| 741 | NewI->setDebugLoc(It->getDebugLoc()); |
| 742 | return NewI; |
| 743 | }; |
| 744 | |
| 745 | if (auto *Arg = dyn_cast<Argument>(Val: Operand)) { |
| 746 | // For arguments, insert the cast at the beginning of entry block. |
| 747 | // Consider inserting at the dominating block for better placement. |
| 748 | Function *F = Arg->getParent(); |
| 749 | auto InsertI = F->getEntryBlock().getFirstNonPHIIt(); |
| 750 | return InsertBefore(InsertI); |
| 751 | } |
| 752 | |
| 753 | // No check for Constant here, as constants are already handled. |
| 754 | assert(isa<Instruction>(Operand)); |
| 755 | |
| 756 | Instruction *OpInst = cast<Instruction>(Val: Operand); |
| 757 | if (LLVM_UNLIKELY(OpInst->getOpcode() == Instruction::PHI)) { |
| 758 | // If the operand is defined by another PHI node, insert after the first |
| 759 | // non-PHI instruction at the corresponding basic block. |
| 760 | auto InsertI = OpInst->getParent()->getFirstNonPHIIt(); |
| 761 | return InsertBefore(InsertI); |
| 762 | } |
| 763 | |
| 764 | // Otherwise, insert immediately after the operand definition. |
| 765 | NewI->insertAfter(InsertPos: OpInst->getIterator()); |
| 766 | NewI->setDebugLoc(OpInst->getDebugLoc()); |
| 767 | return NewI; |
| 768 | } |
| 769 | |
| 770 | // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone |
| 771 | // of OperandUse.get() in the new address space. If the clone is not ready yet, |
| 772 | // returns poison in the new address space as a placeholder. |
| 773 | static Value *operandWithNewAddressSpaceOrCreatePoison( |
| 774 | const Use &OperandUse, unsigned NewAddrSpace, |
| 775 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 776 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 777 | SmallVectorImpl<const Use *> *PoisonUsesToFix) { |
| 778 | Value *Operand = OperandUse.get(); |
| 779 | |
| 780 | Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: Operand->getType(), NewAddrSpace); |
| 781 | |
| 782 | if (Constant *C = dyn_cast<Constant>(Val: Operand)) |
| 783 | return ConstantExpr::getAddrSpaceCast(C, Ty: NewPtrTy); |
| 784 | |
| 785 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: Operand)) |
| 786 | return NewOperand; |
| 787 | |
| 788 | Instruction *Inst = cast<Instruction>(Val: OperandUse.getUser()); |
| 789 | auto I = PredicatedAS.find(Val: std::make_pair(x&: Inst, y&: Operand)); |
| 790 | if (I != PredicatedAS.end()) { |
| 791 | // Insert an addrspacecast on that operand before the user. |
| 792 | unsigned NewAS = I->second; |
| 793 | Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: Operand->getType(), NewAddrSpace: NewAS); |
| 794 | auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy); |
| 795 | |
| 796 | if (LLVM_UNLIKELY(Inst->getOpcode() == Instruction::PHI)) |
| 797 | return phiNodeOperandWithNewAddressSpace(NewI, Operand); |
| 798 | |
| 799 | NewI->insertBefore(InsertPos: Inst->getIterator()); |
| 800 | NewI->setDebugLoc(Inst->getDebugLoc()); |
| 801 | return NewI; |
| 802 | } |
| 803 | |
| 804 | PoisonUsesToFix->push_back(Elt: &OperandUse); |
| 805 | return PoisonValue::get(T: NewPtrTy); |
| 806 | } |
| 807 | |
| 808 | // A helper function for cloneInstructionWithNewAddressSpace. Handles the |
| 809 | // conversion of a ptrmask intrinsic instruction. |
| 810 | Value *InferAddressSpacesImpl::clonePtrMaskWithNewAddressSpace( |
| 811 | IntrinsicInst *I, unsigned NewAddrSpace, |
| 812 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 813 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 814 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const { |
| 815 | const Use &PtrOpUse = I->getArgOperandUse(i: 0); |
| 816 | unsigned OldAddrSpace = PtrOpUse->getType()->getPointerAddressSpace(); |
| 817 | Value *MaskOp = I->getArgOperand(i: 1); |
| 818 | Type *MaskTy = MaskOp->getType(); |
| 819 | |
| 820 | KnownBits OldPtrBits{DL->getPointerSizeInBits(AS: OldAddrSpace)}; |
| 821 | KnownBits NewPtrBits{DL->getPointerSizeInBits(AS: NewAddrSpace)}; |
| 822 | if (!TTI->isNoopAddrSpaceCast(FromAS: OldAddrSpace, ToAS: NewAddrSpace)) { |
| 823 | std::tie(args&: OldPtrBits, args&: NewPtrBits) = |
| 824 | TTI->computeKnownBitsAddrSpaceCast(ToAS: NewAddrSpace, PtrOp: *PtrOpUse.get()); |
| 825 | } |
| 826 | |
| 827 | // If the pointers in both addrspaces have a bitwise representation and if the |
| 828 | // representation of the new pointer is smaller (fewer bits) than the old one, |
| 829 | // check if the mask is applicable to the ptr in the new addrspace. Any |
| 830 | // masking only clearing the low bits will also apply in the new addrspace |
| 831 | // Note: checking if the mask clears high bits is not sufficient as those |
| 832 | // might have already been 0 in the old ptr. |
| 833 | if (OldPtrBits.getBitWidth() > NewPtrBits.getBitWidth()) { |
| 834 | KnownBits MaskBits = |
| 835 | computeKnownBits(V: MaskOp, DL: *DL, /*AssumptionCache=*/AC: nullptr, CxtI: I); |
| 836 | // Set all unknown bits of the old ptr to 1, so that we are conservative in |
| 837 | // checking which bits are cleared by the mask. |
| 838 | OldPtrBits.One |= ~OldPtrBits.Zero; |
| 839 | // Check which bits are cleared by the mask in the old ptr. |
| 840 | KnownBits ClearedBits = KnownBits::sub(LHS: OldPtrBits, RHS: OldPtrBits & MaskBits); |
| 841 | |
| 842 | // If the mask isn't applicable to the new ptr, leave the ptrmask as-is and |
| 843 | // insert an addrspacecast after it. |
| 844 | if (ClearedBits.countMaxActiveBits() > NewPtrBits.countMaxActiveBits()) { |
| 845 | std::optional<BasicBlock::iterator> InsertPoint = |
| 846 | I->getInsertionPointAfterDef(); |
| 847 | assert(InsertPoint && "insertion after ptrmask should be possible" ); |
| 848 | Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace); |
| 849 | Instruction *AddrSpaceCast = |
| 850 | new AddrSpaceCastInst(I, NewPtrType, "" , *InsertPoint); |
| 851 | AddrSpaceCast->setDebugLoc(I->getDebugLoc()); |
| 852 | return AddrSpaceCast; |
| 853 | } |
| 854 | } |
| 855 | |
| 856 | IRBuilder<> B(I); |
| 857 | if (NewPtrBits.getBitWidth() < MaskTy->getScalarSizeInBits()) { |
| 858 | MaskTy = MaskTy->getWithNewBitWidth(NewBitWidth: NewPtrBits.getBitWidth()); |
| 859 | MaskOp = B.CreateTrunc(V: MaskOp, DestTy: MaskTy); |
| 860 | } |
| 861 | Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison( |
| 862 | OperandUse: PtrOpUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, |
| 863 | PoisonUsesToFix); |
| 864 | return B.CreateIntrinsic(ID: Intrinsic::ptrmask, Types: {NewPtr->getType(), MaskTy}, |
| 865 | Args: {NewPtr, MaskOp}); |
| 866 | } |
| 867 | |
| 868 | // Returns a clone of `I` with its operands converted to those specified in |
| 869 | // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an |
| 870 | // operand whose address space needs to be modified might not exist in |
| 871 | // ValueWithNewAddrSpace. In that case, uses poison as a placeholder operand and |
| 872 | // adds that operand use to PoisonUsesToFix so that caller can fix them later. |
| 873 | // |
| 874 | // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast |
| 875 | // from a pointer whose type already matches. Therefore, this function returns a |
| 876 | // Value* instead of an Instruction*. |
| 877 | Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( |
| 878 | Instruction *I, unsigned NewAddrSpace, |
| 879 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 880 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 881 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const { |
| 882 | Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace); |
| 883 | |
| 884 | if (I->getOpcode() == Instruction::AddrSpaceCast) { |
| 885 | Value *Src = I->getOperand(i: 0); |
| 886 | // Because `I` is flat, the source address space must be specific. |
| 887 | // Therefore, the inferred address space must be the source space, according |
| 888 | // to our algorithm. |
| 889 | assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); |
| 890 | return Src; |
| 891 | } |
| 892 | |
| 893 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
| 894 | // Technically the intrinsic ID is a pointer typed argument, so specially |
| 895 | // handle calls early. |
| 896 | assert(II->getIntrinsicID() == Intrinsic::ptrmask); |
| 897 | return clonePtrMaskWithNewAddressSpace( |
| 898 | I: II, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix); |
| 899 | } |
| 900 | |
| 901 | unsigned AS = TTI->getAssumedAddrSpace(V: I); |
| 902 | if (AS != UninitializedAddressSpace) { |
| 903 | // For the assumed address space, insert an `addrspacecast` to make that |
| 904 | // explicit. |
| 905 | Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace: AS); |
| 906 | auto *NewI = new AddrSpaceCastInst(I, NewPtrTy); |
| 907 | NewI->insertAfter(InsertPos: I->getIterator()); |
| 908 | NewI->setDebugLoc(I->getDebugLoc()); |
| 909 | return NewI; |
| 910 | } |
| 911 | |
| 912 | // Computes the converted pointer operands. |
| 913 | SmallVector<Value *, 4> NewPointerOperands; |
| 914 | for (const Use &OperandUse : I->operands()) { |
| 915 | if (!OperandUse.get()->getType()->isPtrOrPtrVectorTy()) |
| 916 | NewPointerOperands.push_back(Elt: nullptr); |
| 917 | else |
| 918 | NewPointerOperands.push_back(Elt: operandWithNewAddressSpaceOrCreatePoison( |
| 919 | OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, |
| 920 | PoisonUsesToFix)); |
| 921 | } |
| 922 | |
| 923 | switch (I->getOpcode()) { |
| 924 | case Instruction::BitCast: |
| 925 | return new BitCastInst(NewPointerOperands[0], NewPtrType); |
| 926 | case Instruction::PHI: { |
| 927 | assert(I->getType()->isPtrOrPtrVectorTy()); |
| 928 | PHINode *PHI = cast<PHINode>(Val: I); |
| 929 | PHINode *NewPHI = PHINode::Create(Ty: NewPtrType, NumReservedValues: PHI->getNumIncomingValues()); |
| 930 | for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) { |
| 931 | unsigned OperandNo = PHINode::getOperandNumForIncomingValue(i: Index); |
| 932 | NewPHI->addIncoming(V: NewPointerOperands[OperandNo], |
| 933 | BB: PHI->getIncomingBlock(i: Index)); |
| 934 | } |
| 935 | return NewPHI; |
| 936 | } |
| 937 | case Instruction::GetElementPtr: { |
| 938 | GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val: I); |
| 939 | GetElementPtrInst *NewGEP = GetElementPtrInst::Create( |
| 940 | PointeeType: GEP->getSourceElementType(), Ptr: NewPointerOperands[0], |
| 941 | IdxList: SmallVector<Value *, 4>(GEP->indices())); |
| 942 | NewGEP->setIsInBounds(GEP->isInBounds()); |
| 943 | return NewGEP; |
| 944 | } |
| 945 | case Instruction::Select: |
| 946 | assert(I->getType()->isPtrOrPtrVectorTy()); |
| 947 | return SelectInst::Create(C: I->getOperand(i: 0), S1: NewPointerOperands[1], |
| 948 | S2: NewPointerOperands[2], NameStr: "" , InsertBefore: nullptr, MDFrom: I); |
| 949 | case Instruction::IntToPtr: { |
| 950 | if (isNoopPtrIntCastPair(I2P: cast<Operator>(Val: I), DL: *DL, TTI)) { |
| 951 | Value *Src = cast<Operator>(Val: I->getOperand(i: 0))->getOperand(i: 0); |
| 952 | if (Src->getType() == NewPtrType) |
| 953 | return Src; |
| 954 | |
| 955 | // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a |
| 956 | // source address space from a generic pointer source need to insert a |
| 957 | // cast back. |
| 958 | return new AddrSpaceCastInst(Src, NewPtrType); |
| 959 | } |
| 960 | assert(isSafeToCastIntToPtrAddrSpace(cast<Operator>(I))); |
| 961 | AddrSpaceCastInst *AsCast = new AddrSpaceCastInst(I, NewPtrType); |
| 962 | AsCast->insertAfter(InsertPos: I); |
| 963 | return AsCast; |
| 964 | } |
| 965 | default: |
| 966 | llvm_unreachable("Unexpected opcode" ); |
| 967 | } |
| 968 | } |
| 969 | |
| 970 | // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the |
| 971 | // constant expression `CE` with its operands replaced as specified in |
| 972 | // ValueWithNewAddrSpace. |
| 973 | Value *InferAddressSpacesImpl::cloneConstantExprWithNewAddressSpace( |
| 974 | ConstantExpr *CE, unsigned NewAddrSpace, |
| 975 | const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL, |
| 976 | const TargetTransformInfo *TTI) const { |
| 977 | Type *TargetType = |
| 978 | CE->getType()->isPtrOrPtrVectorTy() |
| 979 | ? getPtrOrVecOfPtrsWithNewAS(Ty: CE->getType(), NewAddrSpace) |
| 980 | : CE->getType(); |
| 981 | |
| 982 | if (CE->getOpcode() == Instruction::AddrSpaceCast) { |
| 983 | // Because CE is flat, the source address space must be specific. |
| 984 | // Therefore, the inferred address space must be the source space according |
| 985 | // to our algorithm. |
| 986 | assert(CE->getOperand(0)->getType()->getPointerAddressSpace() == |
| 987 | NewAddrSpace); |
| 988 | return CE->getOperand(i_nocapture: 0); |
| 989 | } |
| 990 | |
| 991 | if (CE->getOpcode() == Instruction::BitCast) { |
| 992 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: CE->getOperand(i_nocapture: 0))) |
| 993 | return ConstantExpr::getBitCast(C: cast<Constant>(Val: NewOperand), Ty: TargetType); |
| 994 | return ConstantExpr::getAddrSpaceCast(C: CE, Ty: TargetType); |
| 995 | } |
| 996 | |
| 997 | if (CE->getOpcode() == Instruction::IntToPtr) { |
| 998 | if (isNoopPtrIntCastPair(I2P: cast<Operator>(Val: CE), DL: *DL, TTI)) { |
| 999 | Constant *Src = cast<ConstantExpr>(Val: CE->getOperand(i_nocapture: 0))->getOperand(i_nocapture: 0); |
| 1000 | assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); |
| 1001 | return Src; |
| 1002 | } |
| 1003 | assert(isSafeToCastIntToPtrAddrSpace(cast<Operator>(CE))); |
| 1004 | return ConstantExpr::getAddrSpaceCast(C: CE, Ty: TargetType); |
| 1005 | } |
| 1006 | |
| 1007 | // Computes the operands of the new constant expression. |
| 1008 | bool IsNew = false; |
| 1009 | SmallVector<Constant *, 4> NewOperands; |
| 1010 | for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) { |
| 1011 | Constant *Operand = CE->getOperand(i_nocapture: Index); |
| 1012 | // If the address space of `Operand` needs to be modified, the new operand |
| 1013 | // with the new address space should already be in ValueWithNewAddrSpace |
| 1014 | // because (1) the constant expressions we consider (i.e. addrspacecast, |
| 1015 | // bitcast, and getelementptr) do not incur cycles in the data flow graph |
| 1016 | // and (2) this function is called on constant expressions in postorder. |
| 1017 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: Operand)) { |
| 1018 | IsNew = true; |
| 1019 | NewOperands.push_back(Elt: cast<Constant>(Val: NewOperand)); |
| 1020 | continue; |
| 1021 | } |
| 1022 | if (auto *CExpr = dyn_cast<ConstantExpr>(Val: Operand)) |
| 1023 | if (Value *NewOperand = cloneConstantExprWithNewAddressSpace( |
| 1024 | CE: CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) { |
| 1025 | IsNew = true; |
| 1026 | NewOperands.push_back(Elt: cast<Constant>(Val: NewOperand)); |
| 1027 | continue; |
| 1028 | } |
| 1029 | // Otherwise, reuses the old operand. |
| 1030 | NewOperands.push_back(Elt: Operand); |
| 1031 | } |
| 1032 | |
| 1033 | // If !IsNew, we will replace the Value with itself. However, replaced values |
| 1034 | // are assumed to wrapped in an addrspacecast cast later so drop it now. |
| 1035 | if (!IsNew) |
| 1036 | return nullptr; |
| 1037 | |
| 1038 | if (CE->getOpcode() == Instruction::GetElementPtr) { |
| 1039 | // Needs to specify the source type while constructing a getelementptr |
| 1040 | // constant expression. |
| 1041 | return CE->getWithOperands(Ops: NewOperands, Ty: TargetType, /*OnlyIfReduced=*/false, |
| 1042 | SrcTy: cast<GEPOperator>(Val: CE)->getSourceElementType()); |
| 1043 | } |
| 1044 | |
| 1045 | return CE->getWithOperands(Ops: NewOperands, Ty: TargetType); |
| 1046 | } |
| 1047 | |
| 1048 | // Returns a clone of the value `V`, with its operands replaced as specified in |
| 1049 | // ValueWithNewAddrSpace. This function is called on every flat address |
| 1050 | // expression whose address space needs to be modified, in postorder. |
| 1051 | // |
| 1052 | // See cloneInstructionWithNewAddressSpace for the meaning of PoisonUsesToFix. |
| 1053 | Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace( |
| 1054 | Value *V, unsigned NewAddrSpace, |
| 1055 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
| 1056 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
| 1057 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const { |
| 1058 | // All values in Postorder are flat address expressions. |
| 1059 | assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace && |
| 1060 | isAddressExpression(*V, *DL, TTI)); |
| 1061 | |
| 1062 | if (auto *Arg = dyn_cast<Argument>(Val: V)) { |
| 1063 | // Arguments are address space casted in the function body, as we do not |
| 1064 | // want to change the function signature. |
| 1065 | Function *F = Arg->getParent(); |
| 1066 | BasicBlock::iterator Insert = F->getEntryBlock().getFirstNonPHIIt(); |
| 1067 | |
| 1068 | Type *NewPtrTy = PointerType::get(C&: Arg->getContext(), AddressSpace: NewAddrSpace); |
| 1069 | auto *NewI = new AddrSpaceCastInst(Arg, NewPtrTy); |
| 1070 | NewI->insertBefore(InsertPos: Insert); |
| 1071 | return NewI; |
| 1072 | } |
| 1073 | |
| 1074 | if (Instruction *I = dyn_cast<Instruction>(Val: V)) { |
| 1075 | Value *NewV = cloneInstructionWithNewAddressSpace( |
| 1076 | I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix); |
| 1077 | if (Instruction *NewI = dyn_cast_or_null<Instruction>(Val: NewV)) { |
| 1078 | if (NewI->getParent() == nullptr) { |
| 1079 | NewI->insertBefore(InsertPos: I->getIterator()); |
| 1080 | NewI->takeName(V: I); |
| 1081 | NewI->setDebugLoc(I->getDebugLoc()); |
| 1082 | } |
| 1083 | } |
| 1084 | return NewV; |
| 1085 | } |
| 1086 | |
| 1087 | return cloneConstantExprWithNewAddressSpace( |
| 1088 | CE: cast<ConstantExpr>(Val: V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI); |
| 1089 | } |
| 1090 | |
| 1091 | // Defines the join operation on the address space lattice (see the file header |
| 1092 | // comments). |
| 1093 | unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1, |
| 1094 | unsigned AS2) const { |
| 1095 | if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace) |
| 1096 | return FlatAddrSpace; |
| 1097 | |
| 1098 | if (AS1 == UninitializedAddressSpace) |
| 1099 | return AS2; |
| 1100 | if (AS2 == UninitializedAddressSpace) |
| 1101 | return AS1; |
| 1102 | |
| 1103 | // The join of two different specific address spaces is flat. |
| 1104 | return (AS1 == AS2) ? AS1 : FlatAddrSpace; |
| 1105 | } |
| 1106 | |
| 1107 | bool InferAddressSpacesImpl::run(Function &CurFn) { |
| 1108 | F = &CurFn; |
| 1109 | DL = &F->getDataLayout(); |
| 1110 | PtrIntCastPairs.clear(); |
| 1111 | |
| 1112 | if (AssumeDefaultIsFlatAddressSpace) |
| 1113 | FlatAddrSpace = 0; |
| 1114 | |
| 1115 | if (FlatAddrSpace == UninitializedAddressSpace) { |
| 1116 | FlatAddrSpace = TTI->getFlatAddressSpace(); |
| 1117 | if (FlatAddrSpace == UninitializedAddressSpace) |
| 1118 | return false; |
| 1119 | } |
| 1120 | |
| 1121 | collectIntToPtrPointerOperand(); |
| 1122 | // Collects all flat address expressions in postorder. |
| 1123 | std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F&: *F); |
| 1124 | |
| 1125 | // Runs a data-flow analysis to refine the address spaces of every expression |
| 1126 | // in Postorder. |
| 1127 | ValueToAddrSpaceMapTy InferredAddrSpace; |
| 1128 | PredicatedAddrSpaceMapTy PredicatedAS; |
| 1129 | inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS); |
| 1130 | |
| 1131 | // Changes the address spaces of the flat address expressions who are inferred |
| 1132 | // to point to a specific address space. |
| 1133 | return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, |
| 1134 | PredicatedAS); |
| 1135 | } |
| 1136 | |
| 1137 | // Constants need to be tracked through RAUW to handle cases with nested |
| 1138 | // constant expressions, so wrap values in WeakTrackingVH. |
| 1139 | void InferAddressSpacesImpl::inferAddressSpaces( |
| 1140 | ArrayRef<WeakTrackingVH> Postorder, |
| 1141 | ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 1142 | PredicatedAddrSpaceMapTy &PredicatedAS) const { |
| 1143 | SetVector<Value *> Worklist(llvm::from_range, Postorder); |
| 1144 | // Initially, all expressions are in the uninitialized address space. |
| 1145 | for (Value *V : Postorder) |
| 1146 | InferredAddrSpace[V] = UninitializedAddressSpace; |
| 1147 | |
| 1148 | while (!Worklist.empty()) { |
| 1149 | Value *V = Worklist.pop_back_val(); |
| 1150 | |
| 1151 | // Try to update the address space of the stack top according to the |
| 1152 | // address spaces of its operands. |
| 1153 | if (!updateAddressSpace(V: *V, InferredAddrSpace, PredicatedAS)) |
| 1154 | continue; |
| 1155 | |
| 1156 | for (Value *User : V->users()) { |
| 1157 | // Skip if User is already in the worklist. |
| 1158 | if (Worklist.count(key: User)) |
| 1159 | continue; |
| 1160 | |
| 1161 | auto Pos = InferredAddrSpace.find(Val: User); |
| 1162 | // Our algorithm only updates the address spaces of flat address |
| 1163 | // expressions, which are those in InferredAddrSpace. |
| 1164 | if (Pos == InferredAddrSpace.end()) |
| 1165 | continue; |
| 1166 | |
| 1167 | // Function updateAddressSpace moves the address space down a lattice |
| 1168 | // path. Therefore, nothing to do if User is already inferred as flat (the |
| 1169 | // bottom element in the lattice). |
| 1170 | if (Pos->second == FlatAddrSpace) |
| 1171 | continue; |
| 1172 | |
| 1173 | Worklist.insert(X: User); |
| 1174 | } |
| 1175 | } |
| 1176 | } |
| 1177 | |
| 1178 | unsigned |
| 1179 | InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &Ptr, |
| 1180 | const Value *UserCtx) const { |
| 1181 | const Instruction *UserCtxI = dyn_cast<Instruction>(Val: UserCtx); |
| 1182 | if (!UserCtxI) |
| 1183 | return UninitializedAddressSpace; |
| 1184 | |
| 1185 | const Value *StrippedPtr = Ptr.stripInBoundsOffsets(); |
| 1186 | for (auto &AssumeVH : AC.assumptionsFor(V: StrippedPtr)) { |
| 1187 | if (!AssumeVH) |
| 1188 | continue; |
| 1189 | CallInst *CI = cast<CallInst>(Val&: AssumeVH); |
| 1190 | if (!isValidAssumeForContext(I: CI, CxtI: UserCtxI, DT)) |
| 1191 | continue; |
| 1192 | |
| 1193 | const Value *Ptr; |
| 1194 | unsigned AS; |
| 1195 | std::tie(args&: Ptr, args&: AS) = TTI->getPredicatedAddrSpace(V: CI->getArgOperand(i: 0)); |
| 1196 | if (Ptr) |
| 1197 | return AS; |
| 1198 | } |
| 1199 | |
| 1200 | return UninitializedAddressSpace; |
| 1201 | } |
| 1202 | |
| 1203 | bool InferAddressSpacesImpl::updateAddressSpace( |
| 1204 | const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 1205 | PredicatedAddrSpaceMapTy &PredicatedAS) const { |
| 1206 | assert(InferredAddrSpace.count(&V)); |
| 1207 | |
| 1208 | LLVM_DEBUG(dbgs() << "Updating the address space of\n " << V << '\n'); |
| 1209 | |
| 1210 | // The new inferred address space equals the join of the address spaces |
| 1211 | // of all its pointer operands. |
| 1212 | unsigned NewAS = UninitializedAddressSpace; |
| 1213 | |
| 1214 | // isAddressExpression should guarantee that V is an operator or an argument. |
| 1215 | assert(isa<Operator>(V) || isa<Argument>(V)); |
| 1216 | |
| 1217 | unsigned AS = TTI->getAssumedAddrSpace(V: &V); |
| 1218 | if (AS != UninitializedAddressSpace) { |
| 1219 | // Use the assumed address space directly. |
| 1220 | NewAS = AS; |
| 1221 | } else { |
| 1222 | // Otherwise, infer the address space from its pointer operands. |
| 1223 | SmallVector<Constant *, 2> ConstantPtrOps; |
| 1224 | SmallVector<Value *, 2> PtrOps = getPointerOperands(V, DL: *DL, TTI); |
| 1225 | for (Value *PtrOperand : PtrOps) { |
| 1226 | auto I = InferredAddrSpace.find(Val: PtrOperand); |
| 1227 | unsigned OperandAS; |
| 1228 | if (I == InferredAddrSpace.end()) { |
| 1229 | OperandAS = PtrOperand->getType()->getPointerAddressSpace(); |
| 1230 | if (auto *C = dyn_cast<Constant>(Val: PtrOperand); |
| 1231 | C && OperandAS == FlatAddrSpace) { |
| 1232 | // Defer joining the address space of constant pointer operands. |
| 1233 | ConstantPtrOps.push_back(Elt: C); |
| 1234 | continue; |
| 1235 | } |
| 1236 | if (OperandAS == FlatAddrSpace) { |
| 1237 | // Check AC for assumption dominating V. |
| 1238 | unsigned AS = getPredicatedAddrSpace(Ptr: *PtrOperand, UserCtx: &V); |
| 1239 | if (AS != UninitializedAddressSpace) { |
| 1240 | LLVM_DEBUG(dbgs() |
| 1241 | << " deduce operand AS from the predicate addrspace " |
| 1242 | << AS << '\n'); |
| 1243 | OperandAS = AS; |
| 1244 | // Record this use with the predicated AS. |
| 1245 | PredicatedAS[std::make_pair(x: &V, y&: PtrOperand)] = OperandAS; |
| 1246 | } |
| 1247 | } |
| 1248 | } else |
| 1249 | OperandAS = I->second; |
| 1250 | |
| 1251 | // join(flat, *) = flat. So we can break if NewAS is already flat. |
| 1252 | NewAS = joinAddressSpaces(AS1: NewAS, AS2: OperandAS); |
| 1253 | if (NewAS == FlatAddrSpace) |
| 1254 | break; |
| 1255 | } |
| 1256 | |
| 1257 | if (NewAS != FlatAddrSpace && NewAS != UninitializedAddressSpace) { |
| 1258 | if (any_of(Range&: ConstantPtrOps, P: [=](Constant *C) { |
| 1259 | return !isSafeToCastConstAddrSpace(C, NewAS); |
| 1260 | })) |
| 1261 | NewAS = FlatAddrSpace; |
| 1262 | } |
| 1263 | |
| 1264 | // operator(flat const, flat const, ...) -> flat |
| 1265 | if (NewAS == UninitializedAddressSpace && |
| 1266 | PtrOps.size() == ConstantPtrOps.size()) |
| 1267 | NewAS = FlatAddrSpace; |
| 1268 | } |
| 1269 | |
| 1270 | unsigned OldAS = InferredAddrSpace.lookup(Val: &V); |
| 1271 | assert(OldAS != FlatAddrSpace); |
| 1272 | if (OldAS == NewAS) |
| 1273 | return false; |
| 1274 | |
| 1275 | // If any updates are made, grabs its users to the worklist because |
| 1276 | // their address spaces can also be possibly updated. |
| 1277 | LLVM_DEBUG(dbgs() << " to " << NewAS << '\n'); |
| 1278 | InferredAddrSpace[&V] = NewAS; |
| 1279 | return true; |
| 1280 | } |
| 1281 | |
| 1282 | /// Replace operand \p OpIdx in \p Inst, if the value is the same as \p OldVal |
| 1283 | /// with \p NewVal. |
| 1284 | static bool replaceOperandIfSame(Instruction *Inst, unsigned OpIdx, |
| 1285 | Value *OldVal, Value *NewVal) { |
| 1286 | Use &U = Inst->getOperandUse(i: OpIdx); |
| 1287 | if (U.get() == OldVal) { |
| 1288 | U.set(NewVal); |
| 1289 | return true; |
| 1290 | } |
| 1291 | |
| 1292 | return false; |
| 1293 | } |
| 1294 | |
| 1295 | template <typename InstrType> |
| 1296 | static bool replaceSimplePointerUse(const TargetTransformInfo &TTI, |
| 1297 | InstrType *MemInstr, unsigned AddrSpace, |
| 1298 | Value *OldV, Value *NewV) { |
| 1299 | if (!MemInstr->isVolatile() || TTI.hasVolatileVariant(I: MemInstr, AddrSpace)) { |
| 1300 | return replaceOperandIfSame(MemInstr, InstrType::getPointerOperandIndex(), |
| 1301 | OldV, NewV); |
| 1302 | } |
| 1303 | |
| 1304 | return false; |
| 1305 | } |
| 1306 | |
| 1307 | /// If \p OldV is used as the pointer operand of a compatible memory operation |
| 1308 | /// \p Inst, replaces the pointer operand with NewV. |
| 1309 | /// |
| 1310 | /// This covers memory instructions with a single pointer operand that can have |
| 1311 | /// its address space changed by simply mutating the use to a new value. |
| 1312 | /// |
| 1313 | /// \p returns true the user replacement was made. |
| 1314 | static bool replaceIfSimplePointerUse(const TargetTransformInfo &TTI, |
| 1315 | User *Inst, unsigned AddrSpace, |
| 1316 | Value *OldV, Value *NewV) { |
| 1317 | if (auto *LI = dyn_cast<LoadInst>(Val: Inst)) |
| 1318 | return replaceSimplePointerUse(TTI, MemInstr: LI, AddrSpace, OldV, NewV); |
| 1319 | |
| 1320 | if (auto *SI = dyn_cast<StoreInst>(Val: Inst)) |
| 1321 | return replaceSimplePointerUse(TTI, MemInstr: SI, AddrSpace, OldV, NewV); |
| 1322 | |
| 1323 | if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: Inst)) |
| 1324 | return replaceSimplePointerUse(TTI, MemInstr: RMW, AddrSpace, OldV, NewV); |
| 1325 | |
| 1326 | if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst)) |
| 1327 | return replaceSimplePointerUse(TTI, MemInstr: CmpX, AddrSpace, OldV, NewV); |
| 1328 | |
| 1329 | return false; |
| 1330 | } |
| 1331 | |
| 1332 | /// Update memory intrinsic uses that require more complex processing than |
| 1333 | /// simple memory instructions. These require re-mangling and may have multiple |
| 1334 | /// pointer operands. |
| 1335 | static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, |
| 1336 | Value *NewV) { |
| 1337 | IRBuilder<> B(MI); |
| 1338 | if (auto *MSI = dyn_cast<MemSetInst>(Val: MI)) { |
| 1339 | B.CreateMemSet(Ptr: NewV, Val: MSI->getValue(), Size: MSI->getLength(), Align: MSI->getDestAlign(), |
| 1340 | isVolatile: false, // isVolatile |
| 1341 | AAInfo: MI->getAAMetadata()); |
| 1342 | } else if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI)) { |
| 1343 | Value *Src = MTI->getRawSource(); |
| 1344 | Value *Dest = MTI->getRawDest(); |
| 1345 | |
| 1346 | // Be careful in case this is a self-to-self copy. |
| 1347 | if (Src == OldV) |
| 1348 | Src = NewV; |
| 1349 | |
| 1350 | if (Dest == OldV) |
| 1351 | Dest = NewV; |
| 1352 | |
| 1353 | if (auto *MCI = dyn_cast<MemCpyInst>(Val: MTI)) { |
| 1354 | if (MCI->isForceInlined()) |
| 1355 | B.CreateMemCpyInline(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, |
| 1356 | SrcAlign: MTI->getSourceAlign(), Size: MTI->getLength(), |
| 1357 | isVolatile: false, // isVolatile |
| 1358 | AAInfo: MI->getAAMetadata()); |
| 1359 | else |
| 1360 | B.CreateMemCpy(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, SrcAlign: MTI->getSourceAlign(), |
| 1361 | Size: MTI->getLength(), |
| 1362 | isVolatile: false, // isVolatile |
| 1363 | AAInfo: MI->getAAMetadata()); |
| 1364 | } else { |
| 1365 | assert(isa<MemMoveInst>(MTI)); |
| 1366 | B.CreateMemMove(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, SrcAlign: MTI->getSourceAlign(), |
| 1367 | Size: MTI->getLength(), |
| 1368 | isVolatile: false, // isVolatile |
| 1369 | AAInfo: MI->getAAMetadata()); |
| 1370 | } |
| 1371 | } else |
| 1372 | llvm_unreachable("unhandled MemIntrinsic" ); |
| 1373 | |
| 1374 | MI->eraseFromParent(); |
| 1375 | return true; |
| 1376 | } |
| 1377 | |
| 1378 | // \p returns true if it is OK to change the address space of constant \p C with |
| 1379 | // a ConstantExpr addrspacecast. |
| 1380 | bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C, |
| 1381 | unsigned NewAS) const { |
| 1382 | assert(NewAS != UninitializedAddressSpace); |
| 1383 | |
| 1384 | unsigned SrcAS = C->getType()->getPointerAddressSpace(); |
| 1385 | if (SrcAS == NewAS || isa<UndefValue>(Val: C)) |
| 1386 | return true; |
| 1387 | |
| 1388 | // Prevent illegal casts between different non-flat address spaces. |
| 1389 | if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace) |
| 1390 | return false; |
| 1391 | |
| 1392 | if (isa<ConstantPointerNull>(Val: C) || isa<ConstantAggregateZero>(Val: C)) |
| 1393 | return true; |
| 1394 | |
| 1395 | if (auto *Op = dyn_cast<Operator>(Val: C)) { |
| 1396 | // If we already have a constant addrspacecast, it should be safe to cast it |
| 1397 | // off. |
| 1398 | if (Op->getOpcode() == Instruction::AddrSpaceCast) |
| 1399 | return isSafeToCastConstAddrSpace(C: cast<Constant>(Val: Op->getOperand(i: 0)), |
| 1400 | NewAS); |
| 1401 | |
| 1402 | if (Op->getOpcode() == Instruction::IntToPtr && |
| 1403 | Op->getType()->getPointerAddressSpace() == FlatAddrSpace) |
| 1404 | return true; |
| 1405 | } |
| 1406 | |
| 1407 | return false; |
| 1408 | } |
| 1409 | |
| 1410 | static Value::use_iterator skipToNextUser(Value::use_iterator I, |
| 1411 | Value::use_iterator End) { |
| 1412 | User *CurUser = I->getUser(); |
| 1413 | ++I; |
| 1414 | |
| 1415 | while (I != End && I->getUser() == CurUser) |
| 1416 | ++I; |
| 1417 | |
| 1418 | return I; |
| 1419 | } |
| 1420 | |
| 1421 | void InferAddressSpacesImpl::performPointerReplacement( |
| 1422 | Value *V, Value *NewV, Use &U, ValueToValueMapTy &ValueWithNewAddrSpace, |
| 1423 | SmallVectorImpl<Instruction *> &DeadInstructions) const { |
| 1424 | |
| 1425 | User *CurUser = U.getUser(); |
| 1426 | |
| 1427 | unsigned AddrSpace = V->getType()->getPointerAddressSpace(); |
| 1428 | if (replaceIfSimplePointerUse(TTI: *TTI, Inst: CurUser, AddrSpace, OldV: V, NewV)) |
| 1429 | return; |
| 1430 | |
| 1431 | // Skip if the current user is the new value itself. |
| 1432 | if (CurUser == NewV) |
| 1433 | return; |
| 1434 | |
| 1435 | auto *CurUserI = dyn_cast<Instruction>(Val: CurUser); |
| 1436 | if (!CurUserI || CurUserI->getFunction() != F) |
| 1437 | return; |
| 1438 | |
| 1439 | // Handle more complex cases like intrinsic that need to be remangled. |
| 1440 | if (auto *MI = dyn_cast<MemIntrinsic>(Val: CurUser)) { |
| 1441 | if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, OldV: V, NewV)) |
| 1442 | return; |
| 1443 | } |
| 1444 | |
| 1445 | if (auto *II = dyn_cast<IntrinsicInst>(Val: CurUser)) { |
| 1446 | if (rewriteIntrinsicOperands(II, OldV: V, NewV)) |
| 1447 | return; |
| 1448 | } |
| 1449 | |
| 1450 | if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: CurUserI)) { |
| 1451 | // If we can infer that both pointers are in the same addrspace, |
| 1452 | // transform e.g. |
| 1453 | // %cmp = icmp eq float* %p, %q |
| 1454 | // into |
| 1455 | // %cmp = icmp eq float addrspace(3)* %new_p, %new_q |
| 1456 | |
| 1457 | unsigned NewAS = NewV->getType()->getPointerAddressSpace(); |
| 1458 | int SrcIdx = U.getOperandNo(); |
| 1459 | int OtherIdx = (SrcIdx == 0) ? 1 : 0; |
| 1460 | Value *OtherSrc = Cmp->getOperand(i_nocapture: OtherIdx); |
| 1461 | |
| 1462 | if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(Val: OtherSrc)) { |
| 1463 | if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) { |
| 1464 | Cmp->setOperand(i_nocapture: OtherIdx, Val_nocapture: OtherNewV); |
| 1465 | Cmp->setOperand(i_nocapture: SrcIdx, Val_nocapture: NewV); |
| 1466 | return; |
| 1467 | } |
| 1468 | } |
| 1469 | |
| 1470 | // Even if the type mismatches, we can cast the constant. |
| 1471 | if (auto *KOtherSrc = dyn_cast<Constant>(Val: OtherSrc)) { |
| 1472 | if (isSafeToCastConstAddrSpace(C: KOtherSrc, NewAS)) { |
| 1473 | Cmp->setOperand(i_nocapture: SrcIdx, Val_nocapture: NewV); |
| 1474 | Cmp->setOperand(i_nocapture: OtherIdx, Val_nocapture: ConstantExpr::getAddrSpaceCast( |
| 1475 | C: KOtherSrc, Ty: NewV->getType())); |
| 1476 | return; |
| 1477 | } |
| 1478 | } |
| 1479 | } |
| 1480 | |
| 1481 | if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Val: CurUserI)) { |
| 1482 | unsigned NewAS = NewV->getType()->getPointerAddressSpace(); |
| 1483 | if (ASC->getDestAddressSpace() == NewAS) { |
| 1484 | ASC->replaceAllUsesWith(V: NewV); |
| 1485 | DeadInstructions.push_back(Elt: ASC); |
| 1486 | return; |
| 1487 | } |
| 1488 | } |
| 1489 | |
| 1490 | // Otherwise, replaces the use with flat(NewV). |
| 1491 | if (isa<Instruction>(Val: V) || isa<Instruction>(Val: NewV)) { |
| 1492 | // Don't create a copy of the original addrspacecast. |
| 1493 | if (U == V && isa<AddrSpaceCastInst>(Val: V)) |
| 1494 | return; |
| 1495 | |
| 1496 | // Insert the addrspacecast after NewV. |
| 1497 | BasicBlock::iterator InsertPos; |
| 1498 | if (Instruction *NewVInst = dyn_cast<Instruction>(Val: NewV)) |
| 1499 | InsertPos = std::next(x: NewVInst->getIterator()); |
| 1500 | else |
| 1501 | InsertPos = std::next(x: cast<Instruction>(Val: V)->getIterator()); |
| 1502 | |
| 1503 | while (isa<PHINode>(Val: InsertPos)) |
| 1504 | ++InsertPos; |
| 1505 | // This instruction may contain multiple uses of V, update them all. |
| 1506 | CurUser->replaceUsesOfWith( |
| 1507 | From: V, To: new AddrSpaceCastInst(NewV, V->getType(), "" , InsertPos)); |
| 1508 | } else { |
| 1509 | CurUserI->replaceUsesOfWith( |
| 1510 | From: V, To: ConstantExpr::getAddrSpaceCast(C: cast<Constant>(Val: NewV), Ty: V->getType())); |
| 1511 | } |
| 1512 | } |
| 1513 | |
| 1514 | bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( |
| 1515 | ArrayRef<WeakTrackingVH> Postorder, |
| 1516 | const ValueToAddrSpaceMapTy &InferredAddrSpace, |
| 1517 | const PredicatedAddrSpaceMapTy &PredicatedAS) const { |
| 1518 | // For each address expression to be modified, creates a clone of it with its |
| 1519 | // pointer operands converted to the new address space. Since the pointer |
| 1520 | // operands are converted, the clone is naturally in the new address space by |
| 1521 | // construction. |
| 1522 | ValueToValueMapTy ValueWithNewAddrSpace; |
| 1523 | SmallVector<const Use *, 32> PoisonUsesToFix; |
| 1524 | for (Value *V : Postorder) { |
| 1525 | unsigned NewAddrSpace = InferredAddrSpace.lookup(Val: V); |
| 1526 | |
| 1527 | // In some degenerate cases (e.g. invalid IR in unreachable code), we may |
| 1528 | // not even infer the value to have its original address space. |
| 1529 | if (NewAddrSpace == UninitializedAddressSpace) |
| 1530 | continue; |
| 1531 | |
| 1532 | if (V->getType()->getPointerAddressSpace() != NewAddrSpace) { |
| 1533 | Value *New = |
| 1534 | cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace, |
| 1535 | PredicatedAS, PoisonUsesToFix: &PoisonUsesToFix); |
| 1536 | if (New) |
| 1537 | ValueWithNewAddrSpace[V] = New; |
| 1538 | } |
| 1539 | } |
| 1540 | |
| 1541 | if (ValueWithNewAddrSpace.empty()) |
| 1542 | return false; |
| 1543 | |
| 1544 | // Fixes all the poison uses generated by cloneInstructionWithNewAddressSpace. |
| 1545 | for (const Use *PoisonUse : PoisonUsesToFix) { |
| 1546 | User *V = PoisonUse->getUser(); |
| 1547 | User *NewV = cast_or_null<User>(Val: ValueWithNewAddrSpace.lookup(Val: V)); |
| 1548 | if (!NewV) |
| 1549 | continue; |
| 1550 | |
| 1551 | unsigned OperandNo = PoisonUse->getOperandNo(); |
| 1552 | assert(isa<PoisonValue>(NewV->getOperand(OperandNo))); |
| 1553 | WeakTrackingVH NewOp = ValueWithNewAddrSpace.lookup(Val: PoisonUse->get()); |
| 1554 | assert(NewOp && |
| 1555 | "poison replacements in ValueWithNewAddrSpace shouldn't be null" ); |
| 1556 | NewV->setOperand(i: OperandNo, Val: NewOp); |
| 1557 | } |
| 1558 | |
| 1559 | SmallVector<Instruction *, 16> DeadInstructions; |
| 1560 | ValueToValueMapTy VMap; |
| 1561 | ValueMapper VMapper(VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); |
| 1562 | |
| 1563 | // Replaces the uses of the old address expressions with the new ones. |
| 1564 | for (const WeakTrackingVH &WVH : Postorder) { |
| 1565 | assert(WVH && "value was unexpectedly deleted" ); |
| 1566 | Value *V = WVH; |
| 1567 | Value *NewV = ValueWithNewAddrSpace.lookup(Val: V); |
| 1568 | if (NewV == nullptr) |
| 1569 | continue; |
| 1570 | |
| 1571 | LLVM_DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n " |
| 1572 | << *NewV << '\n'); |
| 1573 | |
| 1574 | if (Constant *C = dyn_cast<Constant>(Val: V)) { |
| 1575 | Constant *Replace = |
| 1576 | ConstantExpr::getAddrSpaceCast(C: cast<Constant>(Val: NewV), Ty: C->getType()); |
| 1577 | if (C != Replace) { |
| 1578 | LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace |
| 1579 | << ": " << *Replace << '\n'); |
| 1580 | SmallVector<User *, 16> WorkList; |
| 1581 | for (User *U : make_early_inc_range(Range: C->users())) { |
| 1582 | if (auto *I = dyn_cast<Instruction>(Val: U)) { |
| 1583 | if (I->getFunction() == F) |
| 1584 | I->replaceUsesOfWith(From: C, To: Replace); |
| 1585 | } else { |
| 1586 | WorkList.append(in_start: U->user_begin(), in_end: U->user_end()); |
| 1587 | } |
| 1588 | } |
| 1589 | if (!WorkList.empty()) { |
| 1590 | VMap[C] = Replace; |
| 1591 | DenseSet<User *> Visited{WorkList.begin(), WorkList.end()}; |
| 1592 | while (!WorkList.empty()) { |
| 1593 | User *U = WorkList.pop_back_val(); |
| 1594 | if (auto *I = dyn_cast<Instruction>(Val: U)) { |
| 1595 | if (I->getFunction() == F) |
| 1596 | VMapper.remapInstruction(I&: *I); |
| 1597 | continue; |
| 1598 | } |
| 1599 | for (User *U2 : U->users()) |
| 1600 | if (Visited.insert(V: U2).second) |
| 1601 | WorkList.push_back(Elt: U2); |
| 1602 | } |
| 1603 | } |
| 1604 | V = Replace; |
| 1605 | } |
| 1606 | } |
| 1607 | |
| 1608 | Value::use_iterator I, E, Next; |
| 1609 | for (I = V->use_begin(), E = V->use_end(); I != E;) { |
| 1610 | Use &U = *I; |
| 1611 | |
| 1612 | // Some users may see the same pointer operand in multiple operands. Skip |
| 1613 | // to the next instruction. |
| 1614 | I = skipToNextUser(I, End: E); |
| 1615 | |
| 1616 | performPointerReplacement(V, NewV, U, ValueWithNewAddrSpace, |
| 1617 | DeadInstructions); |
| 1618 | } |
| 1619 | |
| 1620 | if (V->use_empty()) { |
| 1621 | if (Instruction *I = dyn_cast<Instruction>(Val: V)) |
| 1622 | DeadInstructions.push_back(Elt: I); |
| 1623 | } |
| 1624 | } |
| 1625 | |
| 1626 | for (Instruction *I : DeadInstructions) |
| 1627 | RecursivelyDeleteTriviallyDeadInstructions(V: I); |
| 1628 | |
| 1629 | return true; |
| 1630 | } |
| 1631 | |
| 1632 | bool InferAddressSpaces::runOnFunction(Function &F) { |
| 1633 | if (skipFunction(F)) |
| 1634 | return false; |
| 1635 | |
| 1636 | auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); |
| 1637 | DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; |
| 1638 | return InferAddressSpacesImpl( |
| 1639 | getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), DT, |
| 1640 | &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), |
| 1641 | FlatAddrSpace) |
| 1642 | .run(CurFn&: F); |
| 1643 | } |
| 1644 | |
| 1645 | FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) { |
| 1646 | return new InferAddressSpaces(AddressSpace); |
| 1647 | } |
| 1648 | |
| 1649 | InferAddressSpacesPass::InferAddressSpacesPass() |
| 1650 | : FlatAddrSpace(UninitializedAddressSpace) {} |
| 1651 | InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace) |
| 1652 | : FlatAddrSpace(AddressSpace) {} |
| 1653 | |
| 1654 | PreservedAnalyses InferAddressSpacesPass::run(Function &F, |
| 1655 | FunctionAnalysisManager &AM) { |
| 1656 | bool Changed = |
| 1657 | InferAddressSpacesImpl(AM.getResult<AssumptionAnalysis>(IR&: F), |
| 1658 | AM.getCachedResult<DominatorTreeAnalysis>(IR&: F), |
| 1659 | &AM.getResult<TargetIRAnalysis>(IR&: F), FlatAddrSpace) |
| 1660 | .run(CurFn&: F); |
| 1661 | if (Changed) { |
| 1662 | PreservedAnalyses PA; |
| 1663 | PA.preserveSet<CFGAnalyses>(); |
| 1664 | PA.preserve<DominatorTreeAnalysis>(); |
| 1665 | return PA; |
| 1666 | } |
| 1667 | return PreservedAnalyses::all(); |
| 1668 | } |
| 1669 | |