1 | //===- InferAddressSpace.cpp - --------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // CUDA C/C++ includes memory space designation as variable type qualifers (such |
10 | // as __global__ and __shared__). Knowing the space of a memory access allows |
11 | // CUDA compilers to emit faster PTX loads and stores. For example, a load from |
12 | // shared memory can be translated to `ld.shared` which is roughly 10% faster |
13 | // than a generic `ld` on an NVIDIA Tesla K40c. |
14 | // |
15 | // Unfortunately, type qualifiers only apply to variable declarations, so CUDA |
16 | // compilers must infer the memory space of an address expression from |
17 | // type-qualified variables. |
18 | // |
19 | // LLVM IR uses non-zero (so-called) specific address spaces to represent memory |
20 | // spaces (e.g. addrspace(3) means shared memory). The Clang frontend |
21 | // places only type-qualified variables in specific address spaces, and then |
22 | // conservatively `addrspacecast`s each type-qualified variable to addrspace(0) |
23 | // (so-called the generic address space) for other instructions to use. |
24 | // |
25 | // For example, the Clang translates the following CUDA code |
26 | // __shared__ float a[10]; |
27 | // float v = a[i]; |
28 | // to |
29 | // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* |
30 | // %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i |
31 | // %v = load float, float* %1 ; emits ld.f32 |
32 | // @a is in addrspace(3) since it's type-qualified, but its use from %1 is |
33 | // redirected to %0 (the generic version of @a). |
34 | // |
35 | // The optimization implemented in this file propagates specific address spaces |
36 | // from type-qualified variable declarations to its users. For example, it |
37 | // optimizes the above IR to |
38 | // %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i |
39 | // %v = load float addrspace(3)* %1 ; emits ld.shared.f32 |
40 | // propagating the addrspace(3) from @a to %1. As the result, the NVPTX |
41 | // codegen is able to emit ld.shared.f32 for %v. |
42 | // |
43 | // Address space inference works in two steps. First, it uses a data-flow |
44 | // analysis to infer as many generic pointers as possible to point to only one |
45 | // specific address space. In the above example, it can prove that %1 only |
46 | // points to addrspace(3). This algorithm was published in |
47 | // CUDA: Compiling and optimizing for a GPU platform |
48 | // Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang |
49 | // ICCS 2012 |
50 | // |
51 | // Then, address space inference replaces all refinable generic pointers with |
52 | // equivalent specific pointers. |
53 | // |
54 | // The major challenge of implementing this optimization is handling PHINodes, |
55 | // which may create loops in the data flow graph. This brings two complications. |
56 | // |
57 | // First, the data flow analysis in Step 1 needs to be circular. For example, |
58 | // %generic.input = addrspacecast float addrspace(3)* %input to float* |
59 | // loop: |
60 | // %y = phi [ %generic.input, %y2 ] |
61 | // %y2 = getelementptr %y, 1 |
62 | // %v = load %y2 |
63 | // br ..., label %loop, ... |
64 | // proving %y specific requires proving both %generic.input and %y2 specific, |
65 | // but proving %y2 specific circles back to %y. To address this complication, |
66 | // the data flow analysis operates on a lattice: |
67 | // uninitialized > specific address spaces > generic. |
68 | // All address expressions (our implementation only considers phi, bitcast, |
69 | // addrspacecast, and getelementptr) start with the uninitialized address space. |
70 | // The monotone transfer function moves the address space of a pointer down a |
71 | // lattice path from uninitialized to specific and then to generic. A join |
72 | // operation of two different specific address spaces pushes the expression down |
73 | // to the generic address space. The analysis completes once it reaches a fixed |
74 | // point. |
75 | // |
76 | // Second, IR rewriting in Step 2 also needs to be circular. For example, |
77 | // converting %y to addrspace(3) requires the compiler to know the converted |
78 | // %y2, but converting %y2 needs the converted %y. To address this complication, |
79 | // we break these cycles using "poison" placeholders. When converting an |
80 | // instruction `I` to a new address space, if its operand `Op` is not converted |
81 | // yet, we let `I` temporarily use `poison` and fix all the uses later. |
82 | // For instance, our algorithm first converts %y to |
83 | // %y' = phi float addrspace(3)* [ %input, poison ] |
84 | // Then, it converts %y2 to |
85 | // %y2' = getelementptr %y', 1 |
86 | // Finally, it fixes the poison in %y' so that |
87 | // %y' = phi float addrspace(3)* [ %input, %y2' ] |
88 | // |
89 | //===----------------------------------------------------------------------===// |
90 | |
91 | #include "llvm/Transforms/Scalar/InferAddressSpaces.h" |
92 | #include "llvm/ADT/ArrayRef.h" |
93 | #include "llvm/ADT/DenseMap.h" |
94 | #include "llvm/ADT/DenseSet.h" |
95 | #include "llvm/ADT/SetVector.h" |
96 | #include "llvm/ADT/SmallVector.h" |
97 | #include "llvm/Analysis/AssumptionCache.h" |
98 | #include "llvm/Analysis/TargetTransformInfo.h" |
99 | #include "llvm/Analysis/ValueTracking.h" |
100 | #include "llvm/IR/BasicBlock.h" |
101 | #include "llvm/IR/Constant.h" |
102 | #include "llvm/IR/Constants.h" |
103 | #include "llvm/IR/Dominators.h" |
104 | #include "llvm/IR/Function.h" |
105 | #include "llvm/IR/IRBuilder.h" |
106 | #include "llvm/IR/InstIterator.h" |
107 | #include "llvm/IR/Instruction.h" |
108 | #include "llvm/IR/Instructions.h" |
109 | #include "llvm/IR/IntrinsicInst.h" |
110 | #include "llvm/IR/Intrinsics.h" |
111 | #include "llvm/IR/LLVMContext.h" |
112 | #include "llvm/IR/Operator.h" |
113 | #include "llvm/IR/PassManager.h" |
114 | #include "llvm/IR/Type.h" |
115 | #include "llvm/IR/Use.h" |
116 | #include "llvm/IR/User.h" |
117 | #include "llvm/IR/Value.h" |
118 | #include "llvm/IR/ValueHandle.h" |
119 | #include "llvm/InitializePasses.h" |
120 | #include "llvm/Pass.h" |
121 | #include "llvm/Support/Casting.h" |
122 | #include "llvm/Support/CommandLine.h" |
123 | #include "llvm/Support/Compiler.h" |
124 | #include "llvm/Support/Debug.h" |
125 | #include "llvm/Support/ErrorHandling.h" |
126 | #include "llvm/Support/raw_ostream.h" |
127 | #include "llvm/Transforms/Scalar.h" |
128 | #include "llvm/Transforms/Utils/Local.h" |
129 | #include "llvm/Transforms/Utils/ValueMapper.h" |
130 | #include <cassert> |
131 | #include <iterator> |
132 | #include <limits> |
133 | #include <utility> |
134 | #include <vector> |
135 | |
136 | #define DEBUG_TYPE "infer-address-spaces" |
137 | |
138 | using namespace llvm; |
139 | |
140 | static cl::opt<bool> AssumeDefaultIsFlatAddressSpace( |
141 | "assume-default-is-flat-addrspace" , cl::init(Val: false), cl::ReallyHidden, |
142 | cl::desc("The default address space is assumed as the flat address space. " |
143 | "This is mainly for test purpose." )); |
144 | |
145 | static const unsigned UninitializedAddressSpace = |
146 | std::numeric_limits<unsigned>::max(); |
147 | |
148 | namespace { |
149 | |
150 | using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>; |
151 | // Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on |
152 | // the *def* of a value, PredicatedAddrSpaceMapTy is map where a new |
153 | // addrspace is inferred on the *use* of a pointer. This map is introduced to |
154 | // infer addrspace from the addrspace predicate assumption built from assume |
155 | // intrinsic. In that scenario, only specific uses (under valid assumption |
156 | // context) could be inferred with a new addrspace. |
157 | using PredicatedAddrSpaceMapTy = |
158 | DenseMap<std::pair<const Value *, const Value *>, unsigned>; |
159 | using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>; |
160 | |
161 | class InferAddressSpaces : public FunctionPass { |
162 | unsigned FlatAddrSpace = 0; |
163 | |
164 | public: |
165 | static char ID; |
166 | |
167 | InferAddressSpaces() |
168 | : FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) { |
169 | initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry()); |
170 | } |
171 | InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) { |
172 | initializeInferAddressSpacesPass(*PassRegistry::getPassRegistry()); |
173 | } |
174 | |
175 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
176 | AU.setPreservesCFG(); |
177 | AU.addPreserved<DominatorTreeWrapperPass>(); |
178 | AU.addRequired<AssumptionCacheTracker>(); |
179 | AU.addRequired<TargetTransformInfoWrapperPass>(); |
180 | } |
181 | |
182 | bool runOnFunction(Function &F) override; |
183 | }; |
184 | |
185 | class InferAddressSpacesImpl { |
186 | AssumptionCache &AC; |
187 | const DominatorTree *DT = nullptr; |
188 | const TargetTransformInfo *TTI = nullptr; |
189 | const DataLayout *DL = nullptr; |
190 | |
191 | /// Target specific address space which uses of should be replaced if |
192 | /// possible. |
193 | unsigned FlatAddrSpace = 0; |
194 | |
195 | // Try to update the address space of V. If V is updated, returns true and |
196 | // false otherwise. |
197 | bool updateAddressSpace(const Value &V, |
198 | ValueToAddrSpaceMapTy &InferredAddrSpace, |
199 | PredicatedAddrSpaceMapTy &PredicatedAS) const; |
200 | |
201 | // Tries to infer the specific address space of each address expression in |
202 | // Postorder. |
203 | void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, |
204 | ValueToAddrSpaceMapTy &InferredAddrSpace, |
205 | PredicatedAddrSpaceMapTy &PredicatedAS) const; |
206 | |
207 | bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; |
208 | |
209 | Value *cloneInstructionWithNewAddressSpace( |
210 | Instruction *I, unsigned NewAddrSpace, |
211 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
212 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
213 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const; |
214 | |
215 | // Changes the flat address expressions in function F to point to specific |
216 | // address spaces if InferredAddrSpace says so. Postorder is the postorder of |
217 | // all flat expressions in the use-def graph of function F. |
218 | bool |
219 | rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, |
220 | const ValueToAddrSpaceMapTy &InferredAddrSpace, |
221 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
222 | Function *F) const; |
223 | |
224 | void appendsFlatAddressExpressionToPostorderStack( |
225 | Value *V, PostorderStackTy &PostorderStack, |
226 | DenseSet<Value *> &Visited) const; |
227 | |
228 | bool rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV, |
229 | Value *NewV) const; |
230 | void collectRewritableIntrinsicOperands(IntrinsicInst *II, |
231 | PostorderStackTy &PostorderStack, |
232 | DenseSet<Value *> &Visited) const; |
233 | |
234 | std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const; |
235 | |
236 | Value *cloneValueWithNewAddressSpace( |
237 | Value *V, unsigned NewAddrSpace, |
238 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
239 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
240 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const; |
241 | unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; |
242 | |
243 | unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const; |
244 | |
245 | public: |
246 | InferAddressSpacesImpl(AssumptionCache &AC, const DominatorTree *DT, |
247 | const TargetTransformInfo *TTI, unsigned FlatAddrSpace) |
248 | : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {} |
249 | bool run(Function &F); |
250 | }; |
251 | |
252 | } // end anonymous namespace |
253 | |
254 | char InferAddressSpaces::ID = 0; |
255 | |
256 | INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces" , |
257 | false, false) |
258 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
259 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) |
260 | INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces" , |
261 | false, false) |
262 | |
263 | static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) { |
264 | assert(Ty->isPtrOrPtrVectorTy()); |
265 | PointerType *NPT = PointerType::get(C&: Ty->getContext(), AddressSpace: NewAddrSpace); |
266 | return Ty->getWithNewType(EltTy: NPT); |
267 | } |
268 | |
269 | // Check whether that's no-op pointer bicast using a pair of |
270 | // `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over |
271 | // different address spaces. |
272 | static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL, |
273 | const TargetTransformInfo *TTI) { |
274 | assert(I2P->getOpcode() == Instruction::IntToPtr); |
275 | auto *P2I = dyn_cast<Operator>(Val: I2P->getOperand(i: 0)); |
276 | if (!P2I || P2I->getOpcode() != Instruction::PtrToInt) |
277 | return false; |
278 | // Check it's really safe to treat that pair of `ptrtoint`/`inttoptr` as a |
279 | // no-op cast. Besides checking both of them are no-op casts, as the |
280 | // reinterpreted pointer may be used in other pointer arithmetic, we also |
281 | // need to double-check that through the target-specific hook. That ensures |
282 | // the underlying target also agrees that's a no-op address space cast and |
283 | // pointer bits are preserved. |
284 | // The current IR spec doesn't have clear rules on address space casts, |
285 | // especially a clear definition for pointer bits in non-default address |
286 | // spaces. It would be undefined if that pointer is dereferenced after an |
287 | // invalid reinterpret cast. Also, due to the unclearness for the meaning of |
288 | // bits in non-default address spaces in the current spec, the pointer |
289 | // arithmetic may also be undefined after invalid pointer reinterpret cast. |
290 | // However, as we confirm through the target hooks that it's a no-op |
291 | // addrspacecast, it doesn't matter since the bits should be the same. |
292 | unsigned P2IOp0AS = P2I->getOperand(i: 0)->getType()->getPointerAddressSpace(); |
293 | unsigned I2PAS = I2P->getType()->getPointerAddressSpace(); |
294 | return CastInst::isNoopCast(Opcode: Instruction::CastOps(I2P->getOpcode()), |
295 | SrcTy: I2P->getOperand(i: 0)->getType(), DstTy: I2P->getType(), |
296 | DL) && |
297 | CastInst::isNoopCast(Opcode: Instruction::CastOps(P2I->getOpcode()), |
298 | SrcTy: P2I->getOperand(i: 0)->getType(), DstTy: P2I->getType(), |
299 | DL) && |
300 | (P2IOp0AS == I2PAS || TTI->isNoopAddrSpaceCast(FromAS: P2IOp0AS, ToAS: I2PAS)); |
301 | } |
302 | |
303 | // Returns true if V is an address expression. |
304 | // TODO: Currently, we consider only phi, bitcast, addrspacecast, and |
305 | // getelementptr operators. |
306 | static bool isAddressExpression(const Value &V, const DataLayout &DL, |
307 | const TargetTransformInfo *TTI) { |
308 | const Operator *Op = dyn_cast<Operator>(Val: &V); |
309 | if (!Op) |
310 | return false; |
311 | |
312 | switch (Op->getOpcode()) { |
313 | case Instruction::PHI: |
314 | assert(Op->getType()->isPtrOrPtrVectorTy()); |
315 | return true; |
316 | case Instruction::BitCast: |
317 | case Instruction::AddrSpaceCast: |
318 | case Instruction::GetElementPtr: |
319 | return true; |
320 | case Instruction::Select: |
321 | return Op->getType()->isPtrOrPtrVectorTy(); |
322 | case Instruction::Call: { |
323 | const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &V); |
324 | return II && II->getIntrinsicID() == Intrinsic::ptrmask; |
325 | } |
326 | case Instruction::IntToPtr: |
327 | return isNoopPtrIntCastPair(I2P: Op, DL, TTI); |
328 | default: |
329 | // That value is an address expression if it has an assumed address space. |
330 | return TTI->getAssumedAddrSpace(V: &V) != UninitializedAddressSpace; |
331 | } |
332 | } |
333 | |
334 | // Returns the pointer operands of V. |
335 | // |
336 | // Precondition: V is an address expression. |
337 | static SmallVector<Value *, 2> |
338 | getPointerOperands(const Value &V, const DataLayout &DL, |
339 | const TargetTransformInfo *TTI) { |
340 | const Operator &Op = cast<Operator>(Val: V); |
341 | switch (Op.getOpcode()) { |
342 | case Instruction::PHI: { |
343 | auto IncomingValues = cast<PHINode>(Val: Op).incoming_values(); |
344 | return {IncomingValues.begin(), IncomingValues.end()}; |
345 | } |
346 | case Instruction::BitCast: |
347 | case Instruction::AddrSpaceCast: |
348 | case Instruction::GetElementPtr: |
349 | return {Op.getOperand(i: 0)}; |
350 | case Instruction::Select: |
351 | return {Op.getOperand(i: 1), Op.getOperand(i: 2)}; |
352 | case Instruction::Call: { |
353 | const IntrinsicInst &II = cast<IntrinsicInst>(Val: Op); |
354 | assert(II.getIntrinsicID() == Intrinsic::ptrmask && |
355 | "unexpected intrinsic call" ); |
356 | return {II.getArgOperand(i: 0)}; |
357 | } |
358 | case Instruction::IntToPtr: { |
359 | assert(isNoopPtrIntCastPair(&Op, DL, TTI)); |
360 | auto *P2I = cast<Operator>(Val: Op.getOperand(i: 0)); |
361 | return {P2I->getOperand(i: 0)}; |
362 | } |
363 | default: |
364 | llvm_unreachable("Unexpected instruction type." ); |
365 | } |
366 | } |
367 | |
368 | bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, |
369 | Value *OldV, |
370 | Value *NewV) const { |
371 | Module *M = II->getParent()->getParent()->getParent(); |
372 | |
373 | switch (II->getIntrinsicID()) { |
374 | case Intrinsic::objectsize: { |
375 | Type *DestTy = II->getType(); |
376 | Type *SrcTy = NewV->getType(); |
377 | Function *NewDecl = |
378 | Intrinsic::getDeclaration(M, id: II->getIntrinsicID(), Tys: {DestTy, SrcTy}); |
379 | II->setArgOperand(i: 0, v: NewV); |
380 | II->setCalledFunction(NewDecl); |
381 | return true; |
382 | } |
383 | case Intrinsic::ptrmask: |
384 | // This is handled as an address expression, not as a use memory operation. |
385 | return false; |
386 | case Intrinsic::masked_gather: { |
387 | Type *RetTy = II->getType(); |
388 | Type *NewPtrTy = NewV->getType(); |
389 | Function *NewDecl = |
390 | Intrinsic::getDeclaration(M, id: II->getIntrinsicID(), Tys: {RetTy, NewPtrTy}); |
391 | II->setArgOperand(i: 0, v: NewV); |
392 | II->setCalledFunction(NewDecl); |
393 | return true; |
394 | } |
395 | case Intrinsic::masked_scatter: { |
396 | Type *ValueTy = II->getOperand(i_nocapture: 0)->getType(); |
397 | Type *NewPtrTy = NewV->getType(); |
398 | Function *NewDecl = |
399 | Intrinsic::getDeclaration(M, id: II->getIntrinsicID(), Tys: {ValueTy, NewPtrTy}); |
400 | II->setArgOperand(i: 1, v: NewV); |
401 | II->setCalledFunction(NewDecl); |
402 | return true; |
403 | } |
404 | default: { |
405 | Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); |
406 | if (!Rewrite) |
407 | return false; |
408 | if (Rewrite != II) |
409 | II->replaceAllUsesWith(V: Rewrite); |
410 | return true; |
411 | } |
412 | } |
413 | } |
414 | |
415 | void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( |
416 | IntrinsicInst *II, PostorderStackTy &PostorderStack, |
417 | DenseSet<Value *> &Visited) const { |
418 | auto IID = II->getIntrinsicID(); |
419 | switch (IID) { |
420 | case Intrinsic::ptrmask: |
421 | case Intrinsic::objectsize: |
422 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0), |
423 | PostorderStack, Visited); |
424 | break; |
425 | case Intrinsic::masked_gather: |
426 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 0), |
427 | PostorderStack, Visited); |
428 | break; |
429 | case Intrinsic::masked_scatter: |
430 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: 1), |
431 | PostorderStack, Visited); |
432 | break; |
433 | default: |
434 | SmallVector<int, 2> OpIndexes; |
435 | if (TTI->collectFlatAddressOperands(OpIndexes, IID)) { |
436 | for (int Idx : OpIndexes) { |
437 | appendsFlatAddressExpressionToPostorderStack(V: II->getArgOperand(i: Idx), |
438 | PostorderStack, Visited); |
439 | } |
440 | } |
441 | break; |
442 | } |
443 | } |
444 | |
445 | // Returns all flat address expressions in function F. The elements are |
446 | // If V is an unvisited flat address expression, appends V to PostorderStack |
447 | // and marks it as visited. |
448 | void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack( |
449 | Value *V, PostorderStackTy &PostorderStack, |
450 | DenseSet<Value *> &Visited) const { |
451 | assert(V->getType()->isPtrOrPtrVectorTy()); |
452 | |
453 | // Generic addressing expressions may be hidden in nested constant |
454 | // expressions. |
455 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V)) { |
456 | // TODO: Look in non-address parts, like icmp operands. |
457 | if (isAddressExpression(V: *CE, DL: *DL, TTI) && Visited.insert(V: CE).second) |
458 | PostorderStack.emplace_back(Args&: CE, Args: false); |
459 | |
460 | return; |
461 | } |
462 | |
463 | if (V->getType()->getPointerAddressSpace() == FlatAddrSpace && |
464 | isAddressExpression(V: *V, DL: *DL, TTI)) { |
465 | if (Visited.insert(V).second) { |
466 | PostorderStack.emplace_back(Args&: V, Args: false); |
467 | |
468 | Operator *Op = cast<Operator>(Val: V); |
469 | for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) { |
470 | if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: Op->getOperand(i: I))) { |
471 | if (isAddressExpression(V: *CE, DL: *DL, TTI) && Visited.insert(V: CE).second) |
472 | PostorderStack.emplace_back(Args&: CE, Args: false); |
473 | } |
474 | } |
475 | } |
476 | } |
477 | } |
478 | |
479 | // Returns all flat address expressions in function F. The elements are ordered |
480 | // in postorder. |
481 | std::vector<WeakTrackingVH> |
482 | InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { |
483 | // This function implements a non-recursive postorder traversal of a partial |
484 | // use-def graph of function F. |
485 | PostorderStackTy PostorderStack; |
486 | // The set of visited expressions. |
487 | DenseSet<Value *> Visited; |
488 | |
489 | auto PushPtrOperand = [&](Value *Ptr) { |
490 | appendsFlatAddressExpressionToPostorderStack(V: Ptr, PostorderStack, Visited); |
491 | }; |
492 | |
493 | // Look at operations that may be interesting accelerate by moving to a known |
494 | // address space. We aim at generating after loads and stores, but pure |
495 | // addressing calculations may also be faster. |
496 | for (Instruction &I : instructions(F)) { |
497 | if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: &I)) { |
498 | PushPtrOperand(GEP->getPointerOperand()); |
499 | } else if (auto *LI = dyn_cast<LoadInst>(Val: &I)) |
500 | PushPtrOperand(LI->getPointerOperand()); |
501 | else if (auto *SI = dyn_cast<StoreInst>(Val: &I)) |
502 | PushPtrOperand(SI->getPointerOperand()); |
503 | else if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: &I)) |
504 | PushPtrOperand(RMW->getPointerOperand()); |
505 | else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &I)) |
506 | PushPtrOperand(CmpX->getPointerOperand()); |
507 | else if (auto *MI = dyn_cast<MemIntrinsic>(Val: &I)) { |
508 | // For memset/memcpy/memmove, any pointer operand can be replaced. |
509 | PushPtrOperand(MI->getRawDest()); |
510 | |
511 | // Handle 2nd operand for memcpy/memmove. |
512 | if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI)) |
513 | PushPtrOperand(MTI->getRawSource()); |
514 | } else if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) |
515 | collectRewritableIntrinsicOperands(II, PostorderStack, Visited); |
516 | else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: &I)) { |
517 | if (Cmp->getOperand(i_nocapture: 0)->getType()->isPtrOrPtrVectorTy()) { |
518 | PushPtrOperand(Cmp->getOperand(i_nocapture: 0)); |
519 | PushPtrOperand(Cmp->getOperand(i_nocapture: 1)); |
520 | } |
521 | } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(Val: &I)) { |
522 | PushPtrOperand(ASC->getPointerOperand()); |
523 | } else if (auto *I2P = dyn_cast<IntToPtrInst>(Val: &I)) { |
524 | if (isNoopPtrIntCastPair(I2P: cast<Operator>(Val: I2P), DL: *DL, TTI)) |
525 | PushPtrOperand(cast<Operator>(Val: I2P->getOperand(i_nocapture: 0))->getOperand(i: 0)); |
526 | } else if (auto *RI = dyn_cast<ReturnInst>(Val: &I)) { |
527 | if (auto *RV = RI->getReturnValue(); |
528 | RV && RV->getType()->isPtrOrPtrVectorTy()) |
529 | PushPtrOperand(RV); |
530 | } |
531 | } |
532 | |
533 | std::vector<WeakTrackingVH> Postorder; // The resultant postorder. |
534 | while (!PostorderStack.empty()) { |
535 | Value *TopVal = PostorderStack.back().getPointer(); |
536 | // If the operands of the expression on the top are already explored, |
537 | // adds that expression to the resultant postorder. |
538 | if (PostorderStack.back().getInt()) { |
539 | if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace) |
540 | Postorder.push_back(x: TopVal); |
541 | PostorderStack.pop_back(); |
542 | continue; |
543 | } |
544 | // Otherwise, adds its operands to the stack and explores them. |
545 | PostorderStack.back().setInt(true); |
546 | // Skip values with an assumed address space. |
547 | if (TTI->getAssumedAddrSpace(V: TopVal) == UninitializedAddressSpace) { |
548 | for (Value *PtrOperand : getPointerOperands(V: *TopVal, DL: *DL, TTI)) { |
549 | appendsFlatAddressExpressionToPostorderStack(V: PtrOperand, PostorderStack, |
550 | Visited); |
551 | } |
552 | } |
553 | } |
554 | return Postorder; |
555 | } |
556 | |
557 | // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone |
558 | // of OperandUse.get() in the new address space. If the clone is not ready yet, |
559 | // returns poison in the new address space as a placeholder. |
560 | static Value *operandWithNewAddressSpaceOrCreatePoison( |
561 | const Use &OperandUse, unsigned NewAddrSpace, |
562 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
563 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
564 | SmallVectorImpl<const Use *> *PoisonUsesToFix) { |
565 | Value *Operand = OperandUse.get(); |
566 | |
567 | Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: Operand->getType(), NewAddrSpace); |
568 | |
569 | if (Constant *C = dyn_cast<Constant>(Val: Operand)) |
570 | return ConstantExpr::getAddrSpaceCast(C, Ty: NewPtrTy); |
571 | |
572 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: Operand)) |
573 | return NewOperand; |
574 | |
575 | Instruction *Inst = cast<Instruction>(Val: OperandUse.getUser()); |
576 | auto I = PredicatedAS.find(Val: std::make_pair(x&: Inst, y&: Operand)); |
577 | if (I != PredicatedAS.end()) { |
578 | // Insert an addrspacecast on that operand before the user. |
579 | unsigned NewAS = I->second; |
580 | Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: Operand->getType(), NewAddrSpace: NewAS); |
581 | auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy); |
582 | NewI->insertBefore(InsertPos: Inst); |
583 | NewI->setDebugLoc(Inst->getDebugLoc()); |
584 | return NewI; |
585 | } |
586 | |
587 | PoisonUsesToFix->push_back(Elt: &OperandUse); |
588 | return PoisonValue::get(T: NewPtrTy); |
589 | } |
590 | |
591 | // Returns a clone of `I` with its operands converted to those specified in |
592 | // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an |
593 | // operand whose address space needs to be modified might not exist in |
594 | // ValueWithNewAddrSpace. In that case, uses poison as a placeholder operand and |
595 | // adds that operand use to PoisonUsesToFix so that caller can fix them later. |
596 | // |
597 | // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast |
598 | // from a pointer whose type already matches. Therefore, this function returns a |
599 | // Value* instead of an Instruction*. |
600 | // |
601 | // This may also return nullptr in the case the instruction could not be |
602 | // rewritten. |
603 | Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( |
604 | Instruction *I, unsigned NewAddrSpace, |
605 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
606 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
607 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const { |
608 | Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace); |
609 | |
610 | if (I->getOpcode() == Instruction::AddrSpaceCast) { |
611 | Value *Src = I->getOperand(i: 0); |
612 | // Because `I` is flat, the source address space must be specific. |
613 | // Therefore, the inferred address space must be the source space, according |
614 | // to our algorithm. |
615 | assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); |
616 | if (Src->getType() != NewPtrType) |
617 | return new BitCastInst(Src, NewPtrType); |
618 | return Src; |
619 | } |
620 | |
621 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
622 | // Technically the intrinsic ID is a pointer typed argument, so specially |
623 | // handle calls early. |
624 | assert(II->getIntrinsicID() == Intrinsic::ptrmask); |
625 | Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison( |
626 | OperandUse: II->getArgOperandUse(i: 0), NewAddrSpace, ValueWithNewAddrSpace, |
627 | PredicatedAS, PoisonUsesToFix); |
628 | Value *Rewrite = |
629 | TTI->rewriteIntrinsicWithAddressSpace(II, OldV: II->getArgOperand(i: 0), NewV: NewPtr); |
630 | if (Rewrite) { |
631 | assert(Rewrite != II && "cannot modify this pointer operation in place" ); |
632 | return Rewrite; |
633 | } |
634 | |
635 | return nullptr; |
636 | } |
637 | |
638 | unsigned AS = TTI->getAssumedAddrSpace(V: I); |
639 | if (AS != UninitializedAddressSpace) { |
640 | // For the assumed address space, insert an `addrspacecast` to make that |
641 | // explicit. |
642 | Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Ty: I->getType(), NewAddrSpace: AS); |
643 | auto *NewI = new AddrSpaceCastInst(I, NewPtrTy); |
644 | NewI->insertAfter(InsertPos: I); |
645 | NewI->setDebugLoc(I->getDebugLoc()); |
646 | return NewI; |
647 | } |
648 | |
649 | // Computes the converted pointer operands. |
650 | SmallVector<Value *, 4> NewPointerOperands; |
651 | for (const Use &OperandUse : I->operands()) { |
652 | if (!OperandUse.get()->getType()->isPtrOrPtrVectorTy()) |
653 | NewPointerOperands.push_back(Elt: nullptr); |
654 | else |
655 | NewPointerOperands.push_back(Elt: operandWithNewAddressSpaceOrCreatePoison( |
656 | OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, |
657 | PoisonUsesToFix)); |
658 | } |
659 | |
660 | switch (I->getOpcode()) { |
661 | case Instruction::BitCast: |
662 | return new BitCastInst(NewPointerOperands[0], NewPtrType); |
663 | case Instruction::PHI: { |
664 | assert(I->getType()->isPtrOrPtrVectorTy()); |
665 | PHINode *PHI = cast<PHINode>(Val: I); |
666 | PHINode *NewPHI = PHINode::Create(Ty: NewPtrType, NumReservedValues: PHI->getNumIncomingValues()); |
667 | for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) { |
668 | unsigned OperandNo = PHINode::getOperandNumForIncomingValue(i: Index); |
669 | NewPHI->addIncoming(V: NewPointerOperands[OperandNo], |
670 | BB: PHI->getIncomingBlock(i: Index)); |
671 | } |
672 | return NewPHI; |
673 | } |
674 | case Instruction::GetElementPtr: { |
675 | GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val: I); |
676 | GetElementPtrInst *NewGEP = GetElementPtrInst::Create( |
677 | PointeeType: GEP->getSourceElementType(), Ptr: NewPointerOperands[0], |
678 | IdxList: SmallVector<Value *, 4>(GEP->indices())); |
679 | NewGEP->setIsInBounds(GEP->isInBounds()); |
680 | return NewGEP; |
681 | } |
682 | case Instruction::Select: |
683 | assert(I->getType()->isPtrOrPtrVectorTy()); |
684 | return SelectInst::Create(C: I->getOperand(i: 0), S1: NewPointerOperands[1], |
685 | S2: NewPointerOperands[2], NameStr: "" , InsertBefore: nullptr, MDFrom: I); |
686 | case Instruction::IntToPtr: { |
687 | assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI)); |
688 | Value *Src = cast<Operator>(Val: I->getOperand(i: 0))->getOperand(i: 0); |
689 | if (Src->getType() == NewPtrType) |
690 | return Src; |
691 | |
692 | // If we had a no-op inttoptr/ptrtoint pair, we may still have inferred a |
693 | // source address space from a generic pointer source need to insert a cast |
694 | // back. |
695 | return CastInst::CreatePointerBitCastOrAddrSpaceCast(S: Src, Ty: NewPtrType); |
696 | } |
697 | default: |
698 | llvm_unreachable("Unexpected opcode" ); |
699 | } |
700 | } |
701 | |
702 | // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the |
703 | // constant expression `CE` with its operands replaced as specified in |
704 | // ValueWithNewAddrSpace. |
705 | static Value *cloneConstantExprWithNewAddressSpace( |
706 | ConstantExpr *CE, unsigned NewAddrSpace, |
707 | const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL, |
708 | const TargetTransformInfo *TTI) { |
709 | Type *TargetType = |
710 | CE->getType()->isPtrOrPtrVectorTy() |
711 | ? getPtrOrVecOfPtrsWithNewAS(Ty: CE->getType(), NewAddrSpace) |
712 | : CE->getType(); |
713 | |
714 | if (CE->getOpcode() == Instruction::AddrSpaceCast) { |
715 | // Because CE is flat, the source address space must be specific. |
716 | // Therefore, the inferred address space must be the source space according |
717 | // to our algorithm. |
718 | assert(CE->getOperand(0)->getType()->getPointerAddressSpace() == |
719 | NewAddrSpace); |
720 | return ConstantExpr::getBitCast(C: CE->getOperand(i_nocapture: 0), Ty: TargetType); |
721 | } |
722 | |
723 | if (CE->getOpcode() == Instruction::BitCast) { |
724 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: CE->getOperand(i_nocapture: 0))) |
725 | return ConstantExpr::getBitCast(C: cast<Constant>(Val: NewOperand), Ty: TargetType); |
726 | return ConstantExpr::getAddrSpaceCast(C: CE, Ty: TargetType); |
727 | } |
728 | |
729 | if (CE->getOpcode() == Instruction::IntToPtr) { |
730 | assert(isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI)); |
731 | Constant *Src = cast<ConstantExpr>(Val: CE->getOperand(i_nocapture: 0))->getOperand(i_nocapture: 0); |
732 | assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); |
733 | return ConstantExpr::getBitCast(C: Src, Ty: TargetType); |
734 | } |
735 | |
736 | // Computes the operands of the new constant expression. |
737 | bool IsNew = false; |
738 | SmallVector<Constant *, 4> NewOperands; |
739 | for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) { |
740 | Constant *Operand = CE->getOperand(i_nocapture: Index); |
741 | // If the address space of `Operand` needs to be modified, the new operand |
742 | // with the new address space should already be in ValueWithNewAddrSpace |
743 | // because (1) the constant expressions we consider (i.e. addrspacecast, |
744 | // bitcast, and getelementptr) do not incur cycles in the data flow graph |
745 | // and (2) this function is called on constant expressions in postorder. |
746 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Val: Operand)) { |
747 | IsNew = true; |
748 | NewOperands.push_back(Elt: cast<Constant>(Val: NewOperand)); |
749 | continue; |
750 | } |
751 | if (auto *CExpr = dyn_cast<ConstantExpr>(Val: Operand)) |
752 | if (Value *NewOperand = cloneConstantExprWithNewAddressSpace( |
753 | CE: CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) { |
754 | IsNew = true; |
755 | NewOperands.push_back(Elt: cast<Constant>(Val: NewOperand)); |
756 | continue; |
757 | } |
758 | // Otherwise, reuses the old operand. |
759 | NewOperands.push_back(Elt: Operand); |
760 | } |
761 | |
762 | // If !IsNew, we will replace the Value with itself. However, replaced values |
763 | // are assumed to wrapped in an addrspacecast cast later so drop it now. |
764 | if (!IsNew) |
765 | return nullptr; |
766 | |
767 | if (CE->getOpcode() == Instruction::GetElementPtr) { |
768 | // Needs to specify the source type while constructing a getelementptr |
769 | // constant expression. |
770 | return CE->getWithOperands(Ops: NewOperands, Ty: TargetType, /*OnlyIfReduced=*/false, |
771 | SrcTy: cast<GEPOperator>(Val: CE)->getSourceElementType()); |
772 | } |
773 | |
774 | return CE->getWithOperands(Ops: NewOperands, Ty: TargetType); |
775 | } |
776 | |
777 | // Returns a clone of the value `V`, with its operands replaced as specified in |
778 | // ValueWithNewAddrSpace. This function is called on every flat address |
779 | // expression whose address space needs to be modified, in postorder. |
780 | // |
781 | // See cloneInstructionWithNewAddressSpace for the meaning of PoisonUsesToFix. |
782 | Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace( |
783 | Value *V, unsigned NewAddrSpace, |
784 | const ValueToValueMapTy &ValueWithNewAddrSpace, |
785 | const PredicatedAddrSpaceMapTy &PredicatedAS, |
786 | SmallVectorImpl<const Use *> *PoisonUsesToFix) const { |
787 | // All values in Postorder are flat address expressions. |
788 | assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace && |
789 | isAddressExpression(*V, *DL, TTI)); |
790 | |
791 | if (Instruction *I = dyn_cast<Instruction>(Val: V)) { |
792 | Value *NewV = cloneInstructionWithNewAddressSpace( |
793 | I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix); |
794 | if (Instruction *NewI = dyn_cast_or_null<Instruction>(Val: NewV)) { |
795 | if (NewI->getParent() == nullptr) { |
796 | NewI->insertBefore(InsertPos: I); |
797 | NewI->takeName(V: I); |
798 | NewI->setDebugLoc(I->getDebugLoc()); |
799 | } |
800 | } |
801 | return NewV; |
802 | } |
803 | |
804 | return cloneConstantExprWithNewAddressSpace( |
805 | CE: cast<ConstantExpr>(Val: V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI); |
806 | } |
807 | |
808 | // Defines the join operation on the address space lattice (see the file header |
809 | // comments). |
810 | unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1, |
811 | unsigned AS2) const { |
812 | if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace) |
813 | return FlatAddrSpace; |
814 | |
815 | if (AS1 == UninitializedAddressSpace) |
816 | return AS2; |
817 | if (AS2 == UninitializedAddressSpace) |
818 | return AS1; |
819 | |
820 | // The join of two different specific address spaces is flat. |
821 | return (AS1 == AS2) ? AS1 : FlatAddrSpace; |
822 | } |
823 | |
824 | bool InferAddressSpacesImpl::run(Function &F) { |
825 | DL = &F.getDataLayout(); |
826 | |
827 | if (AssumeDefaultIsFlatAddressSpace) |
828 | FlatAddrSpace = 0; |
829 | |
830 | if (FlatAddrSpace == UninitializedAddressSpace) { |
831 | FlatAddrSpace = TTI->getFlatAddressSpace(); |
832 | if (FlatAddrSpace == UninitializedAddressSpace) |
833 | return false; |
834 | } |
835 | |
836 | // Collects all flat address expressions in postorder. |
837 | std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F); |
838 | |
839 | // Runs a data-flow analysis to refine the address spaces of every expression |
840 | // in Postorder. |
841 | ValueToAddrSpaceMapTy InferredAddrSpace; |
842 | PredicatedAddrSpaceMapTy PredicatedAS; |
843 | inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS); |
844 | |
845 | // Changes the address spaces of the flat address expressions who are inferred |
846 | // to point to a specific address space. |
847 | return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS, |
848 | F: &F); |
849 | } |
850 | |
851 | // Constants need to be tracked through RAUW to handle cases with nested |
852 | // constant expressions, so wrap values in WeakTrackingVH. |
853 | void InferAddressSpacesImpl::inferAddressSpaces( |
854 | ArrayRef<WeakTrackingVH> Postorder, |
855 | ValueToAddrSpaceMapTy &InferredAddrSpace, |
856 | PredicatedAddrSpaceMapTy &PredicatedAS) const { |
857 | SetVector<Value *> Worklist(Postorder.begin(), Postorder.end()); |
858 | // Initially, all expressions are in the uninitialized address space. |
859 | for (Value *V : Postorder) |
860 | InferredAddrSpace[V] = UninitializedAddressSpace; |
861 | |
862 | while (!Worklist.empty()) { |
863 | Value *V = Worklist.pop_back_val(); |
864 | |
865 | // Try to update the address space of the stack top according to the |
866 | // address spaces of its operands. |
867 | if (!updateAddressSpace(V: *V, InferredAddrSpace, PredicatedAS)) |
868 | continue; |
869 | |
870 | for (Value *User : V->users()) { |
871 | // Skip if User is already in the worklist. |
872 | if (Worklist.count(key: User)) |
873 | continue; |
874 | |
875 | auto Pos = InferredAddrSpace.find(Val: User); |
876 | // Our algorithm only updates the address spaces of flat address |
877 | // expressions, which are those in InferredAddrSpace. |
878 | if (Pos == InferredAddrSpace.end()) |
879 | continue; |
880 | |
881 | // Function updateAddressSpace moves the address space down a lattice |
882 | // path. Therefore, nothing to do if User is already inferred as flat (the |
883 | // bottom element in the lattice). |
884 | if (Pos->second == FlatAddrSpace) |
885 | continue; |
886 | |
887 | Worklist.insert(X: User); |
888 | } |
889 | } |
890 | } |
891 | |
892 | unsigned InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &V, |
893 | Value *Opnd) const { |
894 | const Instruction *I = dyn_cast<Instruction>(Val: &V); |
895 | if (!I) |
896 | return UninitializedAddressSpace; |
897 | |
898 | Opnd = Opnd->stripInBoundsOffsets(); |
899 | for (auto &AssumeVH : AC.assumptionsFor(V: Opnd)) { |
900 | if (!AssumeVH) |
901 | continue; |
902 | CallInst *CI = cast<CallInst>(Val&: AssumeVH); |
903 | if (!isValidAssumeForContext(I: CI, CxtI: I, DT)) |
904 | continue; |
905 | |
906 | const Value *Ptr; |
907 | unsigned AS; |
908 | std::tie(args&: Ptr, args&: AS) = TTI->getPredicatedAddrSpace(V: CI->getArgOperand(i: 0)); |
909 | if (Ptr) |
910 | return AS; |
911 | } |
912 | |
913 | return UninitializedAddressSpace; |
914 | } |
915 | |
916 | bool InferAddressSpacesImpl::updateAddressSpace( |
917 | const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace, |
918 | PredicatedAddrSpaceMapTy &PredicatedAS) const { |
919 | assert(InferredAddrSpace.count(&V)); |
920 | |
921 | LLVM_DEBUG(dbgs() << "Updating the address space of\n " << V << '\n'); |
922 | |
923 | // The new inferred address space equals the join of the address spaces |
924 | // of all its pointer operands. |
925 | unsigned NewAS = UninitializedAddressSpace; |
926 | |
927 | const Operator &Op = cast<Operator>(Val: V); |
928 | if (Op.getOpcode() == Instruction::Select) { |
929 | Value *Src0 = Op.getOperand(i: 1); |
930 | Value *Src1 = Op.getOperand(i: 2); |
931 | |
932 | auto I = InferredAddrSpace.find(Val: Src0); |
933 | unsigned Src0AS = (I != InferredAddrSpace.end()) |
934 | ? I->second |
935 | : Src0->getType()->getPointerAddressSpace(); |
936 | |
937 | auto J = InferredAddrSpace.find(Val: Src1); |
938 | unsigned Src1AS = (J != InferredAddrSpace.end()) |
939 | ? J->second |
940 | : Src1->getType()->getPointerAddressSpace(); |
941 | |
942 | auto *C0 = dyn_cast<Constant>(Val: Src0); |
943 | auto *C1 = dyn_cast<Constant>(Val: Src1); |
944 | |
945 | // If one of the inputs is a constant, we may be able to do a constant |
946 | // addrspacecast of it. Defer inferring the address space until the input |
947 | // address space is known. |
948 | if ((C1 && Src0AS == UninitializedAddressSpace) || |
949 | (C0 && Src1AS == UninitializedAddressSpace)) |
950 | return false; |
951 | |
952 | if (C0 && isSafeToCastConstAddrSpace(C: C0, NewAS: Src1AS)) |
953 | NewAS = Src1AS; |
954 | else if (C1 && isSafeToCastConstAddrSpace(C: C1, NewAS: Src0AS)) |
955 | NewAS = Src0AS; |
956 | else |
957 | NewAS = joinAddressSpaces(AS1: Src0AS, AS2: Src1AS); |
958 | } else { |
959 | unsigned AS = TTI->getAssumedAddrSpace(V: &V); |
960 | if (AS != UninitializedAddressSpace) { |
961 | // Use the assumed address space directly. |
962 | NewAS = AS; |
963 | } else { |
964 | // Otherwise, infer the address space from its pointer operands. |
965 | for (Value *PtrOperand : getPointerOperands(V, DL: *DL, TTI)) { |
966 | auto I = InferredAddrSpace.find(Val: PtrOperand); |
967 | unsigned OperandAS; |
968 | if (I == InferredAddrSpace.end()) { |
969 | OperandAS = PtrOperand->getType()->getPointerAddressSpace(); |
970 | if (OperandAS == FlatAddrSpace) { |
971 | // Check AC for assumption dominating V. |
972 | unsigned AS = getPredicatedAddrSpace(V, Opnd: PtrOperand); |
973 | if (AS != UninitializedAddressSpace) { |
974 | LLVM_DEBUG(dbgs() |
975 | << " deduce operand AS from the predicate addrspace " |
976 | << AS << '\n'); |
977 | OperandAS = AS; |
978 | // Record this use with the predicated AS. |
979 | PredicatedAS[std::make_pair(x: &V, y&: PtrOperand)] = OperandAS; |
980 | } |
981 | } |
982 | } else |
983 | OperandAS = I->second; |
984 | |
985 | // join(flat, *) = flat. So we can break if NewAS is already flat. |
986 | NewAS = joinAddressSpaces(AS1: NewAS, AS2: OperandAS); |
987 | if (NewAS == FlatAddrSpace) |
988 | break; |
989 | } |
990 | } |
991 | } |
992 | |
993 | unsigned OldAS = InferredAddrSpace.lookup(Val: &V); |
994 | assert(OldAS != FlatAddrSpace); |
995 | if (OldAS == NewAS) |
996 | return false; |
997 | |
998 | // If any updates are made, grabs its users to the worklist because |
999 | // their address spaces can also be possibly updated. |
1000 | LLVM_DEBUG(dbgs() << " to " << NewAS << '\n'); |
1001 | InferredAddrSpace[&V] = NewAS; |
1002 | return true; |
1003 | } |
1004 | |
1005 | /// \p returns true if \p U is the pointer operand of a memory instruction with |
1006 | /// a single pointer operand that can have its address space changed by simply |
1007 | /// mutating the use to a new value. If the memory instruction is volatile, |
1008 | /// return true only if the target allows the memory instruction to be volatile |
1009 | /// in the new address space. |
1010 | static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI, |
1011 | Use &U, unsigned AddrSpace) { |
1012 | User *Inst = U.getUser(); |
1013 | unsigned OpNo = U.getOperandNo(); |
1014 | bool VolatileIsAllowed = false; |
1015 | if (auto *I = dyn_cast<Instruction>(Val: Inst)) |
1016 | VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace); |
1017 | |
1018 | if (auto *LI = dyn_cast<LoadInst>(Val: Inst)) |
1019 | return OpNo == LoadInst::getPointerOperandIndex() && |
1020 | (VolatileIsAllowed || !LI->isVolatile()); |
1021 | |
1022 | if (auto *SI = dyn_cast<StoreInst>(Val: Inst)) |
1023 | return OpNo == StoreInst::getPointerOperandIndex() && |
1024 | (VolatileIsAllowed || !SI->isVolatile()); |
1025 | |
1026 | if (auto *RMW = dyn_cast<AtomicRMWInst>(Val: Inst)) |
1027 | return OpNo == AtomicRMWInst::getPointerOperandIndex() && |
1028 | (VolatileIsAllowed || !RMW->isVolatile()); |
1029 | |
1030 | if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: Inst)) |
1031 | return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() && |
1032 | (VolatileIsAllowed || !CmpX->isVolatile()); |
1033 | |
1034 | return false; |
1035 | } |
1036 | |
1037 | /// Update memory intrinsic uses that require more complex processing than |
1038 | /// simple memory instructions. These require re-mangling and may have multiple |
1039 | /// pointer operands. |
1040 | static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, |
1041 | Value *NewV) { |
1042 | IRBuilder<> B(MI); |
1043 | MDNode *TBAA = MI->getMetadata(KindID: LLVMContext::MD_tbaa); |
1044 | MDNode *ScopeMD = MI->getMetadata(KindID: LLVMContext::MD_alias_scope); |
1045 | MDNode *NoAliasMD = MI->getMetadata(KindID: LLVMContext::MD_noalias); |
1046 | |
1047 | if (auto *MSI = dyn_cast<MemSetInst>(Val: MI)) { |
1048 | B.CreateMemSet(Ptr: NewV, Val: MSI->getValue(), Size: MSI->getLength(), Align: MSI->getDestAlign(), |
1049 | isVolatile: false, // isVolatile |
1050 | TBAATag: TBAA, ScopeTag: ScopeMD, NoAliasTag: NoAliasMD); |
1051 | } else if (auto *MTI = dyn_cast<MemTransferInst>(Val: MI)) { |
1052 | Value *Src = MTI->getRawSource(); |
1053 | Value *Dest = MTI->getRawDest(); |
1054 | |
1055 | // Be careful in case this is a self-to-self copy. |
1056 | if (Src == OldV) |
1057 | Src = NewV; |
1058 | |
1059 | if (Dest == OldV) |
1060 | Dest = NewV; |
1061 | |
1062 | if (isa<MemCpyInlineInst>(Val: MTI)) { |
1063 | MDNode *TBAAStruct = MTI->getMetadata(KindID: LLVMContext::MD_tbaa_struct); |
1064 | B.CreateMemCpyInline(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, |
1065 | SrcAlign: MTI->getSourceAlign(), Size: MTI->getLength(), |
1066 | isVolatile: false, // isVolatile |
1067 | TBAATag: TBAA, TBAAStructTag: TBAAStruct, ScopeTag: ScopeMD, NoAliasTag: NoAliasMD); |
1068 | } else if (isa<MemCpyInst>(Val: MTI)) { |
1069 | MDNode *TBAAStruct = MTI->getMetadata(KindID: LLVMContext::MD_tbaa_struct); |
1070 | B.CreateMemCpy(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, SrcAlign: MTI->getSourceAlign(), |
1071 | Size: MTI->getLength(), |
1072 | isVolatile: false, // isVolatile |
1073 | TBAATag: TBAA, TBAAStructTag: TBAAStruct, ScopeTag: ScopeMD, NoAliasTag: NoAliasMD); |
1074 | } else { |
1075 | assert(isa<MemMoveInst>(MTI)); |
1076 | B.CreateMemMove(Dst: Dest, DstAlign: MTI->getDestAlign(), Src, SrcAlign: MTI->getSourceAlign(), |
1077 | Size: MTI->getLength(), |
1078 | isVolatile: false, // isVolatile |
1079 | TBAATag: TBAA, ScopeTag: ScopeMD, NoAliasTag: NoAliasMD); |
1080 | } |
1081 | } else |
1082 | llvm_unreachable("unhandled MemIntrinsic" ); |
1083 | |
1084 | MI->eraseFromParent(); |
1085 | return true; |
1086 | } |
1087 | |
1088 | // \p returns true if it is OK to change the address space of constant \p C with |
1089 | // a ConstantExpr addrspacecast. |
1090 | bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C, |
1091 | unsigned NewAS) const { |
1092 | assert(NewAS != UninitializedAddressSpace); |
1093 | |
1094 | unsigned SrcAS = C->getType()->getPointerAddressSpace(); |
1095 | if (SrcAS == NewAS || isa<UndefValue>(Val: C)) |
1096 | return true; |
1097 | |
1098 | // Prevent illegal casts between different non-flat address spaces. |
1099 | if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace) |
1100 | return false; |
1101 | |
1102 | if (isa<ConstantPointerNull>(Val: C)) |
1103 | return true; |
1104 | |
1105 | if (auto *Op = dyn_cast<Operator>(Val: C)) { |
1106 | // If we already have a constant addrspacecast, it should be safe to cast it |
1107 | // off. |
1108 | if (Op->getOpcode() == Instruction::AddrSpaceCast) |
1109 | return isSafeToCastConstAddrSpace(C: cast<Constant>(Val: Op->getOperand(i: 0)), |
1110 | NewAS); |
1111 | |
1112 | if (Op->getOpcode() == Instruction::IntToPtr && |
1113 | Op->getType()->getPointerAddressSpace() == FlatAddrSpace) |
1114 | return true; |
1115 | } |
1116 | |
1117 | return false; |
1118 | } |
1119 | |
1120 | static Value::use_iterator skipToNextUser(Value::use_iterator I, |
1121 | Value::use_iterator End) { |
1122 | User *CurUser = I->getUser(); |
1123 | ++I; |
1124 | |
1125 | while (I != End && I->getUser() == CurUser) |
1126 | ++I; |
1127 | |
1128 | return I; |
1129 | } |
1130 | |
1131 | bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( |
1132 | ArrayRef<WeakTrackingVH> Postorder, |
1133 | const ValueToAddrSpaceMapTy &InferredAddrSpace, |
1134 | const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const { |
1135 | // For each address expression to be modified, creates a clone of it with its |
1136 | // pointer operands converted to the new address space. Since the pointer |
1137 | // operands are converted, the clone is naturally in the new address space by |
1138 | // construction. |
1139 | ValueToValueMapTy ValueWithNewAddrSpace; |
1140 | SmallVector<const Use *, 32> PoisonUsesToFix; |
1141 | for (Value *V : Postorder) { |
1142 | unsigned NewAddrSpace = InferredAddrSpace.lookup(Val: V); |
1143 | |
1144 | // In some degenerate cases (e.g. invalid IR in unreachable code), we may |
1145 | // not even infer the value to have its original address space. |
1146 | if (NewAddrSpace == UninitializedAddressSpace) |
1147 | continue; |
1148 | |
1149 | if (V->getType()->getPointerAddressSpace() != NewAddrSpace) { |
1150 | Value *New = |
1151 | cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace, |
1152 | PredicatedAS, PoisonUsesToFix: &PoisonUsesToFix); |
1153 | if (New) |
1154 | ValueWithNewAddrSpace[V] = New; |
1155 | } |
1156 | } |
1157 | |
1158 | if (ValueWithNewAddrSpace.empty()) |
1159 | return false; |
1160 | |
1161 | // Fixes all the poison uses generated by cloneInstructionWithNewAddressSpace. |
1162 | for (const Use *PoisonUse : PoisonUsesToFix) { |
1163 | User *V = PoisonUse->getUser(); |
1164 | User *NewV = cast_or_null<User>(Val: ValueWithNewAddrSpace.lookup(Val: V)); |
1165 | if (!NewV) |
1166 | continue; |
1167 | |
1168 | unsigned OperandNo = PoisonUse->getOperandNo(); |
1169 | assert(isa<PoisonValue>(NewV->getOperand(OperandNo))); |
1170 | NewV->setOperand(i: OperandNo, Val: ValueWithNewAddrSpace.lookup(Val: PoisonUse->get())); |
1171 | } |
1172 | |
1173 | SmallVector<Instruction *, 16> DeadInstructions; |
1174 | ValueToValueMapTy VMap; |
1175 | ValueMapper VMapper(VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); |
1176 | |
1177 | // Replaces the uses of the old address expressions with the new ones. |
1178 | for (const WeakTrackingVH &WVH : Postorder) { |
1179 | assert(WVH && "value was unexpectedly deleted" ); |
1180 | Value *V = WVH; |
1181 | Value *NewV = ValueWithNewAddrSpace.lookup(Val: V); |
1182 | if (NewV == nullptr) |
1183 | continue; |
1184 | |
1185 | LLVM_DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n " |
1186 | << *NewV << '\n'); |
1187 | |
1188 | if (Constant *C = dyn_cast<Constant>(Val: V)) { |
1189 | Constant *Replace = |
1190 | ConstantExpr::getAddrSpaceCast(C: cast<Constant>(Val: NewV), Ty: C->getType()); |
1191 | if (C != Replace) { |
1192 | LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace |
1193 | << ": " << *Replace << '\n'); |
1194 | SmallVector<User *, 16> WorkList; |
1195 | for (User *U : make_early_inc_range(Range: C->users())) { |
1196 | if (auto *I = dyn_cast<Instruction>(Val: U)) { |
1197 | if (I->getFunction() == F) |
1198 | I->replaceUsesOfWith(From: C, To: Replace); |
1199 | } else { |
1200 | WorkList.append(in_start: U->user_begin(), in_end: U->user_end()); |
1201 | } |
1202 | } |
1203 | if (!WorkList.empty()) { |
1204 | VMap[C] = Replace; |
1205 | DenseSet<User *> Visited{WorkList.begin(), WorkList.end()}; |
1206 | while (!WorkList.empty()) { |
1207 | User *U = WorkList.pop_back_val(); |
1208 | if (auto *I = dyn_cast<Instruction>(Val: U)) { |
1209 | if (I->getFunction() == F) |
1210 | VMapper.remapInstruction(I&: *I); |
1211 | continue; |
1212 | } |
1213 | for (User *U2 : U->users()) |
1214 | if (Visited.insert(V: U2).second) |
1215 | WorkList.push_back(Elt: U2); |
1216 | } |
1217 | } |
1218 | V = Replace; |
1219 | } |
1220 | } |
1221 | |
1222 | Value::use_iterator I, E, Next; |
1223 | for (I = V->use_begin(), E = V->use_end(); I != E;) { |
1224 | Use &U = *I; |
1225 | User *CurUser = U.getUser(); |
1226 | |
1227 | // Some users may see the same pointer operand in multiple operands. Skip |
1228 | // to the next instruction. |
1229 | I = skipToNextUser(I, End: E); |
1230 | |
1231 | if (isSimplePointerUseValidToReplace( |
1232 | TTI: *TTI, U, AddrSpace: V->getType()->getPointerAddressSpace())) { |
1233 | // If V is used as the pointer operand of a compatible memory operation, |
1234 | // sets the pointer operand to NewV. This replacement does not change |
1235 | // the element type, so the resultant load/store is still valid. |
1236 | U.set(NewV); |
1237 | continue; |
1238 | } |
1239 | |
1240 | // Skip if the current user is the new value itself. |
1241 | if (CurUser == NewV) |
1242 | continue; |
1243 | |
1244 | if (auto *CurUserI = dyn_cast<Instruction>(Val: CurUser); |
1245 | CurUserI && CurUserI->getFunction() != F) |
1246 | continue; |
1247 | |
1248 | // Handle more complex cases like intrinsic that need to be remangled. |
1249 | if (auto *MI = dyn_cast<MemIntrinsic>(Val: CurUser)) { |
1250 | if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, OldV: V, NewV)) |
1251 | continue; |
1252 | } |
1253 | |
1254 | if (auto *II = dyn_cast<IntrinsicInst>(Val: CurUser)) { |
1255 | if (rewriteIntrinsicOperands(II, OldV: V, NewV)) |
1256 | continue; |
1257 | } |
1258 | |
1259 | if (isa<Instruction>(Val: CurUser)) { |
1260 | if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Val: CurUser)) { |
1261 | // If we can infer that both pointers are in the same addrspace, |
1262 | // transform e.g. |
1263 | // %cmp = icmp eq float* %p, %q |
1264 | // into |
1265 | // %cmp = icmp eq float addrspace(3)* %new_p, %new_q |
1266 | |
1267 | unsigned NewAS = NewV->getType()->getPointerAddressSpace(); |
1268 | int SrcIdx = U.getOperandNo(); |
1269 | int OtherIdx = (SrcIdx == 0) ? 1 : 0; |
1270 | Value *OtherSrc = Cmp->getOperand(i_nocapture: OtherIdx); |
1271 | |
1272 | if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(Val: OtherSrc)) { |
1273 | if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) { |
1274 | Cmp->setOperand(i_nocapture: OtherIdx, Val_nocapture: OtherNewV); |
1275 | Cmp->setOperand(i_nocapture: SrcIdx, Val_nocapture: NewV); |
1276 | continue; |
1277 | } |
1278 | } |
1279 | |
1280 | // Even if the type mismatches, we can cast the constant. |
1281 | if (auto *KOtherSrc = dyn_cast<Constant>(Val: OtherSrc)) { |
1282 | if (isSafeToCastConstAddrSpace(C: KOtherSrc, NewAS)) { |
1283 | Cmp->setOperand(i_nocapture: SrcIdx, Val_nocapture: NewV); |
1284 | Cmp->setOperand(i_nocapture: OtherIdx, Val_nocapture: ConstantExpr::getAddrSpaceCast( |
1285 | C: KOtherSrc, Ty: NewV->getType())); |
1286 | continue; |
1287 | } |
1288 | } |
1289 | } |
1290 | |
1291 | if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(Val: CurUser)) { |
1292 | unsigned NewAS = NewV->getType()->getPointerAddressSpace(); |
1293 | if (ASC->getDestAddressSpace() == NewAS) { |
1294 | ASC->replaceAllUsesWith(V: NewV); |
1295 | DeadInstructions.push_back(Elt: ASC); |
1296 | continue; |
1297 | } |
1298 | } |
1299 | |
1300 | // Otherwise, replaces the use with flat(NewV). |
1301 | if (Instruction *VInst = dyn_cast<Instruction>(Val: V)) { |
1302 | // Don't create a copy of the original addrspacecast. |
1303 | if (U == V && isa<AddrSpaceCastInst>(Val: V)) |
1304 | continue; |
1305 | |
1306 | // Insert the addrspacecast after NewV. |
1307 | BasicBlock::iterator InsertPos; |
1308 | if (Instruction *NewVInst = dyn_cast<Instruction>(Val: NewV)) |
1309 | InsertPos = std::next(x: NewVInst->getIterator()); |
1310 | else |
1311 | InsertPos = std::next(x: VInst->getIterator()); |
1312 | |
1313 | while (isa<PHINode>(Val: InsertPos)) |
1314 | ++InsertPos; |
1315 | // This instruction may contain multiple uses of V, update them all. |
1316 | CurUser->replaceUsesOfWith( |
1317 | From: V, To: new AddrSpaceCastInst(NewV, V->getType(), "" , InsertPos)); |
1318 | } else { |
1319 | CurUser->replaceUsesOfWith( |
1320 | From: V, To: ConstantExpr::getAddrSpaceCast(C: cast<Constant>(Val: NewV), |
1321 | Ty: V->getType())); |
1322 | } |
1323 | } |
1324 | } |
1325 | |
1326 | if (V->use_empty()) { |
1327 | if (Instruction *I = dyn_cast<Instruction>(Val: V)) |
1328 | DeadInstructions.push_back(Elt: I); |
1329 | } |
1330 | } |
1331 | |
1332 | for (Instruction *I : DeadInstructions) |
1333 | RecursivelyDeleteTriviallyDeadInstructions(V: I); |
1334 | |
1335 | return true; |
1336 | } |
1337 | |
1338 | bool InferAddressSpaces::runOnFunction(Function &F) { |
1339 | if (skipFunction(F)) |
1340 | return false; |
1341 | |
1342 | auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); |
1343 | DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; |
1344 | return InferAddressSpacesImpl( |
1345 | getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), DT, |
1346 | &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F), |
1347 | FlatAddrSpace) |
1348 | .run(F); |
1349 | } |
1350 | |
1351 | FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) { |
1352 | return new InferAddressSpaces(AddressSpace); |
1353 | } |
1354 | |
1355 | InferAddressSpacesPass::InferAddressSpacesPass() |
1356 | : FlatAddrSpace(UninitializedAddressSpace) {} |
1357 | InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace) |
1358 | : FlatAddrSpace(AddressSpace) {} |
1359 | |
1360 | PreservedAnalyses InferAddressSpacesPass::run(Function &F, |
1361 | FunctionAnalysisManager &AM) { |
1362 | bool Changed = |
1363 | InferAddressSpacesImpl(AM.getResult<AssumptionAnalysis>(IR&: F), |
1364 | AM.getCachedResult<DominatorTreeAnalysis>(IR&: F), |
1365 | &AM.getResult<TargetIRAnalysis>(IR&: F), FlatAddrSpace) |
1366 | .run(F); |
1367 | if (Changed) { |
1368 | PreservedAnalyses PA; |
1369 | PA.preserveSet<CFGAnalyses>(); |
1370 | PA.preserve<DominatorTreeAnalysis>(); |
1371 | return PA; |
1372 | } |
1373 | return PreservedAnalyses::all(); |
1374 | } |
1375 | |