1 | //===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" |
10 | #include "llvm/Analysis/ScalarEvolution.h" |
11 | #include "llvm/Analysis/TargetTransformInfo.h" |
12 | #include "llvm/IR/IRBuilder.h" |
13 | #include "llvm/IR/IntrinsicInst.h" |
14 | #include "llvm/IR/MDBuilder.h" |
15 | #include "llvm/Support/Debug.h" |
16 | #include "llvm/Support/MathExtras.h" |
17 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
18 | #include <optional> |
19 | |
20 | #define DEBUG_TYPE "lower-mem-intrinsics" |
21 | |
22 | using namespace llvm; |
23 | |
24 | void llvm::createMemCpyLoopKnownSize( |
25 | Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, |
26 | ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile, |
27 | bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI, |
28 | std::optional<uint32_t> AtomicElementSize) { |
29 | // No need to expand zero length copies. |
30 | if (CopyLen->isZero()) |
31 | return; |
32 | |
33 | BasicBlock *PreLoopBB = InsertBefore->getParent(); |
34 | BasicBlock *PostLoopBB = nullptr; |
35 | Function *ParentFunc = PreLoopBB->getParent(); |
36 | LLVMContext &Ctx = PreLoopBB->getContext(); |
37 | const DataLayout &DL = ParentFunc->getDataLayout(); |
38 | MDBuilder MDB(Ctx); |
39 | MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(Name: "MemCopyDomain" ); |
40 | StringRef Name = "MemCopyAliasScope" ; |
41 | MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name); |
42 | |
43 | unsigned SrcAS = cast<PointerType>(Val: SrcAddr->getType())->getAddressSpace(); |
44 | unsigned DstAS = cast<PointerType>(Val: DstAddr->getType())->getAddressSpace(); |
45 | |
46 | Type *TypeOfCopyLen = CopyLen->getType(); |
47 | Type *LoopOpType = TTI.getMemcpyLoopLoweringType( |
48 | Context&: Ctx, Length: CopyLen, SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign: SrcAlign.value(), DestAlign: DstAlign.value(), |
49 | AtomicElementSize); |
50 | assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && |
51 | "Atomic memcpy lowering is not supported for vector operand type" ); |
52 | |
53 | unsigned LoopOpSize = DL.getTypeStoreSize(Ty: LoopOpType); |
54 | assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && |
55 | "Atomic memcpy lowering is not supported for selected operand size" ); |
56 | |
57 | uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; |
58 | |
59 | if (LoopEndCount != 0) { |
60 | // Split |
61 | PostLoopBB = PreLoopBB->splitBasicBlock(I: InsertBefore, BBName: "memcpy-split" ); |
62 | BasicBlock *LoopBB = |
63 | BasicBlock::Create(Context&: Ctx, Name: "load-store-loop" , Parent: ParentFunc, InsertBefore: PostLoopBB); |
64 | PreLoopBB->getTerminator()->setSuccessor(Idx: 0, BB: LoopBB); |
65 | |
66 | IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); |
67 | |
68 | Align PartDstAlign(commonAlignment(A: DstAlign, Offset: LoopOpSize)); |
69 | Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: LoopOpSize)); |
70 | |
71 | IRBuilder<> LoopBuilder(LoopBB); |
72 | PHINode *LoopIndex = LoopBuilder.CreatePHI(Ty: TypeOfCopyLen, NumReservedValues: 2, Name: "loop-index" ); |
73 | LoopIndex->addIncoming(V: ConstantInt::get(Ty: TypeOfCopyLen, V: 0U), BB: PreLoopBB); |
74 | // Loop Body |
75 | Value *SrcGEP = |
76 | LoopBuilder.CreateInBoundsGEP(Ty: LoopOpType, Ptr: SrcAddr, IdxList: LoopIndex); |
77 | LoadInst *Load = LoopBuilder.CreateAlignedLoad(Ty: LoopOpType, Ptr: SrcGEP, |
78 | Align: PartSrcAlign, isVolatile: SrcIsVolatile); |
79 | if (!CanOverlap) { |
80 | // Set alias scope for loads. |
81 | Load->setMetadata(KindID: LLVMContext::MD_alias_scope, |
82 | Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
83 | } |
84 | Value *DstGEP = |
85 | LoopBuilder.CreateInBoundsGEP(Ty: LoopOpType, Ptr: DstAddr, IdxList: LoopIndex); |
86 | StoreInst *Store = LoopBuilder.CreateAlignedStore( |
87 | Val: Load, Ptr: DstGEP, Align: PartDstAlign, isVolatile: DstIsVolatile); |
88 | if (!CanOverlap) { |
89 | // Indicate that stores don't overlap loads. |
90 | Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
91 | } |
92 | if (AtomicElementSize) { |
93 | Load->setAtomic(Ordering: AtomicOrdering::Unordered); |
94 | Store->setAtomic(Ordering: AtomicOrdering::Unordered); |
95 | } |
96 | Value *NewIndex = |
97 | LoopBuilder.CreateAdd(LHS: LoopIndex, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 1U)); |
98 | LoopIndex->addIncoming(V: NewIndex, BB: LoopBB); |
99 | |
100 | // Create the loop branch condition. |
101 | Constant *LoopEndCI = ConstantInt::get(Ty: TypeOfCopyLen, V: LoopEndCount); |
102 | LoopBuilder.CreateCondBr(Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: LoopEndCI), |
103 | True: LoopBB, False: PostLoopBB); |
104 | } |
105 | |
106 | uint64_t BytesCopied = LoopEndCount * LoopOpSize; |
107 | uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; |
108 | if (RemainingBytes) { |
109 | IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() |
110 | : InsertBefore); |
111 | |
112 | SmallVector<Type *, 5> RemainingOps; |
113 | TTI.getMemcpyLoopResidualLoweringType(OpsOut&: RemainingOps, Context&: Ctx, RemainingBytes, |
114 | SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign: SrcAlign.value(), |
115 | DestAlign: DstAlign.value(), AtomicCpySize: AtomicElementSize); |
116 | |
117 | for (auto *OpTy : RemainingOps) { |
118 | Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: BytesCopied)); |
119 | Align PartDstAlign(commonAlignment(A: DstAlign, Offset: BytesCopied)); |
120 | |
121 | // Calculate the new index |
122 | unsigned OperandSize = DL.getTypeStoreSize(Ty: OpTy); |
123 | assert( |
124 | (!AtomicElementSize || OperandSize % *AtomicElementSize == 0) && |
125 | "Atomic memcpy lowering is not supported for selected operand size" ); |
126 | |
127 | uint64_t GepIndex = BytesCopied / OperandSize; |
128 | assert(GepIndex * OperandSize == BytesCopied && |
129 | "Division should have no Remainder!" ); |
130 | |
131 | Value *SrcGEP = RBuilder.CreateInBoundsGEP( |
132 | Ty: OpTy, Ptr: SrcAddr, IdxList: ConstantInt::get(Ty: TypeOfCopyLen, V: GepIndex)); |
133 | LoadInst *Load = |
134 | RBuilder.CreateAlignedLoad(Ty: OpTy, Ptr: SrcGEP, Align: PartSrcAlign, isVolatile: SrcIsVolatile); |
135 | if (!CanOverlap) { |
136 | // Set alias scope for loads. |
137 | Load->setMetadata(KindID: LLVMContext::MD_alias_scope, |
138 | Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
139 | } |
140 | Value *DstGEP = RBuilder.CreateInBoundsGEP( |
141 | Ty: OpTy, Ptr: DstAddr, IdxList: ConstantInt::get(Ty: TypeOfCopyLen, V: GepIndex)); |
142 | StoreInst *Store = RBuilder.CreateAlignedStore(Val: Load, Ptr: DstGEP, Align: PartDstAlign, |
143 | isVolatile: DstIsVolatile); |
144 | if (!CanOverlap) { |
145 | // Indicate that stores don't overlap loads. |
146 | Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
147 | } |
148 | if (AtomicElementSize) { |
149 | Load->setAtomic(Ordering: AtomicOrdering::Unordered); |
150 | Store->setAtomic(Ordering: AtomicOrdering::Unordered); |
151 | } |
152 | BytesCopied += OperandSize; |
153 | } |
154 | } |
155 | assert(BytesCopied == CopyLen->getZExtValue() && |
156 | "Bytes copied should match size in the call!" ); |
157 | } |
158 | |
159 | // \returns \p Len udiv \p OpSize, checking for optimization opportunities. |
160 | static Value *getRuntimeLoopCount(const DataLayout &DL, IRBuilderBase &B, |
161 | Value *Len, Value *OpSize, |
162 | unsigned OpSizeVal) { |
163 | // For powers of 2, we can lshr by log2 instead of using udiv. |
164 | if (isPowerOf2_32(Value: OpSizeVal)) |
165 | return B.CreateLShr(LHS: Len, RHS: Log2_32(Value: OpSizeVal)); |
166 | return B.CreateUDiv(LHS: Len, RHS: OpSize); |
167 | } |
168 | |
169 | // \returns \p Len urem \p OpSize, checking for optimization opportunities. |
170 | static Value *getRuntimeLoopRemainder(const DataLayout &DL, IRBuilderBase &B, |
171 | Value *Len, Value *OpSize, |
172 | unsigned OpSizeVal) { |
173 | // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem. |
174 | if (isPowerOf2_32(Value: OpSizeVal)) |
175 | return B.CreateAnd(LHS: Len, RHS: OpSizeVal - 1); |
176 | return B.CreateURem(LHS: Len, RHS: OpSize); |
177 | } |
178 | |
179 | void llvm::createMemCpyLoopUnknownSize( |
180 | Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, |
181 | Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile, |
182 | bool CanOverlap, const TargetTransformInfo &TTI, |
183 | std::optional<uint32_t> AtomicElementSize) { |
184 | BasicBlock *PreLoopBB = InsertBefore->getParent(); |
185 | BasicBlock *PostLoopBB = |
186 | PreLoopBB->splitBasicBlock(I: InsertBefore, BBName: "post-loop-memcpy-expansion" ); |
187 | |
188 | Function *ParentFunc = PreLoopBB->getParent(); |
189 | const DataLayout &DL = ParentFunc->getDataLayout(); |
190 | LLVMContext &Ctx = PreLoopBB->getContext(); |
191 | MDBuilder MDB(Ctx); |
192 | MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(Name: "MemCopyDomain" ); |
193 | StringRef Name = "MemCopyAliasScope" ; |
194 | MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name); |
195 | |
196 | unsigned SrcAS = cast<PointerType>(Val: SrcAddr->getType())->getAddressSpace(); |
197 | unsigned DstAS = cast<PointerType>(Val: DstAddr->getType())->getAddressSpace(); |
198 | |
199 | Type *LoopOpType = TTI.getMemcpyLoopLoweringType( |
200 | Context&: Ctx, Length: CopyLen, SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign: SrcAlign.value(), DestAlign: DstAlign.value(), |
201 | AtomicElementSize); |
202 | assert((!AtomicElementSize || !LoopOpType->isVectorTy()) && |
203 | "Atomic memcpy lowering is not supported for vector operand type" ); |
204 | unsigned LoopOpSize = DL.getTypeStoreSize(Ty: LoopOpType); |
205 | assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) && |
206 | "Atomic memcpy lowering is not supported for selected operand size" ); |
207 | |
208 | IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); |
209 | |
210 | // Calculate the loop trip count, and remaining bytes to copy after the loop. |
211 | Type *CopyLenType = CopyLen->getType(); |
212 | IntegerType *ILengthType = dyn_cast<IntegerType>(Val: CopyLenType); |
213 | assert(ILengthType && |
214 | "expected size argument to memcpy to be an integer type!" ); |
215 | Type *Int8Type = Type::getInt8Ty(C&: Ctx); |
216 | bool LoopOpIsInt8 = LoopOpType == Int8Type; |
217 | ConstantInt *CILoopOpSize = ConstantInt::get(Ty: ILengthType, V: LoopOpSize); |
218 | Value *RuntimeLoopCount = LoopOpIsInt8 |
219 | ? CopyLen |
220 | : getRuntimeLoopCount(DL, B&: PLBuilder, Len: CopyLen, |
221 | OpSize: CILoopOpSize, OpSizeVal: LoopOpSize); |
222 | |
223 | BasicBlock *LoopBB = |
224 | BasicBlock::Create(Context&: Ctx, Name: "loop-memcpy-expansion" , Parent: ParentFunc, InsertBefore: PostLoopBB); |
225 | IRBuilder<> LoopBuilder(LoopBB); |
226 | |
227 | Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: LoopOpSize)); |
228 | Align PartDstAlign(commonAlignment(A: DstAlign, Offset: LoopOpSize)); |
229 | |
230 | PHINode *LoopIndex = LoopBuilder.CreatePHI(Ty: CopyLenType, NumReservedValues: 2, Name: "loop-index" ); |
231 | LoopIndex->addIncoming(V: ConstantInt::get(Ty: CopyLenType, V: 0U), BB: PreLoopBB); |
232 | |
233 | Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(Ty: LoopOpType, Ptr: SrcAddr, IdxList: LoopIndex); |
234 | LoadInst *Load = LoopBuilder.CreateAlignedLoad(Ty: LoopOpType, Ptr: SrcGEP, |
235 | Align: PartSrcAlign, isVolatile: SrcIsVolatile); |
236 | if (!CanOverlap) { |
237 | // Set alias scope for loads. |
238 | Load->setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
239 | } |
240 | Value *DstGEP = LoopBuilder.CreateInBoundsGEP(Ty: LoopOpType, Ptr: DstAddr, IdxList: LoopIndex); |
241 | StoreInst *Store = |
242 | LoopBuilder.CreateAlignedStore(Val: Load, Ptr: DstGEP, Align: PartDstAlign, isVolatile: DstIsVolatile); |
243 | if (!CanOverlap) { |
244 | // Indicate that stores don't overlap loads. |
245 | Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
246 | } |
247 | if (AtomicElementSize) { |
248 | Load->setAtomic(Ordering: AtomicOrdering::Unordered); |
249 | Store->setAtomic(Ordering: AtomicOrdering::Unordered); |
250 | } |
251 | Value *NewIndex = |
252 | LoopBuilder.CreateAdd(LHS: LoopIndex, RHS: ConstantInt::get(Ty: CopyLenType, V: 1U)); |
253 | LoopIndex->addIncoming(V: NewIndex, BB: LoopBB); |
254 | |
255 | bool requiresResidual = |
256 | !LoopOpIsInt8 && !(AtomicElementSize && LoopOpSize == AtomicElementSize); |
257 | if (requiresResidual) { |
258 | Type *ResLoopOpType = AtomicElementSize |
259 | ? Type::getIntNTy(C&: Ctx, N: *AtomicElementSize * 8) |
260 | : Int8Type; |
261 | unsigned ResLoopOpSize = DL.getTypeStoreSize(Ty: ResLoopOpType); |
262 | assert((ResLoopOpSize == AtomicElementSize ? *AtomicElementSize : 1) && |
263 | "Store size is expected to match type size" ); |
264 | |
265 | Align ResSrcAlign(commonAlignment(A: PartSrcAlign, Offset: ResLoopOpSize)); |
266 | Align ResDstAlign(commonAlignment(A: PartDstAlign, Offset: ResLoopOpSize)); |
267 | |
268 | Value *RuntimeResidual = getRuntimeLoopRemainder(DL, B&: PLBuilder, Len: CopyLen, |
269 | OpSize: CILoopOpSize, OpSizeVal: LoopOpSize); |
270 | Value *RuntimeBytesCopied = PLBuilder.CreateSub(LHS: CopyLen, RHS: RuntimeResidual); |
271 | |
272 | // Loop body for the residual copy. |
273 | BasicBlock *ResLoopBB = BasicBlock::Create(Context&: Ctx, Name: "loop-memcpy-residual" , |
274 | Parent: PreLoopBB->getParent(), |
275 | InsertBefore: PostLoopBB); |
276 | // Residual loop header. |
277 | BasicBlock * = BasicBlock::Create( |
278 | Context&: Ctx, Name: "loop-memcpy-residual-header" , Parent: PreLoopBB->getParent(), InsertBefore: nullptr); |
279 | |
280 | // Need to update the pre-loop basic block to branch to the correct place. |
281 | // branch to the main loop if the count is non-zero, branch to the residual |
282 | // loop if the copy size is smaller then 1 iteration of the main loop but |
283 | // non-zero and finally branch to after the residual loop if the memcpy |
284 | // size is zero. |
285 | ConstantInt *Zero = ConstantInt::get(Ty: ILengthType, V: 0U); |
286 | PLBuilder.CreateCondBr(Cond: PLBuilder.CreateICmpNE(LHS: RuntimeLoopCount, RHS: Zero), |
287 | True: LoopBB, False: ResHeaderBB); |
288 | PreLoopBB->getTerminator()->eraseFromParent(); |
289 | |
290 | LoopBuilder.CreateCondBr( |
291 | Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: RuntimeLoopCount), True: LoopBB, |
292 | False: ResHeaderBB); |
293 | |
294 | // Determine if we need to branch to the residual loop or bypass it. |
295 | IRBuilder<> RHBuilder(ResHeaderBB); |
296 | RHBuilder.CreateCondBr(Cond: RHBuilder.CreateICmpNE(LHS: RuntimeResidual, RHS: Zero), |
297 | True: ResLoopBB, False: PostLoopBB); |
298 | |
299 | // Copy the residual with single byte load/store loop. |
300 | IRBuilder<> ResBuilder(ResLoopBB); |
301 | PHINode *ResidualIndex = |
302 | ResBuilder.CreatePHI(Ty: CopyLenType, NumReservedValues: 2, Name: "residual-loop-index" ); |
303 | ResidualIndex->addIncoming(V: Zero, BB: ResHeaderBB); |
304 | |
305 | Value *FullOffset = ResBuilder.CreateAdd(LHS: RuntimeBytesCopied, RHS: ResidualIndex); |
306 | Value *SrcGEP = |
307 | ResBuilder.CreateInBoundsGEP(Ty: ResLoopOpType, Ptr: SrcAddr, IdxList: FullOffset); |
308 | LoadInst *Load = ResBuilder.CreateAlignedLoad(Ty: ResLoopOpType, Ptr: SrcGEP, |
309 | Align: ResSrcAlign, isVolatile: SrcIsVolatile); |
310 | if (!CanOverlap) { |
311 | // Set alias scope for loads. |
312 | Load->setMetadata(KindID: LLVMContext::MD_alias_scope, |
313 | Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
314 | } |
315 | Value *DstGEP = |
316 | ResBuilder.CreateInBoundsGEP(Ty: ResLoopOpType, Ptr: DstAddr, IdxList: FullOffset); |
317 | StoreInst *Store = |
318 | ResBuilder.CreateAlignedStore(Val: Load, Ptr: DstGEP, Align: ResDstAlign, isVolatile: DstIsVolatile); |
319 | if (!CanOverlap) { |
320 | // Indicate that stores don't overlap loads. |
321 | Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope)); |
322 | } |
323 | if (AtomicElementSize) { |
324 | Load->setAtomic(Ordering: AtomicOrdering::Unordered); |
325 | Store->setAtomic(Ordering: AtomicOrdering::Unordered); |
326 | } |
327 | Value *ResNewIndex = ResBuilder.CreateAdd( |
328 | LHS: ResidualIndex, RHS: ConstantInt::get(Ty: CopyLenType, V: ResLoopOpSize)); |
329 | ResidualIndex->addIncoming(V: ResNewIndex, BB: ResLoopBB); |
330 | |
331 | // Create the loop branch condition. |
332 | ResBuilder.CreateCondBr( |
333 | Cond: ResBuilder.CreateICmpULT(LHS: ResNewIndex, RHS: RuntimeResidual), True: ResLoopBB, |
334 | False: PostLoopBB); |
335 | } else { |
336 | // In this case the loop operand type was a byte, and there is no need for a |
337 | // residual loop to copy the remaining memory after the main loop. |
338 | // We do however need to patch up the control flow by creating the |
339 | // terminators for the preloop block and the memcpy loop. |
340 | ConstantInt *Zero = ConstantInt::get(Ty: ILengthType, V: 0U); |
341 | PLBuilder.CreateCondBr(Cond: PLBuilder.CreateICmpNE(LHS: RuntimeLoopCount, RHS: Zero), |
342 | True: LoopBB, False: PostLoopBB); |
343 | PreLoopBB->getTerminator()->eraseFromParent(); |
344 | LoopBuilder.CreateCondBr( |
345 | Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: RuntimeLoopCount), True: LoopBB, |
346 | False: PostLoopBB); |
347 | } |
348 | } |
349 | |
350 | // Lower memmove to IR. memmove is required to correctly copy overlapping memory |
351 | // regions; therefore, it has to check the relative positions of the source and |
352 | // destination pointers and choose the copy direction accordingly. |
353 | // |
354 | // The code below is an IR rendition of this C function: |
355 | // |
356 | // void* memmove(void* dst, const void* src, size_t n) { |
357 | // unsigned char* d = dst; |
358 | // const unsigned char* s = src; |
359 | // if (s < d) { |
360 | // // copy backwards |
361 | // while (n--) { |
362 | // d[n] = s[n]; |
363 | // } |
364 | // } else { |
365 | // // copy forward |
366 | // for (size_t i = 0; i < n; ++i) { |
367 | // d[i] = s[i]; |
368 | // } |
369 | // } |
370 | // return dst; |
371 | // } |
372 | static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr, |
373 | Value *DstAddr, Value *CopyLen, Align SrcAlign, |
374 | Align DstAlign, bool SrcIsVolatile, |
375 | bool DstIsVolatile, |
376 | const TargetTransformInfo &TTI) { |
377 | Type *TypeOfCopyLen = CopyLen->getType(); |
378 | BasicBlock *OrigBB = InsertBefore->getParent(); |
379 | Function *F = OrigBB->getParent(); |
380 | const DataLayout &DL = F->getDataLayout(); |
381 | // TODO: Use different element type if possible? |
382 | Type *EltTy = Type::getInt8Ty(C&: F->getContext()); |
383 | |
384 | // Create the a comparison of src and dst, based on which we jump to either |
385 | // the forward-copy part of the function (if src >= dst) or the backwards-copy |
386 | // part (if src < dst). |
387 | // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else |
388 | // structure. Its block terminators (unconditional branches) are replaced by |
389 | // the appropriate conditional branches when the loop is built. |
390 | ICmpInst *PtrCompare = new ICmpInst(InsertBefore->getIterator(), ICmpInst::ICMP_ULT, |
391 | SrcAddr, DstAddr, "compare_src_dst" ); |
392 | Instruction *ThenTerm, *ElseTerm; |
393 | SplitBlockAndInsertIfThenElse(Cond: PtrCompare, SplitBefore: InsertBefore->getIterator(), ThenTerm: &ThenTerm, |
394 | ElseTerm: &ElseTerm); |
395 | |
396 | // Each part of the function consists of two blocks: |
397 | // copy_backwards: used to skip the loop when n == 0 |
398 | // copy_backwards_loop: the actual backwards loop BB |
399 | // copy_forward: used to skip the loop when n == 0 |
400 | // copy_forward_loop: the actual forward loop BB |
401 | BasicBlock *CopyBackwardsBB = ThenTerm->getParent(); |
402 | CopyBackwardsBB->setName("copy_backwards" ); |
403 | BasicBlock *CopyForwardBB = ElseTerm->getParent(); |
404 | CopyForwardBB->setName("copy_forward" ); |
405 | BasicBlock *ExitBB = InsertBefore->getParent(); |
406 | ExitBB->setName("memmove_done" ); |
407 | |
408 | unsigned PartSize = DL.getTypeStoreSize(Ty: EltTy); |
409 | Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: PartSize)); |
410 | Align PartDstAlign(commonAlignment(A: DstAlign, Offset: PartSize)); |
411 | |
412 | // Initial comparison of n == 0 that lets us skip the loops altogether. Shared |
413 | // between both backwards and forward copy clauses. |
414 | ICmpInst *CompareN = |
415 | new ICmpInst(OrigBB->getTerminator()->getIterator(), ICmpInst::ICMP_EQ, CopyLen, |
416 | ConstantInt::get(Ty: TypeOfCopyLen, V: 0), "compare_n_to_0" ); |
417 | |
418 | // Copying backwards. |
419 | BasicBlock *LoopBB = |
420 | BasicBlock::Create(Context&: F->getContext(), Name: "copy_backwards_loop" , Parent: F, InsertBefore: CopyForwardBB); |
421 | IRBuilder<> LoopBuilder(LoopBB); |
422 | |
423 | PHINode *LoopPhi = LoopBuilder.CreatePHI(Ty: TypeOfCopyLen, NumReservedValues: 0); |
424 | Value *IndexPtr = LoopBuilder.CreateSub( |
425 | LHS: LoopPhi, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 1), Name: "index_ptr" ); |
426 | Value *Element = LoopBuilder.CreateAlignedLoad( |
427 | Ty: EltTy, Ptr: LoopBuilder.CreateInBoundsGEP(Ty: EltTy, Ptr: SrcAddr, IdxList: IndexPtr), |
428 | Align: PartSrcAlign, isVolatile: SrcIsVolatile, Name: "element" ); |
429 | LoopBuilder.CreateAlignedStore( |
430 | Val: Element, Ptr: LoopBuilder.CreateInBoundsGEP(Ty: EltTy, Ptr: DstAddr, IdxList: IndexPtr), |
431 | Align: PartDstAlign, isVolatile: DstIsVolatile); |
432 | LoopBuilder.CreateCondBr( |
433 | Cond: LoopBuilder.CreateICmpEQ(LHS: IndexPtr, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 0)), |
434 | True: ExitBB, False: LoopBB); |
435 | LoopPhi->addIncoming(V: IndexPtr, BB: LoopBB); |
436 | LoopPhi->addIncoming(V: CopyLen, BB: CopyBackwardsBB); |
437 | BranchInst::Create(IfTrue: ExitBB, IfFalse: LoopBB, Cond: CompareN, InsertBefore: ThenTerm->getIterator()); |
438 | ThenTerm->eraseFromParent(); |
439 | |
440 | // Copying forward. |
441 | BasicBlock *FwdLoopBB = |
442 | BasicBlock::Create(Context&: F->getContext(), Name: "copy_forward_loop" , Parent: F, InsertBefore: ExitBB); |
443 | IRBuilder<> FwdLoopBuilder(FwdLoopBB); |
444 | PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(Ty: TypeOfCopyLen, NumReservedValues: 0, Name: "index_ptr" ); |
445 | Value *SrcGEP = FwdLoopBuilder.CreateInBoundsGEP(Ty: EltTy, Ptr: SrcAddr, IdxList: FwdCopyPhi); |
446 | Value *FwdElement = FwdLoopBuilder.CreateAlignedLoad( |
447 | Ty: EltTy, Ptr: SrcGEP, Align: PartSrcAlign, isVolatile: SrcIsVolatile, Name: "element" ); |
448 | Value *DstGEP = FwdLoopBuilder.CreateInBoundsGEP(Ty: EltTy, Ptr: DstAddr, IdxList: FwdCopyPhi); |
449 | FwdLoopBuilder.CreateAlignedStore(Val: FwdElement, Ptr: DstGEP, Align: PartDstAlign, |
450 | isVolatile: DstIsVolatile); |
451 | Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd( |
452 | LHS: FwdCopyPhi, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 1), Name: "index_increment" ); |
453 | FwdLoopBuilder.CreateCondBr(Cond: FwdLoopBuilder.CreateICmpEQ(LHS: FwdIndexPtr, RHS: CopyLen), |
454 | True: ExitBB, False: FwdLoopBB); |
455 | FwdCopyPhi->addIncoming(V: FwdIndexPtr, BB: FwdLoopBB); |
456 | FwdCopyPhi->addIncoming(V: ConstantInt::get(Ty: TypeOfCopyLen, V: 0), BB: CopyForwardBB); |
457 | |
458 | BranchInst::Create(IfTrue: ExitBB, IfFalse: FwdLoopBB, Cond: CompareN, InsertBefore: ElseTerm->getIterator()); |
459 | ElseTerm->eraseFromParent(); |
460 | } |
461 | |
462 | static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr, |
463 | Value *CopyLen, Value *SetValue, Align DstAlign, |
464 | bool IsVolatile) { |
465 | Type *TypeOfCopyLen = CopyLen->getType(); |
466 | BasicBlock *OrigBB = InsertBefore->getParent(); |
467 | Function *F = OrigBB->getParent(); |
468 | const DataLayout &DL = F->getDataLayout(); |
469 | BasicBlock *NewBB = |
470 | OrigBB->splitBasicBlock(I: InsertBefore, BBName: "split" ); |
471 | BasicBlock *LoopBB |
472 | = BasicBlock::Create(Context&: F->getContext(), Name: "loadstoreloop" , Parent: F, InsertBefore: NewBB); |
473 | |
474 | IRBuilder<> Builder(OrigBB->getTerminator()); |
475 | |
476 | Builder.CreateCondBr( |
477 | Cond: Builder.CreateICmpEQ(LHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 0), RHS: CopyLen), True: NewBB, |
478 | False: LoopBB); |
479 | OrigBB->getTerminator()->eraseFromParent(); |
480 | |
481 | unsigned PartSize = DL.getTypeStoreSize(Ty: SetValue->getType()); |
482 | Align PartAlign(commonAlignment(A: DstAlign, Offset: PartSize)); |
483 | |
484 | IRBuilder<> LoopBuilder(LoopBB); |
485 | PHINode *LoopIndex = LoopBuilder.CreatePHI(Ty: TypeOfCopyLen, NumReservedValues: 0); |
486 | LoopIndex->addIncoming(V: ConstantInt::get(Ty: TypeOfCopyLen, V: 0), BB: OrigBB); |
487 | |
488 | LoopBuilder.CreateAlignedStore( |
489 | Val: SetValue, |
490 | Ptr: LoopBuilder.CreateInBoundsGEP(Ty: SetValue->getType(), Ptr: DstAddr, IdxList: LoopIndex), |
491 | Align: PartAlign, isVolatile: IsVolatile); |
492 | |
493 | Value *NewIndex = |
494 | LoopBuilder.CreateAdd(LHS: LoopIndex, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 1)); |
495 | LoopIndex->addIncoming(V: NewIndex, BB: LoopBB); |
496 | |
497 | LoopBuilder.CreateCondBr(Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: CopyLen), True: LoopBB, |
498 | False: NewBB); |
499 | } |
500 | |
501 | template <typename T> |
502 | static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) { |
503 | if (SE) { |
504 | auto *SrcSCEV = SE->getSCEV(V: Memcpy->getRawSource()); |
505 | auto *DestSCEV = SE->getSCEV(V: Memcpy->getRawDest()); |
506 | if (SE->isKnownPredicateAt(Pred: CmpInst::ICMP_NE, LHS: SrcSCEV, RHS: DestSCEV, CtxI: Memcpy)) |
507 | return false; |
508 | } |
509 | return true; |
510 | } |
511 | |
512 | void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, |
513 | const TargetTransformInfo &TTI, |
514 | ScalarEvolution *SE) { |
515 | bool CanOverlap = canOverlap(Memcpy, SE); |
516 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Memcpy->getLength())) { |
517 | createMemCpyLoopKnownSize( |
518 | /* InsertBefore */ Memcpy, |
519 | /* SrcAddr */ Memcpy->getRawSource(), |
520 | /* DstAddr */ Memcpy->getRawDest(), |
521 | /* CopyLen */ CI, |
522 | /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), |
523 | /* DestAlign */ DstAlign: Memcpy->getDestAlign().valueOrOne(), |
524 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
525 | /* DstIsVolatile */ Memcpy->isVolatile(), |
526 | /* CanOverlap */ CanOverlap, |
527 | /* TargetTransformInfo */ TTI); |
528 | } else { |
529 | createMemCpyLoopUnknownSize( |
530 | /* InsertBefore */ Memcpy, |
531 | /* SrcAddr */ Memcpy->getRawSource(), |
532 | /* DstAddr */ Memcpy->getRawDest(), |
533 | /* CopyLen */ Memcpy->getLength(), |
534 | /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(), |
535 | /* DestAlign */ DstAlign: Memcpy->getDestAlign().valueOrOne(), |
536 | /* SrcIsVolatile */ Memcpy->isVolatile(), |
537 | /* DstIsVolatile */ Memcpy->isVolatile(), |
538 | /* CanOverlap */ CanOverlap, |
539 | /* TargetTransformInfo */ TTI); |
540 | } |
541 | } |
542 | |
543 | bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove, |
544 | const TargetTransformInfo &TTI) { |
545 | Value *CopyLen = Memmove->getLength(); |
546 | Value *SrcAddr = Memmove->getRawSource(); |
547 | Value *DstAddr = Memmove->getRawDest(); |
548 | Align SrcAlign = Memmove->getSourceAlign().valueOrOne(); |
549 | Align DstAlign = Memmove->getDestAlign().valueOrOne(); |
550 | bool SrcIsVolatile = Memmove->isVolatile(); |
551 | bool DstIsVolatile = SrcIsVolatile; |
552 | IRBuilder<> CastBuilder(Memmove); |
553 | |
554 | unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace(); |
555 | unsigned DstAS = DstAddr->getType()->getPointerAddressSpace(); |
556 | if (SrcAS != DstAS) { |
557 | if (!TTI.addrspacesMayAlias(AS0: SrcAS, AS1: DstAS)) { |
558 | // We may not be able to emit a pointer comparison, but we don't have |
559 | // to. Expand as memcpy. |
560 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CopyLen)) { |
561 | createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, |
562 | CopyLen: CI, SrcAlign, DstAlign, SrcIsVolatile, |
563 | DstIsVolatile, |
564 | /*CanOverlap=*/false, TTI); |
565 | } else { |
566 | createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr, |
567 | CopyLen, SrcAlign, DstAlign, SrcIsVolatile, |
568 | DstIsVolatile, |
569 | /*CanOverlap=*/false, TTI); |
570 | } |
571 | |
572 | return true; |
573 | } |
574 | |
575 | if (TTI.isValidAddrSpaceCast(FromAS: DstAS, ToAS: SrcAS)) |
576 | DstAddr = CastBuilder.CreateAddrSpaceCast(V: DstAddr, DestTy: SrcAddr->getType()); |
577 | else if (TTI.isValidAddrSpaceCast(FromAS: SrcAS, ToAS: DstAS)) |
578 | SrcAddr = CastBuilder.CreateAddrSpaceCast(V: SrcAddr, DestTy: DstAddr->getType()); |
579 | else { |
580 | // We don't know generically if it's legal to introduce an |
581 | // addrspacecast. We need to know either if it's legal to insert an |
582 | // addrspacecast, or if the address spaces cannot alias. |
583 | LLVM_DEBUG( |
584 | dbgs() << "Do not know how to expand memmove between different " |
585 | "address spaces\n" ); |
586 | return false; |
587 | } |
588 | } |
589 | |
590 | createMemMoveLoop( |
591 | /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign, |
592 | SrcIsVolatile, DstIsVolatile, TTI); |
593 | return true; |
594 | } |
595 | |
596 | void llvm::expandMemSetAsLoop(MemSetInst *Memset) { |
597 | createMemSetLoop(/* InsertBefore */ Memset, |
598 | /* DstAddr */ Memset->getRawDest(), |
599 | /* CopyLen */ Memset->getLength(), |
600 | /* SetValue */ Memset->getValue(), |
601 | /* Alignment */ DstAlign: Memset->getDestAlign().valueOrOne(), |
602 | IsVolatile: Memset->isVolatile()); |
603 | } |
604 | |
605 | void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy, |
606 | const TargetTransformInfo &TTI, |
607 | ScalarEvolution *SE) { |
608 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: AtomicMemcpy->getLength())) { |
609 | createMemCpyLoopKnownSize( |
610 | /* InsertBefore */ AtomicMemcpy, |
611 | /* SrcAddr */ AtomicMemcpy->getRawSource(), |
612 | /* DstAddr */ AtomicMemcpy->getRawDest(), |
613 | /* CopyLen */ CI, |
614 | /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), |
615 | /* DestAlign */ DstAlign: AtomicMemcpy->getDestAlign().valueOrOne(), |
616 | /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), |
617 | /* DstIsVolatile */ AtomicMemcpy->isVolatile(), |
618 | /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. |
619 | /* TargetTransformInfo */ TTI, |
620 | /* AtomicCpySize */ AtomicElementSize: AtomicMemcpy->getElementSizeInBytes()); |
621 | } else { |
622 | createMemCpyLoopUnknownSize( |
623 | /* InsertBefore */ AtomicMemcpy, |
624 | /* SrcAddr */ AtomicMemcpy->getRawSource(), |
625 | /* DstAddr */ AtomicMemcpy->getRawDest(), |
626 | /* CopyLen */ AtomicMemcpy->getLength(), |
627 | /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(), |
628 | /* DestAlign */ DstAlign: AtomicMemcpy->getDestAlign().valueOrOne(), |
629 | /* SrcIsVolatile */ AtomicMemcpy->isVolatile(), |
630 | /* DstIsVolatile */ AtomicMemcpy->isVolatile(), |
631 | /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec. |
632 | /* TargetTransformInfo */ TTI, |
633 | /* AtomicCpySize */ AtomicElementSize: AtomicMemcpy->getElementSizeInBytes()); |
634 | } |
635 | } |
636 | |