1//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
10#include "llvm/Analysis/ScalarEvolution.h"
11#include "llvm/Analysis/TargetTransformInfo.h"
12#include "llvm/IR/IRBuilder.h"
13#include "llvm/IR/IntrinsicInst.h"
14#include "llvm/IR/MDBuilder.h"
15#include "llvm/IR/ProfDataUtils.h"
16#include "llvm/ProfileData/InstrProf.h"
17#include "llvm/Support/Debug.h"
18#include "llvm/Support/MathExtras.h"
19#include "llvm/Transforms/Utils/BasicBlockUtils.h"
20#include "llvm/Transforms/Utils/LoopUtils.h"
21#include <limits>
22#include <optional>
23
24#define DEBUG_TYPE "lower-mem-intrinsics"
25
26using namespace llvm;
27
28namespace llvm {
29extern cl::opt<bool> ProfcheckDisableMetadataFixes;
30}
31
32/// \returns \p Len urem \p OpSize, checking for optimization opportunities.
33/// \p OpSizeVal must be the integer value of the \c ConstantInt \p OpSize.
34static Value *getRuntimeLoopRemainder(IRBuilderBase &B, Value *Len,
35 Value *OpSize, unsigned OpSizeVal) {
36 // For powers of 2, we can and by (OpSizeVal - 1) instead of using urem.
37 if (isPowerOf2_32(Value: OpSizeVal))
38 return B.CreateAnd(LHS: Len, RHS: OpSizeVal - 1);
39 return B.CreateURem(LHS: Len, RHS: OpSize);
40}
41
42/// \returns (\p Len udiv \p OpSize) mul \p OpSize, checking for optimization
43/// opportunities.
44/// If \p RTLoopRemainder is provided, it must be the result of
45/// \c getRuntimeLoopRemainder() with the same arguments.
46static Value *getRuntimeLoopUnits(IRBuilderBase &B, Value *Len, Value *OpSize,
47 unsigned OpSizeVal,
48 Value *RTLoopRemainder = nullptr) {
49 if (!RTLoopRemainder)
50 RTLoopRemainder = getRuntimeLoopRemainder(B, Len, OpSize, OpSizeVal);
51 return B.CreateSub(LHS: Len, RHS: RTLoopRemainder);
52}
53
54namespace {
55/// Container for the return values of insertLoopExpansion.
56struct LoopExpansionInfo {
57 /// The instruction at the end of the main loop body.
58 Instruction *MainLoopIP = nullptr;
59
60 /// The unit index in the main loop body.
61 Value *MainLoopIndex = nullptr;
62
63 /// The instruction at the end of the residual loop body. Can be nullptr if no
64 /// residual is required.
65 Instruction *ResidualLoopIP = nullptr;
66
67 /// The unit index in the residual loop body. Can be nullptr if no residual is
68 /// required.
69 Value *ResidualLoopIndex = nullptr;
70};
71
72std::optional<uint64_t> getAverageMemOpLoopTripCount(const MemIntrinsic &I) {
73 if (ProfcheckDisableMetadataFixes)
74 return std::nullopt;
75 if (std::optional<Function::ProfileCount> EC =
76 I.getFunction()->getEntryCount();
77 !EC || !EC->getCount())
78 return std::nullopt;
79 if (const auto Len = I.getLengthInBytes())
80 return Len->getZExtValue();
81 uint64_t Total = 0;
82 SmallVector<InstrProfValueData> ProfData =
83 getValueProfDataFromInst(Inst: I, ValueKind: InstrProfValueKind::IPVK_MemOPSize,
84 MaxNumValueData: std::numeric_limits<uint32_t>::max(), TotalC&: Total);
85 if (!Total)
86 return std::nullopt;
87 uint64_t TripCount = 0;
88 for (const auto &P : ProfData)
89 TripCount += P.Count * P.Value;
90 return std::round(x: 1.0 * TripCount / Total);
91}
92
93} // namespace
94
95/// Insert the control flow and loop counters for a memcpy/memset loop
96/// expansion.
97///
98/// This function inserts IR corresponding to the following C code before
99/// \p InsertBefore:
100/// \code
101/// LoopUnits = (Len / MainLoopStep) * MainLoopStep;
102/// ResidualUnits = Len - LoopUnits;
103/// MainLoopIndex = 0;
104/// if (LoopUnits > 0) {
105/// do {
106/// // MainLoopIP
107/// MainLoopIndex += MainLoopStep;
108/// } while (MainLoopIndex < LoopUnits);
109/// }
110/// for (size_t i = 0; i < ResidualUnits; i += ResidualLoopStep) {
111/// ResidualLoopIndex = LoopUnits + i;
112/// // ResidualLoopIP
113/// }
114/// \endcode
115///
116/// \p MainLoopStep and \p ResidualLoopStep determine by how many "units" the
117/// loop index is increased in each iteration of the main and residual loops,
118/// respectively. In most cases, the "unit" will be bytes, but larger units are
119/// useful for lowering memset.pattern.
120///
121/// The computation of \c LoopUnits and \c ResidualUnits is performed at compile
122/// time if \p Len is a \c ConstantInt.
123/// The second (residual) loop is omitted if \p ResidualLoopStep is 0 or equal
124/// to \p MainLoopStep.
125/// The generated \c MainLoopIP, \c MainLoopIndex, \c ResidualLoopIP, and
126/// \c ResidualLoopIndex are returned in a \c LoopExpansionInfo object.
127static LoopExpansionInfo
128insertLoopExpansion(Instruction *InsertBefore, Value *Len,
129 unsigned MainLoopStep, unsigned ResidualLoopStep,
130 StringRef BBNamePrefix,
131 std::optional<uint64_t> AverageTripCount) {
132 assert((ResidualLoopStep == 0 || MainLoopStep % ResidualLoopStep == 0) &&
133 "ResidualLoopStep must divide MainLoopStep if specified");
134 assert(ResidualLoopStep <= MainLoopStep &&
135 "ResidualLoopStep cannot be larger than MainLoopStep");
136 assert(MainLoopStep > 0 && "MainLoopStep must be non-zero");
137 LoopExpansionInfo LEI;
138 BasicBlock *PreLoopBB = InsertBefore->getParent();
139 BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(
140 I: InsertBefore, BBName: BBNamePrefix + "-post-expansion");
141 Function *ParentFunc = PreLoopBB->getParent();
142 LLVMContext &Ctx = PreLoopBB->getContext();
143 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
144 IRBuilder<> PreLoopBuilder(PreLoopBB->getTerminator());
145 PreLoopBuilder.SetCurrentDebugLocation(DbgLoc);
146
147 // Calculate the main loop trip count and remaining units to cover after the
148 // loop.
149 Type *LenType = Len->getType();
150 IntegerType *ILenType = cast<IntegerType>(Val: LenType);
151 ConstantInt *CIMainLoopStep = ConstantInt::get(Ty: ILenType, V: MainLoopStep);
152
153 Value *LoopUnits = Len;
154 Value *ResidualUnits = nullptr;
155 // We can make a conditional branch unconditional if we know that the
156 // MainLoop must be executed at least once.
157 bool MustTakeMainLoop = false;
158 if (MainLoopStep != 1) {
159 if (auto *CLen = dyn_cast<ConstantInt>(Val: Len)) {
160 uint64_t TotalUnits = CLen->getZExtValue();
161 uint64_t LoopEndCount = alignDown(Value: TotalUnits, Align: MainLoopStep);
162 uint64_t ResidualCount = TotalUnits - LoopEndCount;
163 LoopUnits = ConstantInt::get(Ty: LenType, V: LoopEndCount);
164 ResidualUnits = ConstantInt::get(Ty: LenType, V: ResidualCount);
165 MustTakeMainLoop = LoopEndCount > 0;
166 // As an optimization, we could skip generating the residual loop if
167 // ResidualCount is known to be 0. However, current uses of this function
168 // don't request a residual loop if the length is constant (they generate
169 // a (potentially empty) sequence of loads and stores instead), so this
170 // optimization would have no effect here.
171 } else {
172 ResidualUnits = getRuntimeLoopRemainder(B&: PreLoopBuilder, Len,
173 OpSize: CIMainLoopStep, OpSizeVal: MainLoopStep);
174 LoopUnits = getRuntimeLoopUnits(B&: PreLoopBuilder, Len, OpSize: CIMainLoopStep,
175 OpSizeVal: MainLoopStep, RTLoopRemainder: ResidualUnits);
176 }
177 } else if (auto *CLen = dyn_cast<ConstantInt>(Val: Len)) {
178 MustTakeMainLoop = CLen->getZExtValue() > 0;
179 }
180
181 BasicBlock *MainLoopBB = BasicBlock::Create(
182 Context&: Ctx, Name: BBNamePrefix + "-expansion-main-body", Parent: ParentFunc, InsertBefore: PostLoopBB);
183 IRBuilder<> LoopBuilder(MainLoopBB);
184 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
185
186 PHINode *LoopIndex = LoopBuilder.CreatePHI(Ty: LenType, NumReservedValues: 2, Name: "loop-index");
187 LEI.MainLoopIndex = LoopIndex;
188 LoopIndex->addIncoming(V: ConstantInt::get(Ty: LenType, V: 0U), BB: PreLoopBB);
189
190 Value *NewIndex =
191 LoopBuilder.CreateAdd(LHS: LoopIndex, RHS: ConstantInt::get(Ty: LenType, V: MainLoopStep));
192 LoopIndex->addIncoming(V: NewIndex, BB: MainLoopBB);
193
194 // One argument of the addition is a loop-variant PHI, so it must be an
195 // Instruction (i.e., it cannot be a Constant).
196 LEI.MainLoopIP = cast<Instruction>(Val: NewIndex);
197
198 if (ResidualLoopStep > 0 && ResidualLoopStep < MainLoopStep) {
199 // Loop body for the residual accesses.
200 BasicBlock *ResLoopBB =
201 BasicBlock::Create(Context&: Ctx, Name: BBNamePrefix + "-expansion-residual-body",
202 Parent: PreLoopBB->getParent(), InsertBefore: PostLoopBB);
203 // BB to check if the residual loop is needed.
204 BasicBlock *ResidualCondBB =
205 BasicBlock::Create(Context&: Ctx, Name: BBNamePrefix + "-expansion-residual-cond",
206 Parent: PreLoopBB->getParent(), InsertBefore: ResLoopBB);
207
208 // Enter the MainLoop unless no main loop iteration is required.
209 ConstantInt *Zero = ConstantInt::get(Ty: ILenType, V: 0U);
210 if (MustTakeMainLoop)
211 PreLoopBuilder.CreateBr(Dest: MainLoopBB);
212 else {
213 auto *BR = PreLoopBuilder.CreateCondBr(
214 Cond: PreLoopBuilder.CreateICmpNE(LHS: LoopUnits, RHS: Zero), True: MainLoopBB,
215 False: ResidualCondBB);
216 if (AverageTripCount.has_value()) {
217 MDBuilder MDB(ParentFunc->getContext());
218 setFittedBranchWeights(I&: *BR,
219 Weights: {AverageTripCount.value() % MainLoopStep, 1},
220 /*IsExpected=*/false);
221 } else {
222 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *BR, DEBUG_TYPE);
223 }
224 }
225 PreLoopBB->getTerminator()->eraseFromParent();
226
227 // Stay in the MainLoop until we have handled all the LoopUnits. Then go to
228 // the residual condition BB.
229 LoopBuilder.CreateCondBr(Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: LoopUnits),
230 True: MainLoopBB, False: ResidualCondBB);
231
232 // Determine if we need to branch to the residual loop or bypass it.
233 IRBuilder<> RCBuilder(ResidualCondBB);
234 RCBuilder.SetCurrentDebugLocation(DbgLoc);
235 RCBuilder.CreateCondBr(Cond: RCBuilder.CreateICmpNE(LHS: ResidualUnits, RHS: Zero),
236 True: ResLoopBB, False: PostLoopBB);
237
238 IRBuilder<> ResBuilder(ResLoopBB);
239 ResBuilder.SetCurrentDebugLocation(DbgLoc);
240 PHINode *ResidualIndex =
241 ResBuilder.CreatePHI(Ty: LenType, NumReservedValues: 2, Name: "residual-loop-index");
242 ResidualIndex->addIncoming(V: Zero, BB: ResidualCondBB);
243
244 // Add the offset at the end of the main loop to the loop counter of the
245 // residual loop to get the proper index.
246 Value *FullOffset = ResBuilder.CreateAdd(LHS: LoopUnits, RHS: ResidualIndex);
247 LEI.ResidualLoopIndex = FullOffset;
248
249 Value *ResNewIndex = ResBuilder.CreateAdd(
250 LHS: ResidualIndex, RHS: ConstantInt::get(Ty: LenType, V: ResidualLoopStep));
251 ResidualIndex->addIncoming(V: ResNewIndex, BB: ResLoopBB);
252
253 // One argument of the addition is a loop-variant PHI, so it must be an
254 // Instruction (i.e., it cannot be a Constant).
255 LEI.ResidualLoopIP = cast<Instruction>(Val: ResNewIndex);
256
257 // Stay in the residual loop until all ResidualUnits are handled.
258 ResBuilder.CreateCondBr(
259 Cond: ResBuilder.CreateICmpULT(LHS: ResNewIndex, RHS: ResidualUnits), True: ResLoopBB,
260 False: PostLoopBB);
261 } else {
262 // There is no need for a residual loop after the main loop. We do however
263 // need to patch up the control flow by creating the terminators for the
264 // preloop block and the main loop.
265
266 // Enter the MainLoop unless no main loop iteration is required.
267 if (MustTakeMainLoop) {
268 PreLoopBuilder.CreateBr(Dest: MainLoopBB);
269 } else {
270 ConstantInt *Zero = ConstantInt::get(Ty: ILenType, V: 0U);
271 MDBuilder B(ParentFunc->getContext());
272 PreLoopBuilder.CreateCondBr(Cond: PreLoopBuilder.CreateICmpNE(LHS: LoopUnits, RHS: Zero),
273 True: MainLoopBB, False: PostLoopBB,
274 BranchWeights: B.createLikelyBranchWeights());
275 }
276 PreLoopBB->getTerminator()->eraseFromParent();
277 // Stay in the MainLoop until we have handled all the LoopUnits.
278 auto *Br = LoopBuilder.CreateCondBr(
279 Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: LoopUnits), True: MainLoopBB, False: PostLoopBB);
280 if (AverageTripCount.has_value())
281 setFittedBranchWeights(I&: *Br, Weights: {AverageTripCount.value() / MainLoopStep, 1},
282 /*IsExpected=*/false);
283 else
284 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *Br, DEBUG_TYPE);
285 }
286 return LEI;
287}
288
289void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
290 Value *DstAddr, ConstantInt *CopyLen,
291 Align SrcAlign, Align DstAlign,
292 bool SrcIsVolatile, bool DstIsVolatile,
293 bool CanOverlap,
294 const TargetTransformInfo &TTI,
295 std::optional<uint32_t> AtomicElementSize,
296 std::optional<uint64_t> AverageTripCount) {
297 // No need to expand zero length copies.
298 if (CopyLen->isZero())
299 return;
300
301 BasicBlock *PreLoopBB = InsertBefore->getParent();
302 Function *ParentFunc = PreLoopBB->getParent();
303 LLVMContext &Ctx = PreLoopBB->getContext();
304 const DataLayout &DL = ParentFunc->getDataLayout();
305 MDBuilder MDB(Ctx);
306 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(Name: "MemCopyDomain");
307 StringRef Name = "MemCopyAliasScope";
308 MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name);
309
310 unsigned SrcAS = cast<PointerType>(Val: SrcAddr->getType())->getAddressSpace();
311 unsigned DstAS = cast<PointerType>(Val: DstAddr->getType())->getAddressSpace();
312
313 Type *TypeOfCopyLen = CopyLen->getType();
314 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
315 Context&: Ctx, Length: CopyLen, SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign, DestAlign: DstAlign, AtomicElementSize);
316 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
317 "Atomic memcpy lowering is not supported for vector operand type");
318
319 Type *Int8Type = Type::getInt8Ty(C&: Ctx);
320 unsigned LoopOpSize = DL.getTypeStoreSize(Ty: LoopOpType);
321 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
322 "Atomic memcpy lowering is not supported for selected operand size");
323
324 uint64_t LoopEndCount = alignDown(Value: CopyLen->getZExtValue(), Align: LoopOpSize);
325
326 // Skip the loop expansion entirely if the loop would never be taken.
327 if (LoopEndCount != 0) {
328 LoopExpansionInfo LEI =
329 insertLoopExpansion(InsertBefore, Len: CopyLen, MainLoopStep: LoopOpSize, ResidualLoopStep: 0,
330 BBNamePrefix: "static-memcpy", AverageTripCount);
331
332 // Fill MainLoopBB
333 IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
334 Align PartDstAlign(commonAlignment(A: DstAlign, Offset: LoopOpSize));
335 Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: LoopOpSize));
336
337 // If we used LoopOpType as GEP element type, we would iterate over the
338 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
339 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use
340 // byte offsets computed from the TypeStoreSize.
341 Value *SrcGEP =
342 MainLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: SrcAddr, IdxList: LEI.MainLoopIndex);
343 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
344 Ty: LoopOpType, Ptr: SrcGEP, Align: PartSrcAlign, isVolatile: SrcIsVolatile);
345 if (!CanOverlap) {
346 // Set alias scope for loads.
347 Load->setMetadata(KindID: LLVMContext::MD_alias_scope,
348 Node: MDNode::get(Context&: Ctx, MDs: NewScope));
349 }
350 Value *DstGEP =
351 MainLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: DstAddr, IdxList: LEI.MainLoopIndex);
352 StoreInst *Store = MainLoopBuilder.CreateAlignedStore(
353 Val: Load, Ptr: DstGEP, Align: PartDstAlign, isVolatile: DstIsVolatile);
354 if (!CanOverlap) {
355 // Indicate that stores don't overlap loads.
356 Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope));
357 }
358 if (AtomicElementSize) {
359 Load->setAtomic(Ordering: AtomicOrdering::Unordered);
360 Store->setAtomic(Ordering: AtomicOrdering::Unordered);
361 }
362 assert(!LEI.ResidualLoopIP && !LEI.ResidualLoopIndex &&
363 "No residual loop was requested");
364 }
365
366 // Copy the remaining bytes with straight-line code.
367 uint64_t BytesCopied = LoopEndCount;
368 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied;
369 if (RemainingBytes == 0)
370 return;
371
372 IRBuilder<> RBuilder(InsertBefore);
373 SmallVector<Type *, 5> RemainingOps;
374 TTI.getMemcpyLoopResidualLoweringType(OpsOut&: RemainingOps, Context&: Ctx, RemainingBytes,
375 SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign, DestAlign: DstAlign,
376 AtomicCpySize: AtomicElementSize);
377
378 for (auto *OpTy : RemainingOps) {
379 Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: BytesCopied));
380 Align PartDstAlign(commonAlignment(A: DstAlign, Offset: BytesCopied));
381
382 unsigned OperandSize = DL.getTypeStoreSize(Ty: OpTy);
383 assert((!AtomicElementSize || OperandSize % *AtomicElementSize == 0) &&
384 "Atomic memcpy lowering is not supported for selected operand size");
385
386 Value *SrcGEP = RBuilder.CreateInBoundsGEP(
387 Ty: Int8Type, Ptr: SrcAddr, IdxList: ConstantInt::get(Ty: TypeOfCopyLen, V: BytesCopied));
388 LoadInst *Load =
389 RBuilder.CreateAlignedLoad(Ty: OpTy, Ptr: SrcGEP, Align: PartSrcAlign, isVolatile: SrcIsVolatile);
390 if (!CanOverlap) {
391 // Set alias scope for loads.
392 Load->setMetadata(KindID: LLVMContext::MD_alias_scope,
393 Node: MDNode::get(Context&: Ctx, MDs: NewScope));
394 }
395 Value *DstGEP = RBuilder.CreateInBoundsGEP(
396 Ty: Int8Type, Ptr: DstAddr, IdxList: ConstantInt::get(Ty: TypeOfCopyLen, V: BytesCopied));
397 StoreInst *Store =
398 RBuilder.CreateAlignedStore(Val: Load, Ptr: DstGEP, Align: PartDstAlign, isVolatile: DstIsVolatile);
399 if (!CanOverlap) {
400 // Indicate that stores don't overlap loads.
401 Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope));
402 }
403 if (AtomicElementSize) {
404 Load->setAtomic(Ordering: AtomicOrdering::Unordered);
405 Store->setAtomic(Ordering: AtomicOrdering::Unordered);
406 }
407 BytesCopied += OperandSize;
408 }
409 assert(BytesCopied == CopyLen->getZExtValue() &&
410 "Bytes copied should match size in the call!");
411}
412
413void llvm::createMemCpyLoopUnknownSize(
414 Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
415 Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
416 bool CanOverlap, const TargetTransformInfo &TTI,
417 std::optional<uint32_t> AtomicElementSize,
418 std::optional<uint64_t> AverageTripCount) {
419 BasicBlock *PreLoopBB = InsertBefore->getParent();
420 Function *ParentFunc = PreLoopBB->getParent();
421 const DataLayout &DL = ParentFunc->getDataLayout();
422 LLVMContext &Ctx = PreLoopBB->getContext();
423 MDBuilder MDB(Ctx);
424 MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(Name: "MemCopyDomain");
425 StringRef Name = "MemCopyAliasScope";
426 MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name);
427
428 unsigned SrcAS = cast<PointerType>(Val: SrcAddr->getType())->getAddressSpace();
429 unsigned DstAS = cast<PointerType>(Val: DstAddr->getType())->getAddressSpace();
430
431 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(
432 Context&: Ctx, Length: CopyLen, SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign, DestAlign: DstAlign, AtomicElementSize);
433 assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
434 "Atomic memcpy lowering is not supported for vector operand type");
435 unsigned LoopOpSize = DL.getTypeStoreSize(Ty: LoopOpType);
436 assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
437 "Atomic memcpy lowering is not supported for selected operand size");
438
439 Type *Int8Type = Type::getInt8Ty(C&: Ctx);
440
441 Type *ResidualLoopOpType = AtomicElementSize
442 ? Type::getIntNTy(C&: Ctx, N: *AtomicElementSize * 8)
443 : Int8Type;
444 unsigned ResidualLoopOpSize = DL.getTypeStoreSize(Ty: ResidualLoopOpType);
445 assert(ResidualLoopOpSize == (AtomicElementSize ? *AtomicElementSize : 1) &&
446 "Store size is expected to match type size");
447
448 LoopExpansionInfo LEI =
449 insertLoopExpansion(InsertBefore, Len: CopyLen, MainLoopStep: LoopOpSize, ResidualLoopStep: ResidualLoopOpSize,
450 BBNamePrefix: "dynamic-memcpy", AverageTripCount);
451
452 // Fill MainLoopBB
453 IRBuilder<> MainLoopBuilder(LEI.MainLoopIP);
454 Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: LoopOpSize));
455 Align PartDstAlign(commonAlignment(A: DstAlign, Offset: LoopOpSize));
456
457 // If we used LoopOpType as GEP element type, we would iterate over the
458 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
459 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use byte
460 // offsets computed from the TypeStoreSize.
461 Value *SrcGEP =
462 MainLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: SrcAddr, IdxList: LEI.MainLoopIndex);
463 LoadInst *Load = MainLoopBuilder.CreateAlignedLoad(
464 Ty: LoopOpType, Ptr: SrcGEP, Align: PartSrcAlign, isVolatile: SrcIsVolatile);
465 if (!CanOverlap) {
466 // Set alias scope for loads.
467 Load->setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::get(Context&: Ctx, MDs: NewScope));
468 }
469 Value *DstGEP =
470 MainLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: DstAddr, IdxList: LEI.MainLoopIndex);
471 StoreInst *Store = MainLoopBuilder.CreateAlignedStore(
472 Val: Load, Ptr: DstGEP, Align: PartDstAlign, isVolatile: DstIsVolatile);
473 if (!CanOverlap) {
474 // Indicate that stores don't overlap loads.
475 Store->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope));
476 }
477 if (AtomicElementSize) {
478 Load->setAtomic(Ordering: AtomicOrdering::Unordered);
479 Store->setAtomic(Ordering: AtomicOrdering::Unordered);
480 }
481
482 // Fill ResidualLoopBB.
483 if (!LEI.ResidualLoopIP)
484 return;
485
486 Align ResSrcAlign(commonAlignment(A: PartSrcAlign, Offset: ResidualLoopOpSize));
487 Align ResDstAlign(commonAlignment(A: PartDstAlign, Offset: ResidualLoopOpSize));
488
489 IRBuilder<> ResLoopBuilder(LEI.ResidualLoopIP);
490 Value *ResSrcGEP = ResLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: SrcAddr,
491 IdxList: LEI.ResidualLoopIndex);
492 LoadInst *ResLoad = ResLoopBuilder.CreateAlignedLoad(
493 Ty: ResidualLoopOpType, Ptr: ResSrcGEP, Align: ResSrcAlign, isVolatile: SrcIsVolatile);
494 if (!CanOverlap) {
495 // Set alias scope for loads.
496 ResLoad->setMetadata(KindID: LLVMContext::MD_alias_scope,
497 Node: MDNode::get(Context&: Ctx, MDs: NewScope));
498 }
499 Value *ResDstGEP = ResLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: DstAddr,
500 IdxList: LEI.ResidualLoopIndex);
501 StoreInst *ResStore = ResLoopBuilder.CreateAlignedStore(
502 Val: ResLoad, Ptr: ResDstGEP, Align: ResDstAlign, isVolatile: DstIsVolatile);
503 if (!CanOverlap) {
504 // Indicate that stores don't overlap loads.
505 ResStore->setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::get(Context&: Ctx, MDs: NewScope));
506 }
507 if (AtomicElementSize) {
508 ResLoad->setAtomic(Ordering: AtomicOrdering::Unordered);
509 ResStore->setAtomic(Ordering: AtomicOrdering::Unordered);
510 }
511}
512
513// If \p Addr1 and \p Addr2 are pointers to different address spaces, create an
514// addresspacecast to obtain a pair of pointers in the same addressspace. The
515// caller needs to ensure that addrspacecasting is possible.
516// No-op if the pointers are in the same address space.
517static std::pair<Value *, Value *>
518tryInsertCastToCommonAddrSpace(IRBuilderBase &B, Value *Addr1, Value *Addr2,
519 const TargetTransformInfo &TTI) {
520 Value *ResAddr1 = Addr1;
521 Value *ResAddr2 = Addr2;
522
523 unsigned AS1 = cast<PointerType>(Val: Addr1->getType())->getAddressSpace();
524 unsigned AS2 = cast<PointerType>(Val: Addr2->getType())->getAddressSpace();
525 if (AS1 != AS2) {
526 if (TTI.isValidAddrSpaceCast(FromAS: AS2, ToAS: AS1))
527 ResAddr2 = B.CreateAddrSpaceCast(V: Addr2, DestTy: Addr1->getType());
528 else if (TTI.isValidAddrSpaceCast(FromAS: AS1, ToAS: AS2))
529 ResAddr1 = B.CreateAddrSpaceCast(V: Addr1, DestTy: Addr2->getType());
530 else
531 llvm_unreachable("Can only lower memmove between address spaces if they "
532 "support addrspacecast");
533 }
534 return {ResAddr1, ResAddr2};
535}
536
537// Lower memmove to IR. memmove is required to correctly copy overlapping memory
538// regions; therefore, it has to check the relative positions of the source and
539// destination pointers and choose the copy direction accordingly.
540//
541// The code below is an IR rendition of this C function:
542//
543// void* memmove(void* dst, const void* src, size_t n) {
544// unsigned char* d = dst;
545// const unsigned char* s = src;
546// if (s < d) {
547// // copy backwards
548// while (n--) {
549// d[n] = s[n];
550// }
551// } else {
552// // copy forward
553// for (size_t i = 0; i < n; ++i) {
554// d[i] = s[i];
555// }
556// }
557// return dst;
558// }
559//
560// If the TargetTransformInfo specifies a wider MemcpyLoopLoweringType, it is
561// used for the memory accesses in the loops. Then, additional loops with
562// byte-wise accesses are added for the remaining bytes.
563static void createMemMoveLoopUnknownSize(Instruction *InsertBefore,
564 Value *SrcAddr, Value *DstAddr,
565 Value *CopyLen, Align SrcAlign,
566 Align DstAlign, bool SrcIsVolatile,
567 bool DstIsVolatile,
568 const TargetTransformInfo &TTI) {
569 Type *TypeOfCopyLen = CopyLen->getType();
570 BasicBlock *OrigBB = InsertBefore->getParent();
571 Function *F = OrigBB->getParent();
572 const DataLayout &DL = F->getDataLayout();
573 LLVMContext &Ctx = OrigBB->getContext();
574 unsigned SrcAS = cast<PointerType>(Val: SrcAddr->getType())->getAddressSpace();
575 unsigned DstAS = cast<PointerType>(Val: DstAddr->getType())->getAddressSpace();
576
577 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Context&: Ctx, Length: CopyLen, SrcAddrSpace: SrcAS, DestAddrSpace: DstAS,
578 SrcAlign, DestAlign: DstAlign);
579 unsigned LoopOpSize = DL.getTypeStoreSize(Ty: LoopOpType);
580 Type *Int8Type = Type::getInt8Ty(C&: Ctx);
581 bool LoopOpIsInt8 = LoopOpType == Int8Type;
582
583 // If the memory accesses are wider than one byte, residual loops with
584 // i8-accesses are required to move remaining bytes.
585 bool RequiresResidual = !LoopOpIsInt8;
586
587 Type *ResidualLoopOpType = Int8Type;
588 unsigned ResidualLoopOpSize = DL.getTypeStoreSize(Ty: ResidualLoopOpType);
589
590 // Calculate the loop trip count and remaining bytes to copy after the loop.
591 IntegerType *ILengthType = cast<IntegerType>(Val: TypeOfCopyLen);
592 ConstantInt *CILoopOpSize = ConstantInt::get(Ty: ILengthType, V: LoopOpSize);
593 ConstantInt *CIResidualLoopOpSize =
594 ConstantInt::get(Ty: ILengthType, V: ResidualLoopOpSize);
595 ConstantInt *Zero = ConstantInt::get(Ty: ILengthType, V: 0);
596
597 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
598 IRBuilder<> PLBuilder(InsertBefore);
599 PLBuilder.SetCurrentDebugLocation(DbgLoc);
600
601 Value *RuntimeLoopBytes = CopyLen;
602 Value *RuntimeLoopRemainder = nullptr;
603 Value *SkipResidualCondition = nullptr;
604 if (RequiresResidual) {
605 RuntimeLoopRemainder =
606 getRuntimeLoopRemainder(B&: PLBuilder, Len: CopyLen, OpSize: CILoopOpSize, OpSizeVal: LoopOpSize);
607 RuntimeLoopBytes = getRuntimeLoopUnits(B&: PLBuilder, Len: CopyLen, OpSize: CILoopOpSize,
608 OpSizeVal: LoopOpSize, RTLoopRemainder: RuntimeLoopRemainder);
609 SkipResidualCondition =
610 PLBuilder.CreateICmpEQ(LHS: RuntimeLoopRemainder, RHS: Zero, Name: "skip_residual");
611 }
612 Value *SkipMainCondition =
613 PLBuilder.CreateICmpEQ(LHS: RuntimeLoopBytes, RHS: Zero, Name: "skip_main");
614
615 // Create the a comparison of src and dst, based on which we jump to either
616 // the forward-copy part of the function (if src >= dst) or the backwards-copy
617 // part (if src < dst).
618 // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
619 // structure. Its block terminators (unconditional branches) are replaced by
620 // the appropriate conditional branches when the loop is built.
621 // If the pointers are in different address spaces, they need to be converted
622 // to a compatible one. Cases where memory ranges in the different address
623 // spaces cannot overlap are lowered as memcpy and not handled here.
624 auto [CmpSrcAddr, CmpDstAddr] =
625 tryInsertCastToCommonAddrSpace(B&: PLBuilder, Addr1: SrcAddr, Addr2: DstAddr, TTI);
626 Value *PtrCompare =
627 PLBuilder.CreateICmpULT(LHS: CmpSrcAddr, RHS: CmpDstAddr, Name: "compare_src_dst");
628 Instruction *ThenTerm, *ElseTerm;
629 SplitBlockAndInsertIfThenElse(Cond: PtrCompare, SplitBefore: InsertBefore->getIterator(),
630 ThenTerm: &ThenTerm, ElseTerm: &ElseTerm);
631
632 // If the LoopOpSize is greater than 1, each part of the function consists of
633 // four blocks:
634 // memmove_copy_backwards:
635 // skip the residual loop when 0 iterations are required
636 // memmove_bwd_residual_loop:
637 // copy the last few bytes individually so that the remaining length is
638 // a multiple of the LoopOpSize
639 // memmove_bwd_middle: skip the main loop when 0 iterations are required
640 // memmove_bwd_main_loop: the actual backwards loop BB with wide accesses
641 // memmove_copy_forward: skip the main loop when 0 iterations are required
642 // memmove_fwd_main_loop: the actual forward loop BB with wide accesses
643 // memmove_fwd_middle: skip the residual loop when 0 iterations are required
644 // memmove_fwd_residual_loop: copy the last few bytes individually
645 //
646 // The main and residual loop are switched between copying forward and
647 // backward so that the residual loop always operates on the end of the moved
648 // range. This is based on the assumption that buffers whose start is aligned
649 // with the LoopOpSize are more common than buffers whose end is.
650 //
651 // If the LoopOpSize is 1, each part of the function consists of two blocks:
652 // memmove_copy_backwards: skip the loop when 0 iterations are required
653 // memmove_bwd_main_loop: the actual backwards loop BB
654 // memmove_copy_forward: skip the loop when 0 iterations are required
655 // memmove_fwd_main_loop: the actual forward loop BB
656 BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
657 CopyBackwardsBB->setName("memmove_copy_backwards");
658 BasicBlock *CopyForwardBB = ElseTerm->getParent();
659 CopyForwardBB->setName("memmove_copy_forward");
660 BasicBlock *ExitBB = InsertBefore->getParent();
661 ExitBB->setName("memmove_done");
662
663 Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: LoopOpSize));
664 Align PartDstAlign(commonAlignment(A: DstAlign, Offset: LoopOpSize));
665
666 // Accesses in the residual loops do not share the same alignment as those in
667 // the main loops.
668 Align ResidualSrcAlign(commonAlignment(A: PartSrcAlign, Offset: ResidualLoopOpSize));
669 Align ResidualDstAlign(commonAlignment(A: PartDstAlign, Offset: ResidualLoopOpSize));
670
671 // Copying backwards.
672 {
673 BasicBlock *MainLoopBB = BasicBlock::Create(
674 Context&: F->getContext(), Name: "memmove_bwd_main_loop", Parent: F, InsertBefore: CopyForwardBB);
675
676 // The predecessor of the memmove_bwd_main_loop. Updated in the
677 // following if a residual loop is emitted first.
678 BasicBlock *PredBB = CopyBackwardsBB;
679
680 if (RequiresResidual) {
681 // backwards residual loop
682 BasicBlock *ResidualLoopBB = BasicBlock::Create(
683 Context&: F->getContext(), Name: "memmove_bwd_residual_loop", Parent: F, InsertBefore: MainLoopBB);
684 IRBuilder<> ResidualLoopBuilder(ResidualLoopBB);
685 ResidualLoopBuilder.SetCurrentDebugLocation(DbgLoc);
686 PHINode *ResidualLoopPhi = ResidualLoopBuilder.CreatePHI(Ty: ILengthType, NumReservedValues: 0);
687 Value *ResidualIndex = ResidualLoopBuilder.CreateSub(
688 LHS: ResidualLoopPhi, RHS: CIResidualLoopOpSize, Name: "bwd_residual_index");
689 // If we used LoopOpType as GEP element type, we would iterate over the
690 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes,
691 // i.e., we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore,
692 // use byte offsets computed from the TypeStoreSize.
693 Value *LoadGEP = ResidualLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: SrcAddr,
694 IdxList: ResidualIndex);
695 Value *Element = ResidualLoopBuilder.CreateAlignedLoad(
696 Ty: ResidualLoopOpType, Ptr: LoadGEP, Align: ResidualSrcAlign, isVolatile: SrcIsVolatile,
697 Name: "element");
698 Value *StoreGEP = ResidualLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: DstAddr,
699 IdxList: ResidualIndex);
700 ResidualLoopBuilder.CreateAlignedStore(Val: Element, Ptr: StoreGEP,
701 Align: ResidualDstAlign, isVolatile: DstIsVolatile);
702
703 // After the residual loop, go to an intermediate block.
704 BasicBlock *IntermediateBB = BasicBlock::Create(
705 Context&: F->getContext(), Name: "memmove_bwd_middle", Parent: F, InsertBefore: MainLoopBB);
706 // Later code expects a terminator in the PredBB.
707 IRBuilder<> IntermediateBuilder(IntermediateBB);
708 IntermediateBuilder.SetCurrentDebugLocation(DbgLoc);
709 IntermediateBuilder.CreateUnreachable();
710 ResidualLoopBuilder.CreateCondBr(
711 Cond: ResidualLoopBuilder.CreateICmpEQ(LHS: ResidualIndex, RHS: RuntimeLoopBytes),
712 True: IntermediateBB, False: ResidualLoopBB);
713
714 ResidualLoopPhi->addIncoming(V: ResidualIndex, BB: ResidualLoopBB);
715 ResidualLoopPhi->addIncoming(V: CopyLen, BB: CopyBackwardsBB);
716
717 // How to get to the residual:
718 BranchInst *BrInst =
719 BranchInst::Create(IfTrue: IntermediateBB, IfFalse: ResidualLoopBB,
720 Cond: SkipResidualCondition, InsertBefore: ThenTerm->getIterator());
721 BrInst->setDebugLoc(DbgLoc);
722 ThenTerm->eraseFromParent();
723
724 PredBB = IntermediateBB;
725 }
726
727 // main loop
728 IRBuilder<> MainLoopBuilder(MainLoopBB);
729 MainLoopBuilder.SetCurrentDebugLocation(DbgLoc);
730 PHINode *MainLoopPhi = MainLoopBuilder.CreatePHI(Ty: ILengthType, NumReservedValues: 0);
731 Value *MainIndex =
732 MainLoopBuilder.CreateSub(LHS: MainLoopPhi, RHS: CILoopOpSize, Name: "bwd_main_index");
733 Value *LoadGEP =
734 MainLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: SrcAddr, IdxList: MainIndex);
735 Value *Element = MainLoopBuilder.CreateAlignedLoad(
736 Ty: LoopOpType, Ptr: LoadGEP, Align: PartSrcAlign, isVolatile: SrcIsVolatile, Name: "element");
737 Value *StoreGEP =
738 MainLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: DstAddr, IdxList: MainIndex);
739 MainLoopBuilder.CreateAlignedStore(Val: Element, Ptr: StoreGEP, Align: PartDstAlign,
740 isVolatile: DstIsVolatile);
741 MainLoopBuilder.CreateCondBr(Cond: MainLoopBuilder.CreateICmpEQ(LHS: MainIndex, RHS: Zero),
742 True: ExitBB, False: MainLoopBB);
743 MainLoopPhi->addIncoming(V: MainIndex, BB: MainLoopBB);
744 MainLoopPhi->addIncoming(V: RuntimeLoopBytes, BB: PredBB);
745
746 // How to get to the main loop:
747 Instruction *PredBBTerm = PredBB->getTerminator();
748 BranchInst *BrInst = BranchInst::Create(
749 IfTrue: ExitBB, IfFalse: MainLoopBB, Cond: SkipMainCondition, InsertBefore: PredBBTerm->getIterator());
750 BrInst->setDebugLoc(DbgLoc);
751 PredBBTerm->eraseFromParent();
752 }
753
754 // Copying forward.
755 // main loop
756 {
757 BasicBlock *MainLoopBB =
758 BasicBlock::Create(Context&: F->getContext(), Name: "memmove_fwd_main_loop", Parent: F, InsertBefore: ExitBB);
759 IRBuilder<> MainLoopBuilder(MainLoopBB);
760 MainLoopBuilder.SetCurrentDebugLocation(DbgLoc);
761 PHINode *MainLoopPhi =
762 MainLoopBuilder.CreatePHI(Ty: ILengthType, NumReservedValues: 0, Name: "fwd_main_index");
763 Value *LoadGEP =
764 MainLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: SrcAddr, IdxList: MainLoopPhi);
765 Value *Element = MainLoopBuilder.CreateAlignedLoad(
766 Ty: LoopOpType, Ptr: LoadGEP, Align: PartSrcAlign, isVolatile: SrcIsVolatile, Name: "element");
767 Value *StoreGEP =
768 MainLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: DstAddr, IdxList: MainLoopPhi);
769 MainLoopBuilder.CreateAlignedStore(Val: Element, Ptr: StoreGEP, Align: PartDstAlign,
770 isVolatile: DstIsVolatile);
771 Value *MainIndex = MainLoopBuilder.CreateAdd(LHS: MainLoopPhi, RHS: CILoopOpSize);
772 MainLoopPhi->addIncoming(V: MainIndex, BB: MainLoopBB);
773 MainLoopPhi->addIncoming(V: Zero, BB: CopyForwardBB);
774
775 Instruction *CopyFwdBBTerm = CopyForwardBB->getTerminator();
776 BasicBlock *SuccessorBB = ExitBB;
777 if (RequiresResidual)
778 SuccessorBB =
779 BasicBlock::Create(Context&: F->getContext(), Name: "memmove_fwd_middle", Parent: F, InsertBefore: ExitBB);
780
781 // leaving or staying in the main loop
782 MainLoopBuilder.CreateCondBr(
783 Cond: MainLoopBuilder.CreateICmpEQ(LHS: MainIndex, RHS: RuntimeLoopBytes), True: SuccessorBB,
784 False: MainLoopBB);
785
786 // getting in or skipping the main loop
787 BranchInst *BrInst =
788 BranchInst::Create(IfTrue: SuccessorBB, IfFalse: MainLoopBB, Cond: SkipMainCondition,
789 InsertBefore: CopyFwdBBTerm->getIterator());
790 BrInst->setDebugLoc(DbgLoc);
791 CopyFwdBBTerm->eraseFromParent();
792
793 if (RequiresResidual) {
794 BasicBlock *IntermediateBB = SuccessorBB;
795 IRBuilder<> IntermediateBuilder(IntermediateBB);
796 IntermediateBuilder.SetCurrentDebugLocation(DbgLoc);
797 BasicBlock *ResidualLoopBB = BasicBlock::Create(
798 Context&: F->getContext(), Name: "memmove_fwd_residual_loop", Parent: F, InsertBefore: ExitBB);
799 IntermediateBuilder.CreateCondBr(Cond: SkipResidualCondition, True: ExitBB,
800 False: ResidualLoopBB);
801
802 // Residual loop
803 IRBuilder<> ResidualLoopBuilder(ResidualLoopBB);
804 ResidualLoopBuilder.SetCurrentDebugLocation(DbgLoc);
805 PHINode *ResidualLoopPhi =
806 ResidualLoopBuilder.CreatePHI(Ty: ILengthType, NumReservedValues: 0, Name: "fwd_residual_index");
807 Value *LoadGEP = ResidualLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: SrcAddr,
808 IdxList: ResidualLoopPhi);
809 Value *Element = ResidualLoopBuilder.CreateAlignedLoad(
810 Ty: ResidualLoopOpType, Ptr: LoadGEP, Align: ResidualSrcAlign, isVolatile: SrcIsVolatile,
811 Name: "element");
812 Value *StoreGEP = ResidualLoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: DstAddr,
813 IdxList: ResidualLoopPhi);
814 ResidualLoopBuilder.CreateAlignedStore(Val: Element, Ptr: StoreGEP,
815 Align: ResidualDstAlign, isVolatile: DstIsVolatile);
816 Value *ResidualIndex =
817 ResidualLoopBuilder.CreateAdd(LHS: ResidualLoopPhi, RHS: CIResidualLoopOpSize);
818 ResidualLoopBuilder.CreateCondBr(
819 Cond: ResidualLoopBuilder.CreateICmpEQ(LHS: ResidualIndex, RHS: CopyLen), True: ExitBB,
820 False: ResidualLoopBB);
821 ResidualLoopPhi->addIncoming(V: ResidualIndex, BB: ResidualLoopBB);
822 ResidualLoopPhi->addIncoming(V: RuntimeLoopBytes, BB: IntermediateBB);
823 }
824 }
825}
826
827// Similar to createMemMoveLoopUnknownSize, only the trip counts are computed at
828// compile time, obsolete loops and branches are omitted, and the residual code
829// is straight-line code instead of a loop.
830static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
831 Value *SrcAddr, Value *DstAddr,
832 ConstantInt *CopyLen, Align SrcAlign,
833 Align DstAlign, bool SrcIsVolatile,
834 bool DstIsVolatile,
835 const TargetTransformInfo &TTI) {
836 // No need to expand zero length moves.
837 if (CopyLen->isZero())
838 return;
839
840 Type *TypeOfCopyLen = CopyLen->getType();
841 BasicBlock *OrigBB = InsertBefore->getParent();
842 Function *F = OrigBB->getParent();
843 const DataLayout &DL = F->getDataLayout();
844 LLVMContext &Ctx = OrigBB->getContext();
845 unsigned SrcAS = cast<PointerType>(Val: SrcAddr->getType())->getAddressSpace();
846 unsigned DstAS = cast<PointerType>(Val: DstAddr->getType())->getAddressSpace();
847
848 Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Context&: Ctx, Length: CopyLen, SrcAddrSpace: SrcAS, DestAddrSpace: DstAS,
849 SrcAlign, DestAlign: DstAlign);
850 unsigned LoopOpSize = DL.getTypeStoreSize(Ty: LoopOpType);
851 Type *Int8Type = Type::getInt8Ty(C&: Ctx);
852
853 // Calculate the loop trip count and remaining bytes to copy after the loop.
854 uint64_t BytesCopiedInLoop = alignDown(Value: CopyLen->getZExtValue(), Align: LoopOpSize);
855 uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopiedInLoop;
856
857 IntegerType *ILengthType = cast<IntegerType>(Val: TypeOfCopyLen);
858 ConstantInt *Zero = ConstantInt::get(Ty: ILengthType, V: 0);
859 ConstantInt *LoopBound = ConstantInt::get(Ty: ILengthType, V: BytesCopiedInLoop);
860 ConstantInt *CILoopOpSize = ConstantInt::get(Ty: ILengthType, V: LoopOpSize);
861
862 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
863 IRBuilder<> PLBuilder(InsertBefore);
864 PLBuilder.SetCurrentDebugLocation(DbgLoc);
865
866 auto [CmpSrcAddr, CmpDstAddr] =
867 tryInsertCastToCommonAddrSpace(B&: PLBuilder, Addr1: SrcAddr, Addr2: DstAddr, TTI);
868 Value *PtrCompare =
869 PLBuilder.CreateICmpULT(LHS: CmpSrcAddr, RHS: CmpDstAddr, Name: "compare_src_dst");
870 Instruction *ThenTerm, *ElseTerm;
871 SplitBlockAndInsertIfThenElse(Cond: PtrCompare, SplitBefore: InsertBefore->getIterator(),
872 ThenTerm: &ThenTerm, ElseTerm: &ElseTerm);
873
874 BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
875 BasicBlock *CopyForwardBB = ElseTerm->getParent();
876 BasicBlock *ExitBB = InsertBefore->getParent();
877 ExitBB->setName("memmove_done");
878
879 Align PartSrcAlign(commonAlignment(A: SrcAlign, Offset: LoopOpSize));
880 Align PartDstAlign(commonAlignment(A: DstAlign, Offset: LoopOpSize));
881
882 // Helper function to generate a load/store pair of a given type in the
883 // residual. Used in the forward and backward branches.
884 auto GenerateResidualLdStPair = [&](Type *OpTy, IRBuilderBase &Builder,
885 uint64_t &BytesCopied) {
886 Align ResSrcAlign(commonAlignment(A: SrcAlign, Offset: BytesCopied));
887 Align ResDstAlign(commonAlignment(A: DstAlign, Offset: BytesCopied));
888
889 unsigned OperandSize = DL.getTypeStoreSize(Ty: OpTy);
890
891 // If we used LoopOpType as GEP element type, we would iterate over the
892 // buffers in TypeStoreSize strides while copying TypeAllocSize bytes, i.e.,
893 // we would miss bytes if TypeStoreSize != TypeAllocSize. Therefore, use
894 // byte offsets computed from the TypeStoreSize.
895 Value *SrcGEP = Builder.CreateInBoundsGEP(
896 Ty: Int8Type, Ptr: SrcAddr, IdxList: ConstantInt::get(Ty: TypeOfCopyLen, V: BytesCopied));
897 LoadInst *Load =
898 Builder.CreateAlignedLoad(Ty: OpTy, Ptr: SrcGEP, Align: ResSrcAlign, isVolatile: SrcIsVolatile);
899 Value *DstGEP = Builder.CreateInBoundsGEP(
900 Ty: Int8Type, Ptr: DstAddr, IdxList: ConstantInt::get(Ty: TypeOfCopyLen, V: BytesCopied));
901 Builder.CreateAlignedStore(Val: Load, Ptr: DstGEP, Align: ResDstAlign, isVolatile: DstIsVolatile);
902 BytesCopied += OperandSize;
903 };
904
905 // Copying backwards.
906 if (RemainingBytes != 0) {
907 CopyBackwardsBB->setName("memmove_bwd_residual");
908 uint64_t BytesCopied = BytesCopiedInLoop;
909
910 // Residual code is required to move the remaining bytes. We need the same
911 // instructions as in the forward case, only in reverse. So we generate code
912 // the same way, except that we change the IRBuilder insert point for each
913 // load/store pair so that each one is inserted before the previous one
914 // instead of after it.
915 IRBuilder<> BwdResBuilder(CopyBackwardsBB,
916 CopyBackwardsBB->getFirstNonPHIIt());
917 BwdResBuilder.SetCurrentDebugLocation(DbgLoc);
918 SmallVector<Type *, 5> RemainingOps;
919 TTI.getMemcpyLoopResidualLoweringType(OpsOut&: RemainingOps, Context&: Ctx, RemainingBytes,
920 SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign: PartSrcAlign,
921 DestAlign: PartDstAlign);
922 for (auto *OpTy : RemainingOps) {
923 // reverse the order of the emitted operations
924 BwdResBuilder.SetInsertPoint(TheBB: CopyBackwardsBB,
925 IP: CopyBackwardsBB->getFirstNonPHIIt());
926 GenerateResidualLdStPair(OpTy, BwdResBuilder, BytesCopied);
927 }
928 }
929 if (BytesCopiedInLoop != 0) {
930 BasicBlock *LoopBB = CopyBackwardsBB;
931 BasicBlock *PredBB = OrigBB;
932 if (RemainingBytes != 0) {
933 // if we introduce residual code, it needs its separate BB
934 LoopBB = CopyBackwardsBB->splitBasicBlock(
935 I: CopyBackwardsBB->getTerminator(), BBName: "memmove_bwd_loop");
936 PredBB = CopyBackwardsBB;
937 } else {
938 CopyBackwardsBB->setName("memmove_bwd_loop");
939 }
940 IRBuilder<> LoopBuilder(LoopBB->getTerminator());
941 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
942 PHINode *LoopPhi = LoopBuilder.CreatePHI(Ty: ILengthType, NumReservedValues: 0);
943 Value *Index = LoopBuilder.CreateSub(LHS: LoopPhi, RHS: CILoopOpSize, Name: "bwd_index");
944 Value *LoadGEP = LoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: SrcAddr, IdxList: Index);
945 Value *Element = LoopBuilder.CreateAlignedLoad(
946 Ty: LoopOpType, Ptr: LoadGEP, Align: PartSrcAlign, isVolatile: SrcIsVolatile, Name: "element");
947 Value *StoreGEP = LoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: DstAddr, IdxList: Index);
948 LoopBuilder.CreateAlignedStore(Val: Element, Ptr: StoreGEP, Align: PartDstAlign,
949 isVolatile: DstIsVolatile);
950
951 // Replace the unconditional branch introduced by
952 // SplitBlockAndInsertIfThenElse to turn LoopBB into a loop.
953 Instruction *UncondTerm = LoopBB->getTerminator();
954 LoopBuilder.CreateCondBr(Cond: LoopBuilder.CreateICmpEQ(LHS: Index, RHS: Zero), True: ExitBB,
955 False: LoopBB);
956 UncondTerm->eraseFromParent();
957
958 LoopPhi->addIncoming(V: Index, BB: LoopBB);
959 LoopPhi->addIncoming(V: LoopBound, BB: PredBB);
960 }
961
962 // Copying forward.
963 BasicBlock *FwdResidualBB = CopyForwardBB;
964 if (BytesCopiedInLoop != 0) {
965 CopyForwardBB->setName("memmove_fwd_loop");
966 BasicBlock *LoopBB = CopyForwardBB;
967 BasicBlock *SuccBB = ExitBB;
968 if (RemainingBytes != 0) {
969 // if we introduce residual code, it needs its separate BB
970 SuccBB = CopyForwardBB->splitBasicBlock(I: CopyForwardBB->getTerminator(),
971 BBName: "memmove_fwd_residual");
972 FwdResidualBB = SuccBB;
973 }
974 IRBuilder<> LoopBuilder(LoopBB->getTerminator());
975 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
976 PHINode *LoopPhi = LoopBuilder.CreatePHI(Ty: ILengthType, NumReservedValues: 0, Name: "fwd_index");
977 Value *LoadGEP = LoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: SrcAddr, IdxList: LoopPhi);
978 Value *Element = LoopBuilder.CreateAlignedLoad(
979 Ty: LoopOpType, Ptr: LoadGEP, Align: PartSrcAlign, isVolatile: SrcIsVolatile, Name: "element");
980 Value *StoreGEP = LoopBuilder.CreateInBoundsGEP(Ty: Int8Type, Ptr: DstAddr, IdxList: LoopPhi);
981 LoopBuilder.CreateAlignedStore(Val: Element, Ptr: StoreGEP, Align: PartDstAlign,
982 isVolatile: DstIsVolatile);
983 Value *Index = LoopBuilder.CreateAdd(LHS: LoopPhi, RHS: CILoopOpSize);
984 LoopPhi->addIncoming(V: Index, BB: LoopBB);
985 LoopPhi->addIncoming(V: Zero, BB: OrigBB);
986
987 // Replace the unconditional branch to turn LoopBB into a loop.
988 Instruction *UncondTerm = LoopBB->getTerminator();
989 LoopBuilder.CreateCondBr(Cond: LoopBuilder.CreateICmpEQ(LHS: Index, RHS: LoopBound), True: SuccBB,
990 False: LoopBB);
991 UncondTerm->eraseFromParent();
992 }
993
994 if (RemainingBytes != 0) {
995 uint64_t BytesCopied = BytesCopiedInLoop;
996
997 // Residual code is required to move the remaining bytes. In the forward
998 // case, we emit it in the normal order.
999 IRBuilder<> FwdResBuilder(FwdResidualBB->getTerminator());
1000 FwdResBuilder.SetCurrentDebugLocation(DbgLoc);
1001 SmallVector<Type *, 5> RemainingOps;
1002 TTI.getMemcpyLoopResidualLoweringType(OpsOut&: RemainingOps, Context&: Ctx, RemainingBytes,
1003 SrcAddrSpace: SrcAS, DestAddrSpace: DstAS, SrcAlign: PartSrcAlign,
1004 DestAlign: PartDstAlign);
1005 for (auto *OpTy : RemainingOps)
1006 GenerateResidualLdStPair(OpTy, FwdResBuilder, BytesCopied);
1007 }
1008}
1009
1010static void createMemSetLoop(Instruction *InsertBefore, Value *DstAddr,
1011 Value *CopyLen, Value *SetValue, Align DstAlign,
1012 std::optional<uint64_t> AverageTripCount,
1013 bool IsVolatile) {
1014 Type *TypeOfCopyLen = CopyLen->getType();
1015 BasicBlock *OrigBB = InsertBefore->getParent();
1016 Function *F = OrigBB->getParent();
1017 const DataLayout &DL = F->getDataLayout();
1018 BasicBlock *NewBB =
1019 OrigBB->splitBasicBlock(I: InsertBefore, BBName: "split");
1020 BasicBlock *LoopBB
1021 = BasicBlock::Create(Context&: F->getContext(), Name: "loadstoreloop", Parent: F, InsertBefore: NewBB);
1022
1023 const DebugLoc &DbgLoc = InsertBefore->getStableDebugLoc();
1024 IRBuilder<> Builder(OrigBB->getTerminator());
1025 Builder.SetCurrentDebugLocation(DbgLoc);
1026
1027 auto *ToLoopBR = Builder.CreateCondBr(
1028 Cond: Builder.CreateICmpEQ(LHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 0), RHS: CopyLen), True: NewBB,
1029 False: LoopBB);
1030 MDBuilder MDB(F->getContext());
1031 if (AverageTripCount.has_value())
1032 ToLoopBR->setMetadata(KindID: LLVMContext::MD_prof,
1033 Node: MDB.createLikelyBranchWeights());
1034 else
1035 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *ToLoopBR, DEBUG_TYPE);
1036
1037 OrigBB->getTerminator()->eraseFromParent();
1038
1039 unsigned PartSize = DL.getTypeStoreSize(Ty: SetValue->getType());
1040 Align PartAlign(commonAlignment(A: DstAlign, Offset: PartSize));
1041
1042 IRBuilder<> LoopBuilder(LoopBB);
1043 LoopBuilder.SetCurrentDebugLocation(DbgLoc);
1044 PHINode *LoopIndex = LoopBuilder.CreatePHI(Ty: TypeOfCopyLen, NumReservedValues: 0);
1045 LoopIndex->addIncoming(V: ConstantInt::get(Ty: TypeOfCopyLen, V: 0), BB: OrigBB);
1046
1047 LoopBuilder.CreateAlignedStore(
1048 Val: SetValue,
1049 Ptr: LoopBuilder.CreateInBoundsGEP(Ty: SetValue->getType(), Ptr: DstAddr, IdxList: LoopIndex),
1050 Align: PartAlign, isVolatile: IsVolatile);
1051
1052 Value *NewIndex =
1053 LoopBuilder.CreateAdd(LHS: LoopIndex, RHS: ConstantInt::get(Ty: TypeOfCopyLen, V: 1));
1054 LoopIndex->addIncoming(V: NewIndex, BB: LoopBB);
1055
1056 auto *LoopBR = LoopBuilder.CreateCondBr(
1057 Cond: LoopBuilder.CreateICmpULT(LHS: NewIndex, RHS: CopyLen), True: LoopBB, False: NewBB);
1058 if (AverageTripCount.has_value())
1059 setFittedBranchWeights(I&: *LoopBR, Weights: {AverageTripCount.value(), 1},
1060 /*IsExpected=*/false);
1061 else
1062 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *LoopBR, DEBUG_TYPE);
1063}
1064
1065template <typename T>
1066static bool canOverlap(MemTransferBase<T> *Memcpy, ScalarEvolution *SE) {
1067 if (SE) {
1068 const SCEV *SrcSCEV = SE->getSCEV(V: Memcpy->getRawSource());
1069 const SCEV *DestSCEV = SE->getSCEV(V: Memcpy->getRawDest());
1070 if (SE->isKnownPredicateAt(Pred: CmpInst::ICMP_NE, LHS: SrcSCEV, RHS: DestSCEV, CtxI: Memcpy))
1071 return false;
1072 }
1073 return true;
1074}
1075
1076void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
1077 const TargetTransformInfo &TTI,
1078 ScalarEvolution *SE) {
1079 bool CanOverlap = canOverlap(Memcpy, SE);
1080 auto TripCount = getAverageMemOpLoopTripCount(I: *Memcpy);
1081 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Memcpy->getLength())) {
1082 createMemCpyLoopKnownSize(
1083 /* InsertBefore */ Memcpy,
1084 /* SrcAddr */ Memcpy->getRawSource(),
1085 /* DstAddr */ Memcpy->getRawDest(),
1086 /* CopyLen */ CI,
1087 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
1088 /* DestAlign */ DstAlign: Memcpy->getDestAlign().valueOrOne(),
1089 /* SrcIsVolatile */ Memcpy->isVolatile(),
1090 /* DstIsVolatile */ Memcpy->isVolatile(),
1091 /* CanOverlap */ CanOverlap,
1092 /* TargetTransformInfo */ TTI,
1093 /* AtomicElementSize */ std::nullopt,
1094 /* AverageTripCount */ TripCount);
1095 } else {
1096 createMemCpyLoopUnknownSize(
1097 /* InsertBefore */ Memcpy,
1098 /* SrcAddr */ Memcpy->getRawSource(),
1099 /* DstAddr */ Memcpy->getRawDest(),
1100 /* CopyLen */ Memcpy->getLength(),
1101 /* SrcAlign */ Memcpy->getSourceAlign().valueOrOne(),
1102 /* DestAlign */ DstAlign: Memcpy->getDestAlign().valueOrOne(),
1103 /* SrcIsVolatile */ Memcpy->isVolatile(),
1104 /* DstIsVolatile */ Memcpy->isVolatile(),
1105 /* CanOverlap */ CanOverlap,
1106 /* TargetTransformInfo */ TTI,
1107 /* AtomicElementSize */ std::nullopt,
1108 /* AverageTripCount */ TripCount);
1109 }
1110}
1111
1112bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove,
1113 const TargetTransformInfo &TTI) {
1114 Value *CopyLen = Memmove->getLength();
1115 Value *SrcAddr = Memmove->getRawSource();
1116 Value *DstAddr = Memmove->getRawDest();
1117 Align SrcAlign = Memmove->getSourceAlign().valueOrOne();
1118 Align DstAlign = Memmove->getDestAlign().valueOrOne();
1119 bool SrcIsVolatile = Memmove->isVolatile();
1120 bool DstIsVolatile = SrcIsVolatile;
1121 IRBuilder<> CastBuilder(Memmove);
1122 CastBuilder.SetCurrentDebugLocation(Memmove->getStableDebugLoc());
1123
1124 unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace();
1125 unsigned DstAS = DstAddr->getType()->getPointerAddressSpace();
1126 if (SrcAS != DstAS) {
1127 if (!TTI.addrspacesMayAlias(AS0: SrcAS, AS1: DstAS)) {
1128 // We may not be able to emit a pointer comparison, but we don't have
1129 // to. Expand as memcpy.
1130 auto AverageTripCount = getAverageMemOpLoopTripCount(I: *Memmove);
1131 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CopyLen)) {
1132 createMemCpyLoopKnownSize(
1133 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen: CI, SrcAlign, DstAlign,
1134 SrcIsVolatile, DstIsVolatile,
1135 /*CanOverlap=*/false, TTI, AtomicElementSize: std::nullopt, AverageTripCount);
1136 } else {
1137 createMemCpyLoopUnknownSize(
1138 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign,
1139 DstAlign, SrcIsVolatile, DstIsVolatile,
1140 /*CanOverlap=*/false, TTI, AtomicElementSize: std::nullopt, AverageTripCount);
1141 }
1142
1143 return true;
1144 }
1145
1146 if (!(TTI.isValidAddrSpaceCast(FromAS: DstAS, ToAS: SrcAS) ||
1147 TTI.isValidAddrSpaceCast(FromAS: SrcAS, ToAS: DstAS))) {
1148 // We don't know generically if it's legal to introduce an
1149 // addrspacecast. We need to know either if it's legal to insert an
1150 // addrspacecast, or if the address spaces cannot alias.
1151 LLVM_DEBUG(
1152 dbgs() << "Do not know how to expand memmove between different "
1153 "address spaces\n");
1154 return false;
1155 }
1156 }
1157
1158 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CopyLen)) {
1159 createMemMoveLoopKnownSize(
1160 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen: CI, SrcAlign, DstAlign,
1161 SrcIsVolatile, DstIsVolatile, TTI);
1162 } else {
1163 createMemMoveLoopUnknownSize(
1164 /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
1165 SrcIsVolatile, DstIsVolatile, TTI);
1166 }
1167 return true;
1168}
1169
1170void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
1171 createMemSetLoop(/* InsertBefore */ Memset,
1172 /* DstAddr */ Memset->getRawDest(),
1173 /* CopyLen */ Memset->getLength(),
1174 /* SetValue */ Memset->getValue(),
1175 /* Alignment */ DstAlign: Memset->getDestAlign().valueOrOne(),
1176 /* AverageTripCount */ getAverageMemOpLoopTripCount(I: *Memset),
1177 /* IsVolatile */ Memset->isVolatile());
1178}
1179
1180void llvm::expandMemSetPatternAsLoop(MemSetPatternInst *Memset) {
1181 createMemSetLoop(/* InsertBefore=*/Memset,
1182 /* DstAddr=*/Memset->getRawDest(),
1183 /* CopyLen=*/Memset->getLength(),
1184 /* SetValue=*/Memset->getValue(),
1185 /* Alignment=*/DstAlign: Memset->getDestAlign().valueOrOne(),
1186 /* AverageTripCount */ getAverageMemOpLoopTripCount(I: *Memset),
1187 /* IsVolatile */ Memset->isVolatile());
1188}
1189
1190void llvm::expandAtomicMemCpyAsLoop(AnyMemCpyInst *AtomicMemcpy,
1191 const TargetTransformInfo &TTI,
1192 ScalarEvolution *SE) {
1193 assert(AtomicMemcpy->isAtomic());
1194 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: AtomicMemcpy->getLength())) {
1195 createMemCpyLoopKnownSize(
1196 /* InsertBefore */ AtomicMemcpy,
1197 /* SrcAddr */ AtomicMemcpy->getRawSource(),
1198 /* DstAddr */ AtomicMemcpy->getRawDest(),
1199 /* CopyLen */ CI,
1200 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
1201 /* DestAlign */ DstAlign: AtomicMemcpy->getDestAlign().valueOrOne(),
1202 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
1203 /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
1204 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
1205 /* TargetTransformInfo */ TTI,
1206 /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes());
1207 } else {
1208 createMemCpyLoopUnknownSize(
1209 /* InsertBefore */ AtomicMemcpy,
1210 /* SrcAddr */ AtomicMemcpy->getRawSource(),
1211 /* DstAddr */ AtomicMemcpy->getRawDest(),
1212 /* CopyLen */ AtomicMemcpy->getLength(),
1213 /* SrcAlign */ AtomicMemcpy->getSourceAlign().valueOrOne(),
1214 /* DestAlign */ DstAlign: AtomicMemcpy->getDestAlign().valueOrOne(),
1215 /* SrcIsVolatile */ AtomicMemcpy->isVolatile(),
1216 /* DstIsVolatile */ AtomicMemcpy->isVolatile(),
1217 /* CanOverlap */ false, // SrcAddr & DstAddr may not overlap by spec.
1218 /* TargetTransformInfo */ TTI,
1219 /* AtomicElementSize */ AtomicMemcpy->getElementSizeInBytes());
1220 }
1221}
1222