1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLFunctionalExtras.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/Analysis/InstSimplifyFolder.h"
22#include "llvm/Analysis/OptimizationRemarkEmitter.h"
23#include "llvm/CodeGen/AtomicExpand.h"
24#include "llvm/CodeGen/TargetLowering.h"
25#include "llvm/CodeGen/TargetPassConfig.h"
26#include "llvm/CodeGen/TargetSubtargetInfo.h"
27#include "llvm/CodeGen/ValueTypes.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/MDBuilder.h"
39#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
40#include "llvm/IR/Module.h"
41#include "llvm/IR/ProfDataUtils.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
45#include "llvm/InitializePasses.h"
46#include "llvm/Pass.h"
47#include "llvm/Support/AtomicOrdering.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/Debug.h"
50#include "llvm/Support/ErrorHandling.h"
51#include "llvm/Support/raw_ostream.h"
52#include "llvm/Target/TargetMachine.h"
53#include "llvm/Transforms/Utils/LowerAtomic.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const LibcallLoweringInfo *LibcallLowering = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 /// Callback type for emitting a cmpxchg instruction during RMW expansion.
71 /// Parameters: (Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
72 /// SSID, IsVolatile, /* OUT */ Success, /* OUT */ NewLoaded,
73 /// MetadataSrc)
74 using CreateCmpXchgInstFun = function_ref<void(
75 IRBuilderBase &, Value *, Value *, Value *, Align, AtomicOrdering,
76 SyncScope::ID, bool, Value *&, Value *&, Instruction *)>;
77
78 void handleFailure(Instruction &FailedInst, const Twine &Msg,
79 Instruction *DiagnosticInst = nullptr) const {
80 LLVMContext &Ctx = FailedInst.getContext();
81
82 // TODO: Do not use generic error type.
83 Ctx.emitError(I: DiagnosticInst ? DiagnosticInst : &FailedInst, ErrorStr: Msg);
84
85 if (!FailedInst.getType()->isVoidTy())
86 FailedInst.replaceAllUsesWith(V: PoisonValue::get(T: FailedInst.getType()));
87 FailedInst.eraseFromParent();
88 }
89
90 template <typename Inst>
91 void handleUnsupportedAtomicSize(Inst *I, const Twine &AtomicOpName,
92 Instruction *DiagnosticInst = nullptr) const;
93
94 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
95 bool tryInsertTrailingSeqCstFence(Instruction *AtomicI);
96 template <typename AtomicInst>
97 bool tryInsertFencesForAtomic(AtomicInst *AtomicI, bool OrderingRequiresFence,
98 AtomicOrdering NewOrdering);
99 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
100 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
101 bool tryExpandAtomicLoad(LoadInst *LI);
102 bool expandAtomicLoadToLL(LoadInst *LI);
103 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
104 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
105 bool tryExpandAtomicStore(StoreInst *SI);
106 void expandAtomicStoreToXChg(StoreInst *SI);
107 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
108 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
109 Value *
110 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
111 Align AddrAlign, AtomicOrdering MemOpOrder,
112 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
113 void expandAtomicOpToLLSC(
114 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
115 AtomicOrdering MemOpOrder,
116 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
117 void expandPartwordAtomicRMW(
118 AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind);
119 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
120 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
121 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
122 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
123
124 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
125 Value *insertRMWCmpXchgLoop(
126 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
127 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
128 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
129 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
130 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
131
132 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
133 bool isIdempotentRMW(AtomicRMWInst *RMWI);
134 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
135
136 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
137 Value *PointerOperand, Value *ValueOperand,
138 Value *CASExpected, AtomicOrdering Ordering,
139 AtomicOrdering Ordering2,
140 ArrayRef<RTLIB::Libcall> Libcalls);
141 void expandAtomicLoadToLibcall(LoadInst *LI);
142 void expandAtomicStoreToLibcall(StoreInst *LI);
143 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
144 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
145 const Twine &AtomicOpName = "cmpxchg",
146 Instruction *DiagnosticInst = nullptr);
147
148 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
149 CreateCmpXchgInstFun CreateCmpXchg);
150
151 bool processAtomicInstr(Instruction *I);
152
153public:
154 bool run(Function &F,
155 const LibcallLoweringModuleAnalysisResult &LibcallResult,
156 const TargetMachine *TM);
157};
158
159class AtomicExpandLegacy : public FunctionPass {
160public:
161 static char ID; // Pass identification, replacement for typeid
162
163 AtomicExpandLegacy() : FunctionPass(ID) {}
164
165 void getAnalysisUsage(AnalysisUsage &AU) const override {
166 AU.addRequired<LibcallLoweringInfoWrapper>();
167 FunctionPass::getAnalysisUsage(AU);
168 }
169
170 bool runOnFunction(Function &F) override;
171};
172
173// IRBuilder to be used for replacement atomic instructions.
174struct ReplacementIRBuilder
175 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
176 MDNode *MMRAMD = nullptr;
177 MDNode *PCSectionsMD = nullptr;
178
179 // Preserves the DebugLoc from I, and preserves still valid metadata.
180 // Enable StrictFP builder mode when appropriate.
181 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
182 : IRBuilder(
183 I->getContext(), InstSimplifyFolder(DL),
184 IRBuilderCallbackInserter([this](Instruction *I) { addMD(I); })) {
185 SetInsertPoint(I);
186 if (BB->getParent()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP))
187 this->setIsFPConstrained(true);
188
189 MMRAMD = I->getMetadata(KindID: LLVMContext::MD_mmra);
190 PCSectionsMD = I->getMetadata(KindID: LLVMContext::MD_pcsections);
191 }
192
193 void addMD(Instruction *I) {
194 if (canInstructionHaveMMRAs(I: *I))
195 I->setMetadata(KindID: LLVMContext::MD_mmra, Node: MMRAMD);
196 I->setMetadata(KindID: LLVMContext::MD_pcsections, Node: PCSectionsMD);
197 }
198};
199
200} // end anonymous namespace
201
202char AtomicExpandLegacy::ID = 0;
203
204char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
205
206INITIALIZE_PASS_BEGIN(AtomicExpandLegacy, DEBUG_TYPE,
207 "Expand Atomic instructions", false, false)
208INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)
209INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
210INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
211 "Expand Atomic instructions", false, false)
212
213// Helper functions to retrieve the size of atomic instructions.
214static unsigned getAtomicOpSize(LoadInst *LI) {
215 const DataLayout &DL = LI->getDataLayout();
216 return DL.getTypeStoreSize(Ty: LI->getType());
217}
218
219static unsigned getAtomicOpSize(StoreInst *SI) {
220 const DataLayout &DL = SI->getDataLayout();
221 return DL.getTypeStoreSize(Ty: SI->getValueOperand()->getType());
222}
223
224static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
225 const DataLayout &DL = RMWI->getDataLayout();
226 return DL.getTypeStoreSize(Ty: RMWI->getValOperand()->getType());
227}
228
229static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
230 const DataLayout &DL = CASI->getDataLayout();
231 return DL.getTypeStoreSize(Ty: CASI->getCompareOperand()->getType());
232}
233
234/// Copy metadata that's safe to preserve when widening atomics.
235static void copyMetadataForAtomic(Instruction &Dest,
236 const Instruction &Source) {
237 SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
238 Source.getAllMetadata(MDs&: MD);
239 LLVMContext &Ctx = Dest.getContext();
240 MDBuilder MDB(Ctx);
241
242 for (auto [ID, N] : MD) {
243 switch (ID) {
244 case LLVMContext::MD_dbg:
245 case LLVMContext::MD_tbaa:
246 case LLVMContext::MD_tbaa_struct:
247 case LLVMContext::MD_alias_scope:
248 case LLVMContext::MD_noalias:
249 case LLVMContext::MD_noalias_addrspace:
250 case LLVMContext::MD_access_group:
251 case LLVMContext::MD_mmra:
252 Dest.setMetadata(KindID: ID, Node: N);
253 break;
254 default:
255 if (ID == Ctx.getMDKindID(Name: "amdgpu.no.remote.memory"))
256 Dest.setMetadata(KindID: ID, Node: N);
257 else if (ID == Ctx.getMDKindID(Name: "amdgpu.no.fine.grained.memory"))
258 Dest.setMetadata(KindID: ID, Node: N);
259
260 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
261 // uses.
262 break;
263 }
264 }
265}
266
267template <typename Inst>
268static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
269 unsigned Size = getAtomicOpSize(I);
270 Align Alignment = I->getAlign();
271 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
272 return Alignment >= Size && Size <= MaxSize;
273}
274
275template <typename Inst>
276static void writeUnsupportedAtomicSizeReason(const TargetLowering *TLI, Inst *I,
277 raw_ostream &OS) {
278 unsigned Size = getAtomicOpSize(I);
279 Align Alignment = I->getAlign();
280 bool NeedSeparator = false;
281
282 if (Alignment < Size) {
283 OS << "instruction alignment " << Alignment.value()
284 << " is smaller than the required " << Size
285 << "-byte alignment for this atomic operation";
286 NeedSeparator = true;
287 }
288
289 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
290 if (Size > MaxSize) {
291 if (NeedSeparator)
292 OS << "; ";
293 OS << "target supports atomics up to " << MaxSize
294 << " bytes, but this atomic accesses " << Size << " bytes";
295 }
296}
297
298template <typename Inst>
299void AtomicExpandImpl::handleUnsupportedAtomicSize(
300 Inst *I, const Twine &AtomicOpName, Instruction *DiagnosticInst) const {
301 assert(!atomicSizeSupported(TLI, I) && "expected unsupported atomic size");
302 SmallString<128> FailureReason;
303 raw_svector_ostream OS(FailureReason);
304 writeUnsupportedAtomicSizeReason(TLI, I, OS);
305 handleFailure(FailedInst&: *I, Msg: Twine("unsupported ") + AtomicOpName + ": " + FailureReason,
306 DiagnosticInst);
307}
308
309bool AtomicExpandImpl::tryInsertTrailingSeqCstFence(Instruction *AtomicI) {
310 if (!TLI->shouldInsertTrailingSeqCstFenceForAtomicStore(I: AtomicI))
311 return false;
312
313 IRBuilder Builder(AtomicI);
314 if (auto *TrailingFence = TLI->emitTrailingFence(
315 Builder, Inst: AtomicI, Ord: AtomicOrdering::SequentiallyConsistent)) {
316 TrailingFence->moveAfter(MovePos: AtomicI);
317 return true;
318 }
319 return false;
320}
321
322template <typename AtomicInst>
323bool AtomicExpandImpl::tryInsertFencesForAtomic(AtomicInst *AtomicI,
324 bool OrderingRequiresFence,
325 AtomicOrdering NewOrdering) {
326 bool ShouldInsertFences = TLI->shouldInsertFencesForAtomic(I: AtomicI);
327 if (OrderingRequiresFence && ShouldInsertFences) {
328 AtomicOrdering FenceOrdering = AtomicI->getOrdering();
329 AtomicI->setOrdering(NewOrdering);
330 return bracketInstWithFences(I: AtomicI, Order: FenceOrdering);
331 }
332 if (!ShouldInsertFences)
333 return tryInsertTrailingSeqCstFence(AtomicI);
334 return false;
335}
336
337bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
338 if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
339 if (!LI->isAtomic())
340 return false;
341
342 if (!atomicSizeSupported(TLI, I: LI)) {
343 expandAtomicLoadToLibcall(LI);
344 return true;
345 }
346
347 bool MadeChange = false;
348 if (TLI->shouldCastAtomicLoadInIR(LI) ==
349 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
350 LI = convertAtomicLoadToIntegerType(LI);
351 MadeChange = true;
352 }
353
354 MadeChange |= tryInsertFencesForAtomic(
355 AtomicI: LI, OrderingRequiresFence: isAcquireOrStronger(AO: LI->getOrdering()), NewOrdering: AtomicOrdering::Monotonic);
356
357 MadeChange |= tryExpandAtomicLoad(LI);
358 return MadeChange;
359 }
360
361 if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
362 if (!SI->isAtomic())
363 return false;
364
365 if (!atomicSizeSupported(TLI, I: SI)) {
366 expandAtomicStoreToLibcall(LI: SI);
367 return true;
368 }
369
370 bool MadeChange = false;
371 if (TLI->shouldCastAtomicStoreInIR(SI) ==
372 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
373 SI = convertAtomicStoreToIntegerType(SI);
374 MadeChange = true;
375 }
376
377 MadeChange |= tryInsertFencesForAtomic(
378 AtomicI: SI, OrderingRequiresFence: isReleaseOrStronger(AO: SI->getOrdering()), NewOrdering: AtomicOrdering::Monotonic);
379
380 MadeChange |= tryExpandAtomicStore(SI);
381 return MadeChange;
382 }
383
384 if (auto *RMWI = dyn_cast<AtomicRMWInst>(Val: I)) {
385 if (!atomicSizeSupported(TLI, I: RMWI)) {
386 expandAtomicRMWToLibcall(I: RMWI);
387 return true;
388 }
389
390 bool MadeChange = false;
391 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
392 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
393 RMWI = convertAtomicXchgToIntegerType(RMWI);
394 MadeChange = true;
395 }
396
397 MadeChange |= tryInsertFencesForAtomic(
398 AtomicI: RMWI,
399 OrderingRequiresFence: isReleaseOrStronger(AO: RMWI->getOrdering()) ||
400 isAcquireOrStronger(AO: RMWI->getOrdering()),
401 NewOrdering: TLI->atomicOperationOrderAfterFenceSplit(I: RMWI));
402
403 // There are two different ways of expanding RMW instructions:
404 // - into a load if it is idempotent
405 // - into a Cmpxchg/LL-SC loop otherwise
406 // we try them in that order.
407 MadeChange |= (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
408 tryExpandAtomicRMW(AI: RMWI);
409 return MadeChange;
410 }
411
412 if (auto *CASI = dyn_cast<AtomicCmpXchgInst>(Val: I)) {
413 if (!atomicSizeSupported(TLI, I: CASI)) {
414 expandAtomicCASToLibcall(I: CASI);
415 return true;
416 }
417
418 // TODO: when we're ready to make the change at the IR level, we can
419 // extend convertCmpXchgToInteger for floating point too.
420 bool MadeChange = false;
421 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
422 // TODO: add a TLI hook to control this so that each target can
423 // convert to lowering the original type one at a time.
424 CASI = convertCmpXchgToIntegerType(CI: CASI);
425 MadeChange = true;
426 }
427
428 auto CmpXchgExpansion = TLI->shouldExpandAtomicCmpXchgInIR(AI: CASI);
429 if (TLI->shouldInsertFencesForAtomic(I: CASI)) {
430 if (CmpXchgExpansion == TargetLoweringBase::AtomicExpansionKind::None &&
431 (isReleaseOrStronger(AO: CASI->getSuccessOrdering()) ||
432 isAcquireOrStronger(AO: CASI->getSuccessOrdering()) ||
433 isAcquireOrStronger(AO: CASI->getFailureOrdering()))) {
434 // If a compare and swap is lowered to LL/SC, we can do smarter fence
435 // insertion, with a stronger one on the success path than on the
436 // failure path. As a result, fence insertion is directly done by
437 // expandAtomicCmpXchg in that case.
438 AtomicOrdering FenceOrdering = CASI->getMergedOrdering();
439 AtomicOrdering CASOrdering =
440 TLI->atomicOperationOrderAfterFenceSplit(I: CASI);
441 CASI->setSuccessOrdering(CASOrdering);
442 CASI->setFailureOrdering(CASOrdering);
443 MadeChange |= bracketInstWithFences(I: CASI, Order: FenceOrdering);
444 }
445 } else if (CmpXchgExpansion !=
446 TargetLoweringBase::AtomicExpansionKind::LLSC) {
447 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
448 MadeChange |= tryInsertTrailingSeqCstFence(AtomicI: CASI);
449 }
450
451 MadeChange |= tryExpandAtomicCmpXchg(CI: CASI);
452 return MadeChange;
453 }
454
455 return false;
456}
457
458bool AtomicExpandImpl::run(
459 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
460 const TargetMachine *TM) {
461 const auto *Subtarget = TM->getSubtargetImpl(F);
462 if (!Subtarget->enableAtomicExpand())
463 return false;
464 TLI = Subtarget->getTargetLowering();
465 LibcallLowering = &LibcallResult.getLibcallLowering(Subtarget: *Subtarget);
466 DL = &F.getDataLayout();
467
468 bool MadeChange = false;
469
470 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
471 BasicBlock *BB = &*BBI;
472
473 BasicBlock::reverse_iterator Next;
474
475 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
476 I = Next) {
477 Instruction &Inst = *I;
478 Next = std::next(x: I);
479
480 if (processAtomicInstr(I: &Inst)) {
481 MadeChange = true;
482
483 // New blocks may have been inserted.
484 BBE = F.end();
485 }
486 }
487 }
488
489 return MadeChange;
490}
491
492bool AtomicExpandLegacy::runOnFunction(Function &F) {
493
494 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
495 if (!TPC)
496 return false;
497 auto *TM = &TPC->getTM<TargetMachine>();
498
499 const LibcallLoweringModuleAnalysisResult &LibcallResult =
500 getAnalysis<LibcallLoweringInfoWrapper>().getResult(M: *F.getParent());
501 AtomicExpandImpl AE;
502 return AE.run(F, LibcallResult, TM);
503}
504
505FunctionPass *llvm::createAtomicExpandLegacyPass() {
506 return new AtomicExpandLegacy();
507}
508
509PreservedAnalyses AtomicExpandPass::run(Function &F,
510 FunctionAnalysisManager &FAM) {
511 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
512
513 const LibcallLoweringModuleAnalysisResult *LibcallResult =
514 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(IR&: *F.getParent());
515
516 if (!LibcallResult) {
517 F.getContext().emitError(ErrorStr: "'" + LibcallLoweringModuleAnalysis::name() +
518 "' analysis required");
519 return PreservedAnalyses::all();
520 }
521
522 AtomicExpandImpl AE;
523
524 bool Changed = AE.run(F, LibcallResult: *LibcallResult, TM);
525 if (!Changed)
526 return PreservedAnalyses::all();
527
528 return PreservedAnalyses::none();
529}
530
531bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
532 AtomicOrdering Order) {
533 ReplacementIRBuilder Builder(I, *DL);
534
535 auto LeadingFence = TLI->emitLeadingFence(Builder, Inst: I, Ord: Order);
536
537 auto TrailingFence = TLI->emitTrailingFence(Builder, Inst: I, Ord: Order);
538 // We have a guard here because not every atomic operation generates a
539 // trailing fence.
540 if (TrailingFence)
541 TrailingFence->moveAfter(MovePos: I);
542
543 return (LeadingFence || TrailingFence);
544}
545
546/// Get the iX type with the same bitwidth as T.
547IntegerType *
548AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
549 EVT VT = TLI->getMemValueType(DL, Ty: T);
550 unsigned BitWidth = VT.getStoreSizeInBits();
551 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
552 return IntegerType::get(C&: T->getContext(), NumBits: BitWidth);
553}
554
555/// Convert an atomic load of a non-integral type to an integer load of the
556/// equivalent bitwidth. See the function comment on
557/// convertAtomicStoreToIntegerType for background.
558LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
559 auto *M = LI->getModule();
560 Type *NewTy = getCorrespondingIntegerType(T: LI->getType(), DL: M->getDataLayout());
561
562 ReplacementIRBuilder Builder(LI, *DL);
563
564 Value *Addr = LI->getPointerOperand();
565
566 auto *NewLI = Builder.CreateLoad(Ty: NewTy, Ptr: Addr);
567 NewLI->setAlignment(LI->getAlign());
568 NewLI->setVolatile(LI->isVolatile());
569 NewLI->setAtomic(Ordering: LI->getOrdering(), SSID: LI->getSyncScopeID());
570 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
571
572 Value *NewVal = LI->getType()->isPtrOrPtrVectorTy()
573 ? Builder.CreateIntToPtr(V: NewLI, DestTy: LI->getType())
574 : Builder.CreateBitCast(V: NewLI, DestTy: LI->getType());
575 LI->replaceAllUsesWith(V: NewVal);
576 LI->eraseFromParent();
577 return NewLI;
578}
579
580AtomicRMWInst *
581AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
582 assert(RMWI->getOperation() == AtomicRMWInst::Xchg);
583
584 auto *M = RMWI->getModule();
585 Type *NewTy =
586 getCorrespondingIntegerType(T: RMWI->getType(), DL: M->getDataLayout());
587
588 ReplacementIRBuilder Builder(RMWI, *DL);
589
590 Value *Addr = RMWI->getPointerOperand();
591 Value *Val = RMWI->getValOperand();
592 Value *NewVal = Val->getType()->isPointerTy()
593 ? Builder.CreatePtrToInt(V: Val, DestTy: NewTy)
594 : Builder.CreateBitCast(V: Val, DestTy: NewTy);
595
596 auto *NewRMWI = Builder.CreateAtomicRMW(Op: AtomicRMWInst::Xchg, Ptr: Addr, Val: NewVal,
597 Align: RMWI->getAlign(), Ordering: RMWI->getOrdering(),
598 SSID: RMWI->getSyncScopeID());
599 NewRMWI->setVolatile(RMWI->isVolatile());
600 copyMetadataForAtomic(Dest&: *NewRMWI, Source: *RMWI);
601 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
602
603 Value *NewRVal = RMWI->getType()->isPointerTy()
604 ? Builder.CreateIntToPtr(V: NewRMWI, DestTy: RMWI->getType())
605 : Builder.CreateBitCast(V: NewRMWI, DestTy: RMWI->getType());
606 RMWI->replaceAllUsesWith(V: NewRVal);
607 RMWI->eraseFromParent();
608 return NewRMWI;
609}
610
611bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
612 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
613 case TargetLoweringBase::AtomicExpansionKind::None:
614 return false;
615 case TargetLoweringBase::AtomicExpansionKind::LLSC:
616 expandAtomicOpToLLSC(
617 I: LI, ResultTy: LI->getType(), Addr: LI->getPointerOperand(), AddrAlign: LI->getAlign(),
618 MemOpOrder: LI->getOrdering(),
619 PerformOp: [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
620 return true;
621 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
622 return expandAtomicLoadToLL(LI);
623 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
624 return expandAtomicLoadToCmpXchg(LI);
625 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
626 LI->setAtomic(Ordering: AtomicOrdering::NotAtomic);
627 return true;
628 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
629 TLI->emitExpandAtomicLoad(LI);
630 return true;
631 default:
632 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
633 }
634}
635
636bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
637 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
638 case TargetLoweringBase::AtomicExpansionKind::None:
639 return false;
640 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
641 TLI->emitExpandAtomicStore(SI);
642 return true;
643 case TargetLoweringBase::AtomicExpansionKind::Expand:
644 expandAtomicStoreToXChg(SI);
645 return true;
646 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
647 SI->setAtomic(Ordering: AtomicOrdering::NotAtomic);
648 return true;
649 default:
650 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
651 }
652}
653
654bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
655 ReplacementIRBuilder Builder(LI, *DL);
656
657 // On some architectures, load-linked instructions are atomic for larger
658 // sizes than normal loads. For example, the only 64-bit load guaranteed
659 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
660 Value *Val = TLI->emitLoadLinked(Builder, ValueTy: LI->getType(),
661 Addr: LI->getPointerOperand(), Ord: LI->getOrdering());
662 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
663
664 LI->replaceAllUsesWith(V: Val);
665 LI->eraseFromParent();
666
667 return true;
668}
669
670bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
671 ReplacementIRBuilder Builder(LI, *DL);
672 AtomicOrdering Order = LI->getOrdering();
673 if (Order == AtomicOrdering::Unordered)
674 Order = AtomicOrdering::Monotonic;
675
676 Value *Addr = LI->getPointerOperand();
677 Type *Ty = LI->getType();
678
679 // cmpxchg supports only integer and pointer operands. If the load type is
680 // FP or vector, run the cmpxchg on the same-sized integer and bitcast the
681 // result back; mirrors createCmpXchgInstFun.
682 bool NeedBitcast = Ty->isFloatingPointTy() || Ty->isVectorTy();
683 Type *CmpXchgTy = Ty;
684 if (NeedBitcast)
685 CmpXchgTy = Builder.getIntNTy(N: Ty->getPrimitiveSizeInBits());
686 Constant *DummyVal = Constant::getNullValue(Ty: CmpXchgTy);
687
688 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
689 Ptr: Addr, Cmp: DummyVal, New: DummyVal, Align: LI->getAlign(), SuccessOrdering: Order,
690 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: Order),
691 SSID: LI->getSyncScopeID());
692 Pair->setVolatile(LI->isVolatile());
693 Value *Loaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "loaded");
694 if (NeedBitcast)
695 Loaded = Builder.CreateBitCast(V: Loaded, DestTy: Ty);
696
697 LI->replaceAllUsesWith(V: Loaded);
698 LI->eraseFromParent();
699
700 return true;
701}
702
703/// Convert an atomic store of a non-integral type to an integer store of the
704/// equivalent bitwidth. We used to not support floating point or vector
705/// atomics in the IR at all. The backends learned to deal with the bitcast
706/// idiom because that was the only way of expressing the notion of a atomic
707/// float or vector store. The long term plan is to teach each backend to
708/// instruction select from the original atomic store, but as a migration
709/// mechanism, we convert back to the old format which the backends understand.
710/// Each backend will need individual work to recognize the new format.
711StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
712 ReplacementIRBuilder Builder(SI, *DL);
713 auto *M = SI->getModule();
714 Type *NewTy = getCorrespondingIntegerType(T: SI->getValueOperand()->getType(),
715 DL: M->getDataLayout());
716 Value *NewVal = SI->getValueOperand()->getType()->isPtrOrPtrVectorTy()
717 ? Builder.CreatePtrToInt(V: SI->getValueOperand(), DestTy: NewTy)
718 : Builder.CreateBitCast(V: SI->getValueOperand(), DestTy: NewTy);
719
720 Value *Addr = SI->getPointerOperand();
721
722 StoreInst *NewSI = Builder.CreateStore(Val: NewVal, Ptr: Addr);
723 NewSI->setAlignment(SI->getAlign());
724 NewSI->setVolatile(SI->isVolatile());
725 NewSI->setAtomic(Ordering: SI->getOrdering(), SSID: SI->getSyncScopeID());
726 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
727 SI->eraseFromParent();
728 return NewSI;
729}
730
731void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
732 // This function is only called on atomic stores that are too large to be
733 // atomic if implemented as a native store. So we replace them by an
734 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
735 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
736 // It is the responsibility of the target to only signal expansion via
737 // shouldExpandAtomicRMW in cases where this is required and possible.
738 ReplacementIRBuilder Builder(SI, *DL);
739 AtomicOrdering Ordering = SI->getOrdering();
740 assert(Ordering != AtomicOrdering::NotAtomic);
741 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
742 ? AtomicOrdering::Monotonic
743 : Ordering;
744 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
745 Op: AtomicRMWInst::Xchg, Ptr: SI->getPointerOperand(), Val: SI->getValueOperand(),
746 Align: SI->getAlign(), Ordering: RMWOrdering, SSID: SI->getSyncScopeID());
747 AI->setVolatile(SI->isVolatile());
748 SI->eraseFromParent();
749
750 // Now we have an appropriate swap instruction, lower it as usual.
751 tryExpandAtomicRMW(AI);
752}
753
754static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
755 Value *Loaded, Value *NewVal, Align AddrAlign,
756 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
757 bool IsVolatile, Value *&Success,
758 Value *&NewLoaded, Instruction *MetadataSrc) {
759 Type *OrigTy = NewVal->getType();
760
761 // This code can go away when cmpxchg supports FP and vector types.
762 assert(!OrigTy->isPointerTy());
763 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
764 if (NeedBitcast) {
765 IntegerType *IntTy = Builder.getIntNTy(N: OrigTy->getPrimitiveSizeInBits());
766 NewVal = Builder.CreateBitCast(V: NewVal, DestTy: IntTy);
767 Loaded = Builder.CreateBitCast(V: Loaded, DestTy: IntTy);
768 }
769
770 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
771 Ptr: Addr, Cmp: Loaded, New: NewVal, Align: AddrAlign, SuccessOrdering: MemOpOrder,
772 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: MemOpOrder), SSID);
773 Pair->setVolatile(IsVolatile);
774 if (MetadataSrc)
775 copyMetadataForAtomic(Dest&: *Pair, Source: *MetadataSrc);
776
777 Success = Builder.CreateExtractValue(Agg: Pair, Idxs: 1, Name: "success");
778 NewLoaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "newloaded");
779
780 if (NeedBitcast)
781 NewLoaded = Builder.CreateBitCast(V: NewLoaded, DestTy: OrigTy);
782}
783
784bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
785 LLVMContext &Ctx = AI->getModule()->getContext();
786 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(RMW: AI);
787 switch (Kind) {
788 case TargetLoweringBase::AtomicExpansionKind::None:
789 return false;
790 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
791 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
792 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
793 if (ValueSize < MinCASSize) {
794 expandPartwordAtomicRMW(I: AI,
795 ExpansionKind: TargetLoweringBase::AtomicExpansionKind::LLSC);
796 } else {
797 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
798 return buildAtomicRMWValue(Op: AI->getOperation(), Builder, Loaded,
799 Val: AI->getValOperand());
800 };
801 expandAtomicOpToLLSC(I: AI, ResultTy: AI->getType(), Addr: AI->getPointerOperand(),
802 AddrAlign: AI->getAlign(), MemOpOrder: AI->getOrdering(), PerformOp);
803 }
804 return true;
805 }
806 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
807 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
808 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
809 if (ValueSize < MinCASSize) {
810 expandPartwordAtomicRMW(I: AI,
811 ExpansionKind: TargetLoweringBase::AtomicExpansionKind::CmpXChg);
812 } else {
813 SmallVector<StringRef> SSNs;
814 Ctx.getSyncScopeNames(SSNs);
815 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
816 ? "system"
817 : SSNs[AI->getSyncScopeID()];
818 OptimizationRemarkEmitter ORE(AI->getFunction());
819 ORE.emit(RemarkBuilder: [&]() {
820 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
821 << "A compare and swap loop was generated for an atomic "
822 << AI->getOperationName(Op: AI->getOperation()) << " operation at "
823 << MemScope << " memory scope";
824 });
825 expandAtomicRMWToCmpXchg(AI, CreateCmpXchg: createCmpXchgInstFun);
826 }
827 return true;
828 }
829 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
830 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
831 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
832 if (ValueSize < MinCASSize) {
833 AtomicRMWInst::BinOp Op = AI->getOperation();
834 // Widen And/Or/Xor and give the target another chance at expanding it.
835 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
836 Op == AtomicRMWInst::And) {
837 tryExpandAtomicRMW(AI: widenPartwordAtomicRMW(AI));
838 return true;
839 }
840 }
841 expandAtomicRMWToMaskedIntrinsic(AI);
842 return true;
843 }
844 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
845 TLI->emitBitTestAtomicRMWIntrinsic(AI);
846 return true;
847 }
848 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
849 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
850 return true;
851 }
852 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
853 return lowerAtomicRMWInst(RMWI: AI);
854 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
855 TLI->emitExpandAtomicRMW(AI);
856 return true;
857 default:
858 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
859 }
860}
861
862namespace {
863
864struct PartwordMaskValues {
865 // These three fields are guaranteed to be set by createMaskInstrs.
866 Type *WordType = nullptr;
867 Type *ValueType = nullptr;
868 Type *IntValueType = nullptr;
869 Value *AlignedAddr = nullptr;
870 Align AlignedAddrAlignment;
871 // The remaining fields can be null.
872 Value *ShiftAmt = nullptr;
873 Value *Mask = nullptr;
874 Value *Inv_Mask = nullptr;
875};
876
877[[maybe_unused]]
878raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
879 auto PrintObj = [&O](auto *V) {
880 if (V)
881 O << *V;
882 else
883 O << "nullptr";
884 O << '\n';
885 };
886 O << "PartwordMaskValues {\n";
887 O << " WordType: ";
888 PrintObj(PMV.WordType);
889 O << " ValueType: ";
890 PrintObj(PMV.ValueType);
891 O << " AlignedAddr: ";
892 PrintObj(PMV.AlignedAddr);
893 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
894 O << " ShiftAmt: ";
895 PrintObj(PMV.ShiftAmt);
896 O << " Mask: ";
897 PrintObj(PMV.Mask);
898 O << " Inv_Mask: ";
899 PrintObj(PMV.Inv_Mask);
900 O << "}\n";
901 return O;
902}
903
904} // end anonymous namespace
905
906/// This is a helper function which builds instructions to provide
907/// values necessary for partword atomic operations. It takes an
908/// incoming address, Addr, and ValueType, and constructs the address,
909/// shift-amounts and masks needed to work with a larger value of size
910/// WordSize.
911///
912/// AlignedAddr: Addr rounded down to a multiple of WordSize
913///
914/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
915/// from AlignAddr for it to have the same value as if
916/// ValueType was loaded from Addr.
917///
918/// Mask: Value to mask with the value loaded from AlignAddr to
919/// include only the part that would've been loaded from Addr.
920///
921/// Inv_Mask: The inverse of Mask.
922static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
923 Instruction *I, Type *ValueType,
924 Value *Addr, Align AddrAlign,
925 unsigned MinWordSize) {
926 PartwordMaskValues PMV;
927
928 Module *M = I->getModule();
929 LLVMContext &Ctx = M->getContext();
930 const DataLayout &DL = M->getDataLayout();
931 unsigned ValueSize = DL.getTypeStoreSize(Ty: ValueType);
932
933 PMV.ValueType = PMV.IntValueType = ValueType;
934 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
935 PMV.IntValueType =
936 Type::getIntNTy(C&: Ctx, N: ValueType->getPrimitiveSizeInBits());
937
938 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(C&: Ctx, N: MinWordSize * 8)
939 : ValueType;
940 if (PMV.ValueType == PMV.WordType) {
941 PMV.AlignedAddr = Addr;
942 PMV.AlignedAddrAlignment = AddrAlign;
943 PMV.ShiftAmt = ConstantInt::get(Ty: PMV.ValueType, V: 0);
944 PMV.Mask = ConstantInt::get(Ty: PMV.ValueType, V: ~0, /*isSigned*/ IsSigned: true);
945 return PMV;
946 }
947
948 PMV.AlignedAddrAlignment = Align(MinWordSize);
949
950 assert(ValueSize < MinWordSize);
951
952 PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
953 IntegerType *IntTy = DL.getIndexType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
954 Value *PtrLSB;
955
956 if (AddrAlign < MinWordSize) {
957 PMV.AlignedAddr = Builder.CreateIntrinsic(
958 ID: Intrinsic::ptrmask, OverloadTypes: {PtrTy, IntTy},
959 Args: {Addr, ConstantInt::getSigned(Ty: IntTy, V: ~(uint64_t)(MinWordSize - 1))},
960 FMFSource: nullptr, Name: "AlignedAddr");
961
962 Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
963 PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - 1, Name: "PtrLSB");
964 } else {
965 // If the alignment is high enough, the LSB are known 0.
966 PMV.AlignedAddr = Addr;
967 PtrLSB = ConstantInt::getNullValue(Ty: IntTy);
968 }
969
970 if (DL.isLittleEndian()) {
971 // turn bytes into bits
972 PMV.ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: 3);
973 } else {
974 // turn bytes into bits, and count from the other side.
975 PMV.ShiftAmt = Builder.CreateShl(
976 LHS: Builder.CreateXor(LHS: PtrLSB, RHS: MinWordSize - ValueSize), RHS: 3);
977 }
978
979 PMV.ShiftAmt = Builder.CreateTrunc(V: PMV.ShiftAmt, DestTy: PMV.WordType, Name: "ShiftAmt");
980 PMV.Mask = Builder.CreateShl(
981 LHS: ConstantInt::get(Ty: PMV.WordType, V: (1 << (ValueSize * 8)) - 1), RHS: PMV.ShiftAmt,
982 Name: "Mask");
983
984 PMV.Inv_Mask = Builder.CreateNot(V: PMV.Mask, Name: "Inv_Mask");
985
986 return PMV;
987}
988
989static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
990 const PartwordMaskValues &PMV) {
991 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
992 if (PMV.WordType == PMV.ValueType)
993 return WideWord;
994
995 Value *Shift = Builder.CreateLShr(LHS: WideWord, RHS: PMV.ShiftAmt, Name: "shifted");
996 Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: PMV.IntValueType, Name: "extracted");
997 return Builder.CreateBitCast(V: Trunc, DestTy: PMV.ValueType);
998}
999
1000static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
1001 Value *Updated, const PartwordMaskValues &PMV) {
1002 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
1003 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
1004 if (PMV.WordType == PMV.ValueType)
1005 return Updated;
1006
1007 Updated = Builder.CreateBitCast(V: Updated, DestTy: PMV.IntValueType);
1008
1009 Value *ZExt = Builder.CreateZExt(V: Updated, DestTy: PMV.WordType, Name: "extended");
1010 Value *Shift =
1011 Builder.CreateShl(LHS: ZExt, RHS: PMV.ShiftAmt, Name: "shifted", /*HasNUW*/ true);
1012 Value *And = Builder.CreateAnd(LHS: WideWord, RHS: PMV.Inv_Mask, Name: "unmasked");
1013 Value *Or = Builder.CreateOr(LHS: And, RHS: Shift, Name: "inserted");
1014 return Or;
1015}
1016
1017/// Emit IR to implement a masked version of a given atomicrmw
1018/// operation. (That is, only the bits under the Mask should be
1019/// affected by the operation)
1020static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
1021 IRBuilderBase &Builder, Value *Loaded,
1022 Value *Shifted_Inc, Value *Inc,
1023 const PartwordMaskValues &PMV) {
1024 // TODO: update to use
1025 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
1026 // to merge bits from two values without requiring PMV.Inv_Mask.
1027 switch (Op) {
1028 case AtomicRMWInst::Xchg: {
1029 Value *Loaded_MaskOut = Builder.CreateAnd(LHS: Loaded, RHS: PMV.Inv_Mask);
1030 Value *FinalVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: Shifted_Inc);
1031 return FinalVal;
1032 }
1033 case AtomicRMWInst::Or:
1034 case AtomicRMWInst::Xor:
1035 case AtomicRMWInst::And:
1036 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
1037 case AtomicRMWInst::Add:
1038 case AtomicRMWInst::Sub:
1039 case AtomicRMWInst::Nand: {
1040 // The other arithmetic ops need to be masked into place.
1041 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Val: Shifted_Inc);
1042 Value *NewVal_Masked = Builder.CreateAnd(LHS: NewVal, RHS: PMV.Mask);
1043 Value *Loaded_MaskOut = Builder.CreateAnd(LHS: Loaded, RHS: PMV.Inv_Mask);
1044 Value *FinalVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: NewVal_Masked);
1045 return FinalVal;
1046 }
1047 case AtomicRMWInst::Max:
1048 case AtomicRMWInst::Min:
1049 case AtomicRMWInst::UMax:
1050 case AtomicRMWInst::UMin:
1051 case AtomicRMWInst::FAdd:
1052 case AtomicRMWInst::FSub:
1053 case AtomicRMWInst::FMin:
1054 case AtomicRMWInst::FMax:
1055 case AtomicRMWInst::FMaximum:
1056 case AtomicRMWInst::FMinimum:
1057 case AtomicRMWInst::FMaximumNum:
1058 case AtomicRMWInst::FMinimumNum:
1059 case AtomicRMWInst::UIncWrap:
1060 case AtomicRMWInst::UDecWrap:
1061 case AtomicRMWInst::USubCond:
1062 case AtomicRMWInst::USubSat: {
1063 // Finally, other ops will operate on the full value, so truncate down to
1064 // the original size, and expand out again after doing the
1065 // operation. Bitcasts will be inserted for FP values.
1066 Value *Loaded_Extract = extractMaskedValue(Builder, WideWord: Loaded, PMV);
1067 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded: Loaded_Extract, Val: Inc);
1068 Value *FinalVal = insertMaskedValue(Builder, WideWord: Loaded, Updated: NewVal, PMV);
1069 return FinalVal;
1070 }
1071 default:
1072 llvm_unreachable("Unknown atomic op");
1073 }
1074}
1075
1076/// Expand a sub-word atomicrmw operation into an appropriate
1077/// word-sized operation.
1078///
1079/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
1080/// way as a typical atomicrmw expansion. The only difference here is
1081/// that the operation inside of the loop may operate upon only a
1082/// part of the value.
1083void AtomicExpandImpl::expandPartwordAtomicRMW(
1084 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
1085 // Widen And/Or/Xor and give the target another chance at expanding it.
1086 AtomicRMWInst::BinOp Op = AI->getOperation();
1087 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
1088 Op == AtomicRMWInst::And) {
1089 tryExpandAtomicRMW(AI: widenPartwordAtomicRMW(AI));
1090 return;
1091 }
1092 AtomicOrdering MemOpOrder = AI->getOrdering();
1093 SyncScope::ID SSID = AI->getSyncScopeID();
1094
1095 ReplacementIRBuilder Builder(AI, *DL);
1096
1097 PartwordMaskValues PMV =
1098 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
1099 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1100
1101 Value *ValOperand_Shifted = nullptr;
1102 if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add ||
1103 Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) {
1104 Value *ValOp = Builder.CreateBitCast(V: AI->getValOperand(), DestTy: PMV.IntValueType);
1105 ValOperand_Shifted =
1106 Builder.CreateShl(LHS: Builder.CreateZExt(V: ValOp, DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1107 Name: "ValOperand_Shifted");
1108 }
1109
1110 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1111 return performMaskedAtomicOp(Op, Builder, Loaded, Shifted_Inc: ValOperand_Shifted,
1112 Inc: AI->getValOperand(), PMV);
1113 };
1114
1115 Value *OldResult;
1116 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1117 OldResult = insertRMWCmpXchgLoop(Builder, ResultType: PMV.WordType, Addr: PMV.AlignedAddr,
1118 AddrAlign: PMV.AlignedAddrAlignment, MemOpOrder, SSID,
1119 IsVolatile: AI->isVolatile(), PerformOp: PerformPartwordOp,
1120 CreateCmpXchg: createCmpXchgInstFun, MetadataSrc: AI);
1121 } else {
1122 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1123 OldResult = insertRMWLLSCLoop(Builder, ResultTy: PMV.WordType, Addr: PMV.AlignedAddr,
1124 AddrAlign: PMV.AlignedAddrAlignment, MemOpOrder,
1125 PerformOp: PerformPartwordOp);
1126 }
1127
1128 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: OldResult, PMV);
1129 AI->replaceAllUsesWith(V: FinalOldResult);
1130 AI->eraseFromParent();
1131}
1132
1133// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1134AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1135 ReplacementIRBuilder Builder(AI, *DL);
1136 AtomicRMWInst::BinOp Op = AI->getOperation();
1137
1138 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
1139 Op == AtomicRMWInst::And) &&
1140 "Unable to widen operation");
1141
1142 PartwordMaskValues PMV =
1143 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
1144 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1145
1146 Value *ValOperand_Shifted =
1147 Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: PMV.WordType),
1148 RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
1149
1150 Value *NewOperand;
1151
1152 if (Op == AtomicRMWInst::And)
1153 NewOperand =
1154 Builder.CreateOr(LHS: ValOperand_Shifted, RHS: PMV.Inv_Mask, Name: "AndOperand");
1155 else
1156 NewOperand = ValOperand_Shifted;
1157
1158 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1159 Op, Ptr: PMV.AlignedAddr, Val: NewOperand, Align: PMV.AlignedAddrAlignment,
1160 Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
1161
1162 NewAI->setVolatile(AI->isVolatile());
1163 copyMetadataForAtomic(Dest&: *NewAI, Source: *AI);
1164
1165 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: NewAI, PMV);
1166 AI->replaceAllUsesWith(V: FinalOldResult);
1167 AI->eraseFromParent();
1168 return NewAI;
1169}
1170
1171bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1172 // The basic idea here is that we're expanding a cmpxchg of a
1173 // smaller memory size up to a word-sized cmpxchg. To do this, we
1174 // need to add a retry-loop for strong cmpxchg, so that
1175 // modifications to other parts of the word don't cause a spurious
1176 // failure.
1177
1178 // This generates code like the following:
1179 // [[Setup mask values PMV.*]]
1180 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1181 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1182 // %InitLoaded = load i32* %addr
1183 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1184 // br partword.cmpxchg.loop
1185 // partword.cmpxchg.loop:
1186 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1187 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1188 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1189 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1190 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1191 // i32 %FullWord_NewVal success_ordering failure_ordering
1192 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1193 // %Success = extractvalue { i32, i1 } %NewCI, 1
1194 // br i1 %Success, label %partword.cmpxchg.end,
1195 // label %partword.cmpxchg.failure
1196 // partword.cmpxchg.failure:
1197 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1198 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1199 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1200 // label %partword.cmpxchg.end
1201 // partword.cmpxchg.end:
1202 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1203 // %FinalOldVal = trunc i32 %tmp1 to i8
1204 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1205 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1206
1207 Value *Addr = CI->getPointerOperand();
1208 Value *Cmp = CI->getCompareOperand();
1209 Value *NewVal = CI->getNewValOperand();
1210
1211 BasicBlock *BB = CI->getParent();
1212 Function *F = BB->getParent();
1213 ReplacementIRBuilder Builder(CI, *DL);
1214 LLVMContext &Ctx = Builder.getContext();
1215
1216 BasicBlock *EndBB =
1217 BB->splitBasicBlock(I: CI->getIterator(), BBName: "partword.cmpxchg.end");
1218 auto FailureBB =
1219 BasicBlock::Create(Context&: Ctx, Name: "partword.cmpxchg.failure", Parent: F, InsertBefore: EndBB);
1220 auto LoopBB = BasicBlock::Create(Context&: Ctx, Name: "partword.cmpxchg.loop", Parent: F, InsertBefore: FailureBB);
1221
1222 // The split call above "helpfully" added a branch at the end of BB
1223 // (to the wrong place).
1224 std::prev(x: BB->end())->eraseFromParent();
1225 Builder.SetInsertPoint(BB);
1226
1227 PartwordMaskValues PMV =
1228 createMaskInstrs(Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr,
1229 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1230
1231 // Shift the incoming values over, into the right location in the word.
1232 Value *NewVal_Shifted =
1233 Builder.CreateShl(LHS: Builder.CreateZExt(V: NewVal, DestTy: PMV.WordType), RHS: PMV.ShiftAmt);
1234 Value *Cmp_Shifted =
1235 Builder.CreateShl(LHS: Builder.CreateZExt(V: Cmp, DestTy: PMV.WordType), RHS: PMV.ShiftAmt);
1236
1237 // Load the entire current word, and mask into place the expected and new
1238 // values
1239 LoadInst *InitLoaded = Builder.CreateLoad(Ty: PMV.WordType, Ptr: PMV.AlignedAddr);
1240 Value *InitLoaded_MaskOut = Builder.CreateAnd(LHS: InitLoaded, RHS: PMV.Inv_Mask);
1241 Builder.CreateBr(Dest: LoopBB);
1242
1243 // partword.cmpxchg.loop:
1244 Builder.SetInsertPoint(LoopBB);
1245 PHINode *Loaded_MaskOut = Builder.CreatePHI(Ty: PMV.WordType, NumReservedValues: 2);
1246 Loaded_MaskOut->addIncoming(V: InitLoaded_MaskOut, BB);
1247
1248 // The initial load must be atomic with the same synchronization scope
1249 // to avoid a data race with concurrent stores. If the instruction being
1250 // emulated is volatile, issue a volatile load.
1251 // addIncoming is done first so that any replaceAllUsesWith calls during
1252 // normalization correctly update the PHI incoming value.
1253 InitLoaded->setVolatile(CI->isVolatile());
1254 if (TLI->shouldIssueAtomicLoadForAtomicEmulationLoop()) {
1255 InitLoaded->setAtomic(Ordering: AtomicOrdering::Monotonic, SSID: CI->getSyncScopeID());
1256 // The newly created load might need to be lowered further. Because it is
1257 // created in the same block as the atomicrmw, the AtomicExpand loop will
1258 // not process it again.
1259 processAtomicInstr(I: InitLoaded);
1260 }
1261
1262 // Mask/Or the expected and new values into place in the loaded word.
1263 Value *FullWord_NewVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: NewVal_Shifted);
1264 Value *FullWord_Cmp = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: Cmp_Shifted);
1265 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1266 Ptr: PMV.AlignedAddr, Cmp: FullWord_Cmp, New: FullWord_NewVal, Align: PMV.AlignedAddrAlignment,
1267 SuccessOrdering: CI->getSuccessOrdering(), FailureOrdering: CI->getFailureOrdering(), SSID: CI->getSyncScopeID());
1268 NewCI->setVolatile(CI->isVolatile());
1269 // When we're building a strong cmpxchg, we need a loop, so you
1270 // might think we could use a weak cmpxchg inside. But, using strong
1271 // allows the below comparison for ShouldContinue, and we're
1272 // expecting the underlying cmpxchg to be a machine instruction,
1273 // which is strong anyways.
1274 NewCI->setWeak(CI->isWeak());
1275
1276 Value *OldVal = Builder.CreateExtractValue(Agg: NewCI, Idxs: 0);
1277 Value *Success = Builder.CreateExtractValue(Agg: NewCI, Idxs: 1);
1278
1279 if (CI->isWeak())
1280 Builder.CreateBr(Dest: EndBB);
1281 else
1282 Builder.CreateCondBr(Cond: Success, True: EndBB, False: FailureBB);
1283
1284 // partword.cmpxchg.failure:
1285 Builder.SetInsertPoint(FailureBB);
1286 // Upon failure, verify that the masked-out part of the loaded value
1287 // has been modified. If it didn't, abort the cmpxchg, since the
1288 // masked-in part must've.
1289 Value *OldVal_MaskOut = Builder.CreateAnd(LHS: OldVal, RHS: PMV.Inv_Mask);
1290 Value *ShouldContinue = Builder.CreateICmpNE(LHS: Loaded_MaskOut, RHS: OldVal_MaskOut);
1291 Builder.CreateCondBr(Cond: ShouldContinue, True: LoopBB, False: EndBB);
1292
1293 // Add the second value to the phi from above
1294 Loaded_MaskOut->addIncoming(V: OldVal_MaskOut, BB: FailureBB);
1295
1296 // partword.cmpxchg.end:
1297 Builder.SetInsertPoint(CI);
1298
1299 Value *FinalOldVal = extractMaskedValue(Builder, WideWord: OldVal, PMV);
1300 Value *Res = PoisonValue::get(T: CI->getType());
1301 Res = Builder.CreateInsertValue(Agg: Res, Val: FinalOldVal, Idxs: 0);
1302 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1303
1304 CI->replaceAllUsesWith(V: Res);
1305 CI->eraseFromParent();
1306 return true;
1307}
1308
1309void AtomicExpandImpl::expandAtomicOpToLLSC(
1310 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1311 AtomicOrdering MemOpOrder,
1312 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1313 ReplacementIRBuilder Builder(I, *DL);
1314 Value *Loaded = insertRMWLLSCLoop(Builder, ResultTy: ResultType, Addr, AddrAlign,
1315 MemOpOrder, PerformOp);
1316
1317 I->replaceAllUsesWith(V: Loaded);
1318 I->eraseFromParent();
1319}
1320
1321void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1322 ReplacementIRBuilder Builder(AI, *DL);
1323
1324 PartwordMaskValues PMV =
1325 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
1326 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1327
1328 // The value operand must be sign-extended for signed min/max so that the
1329 // target's signed comparison instructions can be used. Otherwise, just
1330 // zero-ext.
1331 Instruction::CastOps CastOp = Instruction::ZExt;
1332 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1333 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1334 CastOp = Instruction::SExt;
1335
1336 Value *ValOperand_Shifted = Builder.CreateShl(
1337 LHS: Builder.CreateCast(Op: CastOp, V: AI->getValOperand(), DestTy: PMV.WordType),
1338 RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
1339 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1340 Builder, AI, AlignedAddr: PMV.AlignedAddr, Incr: ValOperand_Shifted, Mask: PMV.Mask, ShiftAmt: PMV.ShiftAmt,
1341 Ord: AI->getOrdering());
1342 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: OldResult, PMV);
1343 AI->replaceAllUsesWith(V: FinalOldResult);
1344 AI->eraseFromParent();
1345}
1346
1347void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1348 AtomicCmpXchgInst *CI) {
1349 ReplacementIRBuilder Builder(CI, *DL);
1350
1351 PartwordMaskValues PMV = createMaskInstrs(
1352 Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr: CI->getPointerOperand(),
1353 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1354
1355 Value *CmpVal_Shifted = Builder.CreateShl(
1356 LHS: Builder.CreateZExt(V: CI->getCompareOperand(), DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1357 Name: "CmpVal_Shifted");
1358 Value *NewVal_Shifted = Builder.CreateShl(
1359 LHS: Builder.CreateZExt(V: CI->getNewValOperand(), DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1360 Name: "NewVal_Shifted");
1361 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1362 Builder, CI, AlignedAddr: PMV.AlignedAddr, CmpVal: CmpVal_Shifted, NewVal: NewVal_Shifted, Mask: PMV.Mask,
1363 Ord: CI->getMergedOrdering());
1364 Value *FinalOldVal = extractMaskedValue(Builder, WideWord: OldVal, PMV);
1365 Value *Res = PoisonValue::get(T: CI->getType());
1366 Res = Builder.CreateInsertValue(Agg: Res, Val: FinalOldVal, Idxs: 0);
1367 Value *Success = Builder.CreateICmpEQ(
1368 LHS: CmpVal_Shifted, RHS: Builder.CreateAnd(LHS: OldVal, RHS: PMV.Mask), Name: "Success");
1369 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1370
1371 CI->replaceAllUsesWith(V: Res);
1372 CI->eraseFromParent();
1373}
1374
1375Value *AtomicExpandImpl::insertRMWLLSCLoop(
1376 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1377 AtomicOrdering MemOpOrder,
1378 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1379 LLVMContext &Ctx = Builder.getContext();
1380 BasicBlock *BB = Builder.GetInsertBlock();
1381 Function *F = BB->getParent();
1382
1383 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1384 "Expected at least natural alignment at this point.");
1385
1386 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1387 //
1388 // The standard expansion we produce is:
1389 // [...]
1390 // atomicrmw.start:
1391 // %loaded = @load.linked(%addr)
1392 // %new = some_op iN %loaded, %incr
1393 // %stored = @store_conditional(%new, %addr)
1394 // %try_again = icmp i32 ne %stored, 0
1395 // br i1 %try_again, label %loop, label %atomicrmw.end
1396 // atomicrmw.end:
1397 // [...]
1398 BasicBlock *ExitBB =
1399 BB->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "atomicrmw.end");
1400 BasicBlock *LoopBB = BasicBlock::Create(Context&: Ctx, Name: "atomicrmw.start", Parent: F, InsertBefore: ExitBB);
1401
1402 // The split call above "helpfully" added a branch at the end of BB (to the
1403 // wrong place).
1404 std::prev(x: BB->end())->eraseFromParent();
1405 Builder.SetInsertPoint(BB);
1406 Builder.CreateBr(Dest: LoopBB);
1407
1408 // Start the main loop block now that we've taken care of the preliminaries.
1409 Builder.SetInsertPoint(LoopBB);
1410 Value *Loaded = TLI->emitLoadLinked(Builder, ValueTy: ResultTy, Addr, Ord: MemOpOrder);
1411
1412 Value *NewVal = PerformOp(Builder, Loaded);
1413
1414 Value *StoreSuccess =
1415 TLI->emitStoreConditional(Builder, Val: NewVal, Addr, Ord: MemOpOrder);
1416 Value *TryAgain = Builder.CreateICmpNE(
1417 LHS: StoreSuccess, RHS: ConstantInt::get(Ty: IntegerType::get(C&: Ctx, NumBits: 32), V: 0), Name: "tryagain");
1418
1419 Instruction *CondBr = Builder.CreateCondBr(Cond: TryAgain, True: LoopBB, False: ExitBB);
1420
1421 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1422 // hard to predict precise branch weigths we mark the branch as "unknown"
1423 // (50/50) to prevent misleading optimizations.
1424 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *CondBr, DEBUG_TYPE);
1425
1426 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1427 return Loaded;
1428}
1429
1430/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1431/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1432/// IR. As a migration step, we convert back to what use to be the standard
1433/// way to represent a pointer cmpxchg so that we can update backends one by
1434/// one.
1435AtomicCmpXchgInst *
1436AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1437 auto *M = CI->getModule();
1438 Type *NewTy = getCorrespondingIntegerType(T: CI->getCompareOperand()->getType(),
1439 DL: M->getDataLayout());
1440
1441 ReplacementIRBuilder Builder(CI, *DL);
1442
1443 Value *Addr = CI->getPointerOperand();
1444
1445 Value *NewCmp = Builder.CreatePtrToInt(V: CI->getCompareOperand(), DestTy: NewTy);
1446 Value *NewNewVal = Builder.CreatePtrToInt(V: CI->getNewValOperand(), DestTy: NewTy);
1447
1448 auto *NewCI = Builder.CreateAtomicCmpXchg(
1449 Ptr: Addr, Cmp: NewCmp, New: NewNewVal, Align: CI->getAlign(), SuccessOrdering: CI->getSuccessOrdering(),
1450 FailureOrdering: CI->getFailureOrdering(), SSID: CI->getSyncScopeID());
1451 NewCI->setVolatile(CI->isVolatile());
1452 NewCI->setWeak(CI->isWeak());
1453 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1454
1455 Value *OldVal = Builder.CreateExtractValue(Agg: NewCI, Idxs: 0);
1456 Value *Succ = Builder.CreateExtractValue(Agg: NewCI, Idxs: 1);
1457
1458 OldVal = Builder.CreateIntToPtr(V: OldVal, DestTy: CI->getCompareOperand()->getType());
1459
1460 Value *Res = PoisonValue::get(T: CI->getType());
1461 Res = Builder.CreateInsertValue(Agg: Res, Val: OldVal, Idxs: 0);
1462 Res = Builder.CreateInsertValue(Agg: Res, Val: Succ, Idxs: 1);
1463
1464 CI->replaceAllUsesWith(V: Res);
1465 CI->eraseFromParent();
1466 return NewCI;
1467}
1468
1469bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1470 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1471 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1472 Value *Addr = CI->getPointerOperand();
1473 BasicBlock *BB = CI->getParent();
1474 Function *F = BB->getParent();
1475 LLVMContext &Ctx = F->getContext();
1476 // If shouldInsertFencesForAtomic() returns true, then the target does not
1477 // want to deal with memory orders, and emitLeading/TrailingFence should take
1478 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1479 // should preserve the ordering.
1480 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(I: CI);
1481 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1482 ? AtomicOrdering::Monotonic
1483 : CI->getMergedOrdering();
1484
1485 // In implementations which use a barrier to achieve release semantics, we can
1486 // delay emitting this barrier until we know a store is actually going to be
1487 // attempted. The cost of this delay is that we need 2 copies of the block
1488 // emitting the load-linked, affecting code size.
1489 //
1490 // Ideally, this logic would be unconditional except for the minsize check
1491 // since in other cases the extra blocks naturally collapse down to the
1492 // minimal loop. Unfortunately, this puts too much stress on later
1493 // optimisations so we avoid emitting the extra logic in those cases too.
1494 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1495 SuccessOrder != AtomicOrdering::Monotonic &&
1496 SuccessOrder != AtomicOrdering::Acquire &&
1497 !F->hasMinSize();
1498
1499 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1500 // do it even on minsize.
1501 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1502
1503 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1504 //
1505 // The full expansion we produce is:
1506 // [...]
1507 // %aligned.addr = ...
1508 // cmpxchg.start:
1509 // %unreleasedload = @load.linked(%aligned.addr)
1510 // %unreleasedload.extract = extract value from %unreleasedload
1511 // %should_store = icmp eq %unreleasedload.extract, %desired
1512 // br i1 %should_store, label %cmpxchg.releasingstore,
1513 // label %cmpxchg.nostore
1514 // cmpxchg.releasingstore:
1515 // fence?
1516 // br label cmpxchg.trystore
1517 // cmpxchg.trystore:
1518 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1519 // [%releasedload, %cmpxchg.releasedload]
1520 // %updated.new = insert %new into %loaded.trystore
1521 // %stored = @store_conditional(%updated.new, %aligned.addr)
1522 // %success = icmp eq i32 %stored, 0
1523 // br i1 %success, label %cmpxchg.success,
1524 // label %cmpxchg.releasedload/%cmpxchg.failure
1525 // cmpxchg.releasedload:
1526 // %releasedload = @load.linked(%aligned.addr)
1527 // %releasedload.extract = extract value from %releasedload
1528 // %should_store = icmp eq %releasedload.extract, %desired
1529 // br i1 %should_store, label %cmpxchg.trystore,
1530 // label %cmpxchg.failure
1531 // cmpxchg.success:
1532 // fence?
1533 // br label %cmpxchg.end
1534 // cmpxchg.nostore:
1535 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1536 // [%releasedload,
1537 // %cmpxchg.releasedload/%cmpxchg.trystore]
1538 // @load_linked_fail_balance()?
1539 // br label %cmpxchg.failure
1540 // cmpxchg.failure:
1541 // fence?
1542 // br label %cmpxchg.end
1543 // cmpxchg.end:
1544 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1545 // [%loaded.trystore, %cmpxchg.trystore]
1546 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1547 // %loaded = extract value from %loaded.exit
1548 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1549 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1550 // [...]
1551 BasicBlock *ExitBB = BB->splitBasicBlock(I: CI->getIterator(), BBName: "cmpxchg.end");
1552 auto FailureBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.failure", Parent: F, InsertBefore: ExitBB);
1553 auto NoStoreBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.nostore", Parent: F, InsertBefore: FailureBB);
1554 auto SuccessBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.success", Parent: F, InsertBefore: NoStoreBB);
1555 auto ReleasedLoadBB =
1556 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.releasedload", Parent: F, InsertBefore: SuccessBB);
1557 auto TryStoreBB =
1558 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.trystore", Parent: F, InsertBefore: ReleasedLoadBB);
1559 auto ReleasingStoreBB =
1560 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.fencedstore", Parent: F, InsertBefore: TryStoreBB);
1561 auto StartBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.start", Parent: F, InsertBefore: ReleasingStoreBB);
1562
1563 ReplacementIRBuilder Builder(CI, *DL);
1564
1565 // The split call above "helpfully" added a branch at the end of BB (to the
1566 // wrong place), but we might want a fence too. It's easiest to just remove
1567 // the branch entirely.
1568 std::prev(x: BB->end())->eraseFromParent();
1569 Builder.SetInsertPoint(BB);
1570 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1571 TLI->emitLeadingFence(Builder, Inst: CI, Ord: SuccessOrder);
1572
1573 PartwordMaskValues PMV =
1574 createMaskInstrs(Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr,
1575 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1576 Builder.CreateBr(Dest: StartBB);
1577
1578 // Start the main loop block now that we've taken care of the preliminaries.
1579 Builder.SetInsertPoint(StartBB);
1580 Value *UnreleasedLoad =
1581 TLI->emitLoadLinked(Builder, ValueTy: PMV.WordType, Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1582 Value *UnreleasedLoadExtract =
1583 extractMaskedValue(Builder, WideWord: UnreleasedLoad, PMV);
1584 Value *ShouldStore = Builder.CreateICmpEQ(
1585 LHS: UnreleasedLoadExtract, RHS: CI->getCompareOperand(), Name: "should_store");
1586
1587 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1588 // jump straight past that fence instruction (if it exists).
1589 Builder.CreateCondBr(Cond: ShouldStore, True: ReleasingStoreBB, False: NoStoreBB,
1590 BranchWeights: MDBuilder(F->getContext()).createLikelyBranchWeights());
1591
1592 Builder.SetInsertPoint(ReleasingStoreBB);
1593 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1594 TLI->emitLeadingFence(Builder, Inst: CI, Ord: SuccessOrder);
1595 Builder.CreateBr(Dest: TryStoreBB);
1596
1597 Builder.SetInsertPoint(TryStoreBB);
1598 PHINode *LoadedTryStore =
1599 Builder.CreatePHI(Ty: PMV.WordType, NumReservedValues: 2, Name: "loaded.trystore");
1600 LoadedTryStore->addIncoming(V: UnreleasedLoad, BB: ReleasingStoreBB);
1601 Value *NewValueInsert =
1602 insertMaskedValue(Builder, WideWord: LoadedTryStore, Updated: CI->getNewValOperand(), PMV);
1603 Value *StoreSuccess = TLI->emitStoreConditional(Builder, Val: NewValueInsert,
1604 Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1605 StoreSuccess = Builder.CreateICmpEQ(
1606 LHS: StoreSuccess, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: 0), Name: "success");
1607 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1608 Builder.CreateCondBr(Cond: StoreSuccess, True: SuccessBB,
1609 False: CI->isWeak() ? FailureBB : RetryBB,
1610 BranchWeights: MDBuilder(F->getContext()).createLikelyBranchWeights());
1611
1612 Builder.SetInsertPoint(ReleasedLoadBB);
1613 Value *SecondLoad;
1614 if (HasReleasedLoadBB) {
1615 SecondLoad =
1616 TLI->emitLoadLinked(Builder, ValueTy: PMV.WordType, Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1617 Value *SecondLoadExtract = extractMaskedValue(Builder, WideWord: SecondLoad, PMV);
1618 ShouldStore = Builder.CreateICmpEQ(LHS: SecondLoadExtract,
1619 RHS: CI->getCompareOperand(), Name: "should_store");
1620
1621 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1622 // jump straight past that fence instruction (if it exists).
1623 Builder.CreateCondBr(
1624 Cond: ShouldStore, True: TryStoreBB, False: NoStoreBB,
1625 BranchWeights: MDBuilder(F->getContext()).createLikelyBranchWeights());
1626 // Update PHI node in TryStoreBB.
1627 LoadedTryStore->addIncoming(V: SecondLoad, BB: ReleasedLoadBB);
1628 } else
1629 Builder.CreateUnreachable();
1630
1631 // Make sure later instructions don't get reordered with a fence if
1632 // necessary.
1633 Builder.SetInsertPoint(SuccessBB);
1634 if (ShouldInsertFencesForAtomic ||
1635 TLI->shouldInsertTrailingSeqCstFenceForAtomicStore(I: CI))
1636 TLI->emitTrailingFence(Builder, Inst: CI, Ord: SuccessOrder);
1637 Builder.CreateBr(Dest: ExitBB);
1638
1639 Builder.SetInsertPoint(NoStoreBB);
1640 PHINode *LoadedNoStore =
1641 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.nostore");
1642 LoadedNoStore->addIncoming(V: UnreleasedLoad, BB: StartBB);
1643 if (HasReleasedLoadBB)
1644 LoadedNoStore->addIncoming(V: SecondLoad, BB: ReleasedLoadBB);
1645
1646 // In the failing case, where we don't execute the store-conditional, the
1647 // target might want to balance out the load-linked with a dedicated
1648 // instruction (e.g., on ARM, clearing the exclusive monitor).
1649 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1650 Builder.CreateBr(Dest: FailureBB);
1651
1652 Builder.SetInsertPoint(FailureBB);
1653 PHINode *LoadedFailure =
1654 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.failure");
1655 LoadedFailure->addIncoming(V: LoadedNoStore, BB: NoStoreBB);
1656 if (CI->isWeak())
1657 LoadedFailure->addIncoming(V: LoadedTryStore, BB: TryStoreBB);
1658 if (ShouldInsertFencesForAtomic)
1659 TLI->emitTrailingFence(Builder, Inst: CI, Ord: FailureOrder);
1660 Builder.CreateBr(Dest: ExitBB);
1661
1662 // Finally, we have control-flow based knowledge of whether the cmpxchg
1663 // succeeded or not. We expose this to later passes by converting any
1664 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1665 // PHI.
1666 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1667 PHINode *LoadedExit =
1668 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.exit");
1669 LoadedExit->addIncoming(V: LoadedTryStore, BB: SuccessBB);
1670 LoadedExit->addIncoming(V: LoadedFailure, BB: FailureBB);
1671 PHINode *Success = Builder.CreatePHI(Ty: Type::getInt1Ty(C&: Ctx), NumReservedValues: 2, Name: "success");
1672 Success->addIncoming(V: ConstantInt::getTrue(Context&: Ctx), BB: SuccessBB);
1673 Success->addIncoming(V: ConstantInt::getFalse(Context&: Ctx), BB: FailureBB);
1674
1675 // This is the "exit value" from the cmpxchg expansion. It may be of
1676 // a type wider than the one in the cmpxchg instruction.
1677 Value *LoadedFull = LoadedExit;
1678
1679 Builder.SetInsertPoint(TheBB: ExitBB, IP: std::next(x: Success->getIterator()));
1680 Value *Loaded = extractMaskedValue(Builder, WideWord: LoadedFull, PMV);
1681
1682 // Look for any users of the cmpxchg that are just comparing the loaded value
1683 // against the desired one, and replace them with the CFG-derived version.
1684 SmallVector<ExtractValueInst *, 2> PrunedInsts;
1685 for (auto *User : CI->users()) {
1686 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val: User);
1687 if (!EV)
1688 continue;
1689
1690 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1691 "weird extraction from { iN, i1 }");
1692
1693 if (EV->getIndices()[0] == 0)
1694 EV->replaceAllUsesWith(V: Loaded);
1695 else
1696 EV->replaceAllUsesWith(V: Success);
1697
1698 PrunedInsts.push_back(Elt: EV);
1699 }
1700
1701 // We can remove the instructions now we're no longer iterating through them.
1702 for (auto *EV : PrunedInsts)
1703 EV->eraseFromParent();
1704
1705 if (!CI->use_empty()) {
1706 // Some use of the full struct return that we don't understand has happened,
1707 // so we've got to reconstruct it properly.
1708 Value *Res;
1709 Res = Builder.CreateInsertValue(Agg: PoisonValue::get(T: CI->getType()), Val: Loaded, Idxs: 0);
1710 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1711
1712 CI->replaceAllUsesWith(V: Res);
1713 }
1714
1715 CI->eraseFromParent();
1716 return true;
1717}
1718
1719bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1720 if (RMWI->isVolatile())
1721 return false;
1722 // TODO: Add floating point support.
1723 auto C = dyn_cast<ConstantInt>(Val: RMWI->getValOperand());
1724 if (!C)
1725 return false;
1726
1727 switch (RMWI->getOperation()) {
1728 case AtomicRMWInst::Add:
1729 case AtomicRMWInst::Sub:
1730 case AtomicRMWInst::Or:
1731 case AtomicRMWInst::Xor:
1732 return C->isZero();
1733 case AtomicRMWInst::And:
1734 return C->isMinusOne();
1735 case AtomicRMWInst::Min:
1736 return C->isMaxValue(IsSigned: true);
1737 case AtomicRMWInst::Max:
1738 return C->isMinValue(IsSigned: true);
1739 case AtomicRMWInst::UMin:
1740 return C->isMaxValue(IsSigned: false);
1741 case AtomicRMWInst::UMax:
1742 return C->isMinValue(IsSigned: false);
1743 default:
1744 return false;
1745 }
1746}
1747
1748bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1749 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1750 tryExpandAtomicLoad(LI: ResultingLoad);
1751 return true;
1752 }
1753 return false;
1754}
1755
1756Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1757 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1758 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
1759 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1760 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1761 LLVMContext &Ctx = Builder.getContext();
1762 BasicBlock *BB = Builder.GetInsertBlock();
1763 Function *F = BB->getParent();
1764
1765 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1766 //
1767 // The standard expansion we produce is:
1768 // [...]
1769 // %init_loaded = load atomic iN* %addr
1770 // br label %loop
1771 // loop:
1772 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1773 // %new = some_op iN %loaded, %incr
1774 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1775 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1776 // %success = extractvalue { iN, i1 } %pair, 1
1777 // br i1 %success, label %atomicrmw.end, label %loop
1778 // atomicrmw.end:
1779 // [...]
1780 BasicBlock *ExitBB =
1781 BB->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "atomicrmw.end");
1782 BasicBlock *LoopBB = BasicBlock::Create(Context&: Ctx, Name: "atomicrmw.start", Parent: F, InsertBefore: ExitBB);
1783
1784 // The split call above "helpfully" added a branch at the end of BB (to the
1785 // wrong place), but we want a load. It's easiest to just remove
1786 // the branch entirely.
1787 std::prev(x: BB->end())->eraseFromParent();
1788 Builder.SetInsertPoint(BB);
1789 LoadInst *InitLoaded = Builder.CreateAlignedLoad(Ty: ResultTy, Ptr: Addr, Align: AddrAlign);
1790 Builder.CreateBr(Dest: LoopBB);
1791
1792 // Start the main loop block now that we've taken care of the preliminaries.
1793 Builder.SetInsertPoint(LoopBB);
1794 PHINode *Loaded = Builder.CreatePHI(Ty: ResultTy, NumReservedValues: 2, Name: "loaded");
1795 Loaded->addIncoming(V: InitLoaded, BB);
1796
1797 // The initial load must be atomic with the same synchronization scope
1798 // to avoid a data race with concurrent stores. If the instruction being
1799 // emulated is volatile, issue a volatile load.
1800 // addIncoming is done first so that any replaceAllUsesWith calls during
1801 // normalization correctly update the PHI incoming value.
1802 InitLoaded->setVolatile(IsVolatile);
1803 if (TLI->shouldIssueAtomicLoadForAtomicEmulationLoop()) {
1804 InitLoaded->setAtomic(Ordering: AtomicOrdering::Monotonic, SSID);
1805 // The newly created load might need to be lowered further. Because it is
1806 // created in the same block as the atomicrmw, the AtomicExpand loop will
1807 // not process it again.
1808 processAtomicInstr(I: InitLoaded);
1809 }
1810
1811 Value *NewVal = PerformOp(Builder, Loaded);
1812
1813 Value *NewLoaded = nullptr;
1814 Value *Success = nullptr;
1815
1816 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1817 MemOpOrder == AtomicOrdering::Unordered
1818 ? AtomicOrdering::Monotonic
1819 : MemOpOrder,
1820 SSID, IsVolatile, Success, NewLoaded, MetadataSrc);
1821 assert(Success && NewLoaded);
1822
1823 Loaded->addIncoming(V: NewLoaded, BB: LoopBB);
1824
1825 Instruction *CondBr = Builder.CreateCondBr(Cond: Success, True: ExitBB, False: LoopBB);
1826
1827 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1828 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1829 // to prevent misleading optimizations.
1830 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *CondBr, DEBUG_TYPE);
1831
1832 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1833 return NewLoaded;
1834}
1835
1836bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1837 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1838 unsigned ValueSize = getAtomicOpSize(CASI: CI);
1839
1840 switch (TLI->shouldExpandAtomicCmpXchgInIR(AI: CI)) {
1841 default:
1842 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1843 case TargetLoweringBase::AtomicExpansionKind::None:
1844 if (ValueSize < MinCASSize)
1845 return expandPartwordCmpXchg(CI);
1846 return false;
1847 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1848 return expandAtomicCmpXchg(CI);
1849 }
1850 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1851 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1852 return true;
1853 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1854 return lowerAtomicCmpXchgInst(CXI: CI);
1855 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1856 TLI->emitExpandAtomicCmpXchg(CI);
1857 return true;
1858 }
1859 }
1860}
1861
1862bool AtomicExpandImpl::expandAtomicRMWToCmpXchg(
1863 AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) {
1864 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1865 Builder.setIsFPConstrained(
1866 AI->getFunction()->hasFnAttribute(Kind: Attribute::StrictFP));
1867
1868 // FIXME: If FP exceptions are observable, we should force them off for the
1869 // loop for the FP atomics.
1870 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1871 Builder, ResultTy: AI->getType(), Addr: AI->getPointerOperand(), AddrAlign: AI->getAlign(),
1872 MemOpOrder: AI->getOrdering(), SSID: AI->getSyncScopeID(), IsVolatile: AI->isVolatile(),
1873 PerformOp: [&](IRBuilderBase &Builder, Value *Loaded) {
1874 return buildAtomicRMWValue(Op: AI->getOperation(), Builder, Loaded,
1875 Val: AI->getValOperand());
1876 },
1877 CreateCmpXchg, /*MetadataSrc=*/AI);
1878
1879 AI->replaceAllUsesWith(V: Loaded);
1880 AI->eraseFromParent();
1881 return true;
1882}
1883
1884// In order to use one of the sized library calls such as
1885// __atomic_fetch_add_4, the alignment must be sufficient, the size
1886// must be one of the potentially-specialized sizes, and the value
1887// type must actually exist in C on the target (otherwise, the
1888// function wouldn't actually be defined.)
1889static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1890 const DataLayout &DL) {
1891 // TODO: "LargestSize" is an approximation for "largest type that
1892 // you can express in C". It seems to be the case that int128 is
1893 // supported on all 64-bit platforms, otherwise only up to 64-bit
1894 // integers are supported. If we get this wrong, then we'll try to
1895 // call a sized libcall that doesn't actually exist. There should
1896 // really be some more reliable way in LLVM of determining integer
1897 // sizes which are valid in the target's C ABI...
1898 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1899 return Alignment >= Size &&
1900 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1901 Size <= LargestSize;
1902}
1903
1904void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1905 static const RTLIB::Libcall Libcalls[6] = {
1906 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1907 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1908 unsigned Size = getAtomicOpSize(LI: I);
1909
1910 bool Expanded = expandAtomicOpToLibcall(
1911 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: nullptr, CASExpected: nullptr,
1912 Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1913 if (!Expanded)
1914 handleUnsupportedAtomicSize(I, AtomicOpName: "atomic load");
1915}
1916
1917void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1918 static const RTLIB::Libcall Libcalls[6] = {
1919 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1920 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1921 unsigned Size = getAtomicOpSize(SI: I);
1922
1923 bool Expanded = expandAtomicOpToLibcall(
1924 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getValueOperand(),
1925 CASExpected: nullptr, Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1926 if (!Expanded)
1927 handleUnsupportedAtomicSize(I, AtomicOpName: "atomic store");
1928}
1929
1930void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
1931 const Twine &AtomicOpName,
1932 Instruction *DiagnosticInst) {
1933 static const RTLIB::Libcall Libcalls[6] = {
1934 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1935 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1936 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1937 unsigned Size = getAtomicOpSize(CASI: I);
1938
1939 bool Expanded = expandAtomicOpToLibcall(
1940 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getNewValOperand(),
1941 CASExpected: I->getCompareOperand(), Ordering: I->getSuccessOrdering(), Ordering2: I->getFailureOrdering(),
1942 Libcalls);
1943 if (!Expanded)
1944 handleUnsupportedAtomicSize(I, AtomicOpName, DiagnosticInst);
1945}
1946
1947static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
1948 static const RTLIB::Libcall LibcallsXchg[6] = {
1949 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1950 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1951 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1952 static const RTLIB::Libcall LibcallsAdd[6] = {
1953 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1954 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1955 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1956 static const RTLIB::Libcall LibcallsSub[6] = {
1957 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1958 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1959 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1960 static const RTLIB::Libcall LibcallsAnd[6] = {
1961 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1962 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1963 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1964 static const RTLIB::Libcall LibcallsOr[6] = {
1965 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1966 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1967 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1968 static const RTLIB::Libcall LibcallsXor[6] = {
1969 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1970 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1971 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1972 static const RTLIB::Libcall LibcallsNand[6] = {
1973 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1974 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1975 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1976
1977 switch (Op) {
1978 case AtomicRMWInst::BAD_BINOP:
1979 llvm_unreachable("Should not have BAD_BINOP.");
1980 case AtomicRMWInst::Xchg:
1981 return ArrayRef(LibcallsXchg);
1982 case AtomicRMWInst::Add:
1983 return ArrayRef(LibcallsAdd);
1984 case AtomicRMWInst::Sub:
1985 return ArrayRef(LibcallsSub);
1986 case AtomicRMWInst::And:
1987 return ArrayRef(LibcallsAnd);
1988 case AtomicRMWInst::Or:
1989 return ArrayRef(LibcallsOr);
1990 case AtomicRMWInst::Xor:
1991 return ArrayRef(LibcallsXor);
1992 case AtomicRMWInst::Nand:
1993 return ArrayRef(LibcallsNand);
1994 case AtomicRMWInst::Max:
1995 case AtomicRMWInst::Min:
1996 case AtomicRMWInst::UMax:
1997 case AtomicRMWInst::UMin:
1998 case AtomicRMWInst::FMax:
1999 case AtomicRMWInst::FMin:
2000 case AtomicRMWInst::FMaximum:
2001 case AtomicRMWInst::FMinimum:
2002 case AtomicRMWInst::FMaximumNum:
2003 case AtomicRMWInst::FMinimumNum:
2004 case AtomicRMWInst::FAdd:
2005 case AtomicRMWInst::FSub:
2006 case AtomicRMWInst::UIncWrap:
2007 case AtomicRMWInst::UDecWrap:
2008 case AtomicRMWInst::USubCond:
2009 case AtomicRMWInst::USubSat:
2010 // No atomic libcalls are available for these.
2011 return {};
2012 }
2013 llvm_unreachable("Unexpected AtomicRMW operation.");
2014}
2015
2016void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
2017 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(Op: I->getOperation());
2018
2019 unsigned Size = getAtomicOpSize(RMWI: I);
2020
2021 bool Success = false;
2022 if (!Libcalls.empty())
2023 Success = expandAtomicOpToLibcall(
2024 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getValOperand(),
2025 CASExpected: nullptr, Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
2026
2027 // The expansion failed: either there were no libcalls at all for
2028 // the operation (min/max), or there were only size-specialized
2029 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
2030 // CAS libcall, via a CAS loop, instead.
2031 if (!Success) {
2032 expandAtomicRMWToCmpXchg(
2033 AI: I, CreateCmpXchg: [this, I](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
2034 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
2035 SyncScope::ID SSID, bool IsVolatile, Value *&Success,
2036 Value *&NewLoaded, Instruction *MetadataSrc) {
2037 // Create the CAS instruction normally...
2038 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
2039 Ptr: Addr, Cmp: Loaded, New: NewVal, Align: Alignment, SuccessOrdering: MemOpOrder,
2040 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: MemOpOrder), SSID);
2041 Pair->setVolatile(IsVolatile);
2042 if (MetadataSrc)
2043 copyMetadataForAtomic(Dest&: *Pair, Source: *MetadataSrc);
2044
2045 Success = Builder.CreateExtractValue(Agg: Pair, Idxs: 1, Name: "success");
2046 NewLoaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "newloaded");
2047
2048 // ...and then expand the CAS into a libcall.
2049 expandAtomicCASToLibcall(
2050 I: Pair,
2051 AtomicOpName: "atomicrmw " + AtomicRMWInst::getOperationName(Op: I->getOperation()),
2052 DiagnosticInst: MetadataSrc);
2053 });
2054 }
2055}
2056
2057// A helper routine for the above expandAtomic*ToLibcall functions.
2058//
2059// 'Libcalls' contains an array of enum values for the particular
2060// ATOMIC libcalls to be emitted. All of the other arguments besides
2061// 'I' are extracted from the Instruction subclass by the
2062// caller. Depending on the particular call, some will be null.
2063bool AtomicExpandImpl::expandAtomicOpToLibcall(
2064 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
2065 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
2066 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
2067 assert(Libcalls.size() == 6);
2068
2069 LLVMContext &Ctx = I->getContext();
2070 Module *M = I->getModule();
2071 const DataLayout &DL = M->getDataLayout();
2072 IRBuilder<> Builder(I);
2073 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
2074
2075 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
2076 Type *SizedIntTy = Type::getIntNTy(C&: Ctx, N: Size * 8);
2077
2078 if (M->getTargetTriple().isOSWindows() && M->getTargetTriple().isX86_64() &&
2079 Size == 16) {
2080 // x86_64 Windows passes i128 as an XMM vector; on return, it is in
2081 // XMM0, and as a parameter, it is passed indirectly. The generic lowering
2082 // rules handles this correctly if we pass it as a v2i64 rather than
2083 // i128. This is what Clang does in the frontend for such types as well
2084 // (see WinX86_64ABIInfo::classify in Clang).
2085 SizedIntTy = FixedVectorType::get(ElementType: Type::getInt64Ty(C&: Ctx), NumElts: 2);
2086 }
2087
2088 const Align AllocaAlignment = DL.getPrefTypeAlign(Ty: SizedIntTy);
2089
2090 // TODO: the "order" argument type is "int", not int32. So
2091 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
2092 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
2093 Constant *OrderingVal =
2094 ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: (int)toCABI(AO: Ordering));
2095 Constant *Ordering2Val = nullptr;
2096 if (CASExpected) {
2097 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
2098 Ordering2Val =
2099 ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: (int)toCABI(AO: Ordering2));
2100 }
2101 bool HasResult = I->getType() != Type::getVoidTy(C&: Ctx);
2102
2103 RTLIB::Libcall RTLibType;
2104 if (UseSizedLibcall) {
2105 switch (Size) {
2106 case 1:
2107 RTLibType = Libcalls[1];
2108 break;
2109 case 2:
2110 RTLibType = Libcalls[2];
2111 break;
2112 case 4:
2113 RTLibType = Libcalls[3];
2114 break;
2115 case 8:
2116 RTLibType = Libcalls[4];
2117 break;
2118 case 16:
2119 RTLibType = Libcalls[5];
2120 break;
2121 }
2122 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
2123 RTLibType = Libcalls[0];
2124 } else {
2125 // Can't use sized function, and there's no generic for this
2126 // operation, so give up.
2127 return false;
2128 }
2129
2130 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(Call: RTLibType);
2131 if (LibcallImpl == RTLIB::Unsupported) {
2132 // This target does not implement the requested atomic libcall so give up.
2133 return false;
2134 }
2135
2136 // Build up the function call. There's two kinds. First, the sized
2137 // variants. These calls are going to be one of the following (with
2138 // N=1,2,4,8,16):
2139 // iN __atomic_load_N(iN *ptr, int ordering)
2140 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2141 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2142 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2143 // int success_order, int failure_order)
2144 //
2145 // Note that these functions can be used for non-integer atomic
2146 // operations, the values just need to be bitcast to integers on the
2147 // way in and out.
2148 //
2149 // And, then, the generic variants. They look like the following:
2150 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2151 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2152 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2153 // int ordering)
2154 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2155 // void *desired, int success_order,
2156 // int failure_order)
2157 //
2158 // The different signatures are built up depending on the
2159 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2160 // variables.
2161
2162 AllocaInst *AllocaCASExpected = nullptr;
2163 AllocaInst *AllocaValue = nullptr;
2164 AllocaInst *AllocaResult = nullptr;
2165
2166 Type *ResultTy;
2167 SmallVector<Value *, 6> Args;
2168 AttributeList Attr;
2169
2170 // 'size' argument.
2171 if (!UseSizedLibcall) {
2172 // Note, getIntPtrType is assumed equivalent to size_t.
2173 Args.push_back(Elt: ConstantInt::get(Ty: DL.getIntPtrType(C&: Ctx), V: Size));
2174 }
2175
2176 // 'ptr' argument.
2177 // note: This assumes all address spaces share a common libfunc
2178 // implementation and that addresses are convertable. For systems without
2179 // that property, we'd need to extend this mechanism to support AS-specific
2180 // families of atomic intrinsics.
2181 Value *PtrVal = PointerOperand;
2182 PtrVal = Builder.CreateAddrSpaceCast(V: PtrVal, DestTy: PointerType::getUnqual(C&: Ctx));
2183 Args.push_back(Elt: PtrVal);
2184
2185 // 'expected' argument, if present.
2186 if (CASExpected) {
2187 AllocaCASExpected = AllocaBuilder.CreateAlloca(Ty: CASExpected->getType());
2188 AllocaCASExpected->setAlignment(AllocaAlignment);
2189 Builder.CreateLifetimeStart(Ptr: AllocaCASExpected);
2190 Builder.CreateAlignedStore(Val: CASExpected, Ptr: AllocaCASExpected, Align: AllocaAlignment);
2191 Args.push_back(Elt: AllocaCASExpected);
2192 }
2193
2194 // 'val' argument ('desired' for cas), if present.
2195 if (ValueOperand) {
2196 if (UseSizedLibcall) {
2197 Value *IntValue =
2198 Builder.CreateBitPreservingCastChain(DL, V: ValueOperand, NewTy: SizedIntTy);
2199 Args.push_back(Elt: IntValue);
2200 } else {
2201 AllocaValue = AllocaBuilder.CreateAlloca(Ty: ValueOperand->getType());
2202 AllocaValue->setAlignment(AllocaAlignment);
2203 Builder.CreateLifetimeStart(Ptr: AllocaValue);
2204 Builder.CreateAlignedStore(Val: ValueOperand, Ptr: AllocaValue, Align: AllocaAlignment);
2205 Args.push_back(Elt: AllocaValue);
2206 }
2207 }
2208
2209 // 'ret' argument.
2210 if (!CASExpected && HasResult && !UseSizedLibcall) {
2211 AllocaResult = AllocaBuilder.CreateAlloca(Ty: I->getType());
2212 AllocaResult->setAlignment(AllocaAlignment);
2213 Builder.CreateLifetimeStart(Ptr: AllocaResult);
2214 Args.push_back(Elt: AllocaResult);
2215 }
2216
2217 // 'ordering' ('success_order' for cas) argument.
2218 Args.push_back(Elt: OrderingVal);
2219
2220 // 'failure_order' argument, if present.
2221 if (Ordering2Val)
2222 Args.push_back(Elt: Ordering2Val);
2223
2224 // Now, the return type.
2225 if (CASExpected) {
2226 ResultTy = Type::getInt1Ty(C&: Ctx);
2227 Attr = Attr.addRetAttribute(C&: Ctx, Kind: Attribute::ZExt);
2228 } else if (HasResult && UseSizedLibcall)
2229 ResultTy = SizedIntTy;
2230 else
2231 ResultTy = Type::getVoidTy(C&: Ctx);
2232
2233 // Done with setting up arguments and return types, create the call:
2234 SmallVector<Type *, 6> ArgTys;
2235 for (Value *Arg : Args)
2236 ArgTys.push_back(Elt: Arg->getType());
2237 FunctionType *FnType = FunctionType::get(Result: ResultTy, Params: ArgTys, isVarArg: false);
2238 FunctionCallee LibcallFn = M->getOrInsertFunction(
2239 Name: RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: LibcallImpl), T: FnType,
2240 AttributeList: Attr);
2241 CallInst *Call = Builder.CreateCall(Callee: LibcallFn, Args);
2242 Call->setAttributes(Attr);
2243 Value *Result = Call;
2244
2245 // And then, extract the results...
2246 if (ValueOperand && !UseSizedLibcall)
2247 Builder.CreateLifetimeEnd(Ptr: AllocaValue);
2248
2249 if (CASExpected) {
2250 // The final result from the CAS is {load of 'expected' alloca, bool result
2251 // from call}
2252 Type *FinalResultTy = I->getType();
2253 Value *V = PoisonValue::get(T: FinalResultTy);
2254 Value *ExpectedOut = Builder.CreateAlignedLoad(
2255 Ty: CASExpected->getType(), Ptr: AllocaCASExpected, Align: AllocaAlignment);
2256 Builder.CreateLifetimeEnd(Ptr: AllocaCASExpected);
2257 V = Builder.CreateInsertValue(Agg: V, Val: ExpectedOut, Idxs: 0);
2258 V = Builder.CreateInsertValue(Agg: V, Val: Result, Idxs: 1);
2259 I->replaceAllUsesWith(V);
2260 } else if (HasResult) {
2261 Value *V;
2262 if (UseSizedLibcall) {
2263 // Add bitcasts from Result's scalar type to I's <n x ptr> vector type
2264 auto *PtrTy = dyn_cast<PointerType>(Val: I->getType()->getScalarType());
2265 auto *VTy = dyn_cast<VectorType>(Val: I->getType());
2266 if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
2267 unsigned AS = PtrTy->getAddressSpace();
2268 Value *BC = Builder.CreateBitCast(
2269 V: Result, DestTy: VTy->getWithNewType(EltTy: DL.getIntPtrType(C&: Ctx, AddressSpace: AS)));
2270 V = Builder.CreateIntToPtr(V: BC, DestTy: I->getType());
2271 } else
2272 V = Builder.CreateBitOrPointerCast(V: Result, DestTy: I->getType());
2273 } else {
2274 V = Builder.CreateAlignedLoad(Ty: I->getType(), Ptr: AllocaResult,
2275 Align: AllocaAlignment);
2276 Builder.CreateLifetimeEnd(Ptr: AllocaResult);
2277 }
2278 I->replaceAllUsesWith(V);
2279 }
2280 I->eraseFromParent();
2281 return true;
2282}
2283