1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLFunctionalExtras.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/Analysis/InstSimplifyFolder.h"
21#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22#include "llvm/CodeGen/AtomicExpand.h"
23#include "llvm/CodeGen/AtomicExpandUtils.h"
24#include "llvm/CodeGen/TargetLowering.h"
25#include "llvm/CodeGen/TargetPassConfig.h"
26#include "llvm/CodeGen/TargetSubtargetInfo.h"
27#include "llvm/CodeGen/ValueTypes.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
37#include "llvm/IR/Instructions.h"
38#include "llvm/IR/MDBuilder.h"
39#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
40#include "llvm/IR/Module.h"
41#include "llvm/IR/ProfDataUtils.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
45#include "llvm/InitializePasses.h"
46#include "llvm/Pass.h"
47#include "llvm/Support/AtomicOrdering.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/Debug.h"
50#include "llvm/Support/ErrorHandling.h"
51#include "llvm/Support/raw_ostream.h"
52#include "llvm/Target/TargetMachine.h"
53#include "llvm/Transforms/Utils/LowerAtomic.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const LibcallLoweringInfo *LibcallLowering = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 void handleFailure(Instruction &FailedInst, const Twine &Msg) const {
71 LLVMContext &Ctx = FailedInst.getContext();
72
73 // TODO: Do not use generic error type.
74 Ctx.emitError(I: &FailedInst, ErrorStr: Msg);
75
76 if (!FailedInst.getType()->isVoidTy())
77 FailedInst.replaceAllUsesWith(V: PoisonValue::get(T: FailedInst.getType()));
78 FailedInst.eraseFromParent();
79 }
80
81 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
82 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
83 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
84 bool tryExpandAtomicLoad(LoadInst *LI);
85 bool expandAtomicLoadToLL(LoadInst *LI);
86 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
87 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
88 bool tryExpandAtomicStore(StoreInst *SI);
89 void expandAtomicStoreToXChg(StoreInst *SI);
90 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
91 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
92 Value *
93 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
94 Align AddrAlign, AtomicOrdering MemOpOrder,
95 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
96 void expandAtomicOpToLLSC(
97 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
98 AtomicOrdering MemOpOrder,
99 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
100 void expandPartwordAtomicRMW(
101 AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind);
102 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
103 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
104 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
105 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
106
107 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
108 static Value *insertRMWCmpXchgLoop(
109 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
110 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
111 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
112 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
113 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
114
115 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
116 bool isIdempotentRMW(AtomicRMWInst *RMWI);
117 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
118
119 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
120 Value *PointerOperand, Value *ValueOperand,
121 Value *CASExpected, AtomicOrdering Ordering,
122 AtomicOrdering Ordering2,
123 ArrayRef<RTLIB::Libcall> Libcalls);
124 void expandAtomicLoadToLibcall(LoadInst *LI);
125 void expandAtomicStoreToLibcall(StoreInst *LI);
126 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
127 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
128
129 friend bool
130 llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
131 CreateCmpXchgInstFun CreateCmpXchg);
132
133 bool processAtomicInstr(Instruction *I);
134
135public:
136 bool run(Function &F,
137 const LibcallLoweringModuleAnalysisResult &LibcallResult,
138 const TargetMachine *TM);
139};
140
141class AtomicExpandLegacy : public FunctionPass {
142public:
143 static char ID; // Pass identification, replacement for typeid
144
145 AtomicExpandLegacy() : FunctionPass(ID) {}
146
147 void getAnalysisUsage(AnalysisUsage &AU) const override {
148 AU.addRequired<LibcallLoweringInfoWrapper>();
149 FunctionPass::getAnalysisUsage(AU);
150 }
151
152 bool runOnFunction(Function &F) override;
153};
154
155// IRBuilder to be used for replacement atomic instructions.
156struct ReplacementIRBuilder
157 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
158 MDNode *MMRAMD = nullptr;
159
160 // Preserves the DebugLoc from I, and preserves still valid metadata.
161 // Enable StrictFP builder mode when appropriate.
162 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
163 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
164 IRBuilderCallbackInserter(
165 [this](Instruction *I) { addMMRAMD(I); })) {
166 SetInsertPoint(I);
167 this->CollectMetadataToCopy(Src: I, MetadataKinds: {LLVMContext::MD_pcsections});
168 if (BB->getParent()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP))
169 this->setIsFPConstrained(true);
170
171 MMRAMD = I->getMetadata(KindID: LLVMContext::MD_mmra);
172 }
173
174 void addMMRAMD(Instruction *I) {
175 if (canInstructionHaveMMRAs(I: *I))
176 I->setMetadata(KindID: LLVMContext::MD_mmra, Node: MMRAMD);
177 }
178};
179
180} // end anonymous namespace
181
182char AtomicExpandLegacy::ID = 0;
183
184char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
185
186INITIALIZE_PASS_BEGIN(AtomicExpandLegacy, DEBUG_TYPE,
187 "Expand Atomic instructions", false, false)
188INITIALIZE_PASS_DEPENDENCY(LibcallLoweringInfoWrapper)
189INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
190INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
191 "Expand Atomic instructions", false, false)
192
193// Helper functions to retrieve the size of atomic instructions.
194static unsigned getAtomicOpSize(LoadInst *LI) {
195 const DataLayout &DL = LI->getDataLayout();
196 return DL.getTypeStoreSize(Ty: LI->getType());
197}
198
199static unsigned getAtomicOpSize(StoreInst *SI) {
200 const DataLayout &DL = SI->getDataLayout();
201 return DL.getTypeStoreSize(Ty: SI->getValueOperand()->getType());
202}
203
204static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
205 const DataLayout &DL = RMWI->getDataLayout();
206 return DL.getTypeStoreSize(Ty: RMWI->getValOperand()->getType());
207}
208
209static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
210 const DataLayout &DL = CASI->getDataLayout();
211 return DL.getTypeStoreSize(Ty: CASI->getCompareOperand()->getType());
212}
213
214/// Copy metadata that's safe to preserve when widening atomics.
215static void copyMetadataForAtomic(Instruction &Dest,
216 const Instruction &Source) {
217 SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
218 Source.getAllMetadata(MDs&: MD);
219 LLVMContext &Ctx = Dest.getContext();
220 MDBuilder MDB(Ctx);
221
222 for (auto [ID, N] : MD) {
223 switch (ID) {
224 case LLVMContext::MD_dbg:
225 case LLVMContext::MD_tbaa:
226 case LLVMContext::MD_tbaa_struct:
227 case LLVMContext::MD_alias_scope:
228 case LLVMContext::MD_noalias:
229 case LLVMContext::MD_noalias_addrspace:
230 case LLVMContext::MD_access_group:
231 case LLVMContext::MD_mmra:
232 Dest.setMetadata(KindID: ID, Node: N);
233 break;
234 default:
235 if (ID == Ctx.getMDKindID(Name: "amdgpu.no.remote.memory"))
236 Dest.setMetadata(KindID: ID, Node: N);
237 else if (ID == Ctx.getMDKindID(Name: "amdgpu.no.fine.grained.memory"))
238 Dest.setMetadata(KindID: ID, Node: N);
239
240 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
241 // uses.
242 break;
243 }
244 }
245}
246
247// Determine if a particular atomic operation has a supported size,
248// and is of appropriate alignment, to be passed through for target
249// lowering. (Versus turning into a __atomic libcall)
250template <typename Inst>
251static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
252 unsigned Size = getAtomicOpSize(I);
253 Align Alignment = I->getAlign();
254 return Alignment >= Size &&
255 Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
256}
257
258bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
259 auto *LI = dyn_cast<LoadInst>(Val: I);
260 auto *SI = dyn_cast<StoreInst>(Val: I);
261 auto *RMWI = dyn_cast<AtomicRMWInst>(Val: I);
262 auto *CASI = dyn_cast<AtomicCmpXchgInst>(Val: I);
263
264 bool MadeChange = false;
265
266 // If the Size/Alignment is not supported, replace with a libcall.
267 if (LI) {
268 if (!LI->isAtomic())
269 return false;
270
271 if (!atomicSizeSupported(TLI, I: LI)) {
272 expandAtomicLoadToLibcall(LI);
273 return true;
274 }
275
276 if (TLI->shouldCastAtomicLoadInIR(LI) ==
277 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
278 I = LI = convertAtomicLoadToIntegerType(LI);
279 MadeChange = true;
280 }
281 } else if (SI) {
282 if (!SI->isAtomic())
283 return false;
284
285 if (!atomicSizeSupported(TLI, I: SI)) {
286 expandAtomicStoreToLibcall(LI: SI);
287 return true;
288 }
289
290 if (TLI->shouldCastAtomicStoreInIR(SI) ==
291 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
292 I = SI = convertAtomicStoreToIntegerType(SI);
293 MadeChange = true;
294 }
295 } else if (RMWI) {
296 if (!atomicSizeSupported(TLI, I: RMWI)) {
297 expandAtomicRMWToLibcall(I: RMWI);
298 return true;
299 }
300
301 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
302 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
303 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
304 MadeChange = true;
305 }
306 } else if (CASI) {
307 if (!atomicSizeSupported(TLI, I: CASI)) {
308 expandAtomicCASToLibcall(I: CASI);
309 return true;
310 }
311
312 // TODO: when we're ready to make the change at the IR level, we can
313 // extend convertCmpXchgToInteger for floating point too.
314 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
315 // TODO: add a TLI hook to control this so that each target can
316 // convert to lowering the original type one at a time.
317 I = CASI = convertCmpXchgToIntegerType(CI: CASI);
318 MadeChange = true;
319 }
320 } else
321 return false;
322
323 if (TLI->shouldInsertFencesForAtomic(I)) {
324 auto FenceOrdering = AtomicOrdering::Monotonic;
325 if (LI && isAcquireOrStronger(AO: LI->getOrdering())) {
326 FenceOrdering = LI->getOrdering();
327 LI->setOrdering(AtomicOrdering::Monotonic);
328 } else if (SI && isReleaseOrStronger(AO: SI->getOrdering())) {
329 FenceOrdering = SI->getOrdering();
330 SI->setOrdering(AtomicOrdering::Monotonic);
331 } else if (RMWI && (isReleaseOrStronger(AO: RMWI->getOrdering()) ||
332 isAcquireOrStronger(AO: RMWI->getOrdering()))) {
333 FenceOrdering = RMWI->getOrdering();
334 RMWI->setOrdering(AtomicOrdering::Monotonic);
335 } else if (CASI &&
336 TLI->shouldExpandAtomicCmpXchgInIR(AI: CASI) ==
337 TargetLoweringBase::AtomicExpansionKind::None &&
338 (isReleaseOrStronger(AO: CASI->getSuccessOrdering()) ||
339 isAcquireOrStronger(AO: CASI->getSuccessOrdering()) ||
340 isAcquireOrStronger(AO: CASI->getFailureOrdering()))) {
341 // If a compare and swap is lowered to LL/SC, we can do smarter fence
342 // insertion, with a stronger one on the success path than on the
343 // failure path. As a result, fence insertion is directly done by
344 // expandAtomicCmpXchg in that case.
345 FenceOrdering = CASI->getMergedOrdering();
346 auto CASOrdering = TLI->atomicOperationOrderAfterFenceSplit(I: CASI);
347
348 CASI->setSuccessOrdering(CASOrdering);
349 CASI->setFailureOrdering(CASOrdering);
350 }
351
352 if (FenceOrdering != AtomicOrdering::Monotonic) {
353 MadeChange |= bracketInstWithFences(I, Order: FenceOrdering);
354 }
355 } else if (TLI->shouldInsertTrailingSeqCstFenceForAtomicStore(I) &&
356 !(CASI && TLI->shouldExpandAtomicCmpXchgInIR(AI: CASI) ==
357 TargetLoweringBase::AtomicExpansionKind::LLSC)) {
358 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
359 IRBuilder Builder(I);
360 if (auto TrailingFence = TLI->emitTrailingFence(
361 Builder, Inst: I, Ord: AtomicOrdering::SequentiallyConsistent)) {
362 TrailingFence->moveAfter(MovePos: I);
363 MadeChange = true;
364 }
365 }
366
367 if (LI)
368 MadeChange |= tryExpandAtomicLoad(LI);
369 else if (SI)
370 MadeChange |= tryExpandAtomicStore(SI);
371 else if (RMWI) {
372 // There are two different ways of expanding RMW instructions:
373 // - into a load if it is idempotent
374 // - into a Cmpxchg/LL-SC loop otherwise
375 // we try them in that order.
376
377 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
378 MadeChange = true;
379
380 } else {
381 MadeChange |= tryExpandAtomicRMW(AI: RMWI);
382 }
383 } else if (CASI)
384 MadeChange |= tryExpandAtomicCmpXchg(CI: CASI);
385
386 return MadeChange;
387}
388
389bool AtomicExpandImpl::run(
390 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
391 const TargetMachine *TM) {
392 const auto *Subtarget = TM->getSubtargetImpl(F);
393 if (!Subtarget->enableAtomicExpand())
394 return false;
395 TLI = Subtarget->getTargetLowering();
396 LibcallLowering = &LibcallResult.getLibcallLowering(Subtarget: *Subtarget);
397 DL = &F.getDataLayout();
398
399 bool MadeChange = false;
400
401 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
402 BasicBlock *BB = &*BBI;
403
404 BasicBlock::reverse_iterator Next;
405
406 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
407 I = Next) {
408 Instruction &Inst = *I;
409 Next = std::next(x: I);
410
411 if (processAtomicInstr(I: &Inst)) {
412 MadeChange = true;
413
414 // New blocks may have been inserted.
415 BBE = F.end();
416 }
417 }
418 }
419
420 return MadeChange;
421}
422
423bool AtomicExpandLegacy::runOnFunction(Function &F) {
424
425 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
426 if (!TPC)
427 return false;
428 auto *TM = &TPC->getTM<TargetMachine>();
429
430 const LibcallLoweringModuleAnalysisResult &LibcallResult =
431 getAnalysis<LibcallLoweringInfoWrapper>().getResult(M: *F.getParent());
432 AtomicExpandImpl AE;
433 return AE.run(F, LibcallResult, TM);
434}
435
436FunctionPass *llvm::createAtomicExpandLegacyPass() {
437 return new AtomicExpandLegacy();
438}
439
440PreservedAnalyses AtomicExpandPass::run(Function &F,
441 FunctionAnalysisManager &FAM) {
442 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(IR&: F);
443
444 const LibcallLoweringModuleAnalysisResult *LibcallResult =
445 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(IR&: *F.getParent());
446
447 if (!LibcallResult) {
448 F.getContext().emitError(ErrorStr: "'" + LibcallLoweringModuleAnalysis::name() +
449 "' analysis required");
450 return PreservedAnalyses::all();
451 }
452
453 AtomicExpandImpl AE;
454
455 bool Changed = AE.run(F, LibcallResult: *LibcallResult, TM);
456 if (!Changed)
457 return PreservedAnalyses::all();
458
459 return PreservedAnalyses::none();
460}
461
462bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
463 AtomicOrdering Order) {
464 ReplacementIRBuilder Builder(I, *DL);
465
466 auto LeadingFence = TLI->emitLeadingFence(Builder, Inst: I, Ord: Order);
467
468 auto TrailingFence = TLI->emitTrailingFence(Builder, Inst: I, Ord: Order);
469 // We have a guard here because not every atomic operation generates a
470 // trailing fence.
471 if (TrailingFence)
472 TrailingFence->moveAfter(MovePos: I);
473
474 return (LeadingFence || TrailingFence);
475}
476
477/// Get the iX type with the same bitwidth as T.
478IntegerType *
479AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
480 EVT VT = TLI->getMemValueType(DL, Ty: T);
481 unsigned BitWidth = VT.getStoreSizeInBits();
482 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
483 return IntegerType::get(C&: T->getContext(), NumBits: BitWidth);
484}
485
486/// Convert an atomic load of a non-integral type to an integer load of the
487/// equivalent bitwidth. See the function comment on
488/// convertAtomicStoreToIntegerType for background.
489LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
490 auto *M = LI->getModule();
491 Type *NewTy = getCorrespondingIntegerType(T: LI->getType(), DL: M->getDataLayout());
492
493 ReplacementIRBuilder Builder(LI, *DL);
494
495 Value *Addr = LI->getPointerOperand();
496
497 auto *NewLI = Builder.CreateLoad(Ty: NewTy, Ptr: Addr);
498 NewLI->setAlignment(LI->getAlign());
499 NewLI->setVolatile(LI->isVolatile());
500 NewLI->setAtomic(Ordering: LI->getOrdering(), SSID: LI->getSyncScopeID());
501 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
502
503 Value *NewVal = Builder.CreateBitCast(V: NewLI, DestTy: LI->getType());
504 LI->replaceAllUsesWith(V: NewVal);
505 LI->eraseFromParent();
506 return NewLI;
507}
508
509AtomicRMWInst *
510AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
511 assert(RMWI->getOperation() == AtomicRMWInst::Xchg);
512
513 auto *M = RMWI->getModule();
514 Type *NewTy =
515 getCorrespondingIntegerType(T: RMWI->getType(), DL: M->getDataLayout());
516
517 ReplacementIRBuilder Builder(RMWI, *DL);
518
519 Value *Addr = RMWI->getPointerOperand();
520 Value *Val = RMWI->getValOperand();
521 Value *NewVal = Val->getType()->isPointerTy()
522 ? Builder.CreatePtrToInt(V: Val, DestTy: NewTy)
523 : Builder.CreateBitCast(V: Val, DestTy: NewTy);
524
525 auto *NewRMWI = Builder.CreateAtomicRMW(Op: AtomicRMWInst::Xchg, Ptr: Addr, Val: NewVal,
526 Align: RMWI->getAlign(), Ordering: RMWI->getOrdering(),
527 SSID: RMWI->getSyncScopeID());
528 NewRMWI->setVolatile(RMWI->isVolatile());
529 copyMetadataForAtomic(Dest&: *NewRMWI, Source: *RMWI);
530 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
531
532 Value *NewRVal = RMWI->getType()->isPointerTy()
533 ? Builder.CreateIntToPtr(V: NewRMWI, DestTy: RMWI->getType())
534 : Builder.CreateBitCast(V: NewRMWI, DestTy: RMWI->getType());
535 RMWI->replaceAllUsesWith(V: NewRVal);
536 RMWI->eraseFromParent();
537 return NewRMWI;
538}
539
540bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
541 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
542 case TargetLoweringBase::AtomicExpansionKind::None:
543 return false;
544 case TargetLoweringBase::AtomicExpansionKind::LLSC:
545 expandAtomicOpToLLSC(
546 I: LI, ResultTy: LI->getType(), Addr: LI->getPointerOperand(), AddrAlign: LI->getAlign(),
547 MemOpOrder: LI->getOrdering(),
548 PerformOp: [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
549 return true;
550 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
551 return expandAtomicLoadToLL(LI);
552 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
553 return expandAtomicLoadToCmpXchg(LI);
554 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
555 LI->setAtomic(Ordering: AtomicOrdering::NotAtomic);
556 return true;
557 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
558 TLI->emitExpandAtomicLoad(LI);
559 return true;
560 default:
561 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
562 }
563}
564
565bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
566 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
567 case TargetLoweringBase::AtomicExpansionKind::None:
568 return false;
569 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
570 TLI->emitExpandAtomicStore(SI);
571 return true;
572 case TargetLoweringBase::AtomicExpansionKind::Expand:
573 expandAtomicStoreToXChg(SI);
574 return true;
575 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
576 SI->setAtomic(Ordering: AtomicOrdering::NotAtomic);
577 return true;
578 default:
579 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
580 }
581}
582
583bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
584 ReplacementIRBuilder Builder(LI, *DL);
585
586 // On some architectures, load-linked instructions are atomic for larger
587 // sizes than normal loads. For example, the only 64-bit load guaranteed
588 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
589 Value *Val = TLI->emitLoadLinked(Builder, ValueTy: LI->getType(),
590 Addr: LI->getPointerOperand(), Ord: LI->getOrdering());
591 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
592
593 LI->replaceAllUsesWith(V: Val);
594 LI->eraseFromParent();
595
596 return true;
597}
598
599bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
600 ReplacementIRBuilder Builder(LI, *DL);
601 AtomicOrdering Order = LI->getOrdering();
602 if (Order == AtomicOrdering::Unordered)
603 Order = AtomicOrdering::Monotonic;
604
605 Value *Addr = LI->getPointerOperand();
606 Type *Ty = LI->getType();
607 Constant *DummyVal = Constant::getNullValue(Ty);
608
609 Value *Pair = Builder.CreateAtomicCmpXchg(
610 Ptr: Addr, Cmp: DummyVal, New: DummyVal, Align: LI->getAlign(), SuccessOrdering: Order,
611 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: Order));
612 Value *Loaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "loaded");
613
614 LI->replaceAllUsesWith(V: Loaded);
615 LI->eraseFromParent();
616
617 return true;
618}
619
620/// Convert an atomic store of a non-integral type to an integer store of the
621/// equivalent bitwidth. We used to not support floating point or vector
622/// atomics in the IR at all. The backends learned to deal with the bitcast
623/// idiom because that was the only way of expressing the notion of a atomic
624/// float or vector store. The long term plan is to teach each backend to
625/// instruction select from the original atomic store, but as a migration
626/// mechanism, we convert back to the old format which the backends understand.
627/// Each backend will need individual work to recognize the new format.
628StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
629 ReplacementIRBuilder Builder(SI, *DL);
630 auto *M = SI->getModule();
631 Type *NewTy = getCorrespondingIntegerType(T: SI->getValueOperand()->getType(),
632 DL: M->getDataLayout());
633 Value *NewVal = Builder.CreateBitCast(V: SI->getValueOperand(), DestTy: NewTy);
634
635 Value *Addr = SI->getPointerOperand();
636
637 StoreInst *NewSI = Builder.CreateStore(Val: NewVal, Ptr: Addr);
638 NewSI->setAlignment(SI->getAlign());
639 NewSI->setVolatile(SI->isVolatile());
640 NewSI->setAtomic(Ordering: SI->getOrdering(), SSID: SI->getSyncScopeID());
641 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
642 SI->eraseFromParent();
643 return NewSI;
644}
645
646void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
647 // This function is only called on atomic stores that are too large to be
648 // atomic if implemented as a native store. So we replace them by an
649 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
650 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
651 // It is the responsibility of the target to only signal expansion via
652 // shouldExpandAtomicRMW in cases where this is required and possible.
653 ReplacementIRBuilder Builder(SI, *DL);
654 AtomicOrdering Ordering = SI->getOrdering();
655 assert(Ordering != AtomicOrdering::NotAtomic);
656 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
657 ? AtomicOrdering::Monotonic
658 : Ordering;
659 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
660 Op: AtomicRMWInst::Xchg, Ptr: SI->getPointerOperand(), Val: SI->getValueOperand(),
661 Align: SI->getAlign(), Ordering: RMWOrdering);
662 SI->eraseFromParent();
663
664 // Now we have an appropriate swap instruction, lower it as usual.
665 tryExpandAtomicRMW(AI);
666}
667
668static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
669 Value *Loaded, Value *NewVal, Align AddrAlign,
670 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
671 Value *&Success, Value *&NewLoaded,
672 Instruction *MetadataSrc) {
673 Type *OrigTy = NewVal->getType();
674
675 // This code can go away when cmpxchg supports FP and vector types.
676 assert(!OrigTy->isPointerTy());
677 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
678 if (NeedBitcast) {
679 IntegerType *IntTy = Builder.getIntNTy(N: OrigTy->getPrimitiveSizeInBits());
680 NewVal = Builder.CreateBitCast(V: NewVal, DestTy: IntTy);
681 Loaded = Builder.CreateBitCast(V: Loaded, DestTy: IntTy);
682 }
683
684 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
685 Ptr: Addr, Cmp: Loaded, New: NewVal, Align: AddrAlign, SuccessOrdering: MemOpOrder,
686 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: MemOpOrder), SSID);
687 if (MetadataSrc)
688 copyMetadataForAtomic(Dest&: *Pair, Source: *MetadataSrc);
689
690 Success = Builder.CreateExtractValue(Agg: Pair, Idxs: 1, Name: "success");
691 NewLoaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "newloaded");
692
693 if (NeedBitcast)
694 NewLoaded = Builder.CreateBitCast(V: NewLoaded, DestTy: OrigTy);
695}
696
697bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
698 LLVMContext &Ctx = AI->getModule()->getContext();
699 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(RMW: AI);
700 switch (Kind) {
701 case TargetLoweringBase::AtomicExpansionKind::None:
702 return false;
703 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
704 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
705 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
706 if (ValueSize < MinCASSize) {
707 expandPartwordAtomicRMW(I: AI,
708 ExpansionKind: TargetLoweringBase::AtomicExpansionKind::LLSC);
709 } else {
710 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
711 return buildAtomicRMWValue(Op: AI->getOperation(), Builder, Loaded,
712 Val: AI->getValOperand());
713 };
714 expandAtomicOpToLLSC(I: AI, ResultTy: AI->getType(), Addr: AI->getPointerOperand(),
715 AddrAlign: AI->getAlign(), MemOpOrder: AI->getOrdering(), PerformOp);
716 }
717 return true;
718 }
719 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
720 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
721 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
722 if (ValueSize < MinCASSize) {
723 expandPartwordAtomicRMW(I: AI,
724 ExpansionKind: TargetLoweringBase::AtomicExpansionKind::CmpXChg);
725 } else {
726 SmallVector<StringRef> SSNs;
727 Ctx.getSyncScopeNames(SSNs);
728 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
729 ? "system"
730 : SSNs[AI->getSyncScopeID()];
731 OptimizationRemarkEmitter ORE(AI->getFunction());
732 ORE.emit(RemarkBuilder: [&]() {
733 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
734 << "A compare and swap loop was generated for an atomic "
735 << AI->getOperationName(Op: AI->getOperation()) << " operation at "
736 << MemScope << " memory scope";
737 });
738 expandAtomicRMWToCmpXchg(AI, CreateCmpXchg: createCmpXchgInstFun);
739 }
740 return true;
741 }
742 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
743 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
744 unsigned ValueSize = getAtomicOpSize(RMWI: AI);
745 if (ValueSize < MinCASSize) {
746 AtomicRMWInst::BinOp Op = AI->getOperation();
747 // Widen And/Or/Xor and give the target another chance at expanding it.
748 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
749 Op == AtomicRMWInst::And) {
750 tryExpandAtomicRMW(AI: widenPartwordAtomicRMW(AI));
751 return true;
752 }
753 }
754 expandAtomicRMWToMaskedIntrinsic(AI);
755 return true;
756 }
757 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
758 TLI->emitBitTestAtomicRMWIntrinsic(AI);
759 return true;
760 }
761 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
762 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
763 return true;
764 }
765 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
766 return lowerAtomicRMWInst(RMWI: AI);
767 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
768 TLI->emitExpandAtomicRMW(AI);
769 return true;
770 default:
771 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
772 }
773}
774
775namespace {
776
777struct PartwordMaskValues {
778 // These three fields are guaranteed to be set by createMaskInstrs.
779 Type *WordType = nullptr;
780 Type *ValueType = nullptr;
781 Type *IntValueType = nullptr;
782 Value *AlignedAddr = nullptr;
783 Align AlignedAddrAlignment;
784 // The remaining fields can be null.
785 Value *ShiftAmt = nullptr;
786 Value *Mask = nullptr;
787 Value *Inv_Mask = nullptr;
788};
789
790[[maybe_unused]]
791raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
792 auto PrintObj = [&O](auto *V) {
793 if (V)
794 O << *V;
795 else
796 O << "nullptr";
797 O << '\n';
798 };
799 O << "PartwordMaskValues {\n";
800 O << " WordType: ";
801 PrintObj(PMV.WordType);
802 O << " ValueType: ";
803 PrintObj(PMV.ValueType);
804 O << " AlignedAddr: ";
805 PrintObj(PMV.AlignedAddr);
806 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
807 O << " ShiftAmt: ";
808 PrintObj(PMV.ShiftAmt);
809 O << " Mask: ";
810 PrintObj(PMV.Mask);
811 O << " Inv_Mask: ";
812 PrintObj(PMV.Inv_Mask);
813 O << "}\n";
814 return O;
815}
816
817} // end anonymous namespace
818
819/// This is a helper function which builds instructions to provide
820/// values necessary for partword atomic operations. It takes an
821/// incoming address, Addr, and ValueType, and constructs the address,
822/// shift-amounts and masks needed to work with a larger value of size
823/// WordSize.
824///
825/// AlignedAddr: Addr rounded down to a multiple of WordSize
826///
827/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
828/// from AlignAddr for it to have the same value as if
829/// ValueType was loaded from Addr.
830///
831/// Mask: Value to mask with the value loaded from AlignAddr to
832/// include only the part that would've been loaded from Addr.
833///
834/// Inv_Mask: The inverse of Mask.
835static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
836 Instruction *I, Type *ValueType,
837 Value *Addr, Align AddrAlign,
838 unsigned MinWordSize) {
839 PartwordMaskValues PMV;
840
841 Module *M = I->getModule();
842 LLVMContext &Ctx = M->getContext();
843 const DataLayout &DL = M->getDataLayout();
844 unsigned ValueSize = DL.getTypeStoreSize(Ty: ValueType);
845
846 PMV.ValueType = PMV.IntValueType = ValueType;
847 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
848 PMV.IntValueType =
849 Type::getIntNTy(C&: Ctx, N: ValueType->getPrimitiveSizeInBits());
850
851 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(C&: Ctx, N: MinWordSize * 8)
852 : ValueType;
853 if (PMV.ValueType == PMV.WordType) {
854 PMV.AlignedAddr = Addr;
855 PMV.AlignedAddrAlignment = AddrAlign;
856 PMV.ShiftAmt = ConstantInt::get(Ty: PMV.ValueType, V: 0);
857 PMV.Mask = ConstantInt::get(Ty: PMV.ValueType, V: ~0, /*isSigned*/ IsSigned: true);
858 return PMV;
859 }
860
861 PMV.AlignedAddrAlignment = Align(MinWordSize);
862
863 assert(ValueSize < MinWordSize);
864
865 PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
866 IntegerType *IntTy = DL.getIndexType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
867 Value *PtrLSB;
868
869 if (AddrAlign < MinWordSize) {
870 PMV.AlignedAddr = Builder.CreateIntrinsic(
871 ID: Intrinsic::ptrmask, Types: {PtrTy, IntTy},
872 Args: {Addr, ConstantInt::getSigned(Ty: IntTy, V: ~(uint64_t)(MinWordSize - 1))},
873 FMFSource: nullptr, Name: "AlignedAddr");
874
875 Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
876 PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - 1, Name: "PtrLSB");
877 } else {
878 // If the alignment is high enough, the LSB are known 0.
879 PMV.AlignedAddr = Addr;
880 PtrLSB = ConstantInt::getNullValue(Ty: IntTy);
881 }
882
883 if (DL.isLittleEndian()) {
884 // turn bytes into bits
885 PMV.ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: 3);
886 } else {
887 // turn bytes into bits, and count from the other side.
888 PMV.ShiftAmt = Builder.CreateShl(
889 LHS: Builder.CreateXor(LHS: PtrLSB, RHS: MinWordSize - ValueSize), RHS: 3);
890 }
891
892 PMV.ShiftAmt = Builder.CreateTrunc(V: PMV.ShiftAmt, DestTy: PMV.WordType, Name: "ShiftAmt");
893 PMV.Mask = Builder.CreateShl(
894 LHS: ConstantInt::get(Ty: PMV.WordType, V: (1 << (ValueSize * 8)) - 1), RHS: PMV.ShiftAmt,
895 Name: "Mask");
896
897 PMV.Inv_Mask = Builder.CreateNot(V: PMV.Mask, Name: "Inv_Mask");
898
899 return PMV;
900}
901
902static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
903 const PartwordMaskValues &PMV) {
904 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
905 if (PMV.WordType == PMV.ValueType)
906 return WideWord;
907
908 Value *Shift = Builder.CreateLShr(LHS: WideWord, RHS: PMV.ShiftAmt, Name: "shifted");
909 Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: PMV.IntValueType, Name: "extracted");
910 return Builder.CreateBitCast(V: Trunc, DestTy: PMV.ValueType);
911}
912
913static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
914 Value *Updated, const PartwordMaskValues &PMV) {
915 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
916 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
917 if (PMV.WordType == PMV.ValueType)
918 return Updated;
919
920 Updated = Builder.CreateBitCast(V: Updated, DestTy: PMV.IntValueType);
921
922 Value *ZExt = Builder.CreateZExt(V: Updated, DestTy: PMV.WordType, Name: "extended");
923 Value *Shift =
924 Builder.CreateShl(LHS: ZExt, RHS: PMV.ShiftAmt, Name: "shifted", /*HasNUW*/ true);
925 Value *And = Builder.CreateAnd(LHS: WideWord, RHS: PMV.Inv_Mask, Name: "unmasked");
926 Value *Or = Builder.CreateOr(LHS: And, RHS: Shift, Name: "inserted");
927 return Or;
928}
929
930/// Emit IR to implement a masked version of a given atomicrmw
931/// operation. (That is, only the bits under the Mask should be
932/// affected by the operation)
933static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
934 IRBuilderBase &Builder, Value *Loaded,
935 Value *Shifted_Inc, Value *Inc,
936 const PartwordMaskValues &PMV) {
937 // TODO: update to use
938 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
939 // to merge bits from two values without requiring PMV.Inv_Mask.
940 switch (Op) {
941 case AtomicRMWInst::Xchg: {
942 Value *Loaded_MaskOut = Builder.CreateAnd(LHS: Loaded, RHS: PMV.Inv_Mask);
943 Value *FinalVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: Shifted_Inc);
944 return FinalVal;
945 }
946 case AtomicRMWInst::Or:
947 case AtomicRMWInst::Xor:
948 case AtomicRMWInst::And:
949 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
950 case AtomicRMWInst::Add:
951 case AtomicRMWInst::Sub:
952 case AtomicRMWInst::Nand: {
953 // The other arithmetic ops need to be masked into place.
954 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Val: Shifted_Inc);
955 Value *NewVal_Masked = Builder.CreateAnd(LHS: NewVal, RHS: PMV.Mask);
956 Value *Loaded_MaskOut = Builder.CreateAnd(LHS: Loaded, RHS: PMV.Inv_Mask);
957 Value *FinalVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: NewVal_Masked);
958 return FinalVal;
959 }
960 case AtomicRMWInst::Max:
961 case AtomicRMWInst::Min:
962 case AtomicRMWInst::UMax:
963 case AtomicRMWInst::UMin:
964 case AtomicRMWInst::FAdd:
965 case AtomicRMWInst::FSub:
966 case AtomicRMWInst::FMin:
967 case AtomicRMWInst::FMax:
968 case AtomicRMWInst::FMaximum:
969 case AtomicRMWInst::FMinimum:
970 case AtomicRMWInst::UIncWrap:
971 case AtomicRMWInst::UDecWrap:
972 case AtomicRMWInst::USubCond:
973 case AtomicRMWInst::USubSat: {
974 // Finally, other ops will operate on the full value, so truncate down to
975 // the original size, and expand out again after doing the
976 // operation. Bitcasts will be inserted for FP values.
977 Value *Loaded_Extract = extractMaskedValue(Builder, WideWord: Loaded, PMV);
978 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded: Loaded_Extract, Val: Inc);
979 Value *FinalVal = insertMaskedValue(Builder, WideWord: Loaded, Updated: NewVal, PMV);
980 return FinalVal;
981 }
982 default:
983 llvm_unreachable("Unknown atomic op");
984 }
985}
986
987/// Expand a sub-word atomicrmw operation into an appropriate
988/// word-sized operation.
989///
990/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
991/// way as a typical atomicrmw expansion. The only difference here is
992/// that the operation inside of the loop may operate upon only a
993/// part of the value.
994void AtomicExpandImpl::expandPartwordAtomicRMW(
995 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
996 // Widen And/Or/Xor and give the target another chance at expanding it.
997 AtomicRMWInst::BinOp Op = AI->getOperation();
998 if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
999 Op == AtomicRMWInst::And) {
1000 tryExpandAtomicRMW(AI: widenPartwordAtomicRMW(AI));
1001 return;
1002 }
1003 AtomicOrdering MemOpOrder = AI->getOrdering();
1004 SyncScope::ID SSID = AI->getSyncScopeID();
1005
1006 ReplacementIRBuilder Builder(AI, *DL);
1007
1008 PartwordMaskValues PMV =
1009 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
1010 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1011
1012 Value *ValOperand_Shifted = nullptr;
1013 if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add ||
1014 Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) {
1015 Value *ValOp = Builder.CreateBitCast(V: AI->getValOperand(), DestTy: PMV.IntValueType);
1016 ValOperand_Shifted =
1017 Builder.CreateShl(LHS: Builder.CreateZExt(V: ValOp, DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1018 Name: "ValOperand_Shifted");
1019 }
1020
1021 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1022 return performMaskedAtomicOp(Op, Builder, Loaded, Shifted_Inc: ValOperand_Shifted,
1023 Inc: AI->getValOperand(), PMV);
1024 };
1025
1026 Value *OldResult;
1027 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1028 OldResult = insertRMWCmpXchgLoop(
1029 Builder, ResultType: PMV.WordType, Addr: PMV.AlignedAddr, AddrAlign: PMV.AlignedAddrAlignment,
1030 MemOpOrder, SSID, PerformOp: PerformPartwordOp, CreateCmpXchg: createCmpXchgInstFun, MetadataSrc: AI);
1031 } else {
1032 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1033 OldResult = insertRMWLLSCLoop(Builder, ResultTy: PMV.WordType, Addr: PMV.AlignedAddr,
1034 AddrAlign: PMV.AlignedAddrAlignment, MemOpOrder,
1035 PerformOp: PerformPartwordOp);
1036 }
1037
1038 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: OldResult, PMV);
1039 AI->replaceAllUsesWith(V: FinalOldResult);
1040 AI->eraseFromParent();
1041}
1042
1043// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1044AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1045 ReplacementIRBuilder Builder(AI, *DL);
1046 AtomicRMWInst::BinOp Op = AI->getOperation();
1047
1048 assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
1049 Op == AtomicRMWInst::And) &&
1050 "Unable to widen operation");
1051
1052 PartwordMaskValues PMV =
1053 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
1054 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1055
1056 Value *ValOperand_Shifted =
1057 Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: PMV.WordType),
1058 RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
1059
1060 Value *NewOperand;
1061
1062 if (Op == AtomicRMWInst::And)
1063 NewOperand =
1064 Builder.CreateOr(LHS: ValOperand_Shifted, RHS: PMV.Inv_Mask, Name: "AndOperand");
1065 else
1066 NewOperand = ValOperand_Shifted;
1067
1068 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1069 Op, Ptr: PMV.AlignedAddr, Val: NewOperand, Align: PMV.AlignedAddrAlignment,
1070 Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
1071
1072 copyMetadataForAtomic(Dest&: *NewAI, Source: *AI);
1073
1074 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: NewAI, PMV);
1075 AI->replaceAllUsesWith(V: FinalOldResult);
1076 AI->eraseFromParent();
1077 return NewAI;
1078}
1079
1080bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1081 // The basic idea here is that we're expanding a cmpxchg of a
1082 // smaller memory size up to a word-sized cmpxchg. To do this, we
1083 // need to add a retry-loop for strong cmpxchg, so that
1084 // modifications to other parts of the word don't cause a spurious
1085 // failure.
1086
1087 // This generates code like the following:
1088 // [[Setup mask values PMV.*]]
1089 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1090 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1091 // %InitLoaded = load i32* %addr
1092 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1093 // br partword.cmpxchg.loop
1094 // partword.cmpxchg.loop:
1095 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1096 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1097 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1098 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1099 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1100 // i32 %FullWord_NewVal success_ordering failure_ordering
1101 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1102 // %Success = extractvalue { i32, i1 } %NewCI, 1
1103 // br i1 %Success, label %partword.cmpxchg.end,
1104 // label %partword.cmpxchg.failure
1105 // partword.cmpxchg.failure:
1106 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1107 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1108 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1109 // label %partword.cmpxchg.end
1110 // partword.cmpxchg.end:
1111 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1112 // %FinalOldVal = trunc i32 %tmp1 to i8
1113 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1114 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1115
1116 Value *Addr = CI->getPointerOperand();
1117 Value *Cmp = CI->getCompareOperand();
1118 Value *NewVal = CI->getNewValOperand();
1119
1120 BasicBlock *BB = CI->getParent();
1121 Function *F = BB->getParent();
1122 ReplacementIRBuilder Builder(CI, *DL);
1123 LLVMContext &Ctx = Builder.getContext();
1124
1125 BasicBlock *EndBB =
1126 BB->splitBasicBlock(I: CI->getIterator(), BBName: "partword.cmpxchg.end");
1127 auto FailureBB =
1128 BasicBlock::Create(Context&: Ctx, Name: "partword.cmpxchg.failure", Parent: F, InsertBefore: EndBB);
1129 auto LoopBB = BasicBlock::Create(Context&: Ctx, Name: "partword.cmpxchg.loop", Parent: F, InsertBefore: FailureBB);
1130
1131 // The split call above "helpfully" added a branch at the end of BB
1132 // (to the wrong place).
1133 std::prev(x: BB->end())->eraseFromParent();
1134 Builder.SetInsertPoint(BB);
1135
1136 PartwordMaskValues PMV =
1137 createMaskInstrs(Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr,
1138 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1139
1140 // Shift the incoming values over, into the right location in the word.
1141 Value *NewVal_Shifted =
1142 Builder.CreateShl(LHS: Builder.CreateZExt(V: NewVal, DestTy: PMV.WordType), RHS: PMV.ShiftAmt);
1143 Value *Cmp_Shifted =
1144 Builder.CreateShl(LHS: Builder.CreateZExt(V: Cmp, DestTy: PMV.WordType), RHS: PMV.ShiftAmt);
1145
1146 // Load the entire current word, and mask into place the expected and new
1147 // values
1148 LoadInst *InitLoaded = Builder.CreateLoad(Ty: PMV.WordType, Ptr: PMV.AlignedAddr);
1149 InitLoaded->setVolatile(CI->isVolatile());
1150 Value *InitLoaded_MaskOut = Builder.CreateAnd(LHS: InitLoaded, RHS: PMV.Inv_Mask);
1151 Builder.CreateBr(Dest: LoopBB);
1152
1153 // partword.cmpxchg.loop:
1154 Builder.SetInsertPoint(LoopBB);
1155 PHINode *Loaded_MaskOut = Builder.CreatePHI(Ty: PMV.WordType, NumReservedValues: 2);
1156 Loaded_MaskOut->addIncoming(V: InitLoaded_MaskOut, BB);
1157
1158 // Mask/Or the expected and new values into place in the loaded word.
1159 Value *FullWord_NewVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: NewVal_Shifted);
1160 Value *FullWord_Cmp = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: Cmp_Shifted);
1161 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1162 Ptr: PMV.AlignedAddr, Cmp: FullWord_Cmp, New: FullWord_NewVal, Align: PMV.AlignedAddrAlignment,
1163 SuccessOrdering: CI->getSuccessOrdering(), FailureOrdering: CI->getFailureOrdering(), SSID: CI->getSyncScopeID());
1164 NewCI->setVolatile(CI->isVolatile());
1165 // When we're building a strong cmpxchg, we need a loop, so you
1166 // might think we could use a weak cmpxchg inside. But, using strong
1167 // allows the below comparison for ShouldContinue, and we're
1168 // expecting the underlying cmpxchg to be a machine instruction,
1169 // which is strong anyways.
1170 NewCI->setWeak(CI->isWeak());
1171
1172 Value *OldVal = Builder.CreateExtractValue(Agg: NewCI, Idxs: 0);
1173 Value *Success = Builder.CreateExtractValue(Agg: NewCI, Idxs: 1);
1174
1175 if (CI->isWeak())
1176 Builder.CreateBr(Dest: EndBB);
1177 else
1178 Builder.CreateCondBr(Cond: Success, True: EndBB, False: FailureBB);
1179
1180 // partword.cmpxchg.failure:
1181 Builder.SetInsertPoint(FailureBB);
1182 // Upon failure, verify that the masked-out part of the loaded value
1183 // has been modified. If it didn't, abort the cmpxchg, since the
1184 // masked-in part must've.
1185 Value *OldVal_MaskOut = Builder.CreateAnd(LHS: OldVal, RHS: PMV.Inv_Mask);
1186 Value *ShouldContinue = Builder.CreateICmpNE(LHS: Loaded_MaskOut, RHS: OldVal_MaskOut);
1187 Builder.CreateCondBr(Cond: ShouldContinue, True: LoopBB, False: EndBB);
1188
1189 // Add the second value to the phi from above
1190 Loaded_MaskOut->addIncoming(V: OldVal_MaskOut, BB: FailureBB);
1191
1192 // partword.cmpxchg.end:
1193 Builder.SetInsertPoint(CI);
1194
1195 Value *FinalOldVal = extractMaskedValue(Builder, WideWord: OldVal, PMV);
1196 Value *Res = PoisonValue::get(T: CI->getType());
1197 Res = Builder.CreateInsertValue(Agg: Res, Val: FinalOldVal, Idxs: 0);
1198 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1199
1200 CI->replaceAllUsesWith(V: Res);
1201 CI->eraseFromParent();
1202 return true;
1203}
1204
1205void AtomicExpandImpl::expandAtomicOpToLLSC(
1206 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1207 AtomicOrdering MemOpOrder,
1208 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1209 ReplacementIRBuilder Builder(I, *DL);
1210 Value *Loaded = insertRMWLLSCLoop(Builder, ResultTy: ResultType, Addr, AddrAlign,
1211 MemOpOrder, PerformOp);
1212
1213 I->replaceAllUsesWith(V: Loaded);
1214 I->eraseFromParent();
1215}
1216
1217void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1218 ReplacementIRBuilder Builder(AI, *DL);
1219
1220 PartwordMaskValues PMV =
1221 createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
1222 AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1223
1224 // The value operand must be sign-extended for signed min/max so that the
1225 // target's signed comparison instructions can be used. Otherwise, just
1226 // zero-ext.
1227 Instruction::CastOps CastOp = Instruction::ZExt;
1228 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1229 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1230 CastOp = Instruction::SExt;
1231
1232 Value *ValOperand_Shifted = Builder.CreateShl(
1233 LHS: Builder.CreateCast(Op: CastOp, V: AI->getValOperand(), DestTy: PMV.WordType),
1234 RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
1235 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1236 Builder, AI, AlignedAddr: PMV.AlignedAddr, Incr: ValOperand_Shifted, Mask: PMV.Mask, ShiftAmt: PMV.ShiftAmt,
1237 Ord: AI->getOrdering());
1238 Value *FinalOldResult = extractMaskedValue(Builder, WideWord: OldResult, PMV);
1239 AI->replaceAllUsesWith(V: FinalOldResult);
1240 AI->eraseFromParent();
1241}
1242
1243void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1244 AtomicCmpXchgInst *CI) {
1245 ReplacementIRBuilder Builder(CI, *DL);
1246
1247 PartwordMaskValues PMV = createMaskInstrs(
1248 Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr: CI->getPointerOperand(),
1249 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1250
1251 Value *CmpVal_Shifted = Builder.CreateShl(
1252 LHS: Builder.CreateZExt(V: CI->getCompareOperand(), DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1253 Name: "CmpVal_Shifted");
1254 Value *NewVal_Shifted = Builder.CreateShl(
1255 LHS: Builder.CreateZExt(V: CI->getNewValOperand(), DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1256 Name: "NewVal_Shifted");
1257 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1258 Builder, CI, AlignedAddr: PMV.AlignedAddr, CmpVal: CmpVal_Shifted, NewVal: NewVal_Shifted, Mask: PMV.Mask,
1259 Ord: CI->getMergedOrdering());
1260 Value *FinalOldVal = extractMaskedValue(Builder, WideWord: OldVal, PMV);
1261 Value *Res = PoisonValue::get(T: CI->getType());
1262 Res = Builder.CreateInsertValue(Agg: Res, Val: FinalOldVal, Idxs: 0);
1263 Value *Success = Builder.CreateICmpEQ(
1264 LHS: CmpVal_Shifted, RHS: Builder.CreateAnd(LHS: OldVal, RHS: PMV.Mask), Name: "Success");
1265 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1266
1267 CI->replaceAllUsesWith(V: Res);
1268 CI->eraseFromParent();
1269}
1270
1271Value *AtomicExpandImpl::insertRMWLLSCLoop(
1272 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1273 AtomicOrdering MemOpOrder,
1274 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1275 LLVMContext &Ctx = Builder.getContext();
1276 BasicBlock *BB = Builder.GetInsertBlock();
1277 Function *F = BB->getParent();
1278
1279 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1280 "Expected at least natural alignment at this point.");
1281
1282 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1283 //
1284 // The standard expansion we produce is:
1285 // [...]
1286 // atomicrmw.start:
1287 // %loaded = @load.linked(%addr)
1288 // %new = some_op iN %loaded, %incr
1289 // %stored = @store_conditional(%new, %addr)
1290 // %try_again = icmp i32 ne %stored, 0
1291 // br i1 %try_again, label %loop, label %atomicrmw.end
1292 // atomicrmw.end:
1293 // [...]
1294 BasicBlock *ExitBB =
1295 BB->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "atomicrmw.end");
1296 BasicBlock *LoopBB = BasicBlock::Create(Context&: Ctx, Name: "atomicrmw.start", Parent: F, InsertBefore: ExitBB);
1297
1298 // The split call above "helpfully" added a branch at the end of BB (to the
1299 // wrong place).
1300 std::prev(x: BB->end())->eraseFromParent();
1301 Builder.SetInsertPoint(BB);
1302 Builder.CreateBr(Dest: LoopBB);
1303
1304 // Start the main loop block now that we've taken care of the preliminaries.
1305 Builder.SetInsertPoint(LoopBB);
1306 Value *Loaded = TLI->emitLoadLinked(Builder, ValueTy: ResultTy, Addr, Ord: MemOpOrder);
1307
1308 Value *NewVal = PerformOp(Builder, Loaded);
1309
1310 Value *StoreSuccess =
1311 TLI->emitStoreConditional(Builder, Val: NewVal, Addr, Ord: MemOpOrder);
1312 Value *TryAgain = Builder.CreateICmpNE(
1313 LHS: StoreSuccess, RHS: ConstantInt::get(Ty: IntegerType::get(C&: Ctx, NumBits: 32), V: 0), Name: "tryagain");
1314
1315 Instruction *CondBr = Builder.CreateCondBr(Cond: TryAgain, True: LoopBB, False: ExitBB);
1316
1317 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1318 // hard to predict precise branch weigths we mark the branch as "unknown"
1319 // (50/50) to prevent misleading optimizations.
1320 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *CondBr, DEBUG_TYPE);
1321
1322 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1323 return Loaded;
1324}
1325
1326/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1327/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1328/// IR. As a migration step, we convert back to what use to be the standard
1329/// way to represent a pointer cmpxchg so that we can update backends one by
1330/// one.
1331AtomicCmpXchgInst *
1332AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1333 auto *M = CI->getModule();
1334 Type *NewTy = getCorrespondingIntegerType(T: CI->getCompareOperand()->getType(),
1335 DL: M->getDataLayout());
1336
1337 ReplacementIRBuilder Builder(CI, *DL);
1338
1339 Value *Addr = CI->getPointerOperand();
1340
1341 Value *NewCmp = Builder.CreatePtrToInt(V: CI->getCompareOperand(), DestTy: NewTy);
1342 Value *NewNewVal = Builder.CreatePtrToInt(V: CI->getNewValOperand(), DestTy: NewTy);
1343
1344 auto *NewCI = Builder.CreateAtomicCmpXchg(
1345 Ptr: Addr, Cmp: NewCmp, New: NewNewVal, Align: CI->getAlign(), SuccessOrdering: CI->getSuccessOrdering(),
1346 FailureOrdering: CI->getFailureOrdering(), SSID: CI->getSyncScopeID());
1347 NewCI->setVolatile(CI->isVolatile());
1348 NewCI->setWeak(CI->isWeak());
1349 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1350
1351 Value *OldVal = Builder.CreateExtractValue(Agg: NewCI, Idxs: 0);
1352 Value *Succ = Builder.CreateExtractValue(Agg: NewCI, Idxs: 1);
1353
1354 OldVal = Builder.CreateIntToPtr(V: OldVal, DestTy: CI->getCompareOperand()->getType());
1355
1356 Value *Res = PoisonValue::get(T: CI->getType());
1357 Res = Builder.CreateInsertValue(Agg: Res, Val: OldVal, Idxs: 0);
1358 Res = Builder.CreateInsertValue(Agg: Res, Val: Succ, Idxs: 1);
1359
1360 CI->replaceAllUsesWith(V: Res);
1361 CI->eraseFromParent();
1362 return NewCI;
1363}
1364
1365bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1366 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1367 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1368 Value *Addr = CI->getPointerOperand();
1369 BasicBlock *BB = CI->getParent();
1370 Function *F = BB->getParent();
1371 LLVMContext &Ctx = F->getContext();
1372 // If shouldInsertFencesForAtomic() returns true, then the target does not
1373 // want to deal with memory orders, and emitLeading/TrailingFence should take
1374 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1375 // should preserve the ordering.
1376 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(I: CI);
1377 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1378 ? AtomicOrdering::Monotonic
1379 : CI->getMergedOrdering();
1380
1381 // In implementations which use a barrier to achieve release semantics, we can
1382 // delay emitting this barrier until we know a store is actually going to be
1383 // attempted. The cost of this delay is that we need 2 copies of the block
1384 // emitting the load-linked, affecting code size.
1385 //
1386 // Ideally, this logic would be unconditional except for the minsize check
1387 // since in other cases the extra blocks naturally collapse down to the
1388 // minimal loop. Unfortunately, this puts too much stress on later
1389 // optimisations so we avoid emitting the extra logic in those cases too.
1390 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1391 SuccessOrder != AtomicOrdering::Monotonic &&
1392 SuccessOrder != AtomicOrdering::Acquire &&
1393 !F->hasMinSize();
1394
1395 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1396 // do it even on minsize.
1397 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1398
1399 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1400 //
1401 // The full expansion we produce is:
1402 // [...]
1403 // %aligned.addr = ...
1404 // cmpxchg.start:
1405 // %unreleasedload = @load.linked(%aligned.addr)
1406 // %unreleasedload.extract = extract value from %unreleasedload
1407 // %should_store = icmp eq %unreleasedload.extract, %desired
1408 // br i1 %should_store, label %cmpxchg.releasingstore,
1409 // label %cmpxchg.nostore
1410 // cmpxchg.releasingstore:
1411 // fence?
1412 // br label cmpxchg.trystore
1413 // cmpxchg.trystore:
1414 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1415 // [%releasedload, %cmpxchg.releasedload]
1416 // %updated.new = insert %new into %loaded.trystore
1417 // %stored = @store_conditional(%updated.new, %aligned.addr)
1418 // %success = icmp eq i32 %stored, 0
1419 // br i1 %success, label %cmpxchg.success,
1420 // label %cmpxchg.releasedload/%cmpxchg.failure
1421 // cmpxchg.releasedload:
1422 // %releasedload = @load.linked(%aligned.addr)
1423 // %releasedload.extract = extract value from %releasedload
1424 // %should_store = icmp eq %releasedload.extract, %desired
1425 // br i1 %should_store, label %cmpxchg.trystore,
1426 // label %cmpxchg.failure
1427 // cmpxchg.success:
1428 // fence?
1429 // br label %cmpxchg.end
1430 // cmpxchg.nostore:
1431 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1432 // [%releasedload,
1433 // %cmpxchg.releasedload/%cmpxchg.trystore]
1434 // @load_linked_fail_balance()?
1435 // br label %cmpxchg.failure
1436 // cmpxchg.failure:
1437 // fence?
1438 // br label %cmpxchg.end
1439 // cmpxchg.end:
1440 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1441 // [%loaded.trystore, %cmpxchg.trystore]
1442 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1443 // %loaded = extract value from %loaded.exit
1444 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1445 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1446 // [...]
1447 BasicBlock *ExitBB = BB->splitBasicBlock(I: CI->getIterator(), BBName: "cmpxchg.end");
1448 auto FailureBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.failure", Parent: F, InsertBefore: ExitBB);
1449 auto NoStoreBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.nostore", Parent: F, InsertBefore: FailureBB);
1450 auto SuccessBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.success", Parent: F, InsertBefore: NoStoreBB);
1451 auto ReleasedLoadBB =
1452 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.releasedload", Parent: F, InsertBefore: SuccessBB);
1453 auto TryStoreBB =
1454 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.trystore", Parent: F, InsertBefore: ReleasedLoadBB);
1455 auto ReleasingStoreBB =
1456 BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.fencedstore", Parent: F, InsertBefore: TryStoreBB);
1457 auto StartBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.start", Parent: F, InsertBefore: ReleasingStoreBB);
1458
1459 ReplacementIRBuilder Builder(CI, *DL);
1460
1461 // The split call above "helpfully" added a branch at the end of BB (to the
1462 // wrong place), but we might want a fence too. It's easiest to just remove
1463 // the branch entirely.
1464 std::prev(x: BB->end())->eraseFromParent();
1465 Builder.SetInsertPoint(BB);
1466 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1467 TLI->emitLeadingFence(Builder, Inst: CI, Ord: SuccessOrder);
1468
1469 PartwordMaskValues PMV =
1470 createMaskInstrs(Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr,
1471 AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / 8);
1472 Builder.CreateBr(Dest: StartBB);
1473
1474 // Start the main loop block now that we've taken care of the preliminaries.
1475 Builder.SetInsertPoint(StartBB);
1476 Value *UnreleasedLoad =
1477 TLI->emitLoadLinked(Builder, ValueTy: PMV.WordType, Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1478 Value *UnreleasedLoadExtract =
1479 extractMaskedValue(Builder, WideWord: UnreleasedLoad, PMV);
1480 Value *ShouldStore = Builder.CreateICmpEQ(
1481 LHS: UnreleasedLoadExtract, RHS: CI->getCompareOperand(), Name: "should_store");
1482
1483 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1484 // jump straight past that fence instruction (if it exists).
1485 Builder.CreateCondBr(Cond: ShouldStore, True: ReleasingStoreBB, False: NoStoreBB,
1486 BranchWeights: MDBuilder(F->getContext()).createLikelyBranchWeights());
1487
1488 Builder.SetInsertPoint(ReleasingStoreBB);
1489 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1490 TLI->emitLeadingFence(Builder, Inst: CI, Ord: SuccessOrder);
1491 Builder.CreateBr(Dest: TryStoreBB);
1492
1493 Builder.SetInsertPoint(TryStoreBB);
1494 PHINode *LoadedTryStore =
1495 Builder.CreatePHI(Ty: PMV.WordType, NumReservedValues: 2, Name: "loaded.trystore");
1496 LoadedTryStore->addIncoming(V: UnreleasedLoad, BB: ReleasingStoreBB);
1497 Value *NewValueInsert =
1498 insertMaskedValue(Builder, WideWord: LoadedTryStore, Updated: CI->getNewValOperand(), PMV);
1499 Value *StoreSuccess = TLI->emitStoreConditional(Builder, Val: NewValueInsert,
1500 Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1501 StoreSuccess = Builder.CreateICmpEQ(
1502 LHS: StoreSuccess, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: 0), Name: "success");
1503 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1504 Builder.CreateCondBr(Cond: StoreSuccess, True: SuccessBB,
1505 False: CI->isWeak() ? FailureBB : RetryBB,
1506 BranchWeights: MDBuilder(F->getContext()).createLikelyBranchWeights());
1507
1508 Builder.SetInsertPoint(ReleasedLoadBB);
1509 Value *SecondLoad;
1510 if (HasReleasedLoadBB) {
1511 SecondLoad =
1512 TLI->emitLoadLinked(Builder, ValueTy: PMV.WordType, Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1513 Value *SecondLoadExtract = extractMaskedValue(Builder, WideWord: SecondLoad, PMV);
1514 ShouldStore = Builder.CreateICmpEQ(LHS: SecondLoadExtract,
1515 RHS: CI->getCompareOperand(), Name: "should_store");
1516
1517 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1518 // jump straight past that fence instruction (if it exists).
1519 Builder.CreateCondBr(
1520 Cond: ShouldStore, True: TryStoreBB, False: NoStoreBB,
1521 BranchWeights: MDBuilder(F->getContext()).createLikelyBranchWeights());
1522 // Update PHI node in TryStoreBB.
1523 LoadedTryStore->addIncoming(V: SecondLoad, BB: ReleasedLoadBB);
1524 } else
1525 Builder.CreateUnreachable();
1526
1527 // Make sure later instructions don't get reordered with a fence if
1528 // necessary.
1529 Builder.SetInsertPoint(SuccessBB);
1530 if (ShouldInsertFencesForAtomic ||
1531 TLI->shouldInsertTrailingSeqCstFenceForAtomicStore(I: CI))
1532 TLI->emitTrailingFence(Builder, Inst: CI, Ord: SuccessOrder);
1533 Builder.CreateBr(Dest: ExitBB);
1534
1535 Builder.SetInsertPoint(NoStoreBB);
1536 PHINode *LoadedNoStore =
1537 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.nostore");
1538 LoadedNoStore->addIncoming(V: UnreleasedLoad, BB: StartBB);
1539 if (HasReleasedLoadBB)
1540 LoadedNoStore->addIncoming(V: SecondLoad, BB: ReleasedLoadBB);
1541
1542 // In the failing case, where we don't execute the store-conditional, the
1543 // target might want to balance out the load-linked with a dedicated
1544 // instruction (e.g., on ARM, clearing the exclusive monitor).
1545 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1546 Builder.CreateBr(Dest: FailureBB);
1547
1548 Builder.SetInsertPoint(FailureBB);
1549 PHINode *LoadedFailure =
1550 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.failure");
1551 LoadedFailure->addIncoming(V: LoadedNoStore, BB: NoStoreBB);
1552 if (CI->isWeak())
1553 LoadedFailure->addIncoming(V: LoadedTryStore, BB: TryStoreBB);
1554 if (ShouldInsertFencesForAtomic)
1555 TLI->emitTrailingFence(Builder, Inst: CI, Ord: FailureOrder);
1556 Builder.CreateBr(Dest: ExitBB);
1557
1558 // Finally, we have control-flow based knowledge of whether the cmpxchg
1559 // succeeded or not. We expose this to later passes by converting any
1560 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1561 // PHI.
1562 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1563 PHINode *LoadedExit =
1564 Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: 2, Name: "loaded.exit");
1565 LoadedExit->addIncoming(V: LoadedTryStore, BB: SuccessBB);
1566 LoadedExit->addIncoming(V: LoadedFailure, BB: FailureBB);
1567 PHINode *Success = Builder.CreatePHI(Ty: Type::getInt1Ty(C&: Ctx), NumReservedValues: 2, Name: "success");
1568 Success->addIncoming(V: ConstantInt::getTrue(Context&: Ctx), BB: SuccessBB);
1569 Success->addIncoming(V: ConstantInt::getFalse(Context&: Ctx), BB: FailureBB);
1570
1571 // This is the "exit value" from the cmpxchg expansion. It may be of
1572 // a type wider than the one in the cmpxchg instruction.
1573 Value *LoadedFull = LoadedExit;
1574
1575 Builder.SetInsertPoint(TheBB: ExitBB, IP: std::next(x: Success->getIterator()));
1576 Value *Loaded = extractMaskedValue(Builder, WideWord: LoadedFull, PMV);
1577
1578 // Look for any users of the cmpxchg that are just comparing the loaded value
1579 // against the desired one, and replace them with the CFG-derived version.
1580 SmallVector<ExtractValueInst *, 2> PrunedInsts;
1581 for (auto *User : CI->users()) {
1582 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val: User);
1583 if (!EV)
1584 continue;
1585
1586 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1587 "weird extraction from { iN, i1 }");
1588
1589 if (EV->getIndices()[0] == 0)
1590 EV->replaceAllUsesWith(V: Loaded);
1591 else
1592 EV->replaceAllUsesWith(V: Success);
1593
1594 PrunedInsts.push_back(Elt: EV);
1595 }
1596
1597 // We can remove the instructions now we're no longer iterating through them.
1598 for (auto *EV : PrunedInsts)
1599 EV->eraseFromParent();
1600
1601 if (!CI->use_empty()) {
1602 // Some use of the full struct return that we don't understand has happened,
1603 // so we've got to reconstruct it properly.
1604 Value *Res;
1605 Res = Builder.CreateInsertValue(Agg: PoisonValue::get(T: CI->getType()), Val: Loaded, Idxs: 0);
1606 Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: 1);
1607
1608 CI->replaceAllUsesWith(V: Res);
1609 }
1610
1611 CI->eraseFromParent();
1612 return true;
1613}
1614
1615bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1616 // TODO: Add floating point support.
1617 auto C = dyn_cast<ConstantInt>(Val: RMWI->getValOperand());
1618 if (!C)
1619 return false;
1620
1621 switch (RMWI->getOperation()) {
1622 case AtomicRMWInst::Add:
1623 case AtomicRMWInst::Sub:
1624 case AtomicRMWInst::Or:
1625 case AtomicRMWInst::Xor:
1626 return C->isZero();
1627 case AtomicRMWInst::And:
1628 return C->isMinusOne();
1629 case AtomicRMWInst::Min:
1630 return C->isMaxValue(IsSigned: true);
1631 case AtomicRMWInst::Max:
1632 return C->isMinValue(IsSigned: true);
1633 case AtomicRMWInst::UMin:
1634 return C->isMaxValue(IsSigned: false);
1635 case AtomicRMWInst::UMax:
1636 return C->isMinValue(IsSigned: false);
1637 default:
1638 return false;
1639 }
1640}
1641
1642bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1643 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1644 tryExpandAtomicLoad(LI: ResultingLoad);
1645 return true;
1646 }
1647 return false;
1648}
1649
1650Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1651 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1652 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1653 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1654 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1655 LLVMContext &Ctx = Builder.getContext();
1656 BasicBlock *BB = Builder.GetInsertBlock();
1657 Function *F = BB->getParent();
1658
1659 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1660 //
1661 // The standard expansion we produce is:
1662 // [...]
1663 // %init_loaded = load atomic iN* %addr
1664 // br label %loop
1665 // loop:
1666 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1667 // %new = some_op iN %loaded, %incr
1668 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1669 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1670 // %success = extractvalue { iN, i1 } %pair, 1
1671 // br i1 %success, label %atomicrmw.end, label %loop
1672 // atomicrmw.end:
1673 // [...]
1674 BasicBlock *ExitBB =
1675 BB->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "atomicrmw.end");
1676 BasicBlock *LoopBB = BasicBlock::Create(Context&: Ctx, Name: "atomicrmw.start", Parent: F, InsertBefore: ExitBB);
1677
1678 // The split call above "helpfully" added a branch at the end of BB (to the
1679 // wrong place), but we want a load. It's easiest to just remove
1680 // the branch entirely.
1681 std::prev(x: BB->end())->eraseFromParent();
1682 Builder.SetInsertPoint(BB);
1683 LoadInst *InitLoaded = Builder.CreateAlignedLoad(Ty: ResultTy, Ptr: Addr, Align: AddrAlign);
1684 Builder.CreateBr(Dest: LoopBB);
1685
1686 // Start the main loop block now that we've taken care of the preliminaries.
1687 Builder.SetInsertPoint(LoopBB);
1688 PHINode *Loaded = Builder.CreatePHI(Ty: ResultTy, NumReservedValues: 2, Name: "loaded");
1689 Loaded->addIncoming(V: InitLoaded, BB);
1690
1691 Value *NewVal = PerformOp(Builder, Loaded);
1692
1693 Value *NewLoaded = nullptr;
1694 Value *Success = nullptr;
1695
1696 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1697 MemOpOrder == AtomicOrdering::Unordered
1698 ? AtomicOrdering::Monotonic
1699 : MemOpOrder,
1700 SSID, Success, NewLoaded, MetadataSrc);
1701 assert(Success && NewLoaded);
1702
1703 Loaded->addIncoming(V: NewLoaded, BB: LoopBB);
1704
1705 Instruction *CondBr = Builder.CreateCondBr(Cond: Success, True: ExitBB, False: LoopBB);
1706
1707 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1708 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1709 // to prevent misleading optimizations.
1710 setExplicitlyUnknownBranchWeightsIfProfiled(I&: *CondBr, DEBUG_TYPE);
1711
1712 Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1713 return NewLoaded;
1714}
1715
1716bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1717 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1718 unsigned ValueSize = getAtomicOpSize(CASI: CI);
1719
1720 switch (TLI->shouldExpandAtomicCmpXchgInIR(AI: CI)) {
1721 default:
1722 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1723 case TargetLoweringBase::AtomicExpansionKind::None:
1724 if (ValueSize < MinCASSize)
1725 return expandPartwordCmpXchg(CI);
1726 return false;
1727 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1728 return expandAtomicCmpXchg(CI);
1729 }
1730 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1731 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1732 return true;
1733 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1734 return lowerAtomicCmpXchgInst(CXI: CI);
1735 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1736 TLI->emitExpandAtomicCmpXchg(CI);
1737 return true;
1738 }
1739 }
1740}
1741
1742// Note: This function is exposed externally by AtomicExpandUtils.h
1743bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
1744 CreateCmpXchgInstFun CreateCmpXchg) {
1745 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1746 Builder.setIsFPConstrained(
1747 AI->getFunction()->hasFnAttribute(Kind: Attribute::StrictFP));
1748
1749 // FIXME: If FP exceptions are observable, we should force them off for the
1750 // loop for the FP atomics.
1751 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1752 Builder, ResultTy: AI->getType(), Addr: AI->getPointerOperand(), AddrAlign: AI->getAlign(),
1753 MemOpOrder: AI->getOrdering(), SSID: AI->getSyncScopeID(),
1754 PerformOp: [&](IRBuilderBase &Builder, Value *Loaded) {
1755 return buildAtomicRMWValue(Op: AI->getOperation(), Builder, Loaded,
1756 Val: AI->getValOperand());
1757 },
1758 CreateCmpXchg, /*MetadataSrc=*/AI);
1759
1760 AI->replaceAllUsesWith(V: Loaded);
1761 AI->eraseFromParent();
1762 return true;
1763}
1764
1765// In order to use one of the sized library calls such as
1766// __atomic_fetch_add_4, the alignment must be sufficient, the size
1767// must be one of the potentially-specialized sizes, and the value
1768// type must actually exist in C on the target (otherwise, the
1769// function wouldn't actually be defined.)
1770static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1771 const DataLayout &DL) {
1772 // TODO: "LargestSize" is an approximation for "largest type that
1773 // you can express in C". It seems to be the case that int128 is
1774 // supported on all 64-bit platforms, otherwise only up to 64-bit
1775 // integers are supported. If we get this wrong, then we'll try to
1776 // call a sized libcall that doesn't actually exist. There should
1777 // really be some more reliable way in LLVM of determining integer
1778 // sizes which are valid in the target's C ABI...
1779 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1780 return Alignment >= Size &&
1781 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1782 Size <= LargestSize;
1783}
1784
1785void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1786 static const RTLIB::Libcall Libcalls[6] = {
1787 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1788 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1789 unsigned Size = getAtomicOpSize(LI: I);
1790
1791 bool expanded = expandAtomicOpToLibcall(
1792 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: nullptr, CASExpected: nullptr,
1793 Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1794 if (!expanded)
1795 handleFailure(FailedInst&: *I, Msg: "unsupported atomic load");
1796}
1797
1798void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1799 static const RTLIB::Libcall Libcalls[6] = {
1800 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1801 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1802 unsigned Size = getAtomicOpSize(SI: I);
1803
1804 bool expanded = expandAtomicOpToLibcall(
1805 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getValueOperand(),
1806 CASExpected: nullptr, Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1807 if (!expanded)
1808 handleFailure(FailedInst&: *I, Msg: "unsupported atomic store");
1809}
1810
1811void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1812 static const RTLIB::Libcall Libcalls[6] = {
1813 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1814 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1815 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1816 unsigned Size = getAtomicOpSize(CASI: I);
1817
1818 bool expanded = expandAtomicOpToLibcall(
1819 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getNewValOperand(),
1820 CASExpected: I->getCompareOperand(), Ordering: I->getSuccessOrdering(), Ordering2: I->getFailureOrdering(),
1821 Libcalls);
1822 if (!expanded)
1823 handleFailure(FailedInst&: *I, Msg: "unsupported cmpxchg");
1824}
1825
1826static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
1827 static const RTLIB::Libcall LibcallsXchg[6] = {
1828 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1829 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1830 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1831 static const RTLIB::Libcall LibcallsAdd[6] = {
1832 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1833 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1834 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1835 static const RTLIB::Libcall LibcallsSub[6] = {
1836 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1837 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1838 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1839 static const RTLIB::Libcall LibcallsAnd[6] = {
1840 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1841 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1842 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1843 static const RTLIB::Libcall LibcallsOr[6] = {
1844 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1845 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1846 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1847 static const RTLIB::Libcall LibcallsXor[6] = {
1848 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1849 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1850 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1851 static const RTLIB::Libcall LibcallsNand[6] = {
1852 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1853 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1854 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1855
1856 switch (Op) {
1857 case AtomicRMWInst::BAD_BINOP:
1858 llvm_unreachable("Should not have BAD_BINOP.");
1859 case AtomicRMWInst::Xchg:
1860 return ArrayRef(LibcallsXchg);
1861 case AtomicRMWInst::Add:
1862 return ArrayRef(LibcallsAdd);
1863 case AtomicRMWInst::Sub:
1864 return ArrayRef(LibcallsSub);
1865 case AtomicRMWInst::And:
1866 return ArrayRef(LibcallsAnd);
1867 case AtomicRMWInst::Or:
1868 return ArrayRef(LibcallsOr);
1869 case AtomicRMWInst::Xor:
1870 return ArrayRef(LibcallsXor);
1871 case AtomicRMWInst::Nand:
1872 return ArrayRef(LibcallsNand);
1873 case AtomicRMWInst::Max:
1874 case AtomicRMWInst::Min:
1875 case AtomicRMWInst::UMax:
1876 case AtomicRMWInst::UMin:
1877 case AtomicRMWInst::FMax:
1878 case AtomicRMWInst::FMin:
1879 case AtomicRMWInst::FMaximum:
1880 case AtomicRMWInst::FMinimum:
1881 case AtomicRMWInst::FAdd:
1882 case AtomicRMWInst::FSub:
1883 case AtomicRMWInst::UIncWrap:
1884 case AtomicRMWInst::UDecWrap:
1885 case AtomicRMWInst::USubCond:
1886 case AtomicRMWInst::USubSat:
1887 // No atomic libcalls are available for these.
1888 return {};
1889 }
1890 llvm_unreachable("Unexpected AtomicRMW operation.");
1891}
1892
1893void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1894 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(Op: I->getOperation());
1895
1896 unsigned Size = getAtomicOpSize(RMWI: I);
1897
1898 bool Success = false;
1899 if (!Libcalls.empty())
1900 Success = expandAtomicOpToLibcall(
1901 I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getValOperand(),
1902 CASExpected: nullptr, Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1903
1904 // The expansion failed: either there were no libcalls at all for
1905 // the operation (min/max), or there were only size-specialized
1906 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1907 // CAS libcall, via a CAS loop, instead.
1908 if (!Success) {
1909 expandAtomicRMWToCmpXchg(
1910 AI: I, CreateCmpXchg: [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1911 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1912 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,
1913 Instruction *MetadataSrc) {
1914 // Create the CAS instruction normally...
1915 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1916 Ptr: Addr, Cmp: Loaded, New: NewVal, Align: Alignment, SuccessOrdering: MemOpOrder,
1917 FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: MemOpOrder), SSID);
1918 if (MetadataSrc)
1919 copyMetadataForAtomic(Dest&: *Pair, Source: *MetadataSrc);
1920
1921 Success = Builder.CreateExtractValue(Agg: Pair, Idxs: 1, Name: "success");
1922 NewLoaded = Builder.CreateExtractValue(Agg: Pair, Idxs: 0, Name: "newloaded");
1923
1924 // ...and then expand the CAS into a libcall.
1925 expandAtomicCASToLibcall(I: Pair);
1926 });
1927 }
1928}
1929
1930// A helper routine for the above expandAtomic*ToLibcall functions.
1931//
1932// 'Libcalls' contains an array of enum values for the particular
1933// ATOMIC libcalls to be emitted. All of the other arguments besides
1934// 'I' are extracted from the Instruction subclass by the
1935// caller. Depending on the particular call, some will be null.
1936bool AtomicExpandImpl::expandAtomicOpToLibcall(
1937 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1938 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1939 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1940 assert(Libcalls.size() == 6);
1941
1942 LLVMContext &Ctx = I->getContext();
1943 Module *M = I->getModule();
1944 const DataLayout &DL = M->getDataLayout();
1945 IRBuilder<> Builder(I);
1946 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1947
1948 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1949 Type *SizedIntTy = Type::getIntNTy(C&: Ctx, N: Size * 8);
1950
1951 const Align AllocaAlignment = DL.getPrefTypeAlign(Ty: SizedIntTy);
1952
1953 // TODO: the "order" argument type is "int", not int32. So
1954 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1955 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1956 Constant *OrderingVal =
1957 ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: (int)toCABI(AO: Ordering));
1958 Constant *Ordering2Val = nullptr;
1959 if (CASExpected) {
1960 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1961 Ordering2Val =
1962 ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: (int)toCABI(AO: Ordering2));
1963 }
1964 bool HasResult = I->getType() != Type::getVoidTy(C&: Ctx);
1965
1966 RTLIB::Libcall RTLibType;
1967 if (UseSizedLibcall) {
1968 switch (Size) {
1969 case 1:
1970 RTLibType = Libcalls[1];
1971 break;
1972 case 2:
1973 RTLibType = Libcalls[2];
1974 break;
1975 case 4:
1976 RTLibType = Libcalls[3];
1977 break;
1978 case 8:
1979 RTLibType = Libcalls[4];
1980 break;
1981 case 16:
1982 RTLibType = Libcalls[5];
1983 break;
1984 }
1985 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1986 RTLibType = Libcalls[0];
1987 } else {
1988 // Can't use sized function, and there's no generic for this
1989 // operation, so give up.
1990 return false;
1991 }
1992
1993 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(Call: RTLibType);
1994 if (LibcallImpl == RTLIB::Unsupported) {
1995 // This target does not implement the requested atomic libcall so give up.
1996 return false;
1997 }
1998
1999 // Build up the function call. There's two kinds. First, the sized
2000 // variants. These calls are going to be one of the following (with
2001 // N=1,2,4,8,16):
2002 // iN __atomic_load_N(iN *ptr, int ordering)
2003 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2004 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2005 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2006 // int success_order, int failure_order)
2007 //
2008 // Note that these functions can be used for non-integer atomic
2009 // operations, the values just need to be bitcast to integers on the
2010 // way in and out.
2011 //
2012 // And, then, the generic variants. They look like the following:
2013 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2014 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2015 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2016 // int ordering)
2017 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2018 // void *desired, int success_order,
2019 // int failure_order)
2020 //
2021 // The different signatures are built up depending on the
2022 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2023 // variables.
2024
2025 AllocaInst *AllocaCASExpected = nullptr;
2026 AllocaInst *AllocaValue = nullptr;
2027 AllocaInst *AllocaResult = nullptr;
2028
2029 Type *ResultTy;
2030 SmallVector<Value *, 6> Args;
2031 AttributeList Attr;
2032
2033 // 'size' argument.
2034 if (!UseSizedLibcall) {
2035 // Note, getIntPtrType is assumed equivalent to size_t.
2036 Args.push_back(Elt: ConstantInt::get(Ty: DL.getIntPtrType(C&: Ctx), V: Size));
2037 }
2038
2039 // 'ptr' argument.
2040 // note: This assumes all address spaces share a common libfunc
2041 // implementation and that addresses are convertable. For systems without
2042 // that property, we'd need to extend this mechanism to support AS-specific
2043 // families of atomic intrinsics.
2044 Value *PtrVal = PointerOperand;
2045 PtrVal = Builder.CreateAddrSpaceCast(V: PtrVal, DestTy: PointerType::getUnqual(C&: Ctx));
2046 Args.push_back(Elt: PtrVal);
2047
2048 // 'expected' argument, if present.
2049 if (CASExpected) {
2050 AllocaCASExpected = AllocaBuilder.CreateAlloca(Ty: CASExpected->getType());
2051 AllocaCASExpected->setAlignment(AllocaAlignment);
2052 Builder.CreateLifetimeStart(Ptr: AllocaCASExpected);
2053 Builder.CreateAlignedStore(Val: CASExpected, Ptr: AllocaCASExpected, Align: AllocaAlignment);
2054 Args.push_back(Elt: AllocaCASExpected);
2055 }
2056
2057 // 'val' argument ('desired' for cas), if present.
2058 if (ValueOperand) {
2059 if (UseSizedLibcall) {
2060 Value *IntValue =
2061 Builder.CreateBitOrPointerCast(V: ValueOperand, DestTy: SizedIntTy);
2062 Args.push_back(Elt: IntValue);
2063 } else {
2064 AllocaValue = AllocaBuilder.CreateAlloca(Ty: ValueOperand->getType());
2065 AllocaValue->setAlignment(AllocaAlignment);
2066 Builder.CreateLifetimeStart(Ptr: AllocaValue);
2067 Builder.CreateAlignedStore(Val: ValueOperand, Ptr: AllocaValue, Align: AllocaAlignment);
2068 Args.push_back(Elt: AllocaValue);
2069 }
2070 }
2071
2072 // 'ret' argument.
2073 if (!CASExpected && HasResult && !UseSizedLibcall) {
2074 AllocaResult = AllocaBuilder.CreateAlloca(Ty: I->getType());
2075 AllocaResult->setAlignment(AllocaAlignment);
2076 Builder.CreateLifetimeStart(Ptr: AllocaResult);
2077 Args.push_back(Elt: AllocaResult);
2078 }
2079
2080 // 'ordering' ('success_order' for cas) argument.
2081 Args.push_back(Elt: OrderingVal);
2082
2083 // 'failure_order' argument, if present.
2084 if (Ordering2Val)
2085 Args.push_back(Elt: Ordering2Val);
2086
2087 // Now, the return type.
2088 if (CASExpected) {
2089 ResultTy = Type::getInt1Ty(C&: Ctx);
2090 Attr = Attr.addRetAttribute(C&: Ctx, Kind: Attribute::ZExt);
2091 } else if (HasResult && UseSizedLibcall)
2092 ResultTy = SizedIntTy;
2093 else
2094 ResultTy = Type::getVoidTy(C&: Ctx);
2095
2096 // Done with setting up arguments and return types, create the call:
2097 SmallVector<Type *, 6> ArgTys;
2098 for (Value *Arg : Args)
2099 ArgTys.push_back(Elt: Arg->getType());
2100 FunctionType *FnType = FunctionType::get(Result: ResultTy, Params: ArgTys, isVarArg: false);
2101 FunctionCallee LibcallFn = M->getOrInsertFunction(
2102 Name: RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: LibcallImpl), T: FnType,
2103 AttributeList: Attr);
2104 CallInst *Call = Builder.CreateCall(Callee: LibcallFn, Args);
2105 Call->setAttributes(Attr);
2106 Value *Result = Call;
2107
2108 // And then, extract the results...
2109 if (ValueOperand && !UseSizedLibcall)
2110 Builder.CreateLifetimeEnd(Ptr: AllocaValue);
2111
2112 if (CASExpected) {
2113 // The final result from the CAS is {load of 'expected' alloca, bool result
2114 // from call}
2115 Type *FinalResultTy = I->getType();
2116 Value *V = PoisonValue::get(T: FinalResultTy);
2117 Value *ExpectedOut = Builder.CreateAlignedLoad(
2118 Ty: CASExpected->getType(), Ptr: AllocaCASExpected, Align: AllocaAlignment);
2119 Builder.CreateLifetimeEnd(Ptr: AllocaCASExpected);
2120 V = Builder.CreateInsertValue(Agg: V, Val: ExpectedOut, Idxs: 0);
2121 V = Builder.CreateInsertValue(Agg: V, Val: Result, Idxs: 1);
2122 I->replaceAllUsesWith(V);
2123 } else if (HasResult) {
2124 Value *V;
2125 if (UseSizedLibcall)
2126 V = Builder.CreateBitOrPointerCast(V: Result, DestTy: I->getType());
2127 else {
2128 V = Builder.CreateAlignedLoad(Ty: I->getType(), Ptr: AllocaResult,
2129 Align: AllocaAlignment);
2130 Builder.CreateLifetimeEnd(Ptr: AllocaResult);
2131 }
2132 I->replaceAllUsesWith(V);
2133 }
2134 I->eraseFromParent();
2135 return true;
2136}
2137