AtomicExpandPass.cpp source code [llvm_projects/llvm/lib/CodeGen/AtomicExpandPass.cpp]

1	//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains a pass (at IR level) to replace atomic instructions with
10	// __atomic_ library calls, or target specific instruction which implement the*
11	// same semantics in a way which better fits the target backend. This can
12	// include the use of (intrinsic-based) load-linked/store-conditional loops,
13	// AtomicCmpXchg, or type coercions.
14	//
15	//===----------------------------------------------------------------------===//
16
17	#include "llvm/ADT/ArrayRef.h"
18	#include "llvm/ADT/STLFunctionalExtras.h"
19	#include "llvm/ADT/SmallVector.h"
20	#include "llvm/Analysis/InstSimplifyFolder.h"
21	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22	#include "llvm/CodeGen/AtomicExpand.h"
23	#include "llvm/CodeGen/AtomicExpandUtils.h"
24	#include "llvm/CodeGen/RuntimeLibcallUtil.h"
25	#include "llvm/CodeGen/TargetLowering.h"
26	#include "llvm/CodeGen/TargetPassConfig.h"
27	#include "llvm/CodeGen/TargetSubtargetInfo.h"
28	#include "llvm/CodeGen/ValueTypes.h"
29	#include "llvm/IR/Attributes.h"
30	#include "llvm/IR/BasicBlock.h"
31	#include "llvm/IR/Constant.h"
32	#include "llvm/IR/Constants.h"
33	#include "llvm/IR/DataLayout.h"
34	#include "llvm/IR/DerivedTypes.h"
35	#include "llvm/IR/Function.h"
36	#include "llvm/IR/IRBuilder.h"
37	#include "llvm/IR/InstIterator.h"
38	#include "llvm/IR/Instruction.h"
39	#include "llvm/IR/Instructions.h"
40	#include "llvm/IR/MDBuilder.h"
41	#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
42	#include "llvm/IR/Module.h"
43	#include "llvm/IR/Type.h"
44	#include "llvm/IR/User.h"
45	#include "llvm/IR/Value.h"
46	#include "llvm/InitializePasses.h"
47	#include "llvm/Pass.h"
48	#include "llvm/Support/AtomicOrdering.h"
49	#include "llvm/Support/Casting.h"
50	#include "llvm/Support/Debug.h"
51	#include "llvm/Support/ErrorHandling.h"
52	#include "llvm/Support/raw_ostream.h"
53	#include "llvm/Target/TargetMachine.h"
54	#include "llvm/Transforms/Utils/LowerAtomic.h"
55	#include <cassert>
56	#include <cstdint>
57	#include <iterator>
58
59	using namespace llvm;
60
61	#define DEBUG_TYPE "atomic-expand"
62
63	namespace {
64
65	class AtomicExpandImpl {
66	const TargetLowering TLI = nullptr*;
67	const DataLayout DL = nullptr*;
68
69	private:
70	bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
71	IntegerType getCorrespondingIntegerType(Type T, const DataLayout &DL);
72	LoadInst convertAtomicLoadToIntegerType(LoadInst LI);
73	bool tryExpandAtomicLoad(LoadInst *LI);
74	bool expandAtomicLoadToLL(LoadInst *LI);
75	bool expandAtomicLoadToCmpXchg(LoadInst *LI);
76	StoreInst convertAtomicStoreToIntegerType(StoreInst SI);
77	bool tryExpandAtomicStore(StoreInst *SI);
78	void expandAtomicStore(StoreInst *SI);
79	bool tryExpandAtomicRMW(AtomicRMWInst *AI);
80	AtomicRMWInst convertAtomicXchgToIntegerType(AtomicRMWInst RMWI);
81	Value *
82	insertRMWLLSCLoop(IRBuilderBase &Builder, Type ResultTy, Value Addr,
83	Align AddrAlign, AtomicOrdering MemOpOrder,
84	function_ref<Value (IRBuilderBase &, Value )> PerformOp);
85	void expandAtomicOpToLLSC(
86	Instruction I, Type ResultTy, Value *Addr, Align AddrAlign,
87	AtomicOrdering MemOpOrder,
88	function_ref<Value (IRBuilderBase &, Value )> PerformOp);
89	void expandPartwordAtomicRMW(
90	AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind);
91	AtomicRMWInst widenPartwordAtomicRMW(AtomicRMWInst AI);
92	bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
93	void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
94	void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
95
96	AtomicCmpXchgInst convertCmpXchgToIntegerType(AtomicCmpXchgInst CI);
97	static Value *insertRMWCmpXchgLoop(
98	IRBuilderBase &Builder, Type ResultType, Value Addr, Align AddrAlign,
99	AtomicOrdering MemOpOrder, SyncScope::ID SSID,
100	function_ref<Value (IRBuilderBase &, Value )> PerformOp,
101	CreateCmpXchgInstFun CreateCmpXchg);
102	bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
103
104	bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
105	bool isIdempotentRMW(AtomicRMWInst *RMWI);
106	bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
107
108	bool expandAtomicOpToLibcall(Instruction I, unsigned* Size, Align Alignment,
109	Value PointerOperand, Value ValueOperand,
110	Value *CASExpected, AtomicOrdering Ordering,
111	AtomicOrdering Ordering2,
112	ArrayRef<RTLIB::Libcall> Libcalls);
113	void expandAtomicLoadToLibcall(LoadInst *LI);
114	void expandAtomicStoreToLibcall(StoreInst *LI);
115	void expandAtomicRMWToLibcall(AtomicRMWInst *I);
116	void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
117
118	friend bool
119	llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
120	CreateCmpXchgInstFun CreateCmpXchg);
121
122	public:
123	bool run(Function &F, const TargetMachine *TM);
124	};
125
126	class AtomicExpandLegacy : public FunctionPass {
127	public:
128	static char ID; // Pass identification, replacement for typeid
129
130	AtomicExpandLegacy() : FunctionPass (ID) {
131	initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry());
132	}
133
134	bool runOnFunction(Function &F) override;
135	};
136
137	// IRBuilder to be used for replacement atomic instructions.
138	struct ReplacementIRBuilder
139	: IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
140	MDNode MMRAMD = nullptr*;
141
142	// Preserves the DebugLoc from I, and preserves still valid metadata.
143	// Enable StrictFP builder mode when appropriate.
144	explicit ReplacementIRBuilder(Instruction I, const* DataLayout &DL)
145	: IRBuilder (I->getContext(), DL,
146	IRBuilderCallbackInserter (
147	[this](Instruction *I) { addMMRAMD(I); })) {
148	SetInsertPoint(I);
149	this->CollectMetadataToCopy(Src: I, MetadataKinds: {LLVMContext::MD_pcsections});
150	if (BB->getParent()->getAttributes().hasFnAttr(Kind: Attribute::StrictFP))
151	this->setIsFPConstrained(true);
152
153	MMRAMD = I->getMetadata(KindID: LLVMContext::MD_mmra);
154	}
155
156	void addMMRAMD(Instruction *I) {
157	if (canInstructionHaveMMRAs(I: *I))
158	I->setMetadata(KindID: LLVMContext::MD_mmra, Node: MMRAMD);
159	}
160	};
161
162	} // end anonymous namespace
163
164	char AtomicExpandLegacy::ID = `0`;
165
166	char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
167
168	INITIALIZE_PASS_BEGIN(AtomicExpandLegacy, DEBUG_TYPE,
169	"Expand Atomic instructions", false, false)
170	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
171	INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
172	"Expand Atomic instructions", false, false)
173
174	// Helper functions to retrieve the size of atomic instructions.
175	static unsigned getAtomicOpSize(LoadInst *LI) {
176	const DataLayout &DL = LI->getDataLayout();
177	return DL.getTypeStoreSize(Ty: LI->getType());
178	}
179
180	static unsigned getAtomicOpSize(StoreInst *SI) {
181	const DataLayout &DL = SI->getDataLayout();
182	return DL.getTypeStoreSize(Ty: SI->getValueOperand()->getType());
183	}
184
185	static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
186	const DataLayout &DL = RMWI->getDataLayout();
187	return DL.getTypeStoreSize(Ty: RMWI->getValOperand()->getType());
188	}
189
190	static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
191	const DataLayout &DL = CASI->getDataLayout();
192	return DL.getTypeStoreSize(Ty: CASI->getCompareOperand()->getType());
193	}
194
195	// Determine if a particular atomic operation has a supported size,
196	// and is of appropriate alignment, to be passed through for target
197	// lowering. (Versus turning into a __atomic libcall)
198	template <typename Inst>
199	static bool atomicSizeSupported(const TargetLowering TLI, Inst I) {
200	unsigned Size = getAtomicOpSize(I);
201	Align Alignment = I->getAlign();
202	return Alignment >= Size &&
203	Size <= TLI->getMaxAtomicSizeInBitsSupported() / `8`;
204	}
205
206	bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
207	const auto *Subtarget = TM->getSubtargetImpl(F);
208	if (!Subtarget->enableAtomicExpand())
209	return false;
210	TLI = Subtarget->getTargetLowering();
211	DL = &F.getDataLayout();
212
213	SmallVector<Instruction *, `1`> AtomicInsts;
214
215	// Changing control-flow while iterating through it is a bad idea, so gather a
216	// list of all atomic instructions before we start.
217	for (Instruction &I : instructions(F))
218	if (I.isAtomic() && !isa<FenceInst>(Val: &I))
219	AtomicInsts.push_back(Elt: &I);
220
221	bool MadeChange = false;
222	for (auto *I : AtomicInsts) {
223	auto LI = dyn_cast<LoadInst>(Val: I);
224	auto SI = dyn_cast<StoreInst>(Val: I);
225	auto RMWI = dyn_cast<AtomicRMWInst>(Val: I);
226	auto CASI = dyn_cast<AtomicCmpXchgInst>(Val: I);
227	assert((LI \|\| SI \|\| RMWI \|\| CASI) && "Unknown atomic instruction");
228
229	// If the Size/Alignment is not supported, replace with a libcall.
230	if (LI) {
231	if (!atomicSizeSupported(TLI, I: LI)) {
232	expandAtomicLoadToLibcall(LI);
233	MadeChange = true;
234	continue;
235	}
236	} else if (SI) {
237	if (!atomicSizeSupported(TLI, I: SI)) {
238	expandAtomicStoreToLibcall(LI: SI);
239	MadeChange = true;
240	continue;
241	}
242	} else if (RMWI) {
243	if (!atomicSizeSupported(TLI, I: RMWI)) {
244	expandAtomicRMWToLibcall(I: RMWI);
245	MadeChange = true;
246	continue;
247	}
248	} else if (CASI) {
249	if (!atomicSizeSupported(TLI, I: CASI)) {
250	expandAtomicCASToLibcall(I: CASI);
251	MadeChange = true;
252	continue;
253	}
254	}
255
256	if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
257	TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
258	I = LI = convertAtomicLoadToIntegerType(LI);
259	MadeChange = true;
260	} else if (SI &&
261	TLI->shouldCastAtomicStoreInIR(SI) ==
262	TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
263	I = SI = convertAtomicStoreToIntegerType(SI);
264	MadeChange = true;
265	} else if (RMWI &&
266	TLI->shouldCastAtomicRMWIInIR(RMWI) ==
267	TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
268	I = RMWI = convertAtomicXchgToIntegerType(RMWI);
269	MadeChange = true;
270	} else if (CASI) {
271	// TODO: when we're ready to make the change at the IR level, we can
272	// extend convertCmpXchgToInteger for floating point too.
273	if (CASI->getCompareOperand()->getType()->isPointerTy()) {
274	// TODO: add a TLI hook to control this so that each target can
275	// convert to lowering the original type one at a time.
276	I = CASI = convertCmpXchgToIntegerType(CI: CASI);
277	MadeChange = true;
278	}
279	}
280
281	if (TLI->shouldInsertFencesForAtomic(I)) {
282	auto FenceOrdering = AtomicOrdering::Monotonic;
283	if (LI && isAcquireOrStronger(AO: LI->getOrdering())) {
284	FenceOrdering = LI->getOrdering();
285	LI->setOrdering(AtomicOrdering::Monotonic);
286	} else if (SI && isReleaseOrStronger(AO: SI->getOrdering())) {
287	FenceOrdering = SI->getOrdering();
288	SI->setOrdering(AtomicOrdering::Monotonic);
289	} else if (RMWI && (isReleaseOrStronger(AO: RMWI->getOrdering()) \|\|
290	isAcquireOrStronger(AO: RMWI->getOrdering()))) {
291	FenceOrdering = RMWI->getOrdering();
292	RMWI->setOrdering(AtomicOrdering::Monotonic);
293	} else if (CASI &&
294	TLI->shouldExpandAtomicCmpXchgInIR(AI: CASI) ==
295	TargetLoweringBase::AtomicExpansionKind::None &&
296	(isReleaseOrStronger(AO: CASI->getSuccessOrdering()) \|\|
297	isAcquireOrStronger(AO: CASI->getSuccessOrdering()) \|\|
298	isAcquireOrStronger(AO: CASI->getFailureOrdering()))) {
299	// If a compare and swap is lowered to LL/SC, we can do smarter fence
300	// insertion, with a stronger one on the success path than on the
301	// failure path. As a result, fence insertion is directly done by
302	// expandAtomicCmpXchg in that case.
303	FenceOrdering = CASI->getMergedOrdering();
304	CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
305	CASI->setFailureOrdering(AtomicOrdering::Monotonic);
306	}
307
308	if (FenceOrdering != AtomicOrdering::Monotonic) {
309	MadeChange \|= bracketInstWithFences(I, Order: FenceOrdering);
310	}
311	} else if (I->hasAtomicStore() &&
312	TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
313	auto FenceOrdering = AtomicOrdering::Monotonic;
314	if (SI)
315	FenceOrdering = SI->getOrdering();
316	else if (RMWI)
317	FenceOrdering = RMWI->getOrdering();
318	else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(AI: CASI) !=
319	TargetLoweringBase::AtomicExpansionKind::LLSC)
320	// LLSC is handled in expandAtomicCmpXchg().
321	FenceOrdering = CASI->getSuccessOrdering();
322
323	IRBuilder Builder(I);
324	if (auto TrailingFence =
325	TLI->emitTrailingFence(Builder, Inst: I, Ord: FenceOrdering)) {
326	TrailingFence->moveAfter(MovePos: I);
327	MadeChange = true;
328	}
329	}
330
331	if (LI)
332	MadeChange \|= tryExpandAtomicLoad(LI);
333	else if (SI)
334	MadeChange \|= tryExpandAtomicStore(SI);
335	else if (RMWI) {
336	// There are two different ways of expanding RMW instructions:
337	// - into a load if it is idempotent
338	// - into a Cmpxchg/LL-SC loop otherwise
339	// we try them in that order.
340
341	if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
342	MadeChange = true;
343	} else {
344	MadeChange \|= tryExpandAtomicRMW(AI: RMWI);
345	}
346	} else if (CASI)
347	MadeChange \|= tryExpandAtomicCmpXchg(CI: CASI);
348	}
349	return MadeChange;
350	}
351
352	bool AtomicExpandLegacy::runOnFunction(Function &F) {
353
354	auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
355	if (!TPC)
356	return false;
357	auto *TM = &TPC->getTM<TargetMachine>();
358	AtomicExpandImpl AE;
359	return AE.run(F, TM);
360	}
361
362	FunctionPass *llvm::createAtomicExpandLegacyPass() {
363	return new AtomicExpandLegacy ();
364	}
365
366	PreservedAnalyses AtomicExpandPass::run(Function &F,
367	FunctionAnalysisManager &AM) {
368	AtomicExpandImpl AE;
369
370	bool Changed = AE.run(F, TM);
371	if (!Changed)
372	return PreservedAnalyses::all();
373
374	return PreservedAnalyses::none();
375	}
376
377	bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
378	AtomicOrdering Order) {
379	ReplacementIRBuilder Builder(I, *DL);
380
381	auto LeadingFence = TLI->emitLeadingFence(Builder, Inst: I, Ord: Order);
382
383	auto TrailingFence = TLI->emitTrailingFence(Builder, Inst: I, Ord: Order);
384	// We have a guard here because not every atomic operation generates a
385	// trailing fence.
386	if (TrailingFence)
387	TrailingFence->moveAfter(MovePos: I);
388
389	return (LeadingFence \|\| TrailingFence);
390	}
391
392	/// Get the iX type with the same bitwidth as T.
393	IntegerType *
394	AtomicExpandImpl::getCorrespondingIntegerType(Type T, const* DataLayout &DL) {
395	EVT VT = TLI->getMemValueType(DL, Ty: T);
396	unsigned BitWidth = VT.getStoreSizeInBits();
397	assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
398	return IntegerType::get(C&: T->getContext(), NumBits: BitWidth);
399	}
400
401	/// Convert an atomic load of a non-integral type to an integer load of the
402	/// equivalent bitwidth. See the function comment on
403	/// convertAtomicStoreToIntegerType for background.
404	LoadInst AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst LI) {
405	auto *M = LI->getModule();
406	Type *NewTy = getCorrespondingIntegerType(T: LI->getType(), DL: M->getDataLayout());
407
408	ReplacementIRBuilder Builder(LI, *DL);
409
410	Value *Addr = LI->getPointerOperand();
411
412	auto *NewLI = Builder.CreateLoad(Ty: NewTy, Ptr: Addr);
413	NewLI->setAlignment(LI->getAlign());
414	NewLI->setVolatile(LI->isVolatile());
415	NewLI->setAtomic(Ordering: LI->getOrdering(), SSID: LI->getSyncScopeID());
416	LLVM_DEBUG(dbgs() << "Replaced " << LI << " with " << NewLI << "\n");
417
418	Value *NewVal = Builder.CreateBitCast(V: NewLI, DestTy: LI->getType());
419	LI->replaceAllUsesWith(V: NewVal);
420	LI->eraseFromParent();
421	return NewLI;
422	}
423
424	AtomicRMWInst *
425	AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
426	auto *M = RMWI->getModule();
427	Type *NewTy =
428	getCorrespondingIntegerType(T: RMWI->getType(), DL: M->getDataLayout());
429
430	ReplacementIRBuilder Builder(RMWI, *DL);
431
432	Value *Addr = RMWI->getPointerOperand();
433	Value *Val = RMWI->getValOperand();
434	Value *NewVal = Val->getType()->isPointerTy()
435	? Builder.CreatePtrToInt(V: Val, DestTy: NewTy)
436	: Builder.CreateBitCast(V: Val, DestTy: NewTy);
437
438	auto *NewRMWI = Builder.CreateAtomicRMW(Op: AtomicRMWInst::Xchg, Ptr: Addr, Val: NewVal,
439	Align: RMWI->getAlign(), Ordering: RMWI->getOrdering(),
440	SSID: RMWI->getSyncScopeID());
441	NewRMWI->setVolatile(RMWI->isVolatile());
442	LLVM_DEBUG(dbgs() << "Replaced " << RMWI << " with " << NewRMWI << "\n");
443
444	Value *NewRVal = RMWI->getType()->isPointerTy()
445	? Builder.CreateIntToPtr(V: NewRMWI, DestTy: RMWI->getType())
446	: Builder.CreateBitCast(V: NewRMWI, DestTy: RMWI->getType());
447	RMWI->replaceAllUsesWith(V: NewRVal);
448	RMWI->eraseFromParent();
449	return NewRMWI;
450	}
451
452	bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
453	switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
454	case TargetLoweringBase::AtomicExpansionKind::None:
455	return false;
456	case TargetLoweringBase::AtomicExpansionKind::LLSC:
457	expandAtomicOpToLLSC(
458	I: LI, ResultTy: LI->getType(), Addr: LI->getPointerOperand(), AddrAlign: LI->getAlign(),
459	MemOpOrder: LI->getOrdering(),
460	PerformOp: [](IRBuilderBase &Builder, Value Loaded) { return* Loaded; });
461	return true;
462	case TargetLoweringBase::AtomicExpansionKind::LLOnly:
463	return expandAtomicLoadToLL(LI);
464	case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
465	return expandAtomicLoadToCmpXchg(LI);
466	case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
467	LI->setAtomic(Ordering: AtomicOrdering::NotAtomic);
468	return true;
469	default:
470	llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
471	}
472	}
473
474	bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
475	switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
476	case TargetLoweringBase::AtomicExpansionKind::None:
477	return false;
478	case TargetLoweringBase::AtomicExpansionKind::Expand:
479	expandAtomicStore(SI);
480	return true;
481	case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
482	SI->setAtomic(Ordering: AtomicOrdering::NotAtomic);
483	return true;
484	default:
485	llvm_unreachable("Unhandled case in tryExpandAtomicStore");
486	}
487	}
488
489	bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
490	ReplacementIRBuilder Builder(LI, *DL);
491
492	// On some architectures, load-linked instructions are atomic for larger
493	// sizes than normal loads. For example, the only 64-bit load guaranteed
494	// to be single-copy atomic by ARM is an ldrexd (A3.5.3).
495	Value *Val = TLI->emitLoadLinked(Builder, ValueTy: LI->getType(),
496	Addr: LI->getPointerOperand(), Ord: LI->getOrdering());
497	TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
498
499	LI->replaceAllUsesWith(V: Val);
500	LI->eraseFromParent();
501
502	return true;
503	}
504
505	bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
506	ReplacementIRBuilder Builder(LI, *DL);
507	AtomicOrdering Order = LI->getOrdering();
508	if (Order == AtomicOrdering::Unordered)
509	Order = AtomicOrdering::Monotonic;
510
511	Value *Addr = LI->getPointerOperand();
512	Type *Ty = LI->getType();
513	Constant *DummyVal = Constant::getNullValue(Ty);
514
515	Value *Pair = Builder.CreateAtomicCmpXchg(
516	Ptr: Addr, Cmp: DummyVal, New: DummyVal, Align: LI->getAlign(), SuccessOrdering: Order,
517	FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: Order));
518	Value *Loaded = Builder.CreateExtractValue(Agg: Pair, Idxs: `0`, Name: "loaded");
519
520	LI->replaceAllUsesWith(V: Loaded);
521	LI->eraseFromParent();
522
523	return true;
524	}
525
526	/// Convert an atomic store of a non-integral type to an integer store of the
527	/// equivalent bitwidth. We used to not support floating point or vector
528	/// atomics in the IR at all. The backends learned to deal with the bitcast
529	/// idiom because that was the only way of expressing the notion of a atomic
530	/// float or vector store. The long term plan is to teach each backend to
531	/// instruction select from the original atomic store, but as a migration
532	/// mechanism, we convert back to the old format which the backends understand.
533	/// Each backend will need individual work to recognize the new format.
534	StoreInst AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst SI) {
535	ReplacementIRBuilder Builder(SI, *DL);
536	auto *M = SI->getModule();
537	Type *NewTy = getCorrespondingIntegerType(T: SI->getValueOperand()->getType(),
538	DL: M->getDataLayout());
539	Value *NewVal = Builder.CreateBitCast(V: SI->getValueOperand(), DestTy: NewTy);
540
541	Value *Addr = SI->getPointerOperand();
542
543	StoreInst *NewSI = Builder.CreateStore(Val: NewVal, Ptr: Addr);
544	NewSI->setAlignment(SI->getAlign());
545	NewSI->setVolatile(SI->isVolatile());
546	NewSI->setAtomic(Ordering: SI->getOrdering(), SSID: SI->getSyncScopeID());
547	LLVM_DEBUG(dbgs() << "Replaced " << SI << " with " << NewSI << "\n");
548	SI->eraseFromParent();
549	return NewSI;
550	}
551
552	void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
553	// This function is only called on atomic stores that are too large to be
554	// atomic if implemented as a native store. So we replace them by an
555	// atomic swap, that can be implemented for example as a ldrex/strex on ARM
556	// or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
557	// It is the responsibility of the target to only signal expansion via
558	// shouldExpandAtomicRMW in cases where this is required and possible.
559	ReplacementIRBuilder Builder(SI, *DL);
560	AtomicOrdering Ordering = SI->getOrdering();
561	assert(Ordering != AtomicOrdering::NotAtomic);
562	AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
563	? AtomicOrdering::Monotonic
564	: Ordering;
565	AtomicRMWInst *AI = Builder.CreateAtomicRMW(
566	Op: AtomicRMWInst::Xchg, Ptr: SI->getPointerOperand(), Val: SI->getValueOperand(),
567	Align: SI->getAlign(), Ordering: RMWOrdering);
568	SI->eraseFromParent();
569
570	// Now we have an appropriate swap instruction, lower it as usual.
571	tryExpandAtomicRMW(AI);
572	}
573
574	static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
575	Value Loaded, Value NewVal, Align AddrAlign,
576	AtomicOrdering MemOpOrder, SyncScope::ID SSID,
577	Value &Success, Value &NewLoaded) {
578	Type *OrigTy = NewVal->getType();
579
580	// This code can go away when cmpxchg supports FP and vector types.
581	assert(!OrigTy->isPointerTy());
582	bool NeedBitcast = OrigTy->isFloatingPointTy() \|\| OrigTy->isVectorTy();
583	if (NeedBitcast) {
584	IntegerType *IntTy = Builder.getIntNTy(N: OrigTy->getPrimitiveSizeInBits());
585	NewVal = Builder.CreateBitCast(V: NewVal, DestTy: IntTy);
586	Loaded = Builder.CreateBitCast(V: Loaded, DestTy: IntTy);
587	}
588
589	Value *Pair = Builder.CreateAtomicCmpXchg(
590	Ptr: Addr, Cmp: Loaded, New: NewVal, Align: AddrAlign, SuccessOrdering: MemOpOrder,
591	FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: MemOpOrder), SSID);
592	Success = Builder.CreateExtractValue(Agg: Pair, Idxs: `1`, Name: "success");
593	NewLoaded = Builder.CreateExtractValue(Agg: Pair, Idxs: `0`, Name: "newloaded");
594
595	if (NeedBitcast)
596	NewLoaded = Builder.CreateBitCast(V: NewLoaded, DestTy: OrigTy);
597	}
598
599	bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
600	LLVMContext &Ctx = AI->getModule()->getContext();
601	TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(RMW: AI);
602	switch (Kind) {
603	case TargetLoweringBase::AtomicExpansionKind::None:
604	return false;
605	case TargetLoweringBase::AtomicExpansionKind::LLSC: {
606	unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / `8`;
607	unsigned ValueSize = getAtomicOpSize(RMWI: AI);
608	if (ValueSize < MinCASSize) {
609	expandPartwordAtomicRMW(I: AI,
610	ExpansionKind: TargetLoweringBase::AtomicExpansionKind::LLSC);
611	} else {
612	auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
613	return buildAtomicRMWValue(Op: AI->getOperation(), Builder, Loaded,
614	Val: AI->getValOperand());
615	};
616	expandAtomicOpToLLSC(I: AI, ResultTy: AI->getType(), Addr: AI->getPointerOperand(),
617	AddrAlign: AI->getAlign(), MemOpOrder: AI->getOrdering(), PerformOp);
618	}
619	return true;
620	}
621	case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
622	unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / `8`;
623	unsigned ValueSize = getAtomicOpSize(RMWI: AI);
624	if (ValueSize < MinCASSize) {
625	expandPartwordAtomicRMW(I: AI,
626	ExpansionKind: TargetLoweringBase::AtomicExpansionKind::CmpXChg);
627	} else {
628	SmallVector<StringRef> SSNs;
629	Ctx.getSyncScopeNames(SSNs);
630	auto MemScope = SSNs [AI->getSyncScopeID()].empty()
631	? "system"
632	: SSNs [AI->getSyncScopeID()];
633	OptimizationRemarkEmitter ORE(AI->getFunction());
634	ORE.emit(RemarkBuilder: [&]() {
635	return OptimizationRemark (DEBUG_TYPE, "Passed", AI)
636	<< "A compare and swap loop was generated for an atomic "
637	<< AI->getOperationName(Op: AI->getOperation()) << " operation at "
638	<< MemScope << " memory scope";
639	});
640	expandAtomicRMWToCmpXchg(AI, CreateCmpXchg: createCmpXchgInstFun);
641	}
642	return true;
643	}
644	case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
645	unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / `8`;
646	unsigned ValueSize = getAtomicOpSize(RMWI: AI);
647	if (ValueSize < MinCASSize) {
648	AtomicRMWInst::BinOp Op = AI->getOperation();
649	// Widen And/Or/Xor and give the target another chance at expanding it.
650	if (Op == AtomicRMWInst::Or \|\| Op == AtomicRMWInst::Xor \|\|
651	Op == AtomicRMWInst::And) {
652	tryExpandAtomicRMW(AI: widenPartwordAtomicRMW(AI));
653	return true;
654	}
655	}
656	expandAtomicRMWToMaskedIntrinsic(AI);
657	return true;
658	}
659	case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
660	TLI->emitBitTestAtomicRMWIntrinsic(AI);
661	return true;
662	}
663	case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
664	TLI->emitCmpArithAtomicRMWIntrinsic(AI);
665	return true;
666	}
667	case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
668	return lowerAtomicRMWInst(RMWI: AI);
669	case TargetLoweringBase::AtomicExpansionKind::Expand:
670	TLI->emitExpandAtomicRMW(AI);
671	return true;
672	default:
673	llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
674	}
675	}
676
677	namespace {
678
679	struct PartwordMaskValues {
680	// These three fields are guaranteed to be set by createMaskInstrs.
681	Type WordType = nullptr*;
682	Type ValueType = nullptr*;
683	Type IntValueType = nullptr*;
684	Value AlignedAddr = nullptr*;
685	Align AlignedAddrAlignment;
686	// The remaining fields can be null.
687	Value ShiftAmt = nullptr*;
688	Value Mask = nullptr*;
689	Value Inv_Mask = nullptr*;
690	};
691
692	LLVM_ATTRIBUTE_UNUSED
693	raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
694	auto PrintObj = [&O](auto *V) {
695	if (V)
696	O << *V;
697	else
698	O << "nullptr";
699	O << `'\n'`;
700	};
701	O << "PartwordMaskValues {\n";
702	O << " WordType: ";
703	PrintObj (PMV.WordType);
704	O << " ValueType: ";
705	PrintObj (PMV.ValueType);
706	O << " AlignedAddr: ";
707	PrintObj (PMV.AlignedAddr);
708	O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << `'\n'`;
709	O << " ShiftAmt: ";
710	PrintObj (PMV.ShiftAmt);
711	O << " Mask: ";
712	PrintObj (PMV.Mask);
713	O << " Inv_Mask: ";
714	PrintObj (PMV.Inv_Mask);
715	O << "}\n";
716	return O;
717	}
718
719	} // end anonymous namespace
720
721	/// This is a helper function which builds instructions to provide
722	/// values necessary for partword atomic operations. It takes an
723	/// incoming address, Addr, and ValueType, and constructs the address,
724	/// shift-amounts and masks needed to work with a larger value of size
725	/// WordSize.
726	///
727	/// AlignedAddr: Addr rounded down to a multiple of WordSize
728	///
729	/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
730	/// from AlignAddr for it to have the same value as if
731	/// ValueType was loaded from Addr.
732	///
733	/// Mask: Value to mask with the value loaded from AlignAddr to
734	/// include only the part that would've been loaded from Addr.
735	///
736	/// Inv_Mask: The inverse of Mask.
737	static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
738	Instruction I, Type ValueType,
739	Value *Addr, Align AddrAlign,
740	unsigned MinWordSize) {
741	PartwordMaskValues PMV;
742
743	Module *M = I->getModule();
744	LLVMContext &Ctx = M->getContext();
745	const DataLayout &DL = M->getDataLayout();
746	unsigned ValueSize = DL.getTypeStoreSize(Ty: ValueType);
747
748	PMV.ValueType = PMV.IntValueType = ValueType;
749	if (PMV.ValueType->isFloatingPointTy() \|\| PMV.ValueType->isVectorTy())
750	PMV.IntValueType =
751	Type::getIntNTy(C&: Ctx, N: ValueType->getPrimitiveSizeInBits());
752
753	PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(C&: Ctx, N: MinWordSize * `8`)
754	: ValueType;
755	if (PMV.ValueType == PMV.WordType) {
756	PMV.AlignedAddr = Addr;
757	PMV.AlignedAddrAlignment = AddrAlign;
758	PMV.ShiftAmt = ConstantInt::get(Ty: PMV.ValueType, V: `0`);
759	PMV.Mask = ConstantInt::get(Ty: PMV.ValueType, V: ~`0`, /isSigned/ IsSigned: true);
760	return PMV;
761	}
762
763	PMV.AlignedAddrAlignment = Align (MinWordSize);
764
765	assert(ValueSize < MinWordSize);
766
767	PointerType *PtrTy = cast<PointerType>(Val: Addr->getType());
768	IntegerType *IntTy = DL.getIndexType(C&: Ctx, AddressSpace: PtrTy->getAddressSpace());
769	Value *PtrLSB;
770
771	if (AddrAlign < MinWordSize) {
772	PMV.AlignedAddr = Builder.CreateIntrinsic(
773	ID: Intrinsic::ptrmask, Types: {PtrTy, IntTy},
774	Args: {Addr, ConstantInt::get(Ty: IntTy, V: ~(uint64_t)(MinWordSize - `1`))}, FMFSource: nullptr,
775	Name: "AlignedAddr");
776
777	Value *AddrInt = Builder.CreatePtrToInt(V: Addr, DestTy: IntTy);
778	PtrLSB = Builder.CreateAnd(LHS: AddrInt, RHS: MinWordSize - `1`, Name: "PtrLSB");
779	} else {
780	// If the alignment is high enough, the LSB are known 0.
781	PMV.AlignedAddr = Addr;
782	PtrLSB = ConstantInt::getNullValue(Ty: IntTy);
783	}
784
785	if (DL.isLittleEndian()) {
786	// turn bytes into bits
787	PMV.ShiftAmt = Builder.CreateShl(LHS: PtrLSB, RHS: `3`);
788	} else {
789	// turn bytes into bits, and count from the other side.
790	PMV.ShiftAmt = Builder.CreateShl(
791	LHS: Builder.CreateXor(LHS: PtrLSB, RHS: MinWordSize - ValueSize), RHS: `3`);
792	}
793
794	PMV.ShiftAmt = Builder.CreateTrunc(V: PMV.ShiftAmt, DestTy: PMV.WordType, Name: "ShiftAmt");
795	PMV.Mask = Builder.CreateShl(
796	LHS: ConstantInt::get(Ty: PMV.WordType, V: (`1` << (ValueSize * `8`)) - `1`), RHS: PMV.ShiftAmt,
797	Name: "Mask");
798
799	PMV.Inv_Mask = Builder.CreateNot(V: PMV.Mask, Name: "Inv_Mask");
800
801	return PMV;
802	}
803
804	static Value extractMaskedValue(IRBuilderBase &Builder, Value WideWord,
805	const PartwordMaskValues &PMV) {
806	assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
807	if (PMV.WordType == PMV.ValueType)
808	return WideWord;
809
810	Value *Shift = Builder.CreateLShr(LHS: WideWord, RHS: PMV.ShiftAmt, Name: "shifted");
811	Value *Trunc = Builder.CreateTrunc(V: Shift, DestTy: PMV.IntValueType, Name: "extracted");
812	return Builder.CreateBitCast(V: Trunc, DestTy: PMV.ValueType);
813	}
814
815	static Value insertMaskedValue(IRBuilderBase &Builder, Value WideWord,
816	Value Updated, const* PartwordMaskValues &PMV) {
817	assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
818	assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
819	if (PMV.WordType == PMV.ValueType)
820	return Updated;
821
822	Updated = Builder.CreateBitCast(V: Updated, DestTy: PMV.IntValueType);
823
824	Value *ZExt = Builder.CreateZExt(V: Updated, DestTy: PMV.WordType, Name: "extended");
825	Value *Shift =
826	Builder.CreateShl(LHS: ZExt, RHS: PMV.ShiftAmt, Name: "shifted", /HasNUW/ true);
827	Value *And = Builder.CreateAnd(LHS: WideWord, RHS: PMV.Inv_Mask, Name: "unmasked");
828	Value *Or = Builder.CreateOr(LHS: And, RHS: Shift, Name: "inserted");
829	return Or;
830	}
831
832	/// Emit IR to implement a masked version of a given atomicrmw
833	/// operation. (That is, only the bits under the Mask should be
834	/// affected by the operation)
835	static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
836	IRBuilderBase &Builder, Value *Loaded,
837	Value Shifted_Inc, Value Inc,
838	const PartwordMaskValues &PMV) {
839	// TODO: update to use
840	// https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
841	// to merge bits from two values without requiring PMV.Inv_Mask.
842	switch (Op) {
843	case AtomicRMWInst::Xchg: {
844	Value *Loaded_MaskOut = Builder.CreateAnd(LHS: Loaded, RHS: PMV.Inv_Mask);
845	Value *FinalVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: Shifted_Inc);
846	return FinalVal;
847	}
848	case AtomicRMWInst::Or:
849	case AtomicRMWInst::Xor:
850	case AtomicRMWInst::And:
851	llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
852	case AtomicRMWInst::Add:
853	case AtomicRMWInst::Sub:
854	case AtomicRMWInst::Nand: {
855	// The other arithmetic ops need to be masked into place.
856	Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Val: Shifted_Inc);
857	Value *NewVal_Masked = Builder.CreateAnd(LHS: NewVal, RHS: PMV.Mask);
858	Value *Loaded_MaskOut = Builder.CreateAnd(LHS: Loaded, RHS: PMV.Inv_Mask);
859	Value *FinalVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: NewVal_Masked);
860	return FinalVal;
861	}
862	case AtomicRMWInst::Max:
863	case AtomicRMWInst::Min:
864	case AtomicRMWInst::UMax:
865	case AtomicRMWInst::UMin:
866	case AtomicRMWInst::FAdd:
867	case AtomicRMWInst::FSub:
868	case AtomicRMWInst::FMin:
869	case AtomicRMWInst::FMax:
870	case AtomicRMWInst::UIncWrap:
871	case AtomicRMWInst::UDecWrap: {
872	// Finally, other ops will operate on the full value, so truncate down to
873	// the original size, and expand out again after doing the
874	// operation. Bitcasts will be inserted for FP values.
875	Value *Loaded_Extract = extractMaskedValue(Builder, WideWord: Loaded, PMV);
876	Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded: Loaded_Extract, Val: Inc);
877	Value *FinalVal = insertMaskedValue(Builder, WideWord: Loaded, Updated: NewVal, PMV);
878	return FinalVal;
879	}
880	default:
881	llvm_unreachable("Unknown atomic op");
882	}
883	}
884
885	/// Expand a sub-word atomicrmw operation into an appropriate
886	/// word-sized operation.
887	///
888	/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
889	/// way as a typical atomicrmw expansion. The only difference here is
890	/// that the operation inside of the loop may operate upon only a
891	/// part of the value.
892	void AtomicExpandImpl::expandPartwordAtomicRMW(
893	AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
894	// Widen And/Or/Xor and give the target another chance at expanding it.
895	AtomicRMWInst::BinOp Op = AI->getOperation();
896	if (Op == AtomicRMWInst::Or \|\| Op == AtomicRMWInst::Xor \|\|
897	Op == AtomicRMWInst::And) {
898	tryExpandAtomicRMW(AI: widenPartwordAtomicRMW(AI));
899	return;
900	}
901	AtomicOrdering MemOpOrder = AI->getOrdering();
902	SyncScope::ID SSID = AI->getSyncScopeID();
903
904	ReplacementIRBuilder Builder(AI, *DL);
905
906	PartwordMaskValues PMV =
907	createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
908	AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / `8`);
909
910	Value ValOperand_Shifted = nullptr*;
911	if (Op == AtomicRMWInst::Xchg \|\| Op == AtomicRMWInst::Add \|\|
912	Op == AtomicRMWInst::Sub \|\| Op == AtomicRMWInst::Nand) {
913	Value *ValOp = Builder.CreateBitCast(V: AI->getValOperand(), DestTy: PMV.IntValueType);
914	ValOperand_Shifted =
915	Builder.CreateShl(LHS: Builder.CreateZExt(V: ValOp, DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
916	Name: "ValOperand_Shifted");
917	}
918
919	auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
920	return performMaskedAtomicOp(Op, Builder, Loaded, Shifted_Inc: ValOperand_Shifted,
921	Inc: AI->getValOperand(), PMV);
922	};
923
924	Value *OldResult;
925	if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
926	OldResult = insertRMWCmpXchgLoop(Builder, ResultType: PMV.WordType, Addr: PMV.AlignedAddr,
927	AddrAlign: PMV.AlignedAddrAlignment, MemOpOrder, SSID,
928	PerformOp: PerformPartwordOp, CreateCmpXchg: createCmpXchgInstFun);
929	} else {
930	assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
931	OldResult = insertRMWLLSCLoop(Builder, ResultTy: PMV.WordType, Addr: PMV.AlignedAddr,
932	AddrAlign: PMV.AlignedAddrAlignment, MemOpOrder,
933	PerformOp: PerformPartwordOp);
934	}
935
936	Value *FinalOldResult = extractMaskedValue(Builder, WideWord: OldResult, PMV);
937	AI->replaceAllUsesWith(V: FinalOldResult);
938	AI->eraseFromParent();
939	}
940
941	/// Copy metadata that's safe to preserve when widening atomics.
942	static void copyMetadataForAtomic(Instruction &Dest,
943	const Instruction &Source) {
944	SmallVector<std::pair<unsigned, MDNode *>, `8`> MD;
945	Source.getAllMetadata(MDs&: MD);
946	LLVMContext &Ctx = Dest.getContext();
947	MDBuilder MDB(Ctx);
948
949	for (auto [ID, N] : MD) {
950	switch (ID) {
951	case LLVMContext::MD_dbg:
952	case LLVMContext::MD_tbaa:
953	case LLVMContext::MD_tbaa_struct:
954	case LLVMContext::MD_alias_scope:
955	case LLVMContext::MD_noalias:
956	case LLVMContext::MD_access_group:
957	case LLVMContext::MD_mmra:
958	Dest.setMetadata(KindID: ID, Node: N);
959	break;
960	default:
961	if (ID == Ctx.getMDKindID(Name: "amdgpu.no.remote.memory"))
962	Dest.setMetadata(KindID: ID, Node: N);
963	else if (ID == Ctx.getMDKindID(Name: "amdgpu.no.fine.grained.memory"))
964	Dest.setMetadata(KindID: ID, Node: N);
965
966	break;
967	}
968	}
969	}
970
971	// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
972	AtomicRMWInst AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst AI) {
973	ReplacementIRBuilder Builder(AI, *DL);
974	AtomicRMWInst::BinOp Op = AI->getOperation();
975
976	assert((Op == AtomicRMWInst::Or \|\| Op == AtomicRMWInst::Xor \|\|
977	Op == AtomicRMWInst::And) &&
978	"Unable to widen operation");
979
980	PartwordMaskValues PMV =
981	createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
982	AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / `8`);
983
984	Value *ValOperand_Shifted =
985	Builder.CreateShl(LHS: Builder.CreateZExt(V: AI->getValOperand(), DestTy: PMV.WordType),
986	RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
987
988	Value *NewOperand;
989
990	if (Op == AtomicRMWInst::And)
991	NewOperand =
992	Builder.CreateOr(LHS: ValOperand_Shifted, RHS: PMV.Inv_Mask, Name: "AndOperand");
993	else
994	NewOperand = ValOperand_Shifted;
995
996	AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
997	Op, Ptr: PMV.AlignedAddr, Val: NewOperand, Align: PMV.AlignedAddrAlignment,
998	Ordering: AI->getOrdering(), SSID: AI->getSyncScopeID());
999
1000	copyMetadataForAtomic(Dest&: NewAI, Source: AI);
1001
1002	Value *FinalOldResult = extractMaskedValue(Builder, WideWord: NewAI, PMV);
1003	AI->replaceAllUsesWith(V: FinalOldResult);
1004	AI->eraseFromParent();
1005	return NewAI;
1006	}
1007
1008	bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1009	// The basic idea here is that we're expanding a cmpxchg of a
1010	// smaller memory size up to a word-sized cmpxchg. To do this, we
1011	// need to add a retry-loop for strong cmpxchg, so that
1012	// modifications to other parts of the word don't cause a spurious
1013	// failure.
1014
1015	// This generates code like the following:
1016	// [[Setup mask values PMV.]]*
1017	// %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1018	// %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1019	// %InitLoaded = load i32 %addr*
1020	// %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1021	// br partword.cmpxchg.loop
1022	// partword.cmpxchg.loop:
1023	// %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1024	// [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1025	// %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1026	// %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1027	// %NewCI = cmpxchg i32 %PMV.AlignedAddr, i32 %FullWord_Cmp,*
1028	// i32 %FullWord_NewVal success_ordering failure_ordering
1029	// %OldVal = extractvalue { i32, i1 } %NewCI, 0
1030	// %Success = extractvalue { i32, i1 } %NewCI, 1
1031	// br i1 %Success, label %partword.cmpxchg.end,
1032	// label %partword.cmpxchg.failure
1033	// partword.cmpxchg.failure:
1034	// %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1035	// %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1036	// br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1037	// label %partword.cmpxchg.end
1038	// partword.cmpxchg.end:
1039	// %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1040	// %FinalOldVal = trunc i32 %tmp1 to i8
1041	// %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1042	// %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1043
1044	Value *Addr = CI->getPointerOperand();
1045	Value *Cmp = CI->getCompareOperand();
1046	Value *NewVal = CI->getNewValOperand();
1047
1048	BasicBlock *BB = CI->getParent();
1049	Function *F = BB->getParent();
1050	ReplacementIRBuilder Builder(CI, *DL);
1051	LLVMContext &Ctx = Builder.getContext();
1052
1053	BasicBlock *EndBB =
1054	BB->splitBasicBlock(I: CI->getIterator(), BBName: "partword.cmpxchg.end");
1055	auto FailureBB =
1056	BasicBlock::Create(Context&: Ctx, Name: "partword.cmpxchg.failure", Parent: F, InsertBefore: EndBB);
1057	auto LoopBB = BasicBlock::Create(Context&: Ctx, Name: "partword.cmpxchg.loop", Parent: F, InsertBefore: FailureBB);
1058
1059	// The split call above "helpfully" added a branch at the end of BB
1060	// (to the wrong place).
1061	std::prev(x: BB->end())->eraseFromParent();
1062	Builder.SetInsertPoint(BB);
1063
1064	PartwordMaskValues PMV =
1065	createMaskInstrs(Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr,
1066	AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / `8`);
1067
1068	// Shift the incoming values over, into the right location in the word.
1069	Value *NewVal_Shifted =
1070	Builder.CreateShl(LHS: Builder.CreateZExt(V: NewVal, DestTy: PMV.WordType), RHS: PMV.ShiftAmt);
1071	Value *Cmp_Shifted =
1072	Builder.CreateShl(LHS: Builder.CreateZExt(V: Cmp, DestTy: PMV.WordType), RHS: PMV.ShiftAmt);
1073
1074	// Load the entire current word, and mask into place the expected and new
1075	// values
1076	LoadInst *InitLoaded = Builder.CreateLoad(Ty: PMV.WordType, Ptr: PMV.AlignedAddr);
1077	InitLoaded->setVolatile(CI->isVolatile());
1078	Value *InitLoaded_MaskOut = Builder.CreateAnd(LHS: InitLoaded, RHS: PMV.Inv_Mask);
1079	Builder.CreateBr(Dest: LoopBB);
1080
1081	// partword.cmpxchg.loop:
1082	Builder.SetInsertPoint(LoopBB);
1083	PHINode *Loaded_MaskOut = Builder.CreatePHI(Ty: PMV.WordType, NumReservedValues: `2`);
1084	Loaded_MaskOut->addIncoming(V: InitLoaded_MaskOut, BB);
1085
1086	// Mask/Or the expected and new values into place in the loaded word.
1087	Value *FullWord_NewVal = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: NewVal_Shifted);
1088	Value *FullWord_Cmp = Builder.CreateOr(LHS: Loaded_MaskOut, RHS: Cmp_Shifted);
1089	AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1090	Ptr: PMV.AlignedAddr, Cmp: FullWord_Cmp, New: FullWord_NewVal, Align: PMV.AlignedAddrAlignment,
1091	SuccessOrdering: CI->getSuccessOrdering(), FailureOrdering: CI->getFailureOrdering(), SSID: CI->getSyncScopeID());
1092	NewCI->setVolatile(CI->isVolatile());
1093	// When we're building a strong cmpxchg, we need a loop, so you
1094	// might think we could use a weak cmpxchg inside. But, using strong
1095	// allows the below comparison for ShouldContinue, and we're
1096	// expecting the underlying cmpxchg to be a machine instruction,
1097	// which is strong anyways.
1098	NewCI->setWeak(CI->isWeak());
1099
1100	Value *OldVal = Builder.CreateExtractValue(Agg: NewCI, Idxs: `0`);
1101	Value *Success = Builder.CreateExtractValue(Agg: NewCI, Idxs: `1`);
1102
1103	if (CI->isWeak())
1104	Builder.CreateBr(Dest: EndBB);
1105	else
1106	Builder.CreateCondBr(Cond: Success, True: EndBB, False: FailureBB);
1107
1108	// partword.cmpxchg.failure:
1109	Builder.SetInsertPoint(FailureBB);
1110	// Upon failure, verify that the masked-out part of the loaded value
1111	// has been modified. If it didn't, abort the cmpxchg, since the
1112	// masked-in part must've.
1113	Value *OldVal_MaskOut = Builder.CreateAnd(LHS: OldVal, RHS: PMV.Inv_Mask);
1114	Value *ShouldContinue = Builder.CreateICmpNE(LHS: Loaded_MaskOut, RHS: OldVal_MaskOut);
1115	Builder.CreateCondBr(Cond: ShouldContinue, True: LoopBB, False: EndBB);
1116
1117	// Add the second value to the phi from above
1118	Loaded_MaskOut->addIncoming(V: OldVal_MaskOut, BB: FailureBB);
1119
1120	// partword.cmpxchg.end:
1121	Builder.SetInsertPoint(CI);
1122
1123	Value *FinalOldVal = extractMaskedValue(Builder, WideWord: OldVal, PMV);
1124	Value *Res = PoisonValue::get(T: CI->getType());
1125	Res = Builder.CreateInsertValue(Agg: Res, Val: FinalOldVal, Idxs: `0`);
1126	Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: `1`);
1127
1128	CI->replaceAllUsesWith(V: Res);
1129	CI->eraseFromParent();
1130	return true;
1131	}
1132
1133	void AtomicExpandImpl::expandAtomicOpToLLSC(
1134	Instruction I, Type ResultType, Value *Addr, Align AddrAlign,
1135	AtomicOrdering MemOpOrder,
1136	function_ref<Value (IRBuilderBase &, Value )> PerformOp) {
1137	ReplacementIRBuilder Builder(I, *DL);
1138	Value *Loaded = insertRMWLLSCLoop(Builder, ResultTy: ResultType, Addr, AddrAlign,
1139	MemOpOrder, PerformOp);
1140
1141	I->replaceAllUsesWith(V: Loaded);
1142	I->eraseFromParent();
1143	}
1144
1145	void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1146	ReplacementIRBuilder Builder(AI, *DL);
1147
1148	PartwordMaskValues PMV =
1149	createMaskInstrs(Builder, I: AI, ValueType: AI->getType(), Addr: AI->getPointerOperand(),
1150	AddrAlign: AI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / `8`);
1151
1152	// The value operand must be sign-extended for signed min/max so that the
1153	// target's signed comparison instructions can be used. Otherwise, just
1154	// zero-ext.
1155	Instruction::CastOps CastOp = Instruction::ZExt;
1156	AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1157	if (RMWOp == AtomicRMWInst::Max \|\| RMWOp == AtomicRMWInst::Min)
1158	CastOp = Instruction::SExt;
1159
1160	Value *ValOperand_Shifted = Builder.CreateShl(
1161	LHS: Builder.CreateCast(Op: CastOp, V: AI->getValOperand(), DestTy: PMV.WordType),
1162	RHS: PMV.ShiftAmt, Name: "ValOperand_Shifted");
1163	Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1164	Builder, AI, AlignedAddr: PMV.AlignedAddr, Incr: ValOperand_Shifted, Mask: PMV.Mask, ShiftAmt: PMV.ShiftAmt,
1165	Ord: AI->getOrdering());
1166	Value *FinalOldResult = extractMaskedValue(Builder, WideWord: OldResult, PMV);
1167	AI->replaceAllUsesWith(V: FinalOldResult);
1168	AI->eraseFromParent();
1169	}
1170
1171	void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1172	AtomicCmpXchgInst *CI) {
1173	ReplacementIRBuilder Builder(CI, *DL);
1174
1175	PartwordMaskValues PMV = createMaskInstrs(
1176	Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr: CI->getPointerOperand(),
1177	AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / `8`);
1178
1179	Value *CmpVal_Shifted = Builder.CreateShl(
1180	LHS: Builder.CreateZExt(V: CI->getCompareOperand(), DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1181	Name: "CmpVal_Shifted");
1182	Value *NewVal_Shifted = Builder.CreateShl(
1183	LHS: Builder.CreateZExt(V: CI->getNewValOperand(), DestTy: PMV.WordType), RHS: PMV.ShiftAmt,
1184	Name: "NewVal_Shifted");
1185	Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1186	Builder, CI, AlignedAddr: PMV.AlignedAddr, CmpVal: CmpVal_Shifted, NewVal: NewVal_Shifted, Mask: PMV.Mask,
1187	Ord: CI->getMergedOrdering());
1188	Value *FinalOldVal = extractMaskedValue(Builder, WideWord: OldVal, PMV);
1189	Value *Res = PoisonValue::get(T: CI->getType());
1190	Res = Builder.CreateInsertValue(Agg: Res, Val: FinalOldVal, Idxs: `0`);
1191	Value *Success = Builder.CreateICmpEQ(
1192	LHS: CmpVal_Shifted, RHS: Builder.CreateAnd(LHS: OldVal, RHS: PMV.Mask), Name: "Success");
1193	Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: `1`);
1194
1195	CI->replaceAllUsesWith(V: Res);
1196	CI->eraseFromParent();
1197	}
1198
1199	Value *AtomicExpandImpl::insertRMWLLSCLoop(
1200	IRBuilderBase &Builder, Type ResultTy, Value Addr, Align AddrAlign,
1201	AtomicOrdering MemOpOrder,
1202	function_ref<Value (IRBuilderBase &, Value )> PerformOp) {
1203	LLVMContext &Ctx = Builder.getContext();
1204	BasicBlock *BB = Builder.GetInsertBlock();
1205	Function *F = BB->getParent();
1206
1207	assert(AddrAlign >=
1208	F->getDataLayout().getTypeStoreSize(ResultTy) &&
1209	"Expected at least natural alignment at this point.");
1210
1211	// Given: atomicrmw some_op iN %addr, iN %incr ordering*
1212	//
1213	// The standard expansion we produce is:
1214	// [...]
1215	// atomicrmw.start:
1216	// %loaded = @load.linked(%addr)
1217	// %new = some_op iN %loaded, %incr
1218	// %stored = @store_conditional(%new, %addr)
1219	// %try_again = icmp i32 ne %stored, 0
1220	// br i1 %try_again, label %loop, label %atomicrmw.end
1221	// atomicrmw.end:
1222	// [...]
1223	BasicBlock *ExitBB =
1224	BB->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "atomicrmw.end");
1225	BasicBlock *LoopBB = BasicBlock::Create(Context&: Ctx, Name: "atomicrmw.start", Parent: F, InsertBefore: ExitBB);
1226
1227	// The split call above "helpfully" added a branch at the end of BB (to the
1228	// wrong place).
1229	std::prev(x: BB->end())->eraseFromParent();
1230	Builder.SetInsertPoint(BB);
1231	Builder.CreateBr(Dest: LoopBB);
1232
1233	// Start the main loop block now that we've taken care of the preliminaries.
1234	Builder.SetInsertPoint(LoopBB);
1235	Value *Loaded = TLI->emitLoadLinked(Builder, ValueTy: ResultTy, Addr, Ord: MemOpOrder);
1236
1237	Value *NewVal = PerformOp (Builder, Loaded);
1238
1239	Value *StoreSuccess =
1240	TLI->emitStoreConditional(Builder, Val: NewVal, Addr, Ord: MemOpOrder);
1241	Value *TryAgain = Builder.CreateICmpNE(
1242	LHS: StoreSuccess, RHS: ConstantInt::get(Ty: IntegerType::get(C&: Ctx, NumBits: `32`), V: `0`), Name: "tryagain");
1243	Builder.CreateCondBr(Cond: TryAgain, True: LoopBB, False: ExitBB);
1244
1245	Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1246	return Loaded;
1247	}
1248
1249	/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1250	/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1251	/// IR. As a migration step, we convert back to what use to be the standard
1252	/// way to represent a pointer cmpxchg so that we can update backends one by
1253	/// one.
1254	AtomicCmpXchgInst *
1255	AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1256	auto *M = CI->getModule();
1257	Type *NewTy = getCorrespondingIntegerType(T: CI->getCompareOperand()->getType(),
1258	DL: M->getDataLayout());
1259
1260	ReplacementIRBuilder Builder(CI, *DL);
1261
1262	Value *Addr = CI->getPointerOperand();
1263
1264	Value *NewCmp = Builder.CreatePtrToInt(V: CI->getCompareOperand(), DestTy: NewTy);
1265	Value *NewNewVal = Builder.CreatePtrToInt(V: CI->getNewValOperand(), DestTy: NewTy);
1266
1267	auto *NewCI = Builder.CreateAtomicCmpXchg(
1268	Ptr: Addr, Cmp: NewCmp, New: NewNewVal, Align: CI->getAlign(), SuccessOrdering: CI->getSuccessOrdering(),
1269	FailureOrdering: CI->getFailureOrdering(), SSID: CI->getSyncScopeID());
1270	NewCI->setVolatile(CI->isVolatile());
1271	NewCI->setWeak(CI->isWeak());
1272	LLVM_DEBUG(dbgs() << "Replaced " << CI << " with " << NewCI << "\n");
1273
1274	Value *OldVal = Builder.CreateExtractValue(Agg: NewCI, Idxs: `0`);
1275	Value *Succ = Builder.CreateExtractValue(Agg: NewCI, Idxs: `1`);
1276
1277	OldVal = Builder.CreateIntToPtr(V: OldVal, DestTy: CI->getCompareOperand()->getType());
1278
1279	Value *Res = PoisonValue::get(T: CI->getType());
1280	Res = Builder.CreateInsertValue(Agg: Res, Val: OldVal, Idxs: `0`);
1281	Res = Builder.CreateInsertValue(Agg: Res, Val: Succ, Idxs: `1`);
1282
1283	CI->replaceAllUsesWith(V: Res);
1284	CI->eraseFromParent();
1285	return NewCI;
1286	}
1287
1288	bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1289	AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1290	AtomicOrdering FailureOrder = CI->getFailureOrdering();
1291	Value *Addr = CI->getPointerOperand();
1292	BasicBlock *BB = CI->getParent();
1293	Function *F = BB->getParent();
1294	LLVMContext &Ctx = F->getContext();
1295	// If shouldInsertFencesForAtomic() returns true, then the target does not
1296	// want to deal with memory orders, and emitLeading/TrailingFence should take
1297	// care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1298	// should preserve the ordering.
1299	bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(I: CI);
1300	AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1301	? AtomicOrdering::Monotonic
1302	: CI->getMergedOrdering();
1303
1304	// In implementations which use a barrier to achieve release semantics, we can
1305	// delay emitting this barrier until we know a store is actually going to be
1306	// attempted. The cost of this delay is that we need 2 copies of the block
1307	// emitting the load-linked, affecting code size.
1308	//
1309	// Ideally, this logic would be unconditional except for the minsize check
1310	// since in other cases the extra blocks naturally collapse down to the
1311	// minimal loop. Unfortunately, this puts too much stress on later
1312	// optimisations so we avoid emitting the extra logic in those cases too.
1313	bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1314	SuccessOrder != AtomicOrdering::Monotonic &&
1315	SuccessOrder != AtomicOrdering::Acquire &&
1316	!F->hasMinSize();
1317
1318	// There's no overhead for sinking the release barrier in a weak cmpxchg, so
1319	// do it even on minsize.
1320	bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1321
1322	// Given: cmpxchg some_op iN %addr, iN %desired, iN %new success_ord fail_ord*
1323	//
1324	// The full expansion we produce is:
1325	// [...]
1326	// %aligned.addr = ...
1327	// cmpxchg.start:
1328	// %unreleasedload = @load.linked(%aligned.addr)
1329	// %unreleasedload.extract = extract value from %unreleasedload
1330	// %should_store = icmp eq %unreleasedload.extract, %desired
1331	// br i1 %should_store, label %cmpxchg.releasingstore,
1332	// label %cmpxchg.nostore
1333	// cmpxchg.releasingstore:
1334	// fence?
1335	// br label cmpxchg.trystore
1336	// cmpxchg.trystore:
1337	// %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1338	// [%releasedload, %cmpxchg.releasedload]
1339	// %updated.new = insert %new into %loaded.trystore
1340	// %stored = @store_conditional(%updated.new, %aligned.addr)
1341	// %success = icmp eq i32 %stored, 0
1342	// br i1 %success, label %cmpxchg.success,
1343	// label %cmpxchg.releasedload/%cmpxchg.failure
1344	// cmpxchg.releasedload:
1345	// %releasedload = @load.linked(%aligned.addr)
1346	// %releasedload.extract = extract value from %releasedload
1347	// %should_store = icmp eq %releasedload.extract, %desired
1348	// br i1 %should_store, label %cmpxchg.trystore,
1349	// label %cmpxchg.failure
1350	// cmpxchg.success:
1351	// fence?
1352	// br label %cmpxchg.end
1353	// cmpxchg.nostore:
1354	// %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1355	// [%releasedload,
1356	// %cmpxchg.releasedload/%cmpxchg.trystore]
1357	// @load_linked_fail_balance()?
1358	// br label %cmpxchg.failure
1359	// cmpxchg.failure:
1360	// fence?
1361	// br label %cmpxchg.end
1362	// cmpxchg.end:
1363	// %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1364	// [%loaded.trystore, %cmpxchg.trystore]
1365	// %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1366	// %loaded = extract value from %loaded.exit
1367	// %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1368	// %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1369	// [...]
1370	BasicBlock *ExitBB = BB->splitBasicBlock(I: CI->getIterator(), BBName: "cmpxchg.end");
1371	auto FailureBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.failure", Parent: F, InsertBefore: ExitBB);
1372	auto NoStoreBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.nostore", Parent: F, InsertBefore: FailureBB);
1373	auto SuccessBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.success", Parent: F, InsertBefore: NoStoreBB);
1374	auto ReleasedLoadBB =
1375	BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.releasedload", Parent: F, InsertBefore: SuccessBB);
1376	auto TryStoreBB =
1377	BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.trystore", Parent: F, InsertBefore: ReleasedLoadBB);
1378	auto ReleasingStoreBB =
1379	BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.fencedstore", Parent: F, InsertBefore: TryStoreBB);
1380	auto StartBB = BasicBlock::Create(Context&: Ctx, Name: "cmpxchg.start", Parent: F, InsertBefore: ReleasingStoreBB);
1381
1382	ReplacementIRBuilder Builder(CI, *DL);
1383
1384	// The split call above "helpfully" added a branch at the end of BB (to the
1385	// wrong place), but we might want a fence too. It's easiest to just remove
1386	// the branch entirely.
1387	std::prev(x: BB->end())->eraseFromParent();
1388	Builder.SetInsertPoint(BB);
1389	if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1390	TLI->emitLeadingFence(Builder, Inst: CI, Ord: SuccessOrder);
1391
1392	PartwordMaskValues PMV =
1393	createMaskInstrs(Builder, I: CI, ValueType: CI->getCompareOperand()->getType(), Addr,
1394	AddrAlign: CI->getAlign(), MinWordSize: TLI->getMinCmpXchgSizeInBits() / `8`);
1395	Builder.CreateBr(Dest: StartBB);
1396
1397	// Start the main loop block now that we've taken care of the preliminaries.
1398	Builder.SetInsertPoint(StartBB);
1399	Value *UnreleasedLoad =
1400	TLI->emitLoadLinked(Builder, ValueTy: PMV.WordType, Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1401	Value *UnreleasedLoadExtract =
1402	extractMaskedValue(Builder, WideWord: UnreleasedLoad, PMV);
1403	Value *ShouldStore = Builder.CreateICmpEQ(
1404	LHS: UnreleasedLoadExtract, RHS: CI->getCompareOperand(), Name: "should_store");
1405
1406	// If the cmpxchg doesn't actually need any ordering when it fails, we can
1407	// jump straight past that fence instruction (if it exists).
1408	Builder.CreateCondBr(Cond: ShouldStore, True: ReleasingStoreBB, False: NoStoreBB);
1409
1410	Builder.SetInsertPoint(ReleasingStoreBB);
1411	if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1412	TLI->emitLeadingFence(Builder, Inst: CI, Ord: SuccessOrder);
1413	Builder.CreateBr(Dest: TryStoreBB);
1414
1415	Builder.SetInsertPoint(TryStoreBB);
1416	PHINode *LoadedTryStore =
1417	Builder.CreatePHI(Ty: PMV.WordType, NumReservedValues: `2`, Name: "loaded.trystore");
1418	LoadedTryStore->addIncoming(V: UnreleasedLoad, BB: ReleasingStoreBB);
1419	Value *NewValueInsert =
1420	insertMaskedValue(Builder, WideWord: LoadedTryStore, Updated: CI->getNewValOperand(), PMV);
1421	Value *StoreSuccess = TLI->emitStoreConditional(Builder, Val: NewValueInsert,
1422	Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1423	StoreSuccess = Builder.CreateICmpEQ(
1424	LHS: StoreSuccess, RHS: ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: `0`), Name: "success");
1425	BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1426	Builder.CreateCondBr(Cond: StoreSuccess, True: SuccessBB,
1427	False: CI->isWeak() ? FailureBB : RetryBB);
1428
1429	Builder.SetInsertPoint(ReleasedLoadBB);
1430	Value *SecondLoad;
1431	if (HasReleasedLoadBB) {
1432	SecondLoad =
1433	TLI->emitLoadLinked(Builder, ValueTy: PMV.WordType, Addr: PMV.AlignedAddr, Ord: MemOpOrder);
1434	Value *SecondLoadExtract = extractMaskedValue(Builder, WideWord: SecondLoad, PMV);
1435	ShouldStore = Builder.CreateICmpEQ(LHS: SecondLoadExtract,
1436	RHS: CI->getCompareOperand(), Name: "should_store");
1437
1438	// If the cmpxchg doesn't actually need any ordering when it fails, we can
1439	// jump straight past that fence instruction (if it exists).
1440	Builder.CreateCondBr(Cond: ShouldStore, True: TryStoreBB, False: NoStoreBB);
1441	// Update PHI node in TryStoreBB.
1442	LoadedTryStore->addIncoming(V: SecondLoad, BB: ReleasedLoadBB);
1443	} else
1444	Builder.CreateUnreachable();
1445
1446	// Make sure later instructions don't get reordered with a fence if
1447	// necessary.
1448	Builder.SetInsertPoint(SuccessBB);
1449	if (ShouldInsertFencesForAtomic \|\|
1450	TLI->shouldInsertTrailingFenceForAtomicStore(I: CI))
1451	TLI->emitTrailingFence(Builder, Inst: CI, Ord: SuccessOrder);
1452	Builder.CreateBr(Dest: ExitBB);
1453
1454	Builder.SetInsertPoint(NoStoreBB);
1455	PHINode *LoadedNoStore =
1456	Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: `2`, Name: "loaded.nostore");
1457	LoadedNoStore->addIncoming(V: UnreleasedLoad, BB: StartBB);
1458	if (HasReleasedLoadBB)
1459	LoadedNoStore->addIncoming(V: SecondLoad, BB: ReleasedLoadBB);
1460
1461	// In the failing case, where we don't execute the store-conditional, the
1462	// target might want to balance out the load-linked with a dedicated
1463	// instruction (e.g., on ARM, clearing the exclusive monitor).
1464	TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1465	Builder.CreateBr(Dest: FailureBB);
1466
1467	Builder.SetInsertPoint(FailureBB);
1468	PHINode *LoadedFailure =
1469	Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: `2`, Name: "loaded.failure");
1470	LoadedFailure->addIncoming(V: LoadedNoStore, BB: NoStoreBB);
1471	if (CI->isWeak())
1472	LoadedFailure->addIncoming(V: LoadedTryStore, BB: TryStoreBB);
1473	if (ShouldInsertFencesForAtomic)
1474	TLI->emitTrailingFence(Builder, Inst: CI, Ord: FailureOrder);
1475	Builder.CreateBr(Dest: ExitBB);
1476
1477	// Finally, we have control-flow based knowledge of whether the cmpxchg
1478	// succeeded or not. We expose this to later passes by converting any
1479	// subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1480	// PHI.
1481	Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1482	PHINode *LoadedExit =
1483	Builder.CreatePHI(Ty: UnreleasedLoad->getType(), NumReservedValues: `2`, Name: "loaded.exit");
1484	LoadedExit->addIncoming(V: LoadedTryStore, BB: SuccessBB);
1485	LoadedExit->addIncoming(V: LoadedFailure, BB: FailureBB);
1486	PHINode *Success = Builder.CreatePHI(Ty: Type::getInt1Ty(C&: Ctx), NumReservedValues: `2`, Name: "success");
1487	Success->addIncoming(V: ConstantInt::getTrue(Context&: Ctx), BB: SuccessBB);
1488	Success->addIncoming(V: ConstantInt::getFalse(Context&: Ctx), BB: FailureBB);
1489
1490	// This is the "exit value" from the cmpxchg expansion. It may be of
1491	// a type wider than the one in the cmpxchg instruction.
1492	Value *LoadedFull = LoadedExit;
1493
1494	Builder.SetInsertPoint(TheBB: ExitBB, IP: std::next(x: Success->getIterator()));
1495	Value *Loaded = extractMaskedValue(Builder, WideWord: LoadedFull, PMV);
1496
1497	// Look for any users of the cmpxchg that are just comparing the loaded value
1498	// against the desired one, and replace them with the CFG-derived version.
1499	SmallVector<ExtractValueInst *, `2`> PrunedInsts;
1500	for (auto *User : CI->users()) {
1501	ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val: User);
1502	if (!EV)
1503	continue;
1504
1505	assert(EV->getNumIndices() == `1` && EV->getIndices()[`0`] <= `1` &&
1506	"weird extraction from { iN, i1 }");
1507
1508	if (EV->getIndices()[`0`] == `0`)
1509	EV->replaceAllUsesWith(V: Loaded);
1510	else
1511	EV->replaceAllUsesWith(V: Success);
1512
1513	PrunedInsts.push_back(Elt: EV);
1514	}
1515
1516	// We can remove the instructions now we're no longer iterating through them.
1517	for (auto *EV : PrunedInsts)
1518	EV->eraseFromParent();
1519
1520	if (!CI->use_empty()) {
1521	// Some use of the full struct return that we don't understand has happened,
1522	// so we've got to reconstruct it properly.
1523	Value *Res;
1524	Res = Builder.CreateInsertValue(Agg: PoisonValue::get(T: CI->getType()), Val: Loaded, Idxs: `0`);
1525	Res = Builder.CreateInsertValue(Agg: Res, Val: Success, Idxs: `1`);
1526
1527	CI->replaceAllUsesWith(V: Res);
1528	}
1529
1530	CI->eraseFromParent();
1531	return true;
1532	}
1533
1534	bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1535	auto C = dyn_cast<ConstantInt>(Val: RMWI->getValOperand());
1536	if (!C)
1537	return false;
1538
1539	AtomicRMWInst::BinOp Op = RMWI->getOperation();
1540	switch (Op) {
1541	case AtomicRMWInst::Add:
1542	case AtomicRMWInst::Sub:
1543	case AtomicRMWInst::Or:
1544	case AtomicRMWInst::Xor:
1545	return C->isZero();
1546	case AtomicRMWInst::And:
1547	return C->isMinusOne();
1548	// FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1549	default:
1550	return false;
1551	}
1552	}
1553
1554	bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1555	if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1556	tryExpandAtomicLoad(LI: ResultingLoad);
1557	return true;
1558	}
1559	return false;
1560	}
1561
1562	Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1563	IRBuilderBase &Builder, Type ResultTy, Value Addr, Align AddrAlign,
1564	AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1565	function_ref<Value (IRBuilderBase &, Value )> PerformOp,
1566	CreateCmpXchgInstFun CreateCmpXchg) {
1567	LLVMContext &Ctx = Builder.getContext();
1568	BasicBlock *BB = Builder.GetInsertBlock();
1569	Function *F = BB->getParent();
1570
1571	// Given: atomicrmw some_op iN %addr, iN %incr ordering*
1572	//
1573	// The standard expansion we produce is:
1574	// [...]
1575	// %init_loaded = load atomic iN %addr*
1576	// br label %loop
1577	// loop:
1578	// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1579	// %new = some_op iN %loaded, %incr
1580	// %pair = cmpxchg iN %addr, iN %loaded, iN %new*
1581	// %new_loaded = extractvalue { iN, i1 } %pair, 0
1582	// %success = extractvalue { iN, i1 } %pair, 1
1583	// br i1 %success, label %atomicrmw.end, label %loop
1584	// atomicrmw.end:
1585	// [...]
1586	BasicBlock *ExitBB =
1587	BB->splitBasicBlock(I: Builder.GetInsertPoint(), BBName: "atomicrmw.end");
1588	BasicBlock *LoopBB = BasicBlock::Create(Context&: Ctx, Name: "atomicrmw.start", Parent: F, InsertBefore: ExitBB);
1589
1590	// The split call above "helpfully" added a branch at the end of BB (to the
1591	// wrong place), but we want a load. It's easiest to just remove
1592	// the branch entirely.
1593	std::prev(x: BB->end())->eraseFromParent();
1594	Builder.SetInsertPoint(BB);
1595	LoadInst *InitLoaded = Builder.CreateAlignedLoad(Ty: ResultTy, Ptr: Addr, Align: AddrAlign);
1596	Builder.CreateBr(Dest: LoopBB);
1597
1598	// Start the main loop block now that we've taken care of the preliminaries.
1599	Builder.SetInsertPoint(LoopBB);
1600	PHINode *Loaded = Builder.CreatePHI(Ty: ResultTy, NumReservedValues: `2`, Name: "loaded");
1601	Loaded->addIncoming(V: InitLoaded, BB);
1602
1603	Value *NewVal = PerformOp (Builder, Loaded);
1604
1605	Value NewLoaded = nullptr*;
1606	Value Success = nullptr*;
1607
1608	CreateCmpXchg (Builder, Addr, Loaded, NewVal, AddrAlign,
1609	MemOpOrder == AtomicOrdering::Unordered
1610	? AtomicOrdering::Monotonic
1611	: MemOpOrder,
1612	SSID, Success, NewLoaded);
1613	assert(Success && NewLoaded);
1614
1615	Loaded->addIncoming(V: NewLoaded, BB: LoopBB);
1616
1617	Builder.CreateCondBr(Cond: Success, True: ExitBB, False: LoopBB);
1618
1619	Builder.SetInsertPoint(TheBB: ExitBB, IP: ExitBB->begin());
1620	return NewLoaded;
1621	}
1622
1623	bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1624	unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / `8`;
1625	unsigned ValueSize = getAtomicOpSize(CASI: CI);
1626
1627	switch (TLI->shouldExpandAtomicCmpXchgInIR(AI: CI)) {
1628	default:
1629	llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1630	case TargetLoweringBase::AtomicExpansionKind::None:
1631	if (ValueSize < MinCASSize)
1632	return expandPartwordCmpXchg(CI);
1633	return false;
1634	case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1635	return expandAtomicCmpXchg(CI);
1636	}
1637	case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1638	expandAtomicCmpXchgToMaskedIntrinsic(CI);
1639	return true;
1640	case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1641	return lowerAtomicCmpXchgInst(CXI: CI);
1642	}
1643	}
1644
1645	// Note: This function is exposed externally by AtomicExpandUtils.h
1646	bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
1647	CreateCmpXchgInstFun CreateCmpXchg) {
1648	ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1649	Builder.setIsFPConstrained(
1650	AI->getFunction()->hasFnAttribute(Kind: Attribute::StrictFP));
1651
1652	// FIXME: If FP exceptions are observable, we should force them off for the
1653	// loop for the FP atomics.
1654	Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1655	Builder, ResultTy: AI->getType(), Addr: AI->getPointerOperand(), AddrAlign: AI->getAlign(),
1656	MemOpOrder: AI->getOrdering(), SSID: AI->getSyncScopeID(),
1657	PerformOp: [&](IRBuilderBase &Builder, Value *Loaded) {
1658	return buildAtomicRMWValue(Op: AI->getOperation(), Builder, Loaded,
1659	Val: AI->getValOperand());
1660	},
1661	CreateCmpXchg);
1662
1663	AI->replaceAllUsesWith(V: Loaded);
1664	AI->eraseFromParent();
1665	return true;
1666	}
1667
1668	// In order to use one of the sized library calls such as
1669	// __atomic_fetch_add_4, the alignment must be sufficient, the size
1670	// must be one of the potentially-specialized sizes, and the value
1671	// type must actually exist in C on the target (otherwise, the
1672	// function wouldn't actually be defined.)
1673	static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1674	const DataLayout &DL) {
1675	// TODO: "LargestSize" is an approximation for "largest type that
1676	// you can express in C". It seems to be the case that int128 is
1677	// supported on all 64-bit platforms, otherwise only up to 64-bit
1678	// integers are supported. If we get this wrong, then we'll try to
1679	// call a sized libcall that doesn't actually exist. There should
1680	// really be some more reliable way in LLVM of determining integer
1681	// sizes which are valid in the target's C ABI...
1682	unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= `64` ? `16` : `8`;
1683	return Alignment >= Size &&
1684	(Size == `1` \|\| Size == `2` \|\| Size == `4` \|\| Size == `8` \|\| Size == `16`) &&
1685	Size <= LargestSize;
1686	}
1687
1688	void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1689	static const RTLIB::Libcall Libcalls[`6`] = {
1690	RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1691	RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1692	unsigned Size = getAtomicOpSize(LI: I);
1693
1694	bool expanded = expandAtomicOpToLibcall(
1695	I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: nullptr, CASExpected: nullptr,
1696	Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1697	if (!expanded)
1698	report_fatal_error(reason: "expandAtomicOpToLibcall shouldn't fail for Load");
1699	}
1700
1701	void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1702	static const RTLIB::Libcall Libcalls[`6`] = {
1703	RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1704	RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1705	unsigned Size = getAtomicOpSize(SI: I);
1706
1707	bool expanded = expandAtomicOpToLibcall(
1708	I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getValueOperand(),
1709	CASExpected: nullptr, Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1710	if (!expanded)
1711	report_fatal_error(reason: "expandAtomicOpToLibcall shouldn't fail for Store");
1712	}
1713
1714	void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1715	static const RTLIB::Libcall Libcalls[`6`] = {
1716	RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1717	RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1718	RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1719	unsigned Size = getAtomicOpSize(CASI: I);
1720
1721	bool expanded = expandAtomicOpToLibcall(
1722	I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getNewValOperand(),
1723	CASExpected: I->getCompareOperand(), Ordering: I->getSuccessOrdering(), Ordering2: I->getFailureOrdering(),
1724	Libcalls);
1725	if (!expanded)
1726	report_fatal_error(reason: "expandAtomicOpToLibcall shouldn't fail for CAS");
1727	}
1728
1729	static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
1730	static const RTLIB::Libcall LibcallsXchg[`6`] = {
1731	RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1732	RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1733	RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1734	static const RTLIB::Libcall LibcallsAdd[`6`] = {
1735	RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1736	RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1737	RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1738	static const RTLIB::Libcall LibcallsSub[`6`] = {
1739	RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1740	RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1741	RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1742	static const RTLIB::Libcall LibcallsAnd[`6`] = {
1743	RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1744	RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1745	RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1746	static const RTLIB::Libcall LibcallsOr[`6`] = {
1747	RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1748	RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1749	RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1750	static const RTLIB::Libcall LibcallsXor[`6`] = {
1751	RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1752	RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1753	RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1754	static const RTLIB::Libcall LibcallsNand[`6`] = {
1755	RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1756	RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1757	RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1758
1759	switch (Op) {
1760	case AtomicRMWInst::BAD_BINOP:
1761	llvm_unreachable("Should not have BAD_BINOP.");
1762	case AtomicRMWInst::Xchg:
1763	return ArrayRef(LibcallsXchg);
1764	case AtomicRMWInst::Add:
1765	return ArrayRef(LibcallsAdd);
1766	case AtomicRMWInst::Sub:
1767	return ArrayRef(LibcallsSub);
1768	case AtomicRMWInst::And:
1769	return ArrayRef(LibcallsAnd);
1770	case AtomicRMWInst::Or:
1771	return ArrayRef(LibcallsOr);
1772	case AtomicRMWInst::Xor:
1773	return ArrayRef(LibcallsXor);
1774	case AtomicRMWInst::Nand:
1775	return ArrayRef(LibcallsNand);
1776	case AtomicRMWInst::Max:
1777	case AtomicRMWInst::Min:
1778	case AtomicRMWInst::UMax:
1779	case AtomicRMWInst::UMin:
1780	case AtomicRMWInst::FMax:
1781	case AtomicRMWInst::FMin:
1782	case AtomicRMWInst::FAdd:
1783	case AtomicRMWInst::FSub:
1784	case AtomicRMWInst::UIncWrap:
1785	case AtomicRMWInst::UDecWrap:
1786	// No atomic libcalls are available for max/min/umax/umin.
1787	return {};
1788	}
1789	llvm_unreachable("Unexpected AtomicRMW operation.");
1790	}
1791
1792	void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1793	ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(Op: I->getOperation());
1794
1795	unsigned Size = getAtomicOpSize(RMWI: I);
1796
1797	bool Success = false;
1798	if (!Libcalls.empty())
1799	Success = expandAtomicOpToLibcall(
1800	I, Size, Alignment: I->getAlign(), PointerOperand: I->getPointerOperand(), ValueOperand: I->getValOperand(),
1801	CASExpected: nullptr, Ordering: I->getOrdering(), Ordering2: AtomicOrdering::NotAtomic, Libcalls);
1802
1803	// The expansion failed: either there were no libcalls at all for
1804	// the operation (min/max), or there were only size-specialized
1805	// libcalls (add/sub/etc) and we needed a generic. So, expand to a
1806	// CAS libcall, via a CAS loop, instead.
1807	if (!Success) {
1808	expandAtomicRMWToCmpXchg(
1809	AI: I, CreateCmpXchg: [this](IRBuilderBase &Builder, Value Addr, Value Loaded,
1810	Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1811	SyncScope::ID SSID, Value &Success, Value &NewLoaded) {
1812	// Create the CAS instruction normally...
1813	AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1814	Ptr: Addr, Cmp: Loaded, New: NewVal, Align: Alignment, SuccessOrdering: MemOpOrder,
1815	FailureOrdering: AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: MemOpOrder), SSID);
1816	Success = Builder.CreateExtractValue(Agg: Pair, Idxs: `1`, Name: "success");
1817	NewLoaded = Builder.CreateExtractValue(Agg: Pair, Idxs: `0`, Name: "newloaded");
1818
1819	// ...and then expand the CAS into a libcall.
1820	expandAtomicCASToLibcall(I: Pair);
1821	});
1822	}
1823	}
1824
1825	// A helper routine for the above expandAtomicToLibcall functions.*
1826	//
1827	// 'Libcalls' contains an array of enum values for the particular
1828	// ATOMIC libcalls to be emitted. All of the other arguments besides
1829	// 'I' are extracted from the Instruction subclass by the
1830	// caller. Depending on the particular call, some will be null.
1831	bool AtomicExpandImpl::expandAtomicOpToLibcall(
1832	Instruction I, unsigned* Size, Align Alignment, Value *PointerOperand,
1833	Value ValueOperand, Value CASExpected, AtomicOrdering Ordering,
1834	AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1835	assert(Libcalls.size() == `6`);
1836
1837	LLVMContext &Ctx = I->getContext();
1838	Module *M = I->getModule();
1839	const DataLayout &DL = M->getDataLayout();
1840	IRBuilder<> Builder(I);
1841	IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1842
1843	bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1844	Type SizedIntTy = Type::getIntNTy(C&: Ctx, N: Size `8`);
1845
1846	const Align AllocaAlignment = DL.getPrefTypeAlign(Ty: SizedIntTy);
1847
1848	// TODO: the "order" argument type is "int", not int32. So
1849	// getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1850	ConstantInt *SizeVal64 = ConstantInt::get(Ty: Type::getInt64Ty(C&: Ctx), V: Size);
1851	assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1852	Constant *OrderingVal =
1853	ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: (int)toCABI(AO: Ordering));
1854	Constant Ordering2Val = nullptr*;
1855	if (CASExpected) {
1856	assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1857	Ordering2Val =
1858	ConstantInt::get(Ty: Type::getInt32Ty(C&: Ctx), V: (int)toCABI(AO: Ordering2));
1859	}
1860	bool HasResult = I->getType() != Type::getVoidTy(C&: Ctx);
1861
1862	RTLIB::Libcall RTLibType;
1863	if (UseSizedLibcall) {
1864	switch (Size) {
1865	case `1`:
1866	RTLibType = Libcalls [`1`];
1867	break;
1868	case `2`:
1869	RTLibType = Libcalls [`2`];
1870	break;
1871	case `4`:
1872	RTLibType = Libcalls [`3`];
1873	break;
1874	case `8`:
1875	RTLibType = Libcalls [`4`];
1876	break;
1877	case `16`:
1878	RTLibType = Libcalls [`5`];
1879	break;
1880	}
1881	} else if (Libcalls [`0`] != RTLIB::UNKNOWN_LIBCALL) {
1882	RTLibType = Libcalls [`0`];
1883	} else {
1884	// Can't use sized function, and there's no generic for this
1885	// operation, so give up.
1886	return false;
1887	}
1888
1889	if (!TLI->getLibcallName(Call: RTLibType)) {
1890	// This target does not implement the requested atomic libcall so give up.
1891	return false;
1892	}
1893
1894	// Build up the function call. There's two kinds. First, the sized
1895	// variants. These calls are going to be one of the following (with
1896	// N=1,2,4,8,16):
1897	// iN __atomic_load_N(iN ptr, int ordering)*
1898	// void __atomic_store_N(iN ptr, iN val, int ordering)*
1899	// iN __atomic_{exchange\|fetch_}_N(iN ptr, iN val, int ordering)
1900	// bool __atomic_compare_exchange_N(iN ptr, iN expected, iN desired,
1901	// int success_order, int failure_order)
1902	//
1903	// Note that these functions can be used for non-integer atomic
1904	// operations, the values just need to be bitcast to integers on the
1905	// way in and out.
1906	//
1907	// And, then, the generic variants. They look like the following:
1908	// void __atomic_load(size_t size, void ptr, void ret, int ordering)
1909	// void __atomic_store(size_t size, void ptr, void val, int ordering)
1910	// void __atomic_exchange(size_t size, void ptr, void val, void ret,*
1911	// int ordering)
1912	// bool __atomic_compare_exchange(size_t size, void ptr, void expected,
1913	// void desired, int success_order,*
1914	// int failure_order)
1915	//
1916	// The different signatures are built up depending on the
1917	// 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1918	// variables.
1919
1920	AllocaInst AllocaCASExpected = nullptr*;
1921	AllocaInst AllocaValue = nullptr*;
1922	AllocaInst AllocaResult = nullptr*;
1923
1924	Type *ResultTy;
1925	SmallVector<Value *, `6`> Args;
1926	AttributeList Attr;
1927
1928	// 'size' argument.
1929	if (!UseSizedLibcall) {
1930	// Note, getIntPtrType is assumed equivalent to size_t.
1931	Args.push_back(Elt: ConstantInt::get(Ty: DL.getIntPtrType(C&: Ctx), V: Size));
1932	}
1933
1934	// 'ptr' argument.
1935	// note: This assumes all address spaces share a common libfunc
1936	// implementation and that addresses are convertable. For systems without
1937	// that property, we'd need to extend this mechanism to support AS-specific
1938	// families of atomic intrinsics.
1939	Value *PtrVal = PointerOperand;
1940	PtrVal = Builder.CreateAddrSpaceCast(V: PtrVal, DestTy: PointerType::getUnqual(C&: Ctx));
1941	Args.push_back(Elt: PtrVal);
1942
1943	// 'expected' argument, if present.
1944	if (CASExpected) {
1945	AllocaCASExpected = AllocaBuilder.CreateAlloca(Ty: CASExpected->getType());
1946	AllocaCASExpected->setAlignment(AllocaAlignment);
1947	Builder.CreateLifetimeStart(Ptr: AllocaCASExpected, Size: SizeVal64);
1948	Builder.CreateAlignedStore(Val: CASExpected, Ptr: AllocaCASExpected, Align: AllocaAlignment);
1949	Args.push_back(Elt: AllocaCASExpected);
1950	}
1951
1952	// 'val' argument ('desired' for cas), if present.
1953	if (ValueOperand) {
1954	if (UseSizedLibcall) {
1955	Value *IntValue =
1956	Builder.CreateBitOrPointerCast(V: ValueOperand, DestTy: SizedIntTy);
1957	Args.push_back(Elt: IntValue);
1958	} else {
1959	AllocaValue = AllocaBuilder.CreateAlloca(Ty: ValueOperand->getType());
1960	AllocaValue->setAlignment(AllocaAlignment);
1961	Builder.CreateLifetimeStart(Ptr: AllocaValue, Size: SizeVal64);
1962	Builder.CreateAlignedStore(Val: ValueOperand, Ptr: AllocaValue, Align: AllocaAlignment);
1963	Args.push_back(Elt: AllocaValue);
1964	}
1965	}
1966
1967	// 'ret' argument.
1968	if (!CASExpected && HasResult && !UseSizedLibcall) {
1969	AllocaResult = AllocaBuilder.CreateAlloca(Ty: I->getType());
1970	AllocaResult->setAlignment(AllocaAlignment);
1971	Builder.CreateLifetimeStart(Ptr: AllocaResult, Size: SizeVal64);
1972	Args.push_back(Elt: AllocaResult);
1973	}
1974
1975	// 'ordering' ('success_order' for cas) argument.
1976	Args.push_back(Elt: OrderingVal);
1977
1978	// 'failure_order' argument, if present.
1979	if (Ordering2Val)
1980	Args.push_back(Elt: Ordering2Val);
1981
1982	// Now, the return type.
1983	if (CASExpected) {
1984	ResultTy = Type::getInt1Ty(C&: Ctx);
1985	Attr = Attr.addRetAttribute(C&: Ctx, Kind: Attribute::ZExt);
1986	} else if (HasResult && UseSizedLibcall)
1987	ResultTy = SizedIntTy;
1988	else
1989	ResultTy = Type::getVoidTy(C&: Ctx);
1990
1991	// Done with setting up arguments and return types, create the call:
1992	SmallVector<Type *, `6`> ArgTys;
1993	for (Value *Arg : Args)
1994	ArgTys.push_back(Elt: Arg->getType());
1995	FunctionType FnType = FunctionType::get(Result: ResultTy, Params: ArgTys, isVarArg: false*);
1996	FunctionCallee LibcallFn =
1997	M->getOrInsertFunction(Name: TLI->getLibcallName(Call: RTLibType), T: FnType, AttributeList: Attr);
1998	CallInst *Call = Builder.CreateCall(Callee: LibcallFn, Args);
1999	Call->setAttributes(Attr);
2000	Value *Result = Call;
2001
2002	// And then, extract the results...
2003	if (ValueOperand && !UseSizedLibcall)
2004	Builder.CreateLifetimeEnd(Ptr: AllocaValue, Size: SizeVal64);
2005
2006	if (CASExpected) {
2007	// The final result from the CAS is {load of 'expected' alloca, bool result
2008	// from call}
2009	Type *FinalResultTy = I->getType();
2010	Value *V = PoisonValue::get(T: FinalResultTy);
2011	Value *ExpectedOut = Builder.CreateAlignedLoad(
2012	Ty: CASExpected->getType(), Ptr: AllocaCASExpected, Align: AllocaAlignment);
2013	Builder.CreateLifetimeEnd(Ptr: AllocaCASExpected, Size: SizeVal64);
2014	V = Builder.CreateInsertValue(Agg: V, Val: ExpectedOut, Idxs: `0`);
2015	V = Builder.CreateInsertValue(Agg: V, Val: Result, Idxs: `1`);
2016	I->replaceAllUsesWith(V);
2017	} else if (HasResult) {
2018	Value *V;
2019	if (UseSizedLibcall)
2020	V = Builder.CreateBitOrPointerCast(V: Result, DestTy: I->getType());
2021	else {
2022	V = Builder.CreateAlignedLoad(Ty: I->getType(), Ptr: AllocaResult,
2023	Align: AllocaAlignment);
2024	Builder.CreateLifetimeEnd(Ptr: AllocaResult, Size: SizeVal64);
2025	}
2026	I->replaceAllUsesWith(V);
2027	}
2028	I->eraseFromParent();
2029	return true;
2030	}
2031

Browse the source code of llvm_projects/llvm/lib/CodeGen/AtomicExpandPass.cpp