IRTranslator.cpp source code [llvm_projects/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp]

1	//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements the IRTranslator class.
10	//===----------------------------------------------------------------------===//
11
12	#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
13	#include "llvm/ADT/PostOrderIterator.h"
14	#include "llvm/ADT/STLExtras.h"
15	#include "llvm/ADT/ScopeExit.h"
16	#include "llvm/ADT/SmallVector.h"
17	#include "llvm/Analysis/AliasAnalysis.h"
18	#include "llvm/Analysis/AssumptionCache.h"
19	#include "llvm/Analysis/BranchProbabilityInfo.h"
20	#include "llvm/Analysis/Loads.h"
21	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22	#include "llvm/Analysis/ValueTracking.h"
23	#include "llvm/Analysis/VectorUtils.h"
24	#include "llvm/CodeGen/Analysis.h"
25	#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
26	#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
27	#include "llvm/CodeGen/GlobalISel/CallLowering.h"
28	#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
29	#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
30	#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
31	#include "llvm/CodeGen/LowLevelTypeUtils.h"
32	#include "llvm/CodeGen/MachineBasicBlock.h"
33	#include "llvm/CodeGen/MachineFrameInfo.h"
34	#include "llvm/CodeGen/MachineFunction.h"
35	#include "llvm/CodeGen/MachineInstrBuilder.h"
36	#include "llvm/CodeGen/MachineMemOperand.h"
37	#include "llvm/CodeGen/MachineModuleInfo.h"
38	#include "llvm/CodeGen/MachineOperand.h"
39	#include "llvm/CodeGen/MachineRegisterInfo.h"
40	#include "llvm/CodeGen/StackProtector.h"
41	#include "llvm/CodeGen/SwitchLoweringUtils.h"
42	#include "llvm/CodeGen/TargetFrameLowering.h"
43	#include "llvm/CodeGen/TargetInstrInfo.h"
44	#include "llvm/CodeGen/TargetLowering.h"
45	#include "llvm/CodeGen/TargetOpcodes.h"
46	#include "llvm/CodeGen/TargetPassConfig.h"
47	#include "llvm/CodeGen/TargetRegisterInfo.h"
48	#include "llvm/CodeGen/TargetSubtargetInfo.h"
49	#include "llvm/CodeGenTypes/LowLevelType.h"
50	#include "llvm/IR/BasicBlock.h"
51	#include "llvm/IR/CFG.h"
52	#include "llvm/IR/Constant.h"
53	#include "llvm/IR/Constants.h"
54	#include "llvm/IR/DataLayout.h"
55	#include "llvm/IR/DerivedTypes.h"
56	#include "llvm/IR/DiagnosticInfo.h"
57	#include "llvm/IR/Function.h"
58	#include "llvm/IR/GetElementPtrTypeIterator.h"
59	#include "llvm/IR/InlineAsm.h"
60	#include "llvm/IR/InstrTypes.h"
61	#include "llvm/IR/Instructions.h"
62	#include "llvm/IR/IntrinsicInst.h"
63	#include "llvm/IR/Intrinsics.h"
64	#include "llvm/IR/IntrinsicsAMDGPU.h"
65	#include "llvm/IR/LLVMContext.h"
66	#include "llvm/IR/Metadata.h"
67	#include "llvm/IR/PatternMatch.h"
68	#include "llvm/IR/Statepoint.h"
69	#include "llvm/IR/Type.h"
70	#include "llvm/IR/User.h"
71	#include "llvm/IR/Value.h"
72	#include "llvm/InitializePasses.h"
73	#include "llvm/MC/MCContext.h"
74	#include "llvm/Pass.h"
75	#include "llvm/Support/Casting.h"
76	#include "llvm/Support/CodeGen.h"
77	#include "llvm/Support/Debug.h"
78	#include "llvm/Support/ErrorHandling.h"
79	#include "llvm/Support/MathExtras.h"
80	#include "llvm/Support/raw_ostream.h"
81	#include "llvm/Target/TargetMachine.h"
82	#include "llvm/Transforms/Utils/Local.h"
83	#include "llvm/Transforms/Utils/MemoryOpRemark.h"
84	#include <algorithm>
85	#include <cassert>
86	#include <cstdint>
87	#include <iterator>
88	#include <optional>
89	#include <string>
90	#include <utility>
91	#include <vector>
92
93	#define DEBUG_TYPE "irtranslator"
94
95	using namespace llvm;
96
97	static cl::opt<bool>
98	EnableCSEInIRTranslator("enable-cse-in-irtranslator",
99	cl::desc ("Should enable CSE in irtranslator"),
100	cl::Optional, cl::init(Val: false));
101	char IRTranslator::ID = `0`;
102
103	INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
104	false, false)
105	INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
106	INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
107	INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
108	INITIALIZE_PASS_DEPENDENCY(StackProtector)
109	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
110	INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
111	false, false)
112
113	static void reportTranslationError(MachineFunction &MF,
114	OptimizationRemarkEmitter &ORE,
115	OptimizationRemarkMissed &R) {
116	MF.getProperties().setFailedISel();
117	bool IsGlobalISelAbortEnabled =
118	MF.getTarget().Options.GlobalISelAbort == GlobalISelAbortMode::Enable;
119
120	// Print the function name explicitly if we don't have a debug location (which
121	// makes the diagnostic less useful) or if we're going to emit a raw error.
122	if (!R.getLocation().isValid() \|\| IsGlobalISelAbortEnabled)
123	R << (" (in function: " + MF.getName() + ")").str();
124
125	if (IsGlobalISelAbortEnabled)
126	report_fatal_error(reason: Twine (R.getMsg()));
127	else
128	ORE.emit(OptDiag&: R);
129	}
130
131	IRTranslator::IRTranslator(CodeGenOptLevel optlevel)
132	: MachineFunctionPass (ID), OptLevel(optlevel) {}
133
134	#ifndef NDEBUG
135	namespace {
136	/// Verify that every instruction created has the same DILocation as the
137	/// instruction being translated.
138	class DILocationVerifier : public GISelChangeObserver {
139	const Instruction CurrInst = nullptr*;
140
141	public:
142	DILocationVerifier() = default;
143	~DILocationVerifier() override = default;
144
145	const Instruction getCurrentInst() const* { return CurrInst; }
146	void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }
147
148	void erasingInstr(MachineInstr &MI) override {}
149	void changingInstr(MachineInstr &MI) override {}
150	void changedInstr(MachineInstr &MI) override {}
151
152	void createdInstr(MachineInstr &MI) override {
153	assert(getCurrentInst() && "Inserted instruction without a current MI");
154
155	// Only print the check message if we're actually checking it.
156	#ifndef NDEBUG
157	LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
158	<< " was copied to " << MI);
159	#endif
160	// We allow insts in the entry block to have no debug loc because
161	// they could have originated from constants, and we don't want a jumpy
162	// debug experience.
163	assert((CurrInst->getDebugLoc() == MI.getDebugLoc() \|\|
164	(MI.getParent()->isEntryBlock() && !MI.getDebugLoc()) \|\|
165	(MI.isDebugInstr())) &&
166	"Line info was not transferred to all instructions");
167	}
168	};
169	} // namespace
170	#endif // ifndef NDEBUG
171
172
173	void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
174	AU.addRequired<StackProtector>();
175	AU.addRequired<TargetPassConfig>();
176	AU.addRequired<GISelCSEAnalysisWrapperPass>();
177	AU.addRequired<AssumptionCacheTracker>();
178	if (OptLevel != CodeGenOptLevel::None) {
179	AU.addRequired<BranchProbabilityInfoWrapperPass>();
180	AU.addRequired<AAResultsWrapperPass>();
181	}
182	AU.addRequired<TargetLibraryInfoWrapperPass>();
183	AU.addPreserved<TargetLibraryInfoWrapperPass>();
184	AU.addRequired<LibcallLoweringInfoWrapper>();
185
186	getSelectionDAGFallbackAnalysisUsage(AU);
187	MachineFunctionPass::getAnalysisUsage(AU);
188	}
189
190	IRTranslator::ValueToVRegInfo::VRegListT &
191	IRTranslator::allocateVRegs(const Value &Val) {
192	auto VRegsIt = VMap.findVRegs(V: Val);
193	if (VRegsIt != VMap.vregs_end())
194	return *VRegsIt ->second;
195	auto *Regs = VMap.getVRegs(V: Val);
196	auto *Offsets = VMap.getOffsets(V: Val);
197	SmallVector<LLT, `4`> SplitTys;
198	computeValueLLTs(DL: DL, Ty&: Val.getType(), ValueLLTs&: SplitTys,
199	FixedOffsets: Offsets->empty() ? Offsets : nullptr);
200	for (unsigned i = `0`; i < SplitTys.size(); ++i)
201	Regs->push_back(Elt: `0`);
202	return *Regs;
203	}
204
205	ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
206	auto VRegsIt = VMap.findVRegs(V: Val);
207	if (VRegsIt != VMap.vregs_end())
208	return *VRegsIt ->second;
209
210	if (Val.getType()->isVoidTy())
211	return *VMap.getVRegs(V: Val);
212
213	// Create entry for this type.
214	auto *VRegs = VMap.getVRegs(V: Val);
215	auto *Offsets = VMap.getOffsets(V: Val);
216
217	if (!Val.getType()->isTokenTy())
218	assert(Val.getType()->isSized() &&
219	"Don't know how to create an empty vreg");
220
221	SmallVector<LLT, `4`> SplitTys;
222	computeValueLLTs(DL: DL, Ty&: Val.getType(), ValueLLTs&: SplitTys,
223	FixedOffsets: Offsets->empty() ? Offsets : nullptr);
224
225	if (!isa<Constant>(Val)) {
226	for (auto Ty : SplitTys)
227	VRegs->push_back(Elt: MRI->createGenericVirtualRegister(Ty));
228	return *VRegs;
229	}
230
231	if (Val.getType()->isAggregateType()) {
232	// UndefValue, ConstantAggregateZero
233	auto &C = cast<Constant>(Val);
234	unsigned Idx = `0`;
235	while (auto Elt = C.getAggregateElement(Elt: Idx++)) {
236	auto EltRegs = getOrCreateVRegs(Val: *Elt);
237	llvm::append_range(C&: *VRegs, R&: EltRegs);
238	}
239	} else {
240	assert(SplitTys.size() == `1` && "unexpectedly split LLT");
241	VRegs->push_back(Elt: MRI->createGenericVirtualRegister(Ty: SplitTys [`0`]));
242	bool Success = translate(C: cast<Constant>(Val), Reg: VRegs->front());
243	if (!Success) {
244	OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
245	MF->getFunction().getSubprogram(),
246	&MF->getFunction().getEntryBlock());
247	R << "unable to translate constant: " << ore::NV ("Type", Val.getType());
248	reportTranslationError(MF&: MF, ORE&: ORE, R);
249	return *VRegs;
250	}
251	}
252
253	return *VRegs;
254	}
255
256	int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
257	auto [MapEntry, Inserted] = FrameIndices.try_emplace(Key: &AI);
258	if (!Inserted)
259	return MapEntry ->second;
260
261	TypeSize TySize = AI.getAllocationSize(DL: *DL).value_or(u: TypeSize::getZero());
262	uint64_t Size = TySize.getKnownMinValue();
263
264	// Always allocate at least one byte.
265	Size = std::max<uint64_t>(a: Size, b: `1u`);
266
267	int &FI = MapEntry ->second;
268	FI = MF->getFrameInfo().CreateStackObject(Size, Alignment: AI.getAlign(), isSpillSlot: false, Alloca: &AI);
269
270	// Scalable vectors and structures that contain scalable vectors may
271	// need a special StackID to distinguish them from other (fixed size)
272	// stack objects.
273	if (TySize.isScalable()) {
274	auto StackID =
275	MF->getSubtarget().getFrameLowering()->getStackIDForScalableVectors();
276	MF->getFrameInfo().setStackID(ObjectIdx: FI, ID: StackID);
277	}
278
279	return FI;
280	}
281
282	Align IRTranslator::getMemOpAlign(const Instruction &I) {
283	if (const StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
284	return SI->getAlign();
285	if (const LoadInst *LI = dyn_cast<LoadInst>(Val: &I))
286	return LI->getAlign();
287	if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Val: &I))
288	return AI->getAlign();
289	if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Val: &I))
290	return AI->getAlign();
291
292	OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
293	R << "unable to translate memop: " << ore::NV ("Opcode", &I);
294	reportTranslationError(MF&: MF, ORE&: ORE, R);
295	return Align (`1`);
296	}
297
298	MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
299	MachineBasicBlock *MBB = FuncInfo.getMBB(BB: &BB);
300	assert(MBB && "BasicBlock was not encountered before");
301	return *MBB;
302	}
303
304	void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
305	assert(NewPred && "new predecessor must be a real MachineBasicBlock");
306	MachinePreds [Edge].push_back(Elt: NewPred);
307	}
308
309	static bool targetSupportsBF16Type(const MachineFunction *MF) {
310	return MF->getTarget().getTargetTriple().isSPIRV();
311	}
312
313	static bool containsBF16Type(const User &U) {
314	// BF16 cannot currently be represented by LLT, to avoid miscompiles we
315	// prevent any instructions using them. FIXME: This can be removed once LLT
316	// supports bfloat.
317	return U.getType()->getScalarType()->isBFloatTy() \|\|
318	any_of(Range: U.operands(), P: [](Value *V) {
319	return V->getType()->getScalarType()->isBFloatTy();
320	});
321	}
322
323	bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
324	MachineIRBuilder &MIRBuilder) {
325	if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
326	return false;
327
328	// Get or create a virtual register for each value.
329	// Unless the value is a Constant => loadimm cst?
330	// or inline constant each time?
331	// Creation of a virtual register needs to have a size.
332	Register Op0 = getOrCreateVReg(Val: *U.getOperand(i: `0`));
333	Register Op1 = getOrCreateVReg(Val: *U.getOperand(i: `1`));
334	Register Res = getOrCreateVReg(Val: U);
335	uint32_t Flags = `0`;
336	if (isa<Instruction>(Val: U)) {
337	const Instruction &I = cast<Instruction>(Val: U);
338	Flags = MachineInstr::copyFlagsFromInstruction(I);
339	}
340
341	MIRBuilder.buildInstr(Opc: Opcode, DstOps: {Res}, SrcOps: {Op0, Op1}, Flags);
342	return true;
343	}
344
345	bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
346	MachineIRBuilder &MIRBuilder) {
347	if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
348	return false;
349
350	Register Op0 = getOrCreateVReg(Val: *U.getOperand(i: `0`));
351	Register Res = getOrCreateVReg(Val: U);
352	uint32_t Flags = `0`;
353	if (isa<Instruction>(Val: U)) {
354	const Instruction &I = cast<Instruction>(Val: U);
355	Flags = MachineInstr::copyFlagsFromInstruction(I);
356	}
357	MIRBuilder.buildInstr(Opc: Opcode, DstOps: {Res}, SrcOps: {Op0}, Flags);
358	return true;
359	}
360
361	bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
362	return translateUnaryOp(Opcode: TargetOpcode::G_FNEG, U, MIRBuilder);
363	}
364
365	bool IRTranslator::translateCompare(const User &U,
366	MachineIRBuilder &MIRBuilder) {
367	if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
368	return false;
369
370	auto *CI = cast<CmpInst>(Val: &U);
371	Register Op0 = getOrCreateVReg(Val: *U.getOperand(i: `0`));
372	Register Op1 = getOrCreateVReg(Val: *U.getOperand(i: `1`));
373	Register Res = getOrCreateVReg(Val: U);
374	CmpInst::Predicate Pred = CI->getPredicate();
375	uint32_t Flags = MachineInstr::copyFlagsFromInstruction(I: *CI);
376	if (CmpInst::isIntPredicate(P: Pred))
377	MIRBuilder.buildICmp(Pred, Res, Op0, Op1, Flags);
378	else if (Pred == CmpInst::FCMP_FALSE)
379	MIRBuilder.buildCopy(
380	Res, Op: getOrCreateVReg(Val: *Constant::getNullValue(Ty: U.getType())));
381	else if (Pred == CmpInst::FCMP_TRUE)
382	MIRBuilder.buildCopy(
383	Res, Op: getOrCreateVReg(Val: *Constant::getAllOnesValue(Ty: U.getType())));
384	else
385	MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags);
386
387	return true;
388	}
389
390	bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
391	const ReturnInst &RI = cast<ReturnInst>(Val: U);
392	const Value *Ret = RI.getReturnValue();
393	if (Ret && DL->getTypeStoreSize(Ty: Ret->getType()).isZero())
394	Ret = nullptr;
395
396	ArrayRef<Register> VRegs;
397	if (Ret)
398	VRegs = getOrCreateVRegs(Val: *Ret);
399
400	Register SwiftErrorVReg = `0`;
401	if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
402	SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
403	&RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
404	}
405
406	// The target may mess up with the insertion point, but
407	// this is not important as a return is the last instruction
408	// of the block anyway.
409	return CLI->lowerReturn(MIRBuilder, Val: Ret, VRegs, FLI&: FuncInfo, SwiftErrorVReg);
410	}
411
412	void IRTranslator::emitBranchForMergedCondition(
413	const Value Cond, MachineBasicBlock TBB, MachineBasicBlock *FBB,
414	MachineBasicBlock CurBB, MachineBasicBlock SwitchBB,
415	BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
416	// If the leaf of the tree is a comparison, merge the condition into
417	// the caseblock.
418	if (const CmpInst *BOp = dyn_cast<CmpInst>(Val: Cond)) {
419	CmpInst::Predicate Condition;
420	if (const ICmpInst *IC = dyn_cast<ICmpInst>(Val: Cond)) {
421	Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
422	} else {
423	const FCmpInst *FC = cast<FCmpInst>(Val: Cond);
424	Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
425	}
426
427	SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(i_nocapture: `0`),
428	BOp->getOperand(i_nocapture: `1`), nullptr, TBB, FBB, CurBB,
429	CurBuilder ->getDebugLoc(), TProb, FProb);
430	SL ->SwitchCases.push_back(x: CB);
431	return;
432	}
433
434	// Create a CaseBlock record representing this branch.
435	CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
436	SwitchCG::CaseBlock CB(
437	Pred, false, Cond, ConstantInt::getTrue(Context&: MF->getFunction().getContext()),
438	nullptr, TBB, FBB, CurBB, CurBuilder ->getDebugLoc(), TProb, FProb);
439	SL ->SwitchCases.push_back(x: CB);
440	}
441
442	static bool isValInBlock(const Value V, const* BasicBlock *BB) {
443	if (const Instruction *I = dyn_cast<Instruction>(Val: V))
444	return I->getParent() == BB;
445	return true;
446	}
447
448	void IRTranslator::findMergedConditions(
449	const Value Cond, MachineBasicBlock TBB, MachineBasicBlock *FBB,
450	MachineBasicBlock CurBB, MachineBasicBlock SwitchBB,
451	Instruction::BinaryOps Opc, BranchProbability TProb,
452	BranchProbability FProb, bool InvertCond) {
453	using namespace PatternMatch;
454	assert((Opc == Instruction::And \|\| Opc == Instruction::Or) &&
455	"Expected Opc to be AND/OR");
456	// Skip over not part of the tree and remember to invert op and operands at
457	// next level.
458	Value *NotCond;
459	if (match(V: Cond, P: m_OneUse(SubPattern: m_Not(V: m_Value(V&: NotCond)))) &&
460	isValInBlock(V: NotCond, BB: CurBB->getBasicBlock())) {
461	findMergedConditions(Cond: NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
462	InvertCond: !InvertCond);
463	return;
464	}
465
466	const Instruction *BOp = dyn_cast<Instruction>(Val: Cond);
467	const Value BOpOp0, BOpOp1;
468	// Compute the effective opcode for Cond, taking into account whether it needs
469	// to be inverted, e.g.
470	// and (not (or A, B)), C
471	// gets lowered as
472	// and (and (not A, not B), C)
473	Instruction::BinaryOps BOpc = (Instruction::BinaryOps)`0`;
474	if (BOp) {
475	BOpc = match(V: BOp, P: m_LogicalAnd(L: m_Value(V&: BOpOp0), R: m_Value(V&: BOpOp1)))
476	? Instruction::And
477	: (match(V: BOp, P: m_LogicalOr(L: m_Value(V&: BOpOp0), R: m_Value(V&: BOpOp1)))
478	? Instruction::Or
479	: (Instruction::BinaryOps)`0`);
480	if (InvertCond) {
481	if (BOpc == Instruction::And)
482	BOpc = Instruction::Or;
483	else if (BOpc == Instruction::Or)
484	BOpc = Instruction::And;
485	}
486	}
487
488	// If this node is not part of the or/and tree, emit it as a branch.
489	// Note that all nodes in the tree should have same opcode.
490	bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
491	if (!BOpIsInOrAndTree \|\| BOp->getParent() != CurBB->getBasicBlock() \|\|
492	!isValInBlock(V: BOpOp0, BB: CurBB->getBasicBlock()) \|\|
493	!isValInBlock(V: BOpOp1, BB: CurBB->getBasicBlock())) {
494	emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
495	InvertCond);
496	return;
497	}
498
499	// Create TmpBB after CurBB.
500	MachineFunction::iterator BBI(CurBB);
501	MachineBasicBlock *TmpBB =
502	MF->CreateMachineBasicBlock(BB: CurBB->getBasicBlock());
503	CurBB->getParent()->insert(MBBI: ++BBI, MBB: TmpBB);
504
505	if (Opc == Instruction::Or) {
506	// Codegen X \| Y as:
507	// BB1:
508	// jmp_if_X TBB
509	// jmp TmpBB
510	// TmpBB:
511	// jmp_if_Y TBB
512	// jmp FBB
513	//
514
515	// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
516	// The requirement is that
517	// TrueProb for BB1 + (FalseProb for BB1 TrueProb for TmpBB)*
518	// = TrueProb for original BB.
519	// Assuming the original probabilities are A and B, one choice is to set
520	// BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
521	// A/(1+B) and 2B/(1+B). This choice assumes that
522	// TrueProb for BB1 == FalseProb for BB1 TrueProb for TmpBB.*
523	// Another choice is to assume TrueProb for BB1 equals to TrueProb for
524	// TmpBB, but the math is more complicated.
525
526	auto NewTrueProb = TProb / `2`;
527	auto NewFalseProb = TProb / `2` + FProb;
528	// Emit the LHS condition.
529	findMergedConditions(Cond: BOpOp0, TBB, FBB: TmpBB, CurBB, SwitchBB, Opc, TProb: NewTrueProb,
530	FProb: NewFalseProb, InvertCond);
531
532	// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
533	SmallVector<BranchProbability, `2`> Probs{TProb / `2`, FProb};
534	BranchProbability::normalizeProbabilities(Begin: Probs.begin(), End: Probs.end());
535	// Emit the RHS condition into TmpBB.
536	findMergedConditions(Cond: BOpOp1, TBB, FBB, CurBB: TmpBB, SwitchBB, Opc, TProb: Probs [`0`],
537	FProb: Probs [`1`], InvertCond);
538	} else {
539	assert(Opc == Instruction::And && "Unknown merge op!");
540	// Codegen X & Y as:
541	// BB1:
542	// jmp_if_X TmpBB
543	// jmp FBB
544	// TmpBB:
545	// jmp_if_Y TBB
546	// jmp FBB
547	//
548	// This requires creation of TmpBB after CurBB.
549
550	// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
551	// The requirement is that
552	// FalseProb for BB1 + (TrueProb for BB1 FalseProb for TmpBB)*
553	// = FalseProb for original BB.
554	// Assuming the original probabilities are A and B, one choice is to set
555	// BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
556	// 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
557	// TrueProb for BB1 FalseProb for TmpBB.*
558
559	auto NewTrueProb = TProb + FProb / `2`;
560	auto NewFalseProb = FProb / `2`;
561	// Emit the LHS condition.
562	findMergedConditions(Cond: BOpOp0, TBB: TmpBB, FBB, CurBB, SwitchBB, Opc, TProb: NewTrueProb,
563	FProb: NewFalseProb, InvertCond);
564
565	// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
566	SmallVector<BranchProbability, `2`> Probs{TProb, FProb / `2`};
567	BranchProbability::normalizeProbabilities(Begin: Probs.begin(), End: Probs.end());
568	// Emit the RHS condition into TmpBB.
569	findMergedConditions(Cond: BOpOp1, TBB, FBB, CurBB: TmpBB, SwitchBB, Opc, TProb: Probs [`0`],
570	FProb: Probs [`1`], InvertCond);
571	}
572	}
573
574	bool IRTranslator::shouldEmitAsBranches(
575	const std::vector<SwitchCG::CaseBlock> &Cases) {
576	// For multiple cases, it's better to emit as branches.
577	if (Cases.size() != `2`)
578	return true;
579
580	// If this is two comparisons of the same values or'd or and'd together, they
581	// will get folded into a single comparison, so don't emit two blocks.
582	if ((Cases [`0`].CmpLHS == Cases [`1`].CmpLHS &&
583	Cases [`0`].CmpRHS == Cases [`1`].CmpRHS) \|\|
584	(Cases [`0`].CmpRHS == Cases [`1`].CmpLHS &&
585	Cases [`0`].CmpLHS == Cases [`1`].CmpRHS)) {
586	return false;
587	}
588
589	// Handle: (X != null) \| (Y != null) --> (X\|Y) != 0
590	// Handle: (X == null) & (Y == null) --> (X\|Y) == 0
591	if (Cases [`0`].CmpRHS == Cases [`1`].CmpRHS &&
592	Cases [`0`].PredInfo.Pred == Cases [`1`].PredInfo.Pred &&
593	isa<Constant>(Val: Cases [`0`].CmpRHS) &&
594	cast<Constant>(Val: Cases [`0`].CmpRHS)->isNullValue()) {
595	if (Cases [`0`].PredInfo.Pred == CmpInst::ICMP_EQ &&
596	Cases [`0`].TrueBB == Cases [`1`].ThisBB)
597	return false;
598	if (Cases [`0`].PredInfo.Pred == CmpInst::ICMP_NE &&
599	Cases [`0`].FalseBB == Cases [`1`].ThisBB)
600	return false;
601	}
602
603	return true;
604	}
605
606	bool IRTranslator::translateUncondBr(const User &U,
607	MachineIRBuilder &MIRBuilder) {
608	const UncondBrInst &BrInst = cast<UncondBrInst>(Val: U);
609	auto &CurMBB = MIRBuilder.getMBB();
610	auto Succ0MBB = &getMBB(BB: BrInst.getSuccessor(i: `0`));
611
612	// If the unconditional target is the layout successor, fallthrough.
613	if (OptLevel == CodeGenOptLevel::None \|\| !CurMBB.isLayoutSuccessor(MBB: Succ0MBB))
614	MIRBuilder.buildBr(Dest&: *Succ0MBB);
615
616	// Link successors.
617	for (const BasicBlock *Succ : successors(I: &BrInst))
618	CurMBB.addSuccessor(Succ: &getMBB(BB: *Succ));
619	return true;
620	}
621
622	bool IRTranslator::translateCondBr(const User &U,
623	MachineIRBuilder &MIRBuilder) {
624	const CondBrInst &BrInst = cast<CondBrInst>(Val: U);
625	auto &CurMBB = MIRBuilder.getMBB();
626	auto Succ0MBB = &getMBB(BB: BrInst.getSuccessor(i: `0`));
627
628	// If this condition is one of the special cases we handle, do special stuff
629	// now.
630	const Value *CondVal = BrInst.getCondition();
631	MachineBasicBlock Succ1MBB = &getMBB(BB: BrInst.getSuccessor(i: `1`));
632
633	// If this is a series of conditions that are or'd or and'd together, emit
634	// this as a sequence of branches instead of setcc's with and/or operations.
635	// As long as jumps are not expensive (exceptions for multi-use logic ops,
636	// unpredictable branches, and vector extracts because those jumps are likely
637	// expensive for any target), this should improve performance.
638	// For example, instead of something like:
639	// cmp A, B
640	// C = seteq
641	// cmp D, E
642	// F = setle
643	// or C, F
644	// jnz foo
645	// Emit:
646	// cmp A, B
647	// je foo
648	// cmp D, E
649	// jle foo
650	using namespace PatternMatch;
651	const Instruction *CondI = dyn_cast<Instruction>(Val: CondVal);
652	if (!TLI->isJumpExpensive() && CondI && CondI->hasOneUse() &&
653	!BrInst.hasMetadata(KindID: LLVMContext::MD_unpredictable)) {
654	Instruction::BinaryOps Opcode = (Instruction::BinaryOps)`0`;
655	Value *Vec;
656	const Value BOp0, BOp1;
657	if (match(V: CondI, P: m_LogicalAnd(L: m_Value(V&: BOp0), R: m_Value(V&: BOp1))))
658	Opcode = Instruction::And;
659	else if (match(V: CondI, P: m_LogicalOr(L: m_Value(V&: BOp0), R: m_Value(V&: BOp1))))
660	Opcode = Instruction::Or;
661
662	if (Opcode && !(match(V: BOp0, P: m_ExtractElt(Val: m_Value(V&: Vec), Idx: m_Value())) &&
663	match(V: BOp1, P: m_ExtractElt(Val: m_Specific(V: Vec), Idx: m_Value())))) {
664	findMergedConditions(Cond: CondI, TBB: Succ0MBB, FBB: Succ1MBB, CurBB: &CurMBB, SwitchBB: &CurMBB, Opc: Opcode,
665	TProb: getEdgeProbability(Src: &CurMBB, Dst: Succ0MBB),
666	FProb: getEdgeProbability(Src: &CurMBB, Dst: Succ1MBB),
667	/InvertCond=/false);
668	assert(SL->SwitchCases[`0`].ThisBB == &CurMBB && "Unexpected lowering!");
669
670	// Allow some cases to be rejected.
671	if (shouldEmitAsBranches(Cases: SL ->SwitchCases)) {
672	// Emit the branch for this block.
673	emitSwitchCase(CB&: SL ->SwitchCases [`0`], SwitchBB: &CurMBB, MIB&: *CurBuilder);
674	SL ->SwitchCases.erase(position: SL ->SwitchCases.begin());
675	return true;
676	}
677
678	// Okay, we decided not to do this, remove any inserted MBB's and clear
679	// SwitchCases.
680	for (unsigned I = `1`, E = SL ->SwitchCases.size(); I != E; ++I)
681	MF->erase(MBBI: SL ->SwitchCases [I].ThisBB);
682
683	SL ->SwitchCases.clear();
684	}
685	}
686
687	// Create a CaseBlock record representing this branch.
688	SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
689	ConstantInt::getTrue(Context&: MF->getFunction().getContext()),
690	nullptr, Succ0MBB, Succ1MBB, &CurMBB,
691	CurBuilder ->getDebugLoc());
692
693	// Use emitSwitchCase to actually insert the fast branch sequence for this
694	// cond branch.
695	emitSwitchCase(CB, SwitchBB: &CurMBB, MIB&: *CurBuilder);
696	return true;
697	}
698
699	void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src,
700	MachineBasicBlock *Dst,
701	BranchProbability Prob) {
702	if (!FuncInfo.BPI) {
703	Src->addSuccessorWithoutProb(Succ: Dst);
704	return;
705	}
706	if (Prob.isUnknown())
707	Prob = getEdgeProbability(Src, Dst);
708	Src->addSuccessor(Succ: Dst, Prob);
709	}
710
711	BranchProbability
712	IRTranslator::getEdgeProbability(const MachineBasicBlock *Src,
713	const MachineBasicBlock Dst) const* {
714	const BasicBlock *SrcBB = Src->getBasicBlock();
715	const BasicBlock *DstBB = Dst->getBasicBlock();
716	if (!FuncInfo.BPI) {
717	// If BPI is not available, set the default probability as 1 / N, where N is
718	// the number of successors.
719	auto SuccSize = std::max<uint32_t>(a: succ_size(BB: SrcBB), b: `1`);
720	return BranchProbability (`1`, SuccSize);
721	}
722	return FuncInfo.BPI->getEdgeProbability(Src: SrcBB, Dst: DstBB);
723	}
724
725	bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
726	using namespace SwitchCG;
727	// Extract cases from the switch.
728	const SwitchInst &SI = cast<SwitchInst>(Val: U);
729	BranchProbabilityInfo *BPI = FuncInfo.BPI;
730	CaseClusterVector Clusters;
731	Clusters.reserve(n: SI.getNumCases());
732	for (const auto &I : SI.cases()) {
733	MachineBasicBlock Succ = &getMBB(BB: I.getCaseSuccessor());
734	assert(Succ && "Could not find successor mbb in mapping");
735	const ConstantInt *CaseVal = I.getCaseValue();
736	BranchProbability Prob =
737	BPI ? BPI->getEdgeProbability(Src: SI.getParent(), IndexInSuccessors: I.getSuccessorIndex())
738	: BranchProbability (`1`, SI.getNumCases() + `1`);
739	Clusters.push_back(x: CaseCluster::range(Low: CaseVal, High: CaseVal, MBB: Succ, Prob));
740	}
741
742	MachineBasicBlock DefaultMBB = &getMBB(BB: SI.getDefaultDest());
743
744	// Cluster adjacent cases with the same destination. We do this at all
745	// optimization levels because it's cheap to do and will make codegen faster
746	// if there are many clusters.
747	sortAndRangeify(Clusters);
748
749	MachineBasicBlock SwitchMBB = &getMBB(BB: SI.getParent());
750
751	// If there is only the default destination, jump there directly.
752	if (Clusters.empty()) {
753	SwitchMBB->addSuccessor(Succ: DefaultMBB);
754	if (DefaultMBB != SwitchMBB->getNextNode())
755	MIB.buildBr(Dest&: *DefaultMBB);
756	return true;
757	}
758
759	SL ->findJumpTables(Clusters, SI: &SI, SL: std::nullopt, DefaultMBB, PSI: nullptr, BFI: nullptr);
760	SL ->findBitTestClusters(Clusters, SI: &SI);
761
762	LLVM_DEBUG({
763	dbgs() << "Case clusters: ";
764	for (const CaseCluster &C : Clusters) {
765	if (C.Kind == CC_JumpTable)
766	dbgs() << "JT:";
767	if (C.Kind == CC_BitTests)
768	dbgs() << "BT:";
769
770	C.Low->getValue().print(dbgs(), true);
771	if (C.Low != C.High) {
772	dbgs() << `'-'`;
773	C.High->getValue().print(dbgs(), true);
774	}
775	dbgs() << `' '`;
776	}
777	dbgs() << `'\n'`;
778	});
779
780	assert(!Clusters.empty());
781	SwitchWorkList WorkList;
782	CaseClusterIt First = Clusters.begin();
783	CaseClusterIt Last = Clusters.end() - `1`;
784	auto DefaultProb = getEdgeProbability(Src: SwitchMBB, Dst: DefaultMBB);
785	WorkList.push_back(Elt: {.MBB: SwitchMBB, .FirstCluster: First, .LastCluster: Last, .GE: nullptr, .LT: nullptr, .DefaultProb: DefaultProb});
786
787	while (!WorkList.empty()) {
788	SwitchWorkListItem W = WorkList.pop_back_val();
789
790	unsigned NumClusters = W.LastCluster - W.FirstCluster + `1`;
791	// For optimized builds, lower large range as a balanced binary tree.
792	if (NumClusters > `3` &&
793	MF->getTarget().getOptLevel() != CodeGenOptLevel::None &&
794	!DefaultMBB->getParent()->getFunction().hasMinSize()) {
795	splitWorkItem(WorkList, W, Cond: SI.getCondition(), SwitchMBB, MIB);
796	continue;
797	}
798
799	if (!lowerSwitchWorkItem(W, Cond: SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
800	return false;
801	}
802	return true;
803	}
804
805	void IRTranslator::splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
806	const SwitchCG::SwitchWorkListItem &W,
807	Value Cond, MachineBasicBlock SwitchMBB,
808	MachineIRBuilder &MIB) {
809	using namespace SwitchCG;
810	assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
811	"Clusters not sorted?");
812	assert(W.LastCluster - W.FirstCluster + `1` >= `2` && "Too small to split!");
813
814	auto [LastLeft, FirstRight, LeftProb, RightProb] =
815	SL ->computeSplitWorkItemInfo(W);
816
817	// Use the first element on the right as pivot since we will make less-than
818	// comparisons against it.
819	CaseClusterIt PivotCluster = FirstRight;
820	assert(PivotCluster > W.FirstCluster);
821	assert(PivotCluster <= W.LastCluster);
822
823	CaseClusterIt FirstLeft = W.FirstCluster;
824	CaseClusterIt LastRight = W.LastCluster;
825
826	const ConstantInt *Pivot = PivotCluster ->Low;
827
828	// New blocks will be inserted immediately after the current one.
829	MachineFunction::iterator BBI(W.MBB);
830	++BBI;
831
832	// We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
833	// we can branch to its destination directly if it's squeezed exactly in
834	// between the known lower bound and Pivot - 1.
835	MachineBasicBlock *LeftMBB;
836	if (FirstLeft == LastLeft && FirstLeft ->Kind == CC_Range &&
837	FirstLeft ->Low == W.GE &&
838	(FirstLeft ->High->getValue() + `1LL`) == Pivot->getValue()) {
839	LeftMBB = FirstLeft ->MBB;
840	} else {
841	LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(BB: W.MBB->getBasicBlock());
842	FuncInfo.MF->insert(MBBI: BBI, MBB: LeftMBB);
843	WorkList.push_back(
844	Elt: {.MBB: LeftMBB, .FirstCluster: FirstLeft, .LastCluster: LastLeft, .GE: W.GE, .LT: Pivot, .DefaultProb: W.DefaultProb / `2`});
845	}
846
847	// Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
848	// single cluster, RHS.Low == Pivot, and we can branch to its destination
849	// directly if RHS.High equals the current upper bound.
850	MachineBasicBlock *RightMBB;
851	if (FirstRight == LastRight && FirstRight ->Kind == CC_Range && W.LT &&
852	(FirstRight ->High->getValue() + `1ULL`) == W.LT->getValue()) {
853	RightMBB = FirstRight ->MBB;
854	} else {
855	RightMBB = FuncInfo.MF->CreateMachineBasicBlock(BB: W.MBB->getBasicBlock());
856	FuncInfo.MF->insert(MBBI: BBI, MBB: RightMBB);
857	WorkList.push_back(
858	Elt: {.MBB: RightMBB, .FirstCluster: FirstRight, .LastCluster: LastRight, .GE: Pivot, .LT: W.LT, .DefaultProb: W.DefaultProb / `2`});
859	}
860
861	// Create the CaseBlock record that will be used to lower the branch.
862	CaseBlock CB(ICmpInst::Predicate::ICMP_SLT, false, Cond, Pivot, nullptr,
863	LeftMBB, RightMBB, W.MBB, MIB.getDebugLoc(), LeftProb,
864	RightProb);
865
866	if (W.MBB == SwitchMBB)
867	emitSwitchCase(CB, SwitchBB: SwitchMBB, MIB);
868	else
869	SL ->SwitchCases.push_back(x: CB);
870	}
871
872	void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
873	MachineBasicBlock *MBB) {
874	// Emit the code for the jump table
875	assert(JT.Reg && "Should lower JT Header first!");
876	MachineIRBuilder MIB(*MBB->getParent());
877	MIB.setMBB(*MBB);
878	MIB.setDebugLoc(CurBuilder ->getDebugLoc());
879
880	Type *PtrIRTy = PointerType::getUnqual(C&: MF->getFunction().getContext());
881	const LLT PtrTy = getLLTForType(Ty&: PtrIRTy, DL: DL);
882
883	auto Table = MIB.buildJumpTable(PtrTy, JTI: JT.JTI);
884	MIB.buildBrJT(TablePtr: Table.getReg(Idx: `0`), JTI: JT.JTI, IndexReg: JT.Reg);
885	}
886
887	bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
888	SwitchCG::JumpTableHeader &JTH,
889	MachineBasicBlock *HeaderBB) {
890	MachineIRBuilder MIB(*HeaderBB->getParent());
891	MIB.setMBB(*HeaderBB);
892	MIB.setDebugLoc(CurBuilder ->getDebugLoc());
893
894	const Value &SValue = *JTH.SValue;
895	// Subtract the lowest switch case value from the value being switched on.
896	const LLT SwitchTy = getLLTForType(Ty&: SValue.getType(), DL: DL);
897	Register SwitchOpReg = getOrCreateVReg(Val: SValue);
898	auto FirstCst = MIB.buildConstant(Res: SwitchTy, Val: JTH.First);
899	auto Sub = MIB.buildSub(Dst: {SwitchTy}, Src0: SwitchOpReg, Src1: FirstCst);
900
901	// This value may be smaller or larger than the target's pointer type, and
902	// therefore require extension or truncating.
903	auto *PtrIRTy = PointerType::getUnqual(C&: SValue.getContext());
904	const LLT PtrScalarTy = LLT::scalar(SizeInBits: DL->getTypeSizeInBits(Ty: PtrIRTy));
905	Sub = MIB.buildZExtOrTrunc(Res: PtrScalarTy, Op: Sub);
906
907	JT.Reg = Sub.getReg(Idx: `0`);
908
909	if (JTH.FallthroughUnreachable) {
910	if (JT.MBB != HeaderBB->getNextNode())
911	MIB.buildBr(Dest&: *JT.MBB);
912	return true;
913	}
914
915	// Emit the range check for the jump table, and branch to the default block
916	// for the switch statement if the value being switched on exceeds the
917	// largest case in the switch.
918	auto Cst = getOrCreateVReg(
919	Val: *ConstantInt::get(Ty: SValue.getType(), V: JTH.Last - JTH.First));
920	Cst = MIB.buildZExtOrTrunc(Res: PtrScalarTy, Op: Cst).getReg(Idx: `0`);
921	auto Cmp = MIB.buildICmp(Pred: CmpInst::ICMP_UGT, Res: LLT::scalar(SizeInBits: `1`), Op0: Sub, Op1: Cst);
922
923	auto BrCond = MIB.buildBrCond(Tst: Cmp.getReg(Idx: `0`), Dest&: *JT.Default);
924
925	// Avoid emitting unnecessary branches to the next block.
926	if (JT.MBB != HeaderBB->getNextNode())
927	BrCond = MIB.buildBr(Dest&: *JT.MBB);
928	return true;
929	}
930
931	void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
932	MachineBasicBlock *SwitchBB,
933	MachineIRBuilder &MIB) {
934	Register CondLHS = getOrCreateVReg(Val: *CB.CmpLHS);
935	Register Cond;
936	DebugLoc OldDbgLoc = MIB.getDebugLoc();
937	MIB.setDebugLoc(CB.DbgLoc);
938	MIB.setMBB(*CB.ThisBB);
939
940	if (CB.PredInfo.NoCmp) {
941	// Branch or fall through to TrueBB.
942	addSuccessorWithProb(Src: CB.ThisBB, Dst: CB.TrueBB, Prob: CB.TrueProb);
943	addMachineCFGPred(Edge: {SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
944	NewPred: CB.ThisBB);
945	CB.ThisBB->normalizeSuccProbs();
946	if (CB.TrueBB != CB.ThisBB->getNextNode())
947	MIB.buildBr(Dest&: *CB.TrueBB);
948	MIB.setDebugLoc(OldDbgLoc);
949	return;
950	}
951
952	const LLT i1Ty = LLT::scalar(SizeInBits: `1`);
953	// Build the compare.
954	if (!CB.CmpMHS) {
955	const auto *CI = dyn_cast<ConstantInt>(Val: CB.CmpRHS);
956	// For conditional branch lowering, we might try to do something silly like
957	// emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
958	// just re-use the existing condition vreg.
959	if (MRI->getType(Reg: CondLHS).getSizeInBits() == `1` && CI && CI->isOne() &&
960	CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
961	Cond = CondLHS;
962	} else {
963	Register CondRHS = getOrCreateVReg(Val: *CB.CmpRHS);
964	if (CmpInst::isFPPredicate(P: CB.PredInfo.Pred))
965	Cond =
966	MIB.buildFCmp(Pred: CB.PredInfo.Pred, Res: i1Ty, Op0: CondLHS, Op1: CondRHS).getReg(Idx: `0`);
967	else
968	Cond =
969	MIB.buildICmp(Pred: CB.PredInfo.Pred, Res: i1Ty, Op0: CondLHS, Op1: CondRHS).getReg(Idx: `0`);
970	}
971	} else {
972	assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
973	"Can only handle SLE ranges");
974
975	const APInt& Low = cast<ConstantInt>(Val: CB.CmpLHS)->getValue();
976	const APInt& High = cast<ConstantInt>(Val: CB.CmpRHS)->getValue();
977
978	Register CmpOpReg = getOrCreateVReg(Val: *CB.CmpMHS);
979	if (cast<ConstantInt>(Val: CB.CmpLHS)->isMinValue(IsSigned: true)) {
980	Register CondRHS = getOrCreateVReg(Val: *CB.CmpRHS);
981	Cond =
982	MIB.buildICmp(Pred: CmpInst::ICMP_SLE, Res: i1Ty, Op0: CmpOpReg, Op1: CondRHS).getReg(Idx: `0`);
983	} else {
984	const LLT CmpTy = MRI->getType(Reg: CmpOpReg);
985	auto Sub = MIB.buildSub(Dst: {CmpTy}, Src0: CmpOpReg, Src1: CondLHS);
986	auto Diff = MIB.buildConstant(Res: CmpTy, Val: High - Low);
987	Cond = MIB.buildICmp(Pred: CmpInst::ICMP_ULE, Res: i1Ty, Op0: Sub, Op1: Diff).getReg(Idx: `0`);
988	}
989	}
990
991	// Update successor info
992	addSuccessorWithProb(Src: CB.ThisBB, Dst: CB.TrueBB, Prob: CB.TrueProb);
993
994	addMachineCFGPred(Edge: {SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
995	NewPred: CB.ThisBB);
996
997	// TrueBB and FalseBB are always different unless the incoming IR is
998	// degenerate. This only happens when running llc on weird IR.
999	if (CB.TrueBB != CB.FalseBB)
1000	addSuccessorWithProb(Src: CB.ThisBB, Dst: CB.FalseBB, Prob: CB.FalseProb);
1001	CB.ThisBB->normalizeSuccProbs();
1002
1003	addMachineCFGPred(Edge: {SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
1004	NewPred: CB.ThisBB);
1005
1006	MIB.buildBrCond(Tst: Cond, Dest&: *CB.TrueBB);
1007	MIB.buildBr(Dest&: *CB.FalseBB);
1008	MIB.setDebugLoc(OldDbgLoc);
1009	}
1010
1011	bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
1012	MachineBasicBlock *SwitchMBB,
1013	MachineBasicBlock *CurMBB,
1014	MachineBasicBlock *DefaultMBB,
1015	MachineIRBuilder &MIB,
1016	MachineFunction::iterator BBI,
1017	BranchProbability UnhandledProbs,
1018	SwitchCG::CaseClusterIt I,
1019	MachineBasicBlock *Fallthrough,
1020	bool FallthroughUnreachable) {
1021	using namespace SwitchCG;
1022	MachineFunction *CurMF = SwitchMBB->getParent();
1023	// FIXME: Optimize away range check based on pivot comparisons.
1024	JumpTableHeader *JTH = &SL ->JTCases [I ->JTCasesIndex].first;
1025	SwitchCG::JumpTable *JT = &SL ->JTCases [I ->JTCasesIndex].second;
1026	BranchProbability DefaultProb = W.DefaultProb;
1027
1028	// The jump block hasn't been inserted yet; insert it here.
1029	MachineBasicBlock *JumpMBB = JT->MBB;
1030	CurMF->insert(MBBI: BBI, MBB: JumpMBB);
1031
1032	// Since the jump table block is separate from the switch block, we need
1033	// to keep track of it as a machine predecessor to the default block,
1034	// otherwise we lose the phi edges.
1035	addMachineCFGPred(Edge: {SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
1036	NewPred: CurMBB);
1037	addMachineCFGPred(Edge: {SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
1038	NewPred: JumpMBB);
1039
1040	auto JumpProb = I ->Prob;
1041	auto FallthroughProb = UnhandledProbs;
1042
1043	// If the default statement is a target of the jump table, we evenly
1044	// distribute the default probability to successors of CurMBB. Also
1045	// update the probability on the edge from JumpMBB to Fallthrough.
1046	for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
1047	SE = JumpMBB->succ_end();
1048	SI != SE; ++SI) {
1049	if (*SI == DefaultMBB) {
1050	JumpProb += DefaultProb / `2`;
1051	FallthroughProb -= DefaultProb / `2`;
1052	JumpMBB->setSuccProbability(I: SI, Prob: DefaultProb / `2`);
1053	JumpMBB->normalizeSuccProbs();
1054	} else {
1055	// Also record edges from the jump table block to it's successors.
1056	addMachineCFGPred(Edge: {SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()},
1057	NewPred: JumpMBB);
1058	}
1059	}
1060
1061	if (FallthroughUnreachable)
1062	JTH->FallthroughUnreachable = true;
1063
1064	if (!JTH->FallthroughUnreachable)
1065	addSuccessorWithProb(Src: CurMBB, Dst: Fallthrough, Prob: FallthroughProb);
1066	addSuccessorWithProb(Src: CurMBB, Dst: JumpMBB, Prob: JumpProb);
1067	CurMBB->normalizeSuccProbs();
1068
1069	// The jump table header will be inserted in our current block, do the
1070	// range check, and fall through to our fallthrough block.
1071	JTH->HeaderBB = CurMBB;
1072	JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
1073
1074	// If we're in the right place, emit the jump table header right now.
1075	if (CurMBB == SwitchMBB) {
1076	if (!emitJumpTableHeader(JT&: JT, JTH&: JTH, HeaderBB: CurMBB))
1077	return false;
1078	JTH->Emitted = true;
1079	}
1080	return true;
1081	}
1082	bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
1083	Value *Cond,
1084	MachineBasicBlock *Fallthrough,
1085	bool FallthroughUnreachable,
1086	BranchProbability UnhandledProbs,
1087	MachineBasicBlock *CurMBB,
1088	MachineIRBuilder &MIB,
1089	MachineBasicBlock *SwitchMBB) {
1090	using namespace SwitchCG;
1091	const Value RHS, LHS, *MHS;
1092	CmpInst::Predicate Pred;
1093	if (I ->Low == I ->High) {
1094	// Check Cond == I->Low.
1095	Pred = CmpInst::ICMP_EQ;
1096	LHS = Cond;
1097	RHS = I ->Low;
1098	MHS = nullptr;
1099	} else {
1100	// Check I->Low <= Cond <= I->High.
1101	Pred = CmpInst::ICMP_SLE;
1102	LHS = I ->Low;
1103	MHS = Cond;
1104	RHS = I ->High;
1105	}
1106
1107	// If Fallthrough is unreachable, fold away the comparison.
1108	// The false probability is the sum of all unhandled cases.
1109	CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I ->MBB, Fallthrough,
1110	CurMBB, MIB.getDebugLoc(), I ->Prob, UnhandledProbs);
1111
1112	emitSwitchCase(CB, SwitchBB: SwitchMBB, MIB);
1113	return true;
1114	}
1115
1116	void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
1117	MachineBasicBlock *SwitchBB) {
1118	MachineIRBuilder &MIB = *CurBuilder;
1119	MIB.setMBB(*SwitchBB);
1120
1121	// Subtract the minimum value.
1122	Register SwitchOpReg = getOrCreateVReg(Val: *B.SValue);
1123
1124	LLT SwitchOpTy = MRI->getType(Reg: SwitchOpReg);
1125	Register MinValReg = MIB.buildConstant(Res: SwitchOpTy, Val: B.First).getReg(Idx: `0`);
1126	auto RangeSub = MIB.buildSub(Dst: SwitchOpTy, Src0: SwitchOpReg, Src1: MinValReg);
1127
1128	Type *PtrIRTy = PointerType::getUnqual(C&: MF->getFunction().getContext());
1129	const LLT PtrTy = getLLTForType(Ty&: PtrIRTy, DL: DL);
1130
1131	LLT MaskTy = SwitchOpTy;
1132	if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() \|\|
1133	!llvm::has_single_bit<uint32_t>(Value: MaskTy.getSizeInBits()))
1134	MaskTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
1135	else {
1136	// Ensure that the type will fit the mask value.
1137	for (const SwitchCG::BitTestCase &Case : B.Cases) {
1138	if (!isUIntN(N: SwitchOpTy.getSizeInBits(), x: Case.Mask)) {
1139	// Switch table case range are encoded into series of masks.
1140	// Just use pointer type, it's guaranteed to fit.
1141	MaskTy = LLT::scalar(SizeInBits: PtrTy.getSizeInBits());
1142	break;
1143	}
1144	}
1145	}
1146	Register SubReg = RangeSub.getReg(Idx: `0`);
1147	if (SwitchOpTy != MaskTy)
1148	SubReg = MIB.buildZExtOrTrunc(Res: MaskTy, Op: SubReg).getReg(Idx: `0`);
1149
1150	B.RegVT = getMVTForLLT(Ty: MaskTy);
1151	B.Reg = SubReg;
1152
1153	MachineBasicBlock *MBB = B.Cases [`0`].ThisBB;
1154
1155	if (!B.FallthroughUnreachable)
1156	addSuccessorWithProb(Src: SwitchBB, Dst: B.Default, Prob: B.DefaultProb);
1157	addSuccessorWithProb(Src: SwitchBB, Dst: MBB, Prob: B.Prob);
1158
1159	SwitchBB->normalizeSuccProbs();
1160
1161	if (!B.FallthroughUnreachable) {
1162	// Conditional branch to the default block.
1163	auto RangeCst = MIB.buildConstant(Res: SwitchOpTy, Val: B.Range);
1164	auto RangeCmp = MIB.buildICmp(Pred: CmpInst::Predicate::ICMP_UGT, Res: LLT::scalar(SizeInBits: `1`),
1165	Op0: RangeSub, Op1: RangeCst);
1166	MIB.buildBrCond(Tst: RangeCmp, Dest&: *B.Default);
1167	}
1168
1169	// Avoid emitting unnecessary branches to the next block.
1170	if (MBB != SwitchBB->getNextNode())
1171	MIB.buildBr(Dest&: *MBB);
1172	}
1173
1174	void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
1175	MachineBasicBlock *NextMBB,
1176	BranchProbability BranchProbToNext,
1177	Register Reg, SwitchCG::BitTestCase &B,
1178	MachineBasicBlock *SwitchBB) {
1179	MachineIRBuilder &MIB = *CurBuilder;
1180	MIB.setMBB(*SwitchBB);
1181
1182	LLT SwitchTy = getLLTForMVT(Ty: BB.RegVT);
1183	Register Cmp;
1184	unsigned PopCount = llvm::popcount(Value: B.Mask);
1185	if (PopCount == `1`) {
1186	// Testing for a single bit; just compare the shift count with what it
1187	// would need to be to shift a 1 bit in that position.
1188	auto MaskTrailingZeros =
1189	MIB.buildConstant(Res: SwitchTy, Val: llvm::countr_zero(Val: B.Mask));
1190	Cmp =
1191	MIB.buildICmp(Pred: ICmpInst::ICMP_EQ, Res: LLT::scalar(SizeInBits: `1`), Op0: Reg, Op1: MaskTrailingZeros)
1192	.getReg(Idx: `0`);
1193	} else if (PopCount == BB.Range) {
1194	// There is only one zero bit in the range, test for it directly.
1195	auto MaskTrailingOnes =
1196	MIB.buildConstant(Res: SwitchTy, Val: llvm::countr_one(Value: B.Mask));
1197	Cmp = MIB.buildICmp(Pred: CmpInst::ICMP_NE, Res: LLT::scalar(SizeInBits: `1`), Op0: Reg, Op1: MaskTrailingOnes)
1198	.getReg(Idx: `0`);
1199	} else {
1200	// Make desired shift.
1201	auto CstOne = MIB.buildConstant(Res: SwitchTy, Val: `1`);
1202	auto SwitchVal = MIB.buildShl(Dst: SwitchTy, Src0: CstOne, Src1: Reg);
1203
1204	// Emit bit tests and jumps.
1205	auto CstMask = MIB.buildConstant(Res: SwitchTy, Val: B.Mask);
1206	auto AndOp = MIB.buildAnd(Dst: SwitchTy, Src0: SwitchVal, Src1: CstMask);
1207	auto CstZero = MIB.buildConstant(Res: SwitchTy, Val: `0`);
1208	Cmp = MIB.buildICmp(Pred: CmpInst::ICMP_NE, Res: LLT::scalar(SizeInBits: `1`), Op0: AndOp, Op1: CstZero)
1209	.getReg(Idx: `0`);
1210	}
1211
1212	// The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
1213	addSuccessorWithProb(Src: SwitchBB, Dst: B.TargetBB, Prob: B.ExtraProb);
1214	// The branch probability from SwitchBB to NextMBB is BranchProbToNext.
1215	addSuccessorWithProb(Src: SwitchBB, Dst: NextMBB, Prob: BranchProbToNext);
1216	// It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
1217	// one as they are relative probabilities (and thus work more like weights),
1218	// and hence we need to normalize them to let the sum of them become one.
1219	SwitchBB->normalizeSuccProbs();
1220
1221	// Record the fact that the IR edge from the header to the bit test target
1222	// will go through our new block. Neeeded for PHIs to have nodes added.
1223	addMachineCFGPred(Edge: {BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
1224	NewPred: SwitchBB);
1225
1226	MIB.buildBrCond(Tst: Cmp, Dest&: *B.TargetBB);
1227
1228	// Avoid emitting unnecessary branches to the next block.
1229	if (NextMBB != SwitchBB->getNextNode())
1230	MIB.buildBr(Dest&: *NextMBB);
1231	}
1232
1233	bool IRTranslator::lowerBitTestWorkItem(
1234	SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
1235	MachineBasicBlock CurMBB, MachineBasicBlock DefaultMBB,
1236	MachineIRBuilder &MIB, MachineFunction::iterator BBI,
1237	BranchProbability DefaultProb, BranchProbability UnhandledProbs,
1238	SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
1239	bool FallthroughUnreachable) {
1240	using namespace SwitchCG;
1241	MachineFunction *CurMF = SwitchMBB->getParent();
1242	// FIXME: Optimize away range check based on pivot comparisons.
1243	BitTestBlock *BTB = &SL ->BitTestCases [I ->BTCasesIndex];
1244	// The bit test blocks haven't been inserted yet; insert them here.
1245	for (BitTestCase &BTC : BTB->Cases)
1246	CurMF->insert(MBBI: BBI, MBB: BTC.ThisBB);
1247
1248	// Fill in fields of the BitTestBlock.
1249	BTB->Parent = CurMBB;
1250	BTB->Default = Fallthrough;
1251
1252	BTB->DefaultProb = UnhandledProbs;
1253	// If the cases in bit test don't form a contiguous range, we evenly
1254	// distribute the probability on the edge to Fallthrough to two
1255	// successors of CurMBB.
1256	if (!BTB->ContiguousRange) {
1257	BTB->Prob += DefaultProb / `2`;
1258	BTB->DefaultProb -= DefaultProb / `2`;
1259	}
1260
1261	if (FallthroughUnreachable)
1262	BTB->FallthroughUnreachable = true;
1263
1264	// If we're in the right place, emit the bit test header right now.
1265	if (CurMBB == SwitchMBB) {
1266	emitBitTestHeader(B&: *BTB, SwitchBB: SwitchMBB);
1267	BTB->Emitted = true;
1268	}
1269	return true;
1270	}
1271
1272	bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
1273	Value *Cond,
1274	MachineBasicBlock *SwitchMBB,
1275	MachineBasicBlock *DefaultMBB,
1276	MachineIRBuilder &MIB) {
1277	using namespace SwitchCG;
1278	MachineFunction *CurMF = FuncInfo.MF;
1279	MachineBasicBlock NextMBB = nullptr*;
1280	MachineFunction::iterator BBI(W.MBB);
1281	if (++BBI != FuncInfo.MF->end())
1282	NextMBB = &*BBI;
1283
1284	if (EnableOpts) {
1285	// Here, we order cases by probability so the most likely case will be
1286	// checked first. However, two clusters can have the same probability in
1287	// which case their relative ordering is non-deterministic. So we use Low
1288	// as a tie-breaker as clusters are guaranteed to never overlap.
1289	llvm::sort(Start: W.FirstCluster, End: W.LastCluster + `1`,
1290	Comp: [](const CaseCluster &a, const CaseCluster &b) {
1291	return a.Prob != b.Prob
1292	? a.Prob > b.Prob
1293	: a.Low->getValue().slt(RHS: b.Low->getValue());
1294	});
1295
1296	// Rearrange the case blocks so that the last one falls through if possible
1297	// without changing the order of probabilities.
1298	for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) {
1299	--I;
1300	if (I ->Prob > W.LastCluster ->Prob)
1301	break;
1302	if (I ->Kind == CC_Range && I ->MBB == NextMBB) {
1303	std::swap(a&: I, b&: W.LastCluster);
1304	break;
1305	}
1306	}
1307	}
1308
1309	// Compute total probability.
1310	BranchProbability DefaultProb = W.DefaultProb;
1311	BranchProbability UnhandledProbs = DefaultProb;
1312	for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
1313	UnhandledProbs += I ->Prob;
1314
1315	MachineBasicBlock *CurMBB = W.MBB;
1316	for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
1317	bool FallthroughUnreachable = false;
1318	MachineBasicBlock *Fallthrough;
1319	if (I == W.LastCluster) {
1320	// For the last cluster, fall through to the default destination.
1321	Fallthrough = DefaultMBB;
1322	FallthroughUnreachable = isa<UnreachableInst>(
1323	Val: DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
1324	} else {
1325	Fallthrough = CurMF->CreateMachineBasicBlock(BB: CurMBB->getBasicBlock());
1326	CurMF->insert(MBBI: BBI, MBB: Fallthrough);
1327	}
1328	UnhandledProbs -= I ->Prob;
1329
1330	switch (I ->Kind) {
1331	case CC_BitTests: {
1332	if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
1333	DefaultProb, UnhandledProbs, I, Fallthrough,
1334	FallthroughUnreachable)) {
1335	LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
1336	return false;
1337	}
1338	break;
1339	}
1340
1341	case CC_JumpTable: {
1342	if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
1343	UnhandledProbs, I, Fallthrough,
1344	FallthroughUnreachable)) {
1345	LLVM_DEBUG(dbgs() << "Failed to lower jump table");
1346	return false;
1347	}
1348	break;
1349	}
1350	case CC_Range: {
1351	if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
1352	FallthroughUnreachable, UnhandledProbs,
1353	CurMBB, MIB, SwitchMBB)) {
1354	LLVM_DEBUG(dbgs() << "Failed to lower switch range");
1355	return false;
1356	}
1357	break;
1358	}
1359	}
1360	CurMBB = Fallthrough;
1361	}
1362
1363	return true;
1364	}
1365
1366	bool IRTranslator::translateIndirectBr(const User &U,
1367	MachineIRBuilder &MIRBuilder) {
1368	const IndirectBrInst &BrInst = cast<IndirectBrInst>(Val: U);
1369
1370	const Register Tgt = getOrCreateVReg(Val: *BrInst.getAddress());
1371	MIRBuilder.buildBrIndirect(Tgt);
1372
1373	// Link successors.
1374	SmallPtrSet<const BasicBlock *, `32`> AddedSuccessors;
1375	MachineBasicBlock &CurBB = MIRBuilder.getMBB();
1376	for (const BasicBlock *Succ : successors(I: &BrInst)) {
1377	// It's legal for indirectbr instructions to have duplicate blocks in the
1378	// destination list. We don't allow this in MIR. Skip anything that's
1379	// already a successor.
1380	if (!AddedSuccessors.insert(Ptr: Succ).second)
1381	continue;
1382	CurBB.addSuccessor(Succ: &getMBB(BB: *Succ));
1383	}
1384
1385	return true;
1386	}
1387
1388	static bool isSwiftError(const Value *V) {
1389	if (auto Arg = dyn_cast<Argument>(Val: V))
1390	return Arg->hasSwiftErrorAttr();
1391	if (auto AI = dyn_cast<AllocaInst>(Val: V))
1392	return AI->isSwiftError();
1393	return false;
1394	}
1395
1396	bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
1397	const LoadInst &LI = cast<LoadInst>(Val: U);
1398	TypeSize StoreSize = DL->getTypeStoreSize(Ty: LI.getType());
1399	if (StoreSize.isZero())
1400	return true;
1401
1402	ArrayRef<Register> Regs = getOrCreateVRegs(Val: LI);
1403	ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V: LI);
1404	Register Base = getOrCreateVReg(Val: *LI.getPointerOperand());
1405	AAMDNodes AAInfo = LI.getAAMetadata();
1406
1407	const Value *Ptr = LI.getPointerOperand();
1408	Type *OffsetIRTy = DL->getIndexType(PtrTy: Ptr->getType());
1409	LLT OffsetTy = getLLTForType(Ty&: OffsetIRTy, DL: DL);
1410
1411	if (CLI->supportSwiftError() && isSwiftError(V: Ptr)) {
1412	assert(Regs.size() == `1` && "swifterror should be single pointer");
1413	Register VReg =
1414	SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), Ptr);
1415	MIRBuilder.buildCopy(Res: Regs [`0`], Op: VReg);
1416	return true;
1417	}
1418
1419	MachineMemOperand::Flags Flags =
1420	TLI->getLoadMemOperandFlags(LI, DL: *DL, AC, LibInfo);
1421	if (AA && !(Flags & MachineMemOperand::MOInvariant)) {
1422	if (AA->pointsToConstantMemory(
1423	Loc: MemoryLocation (Ptr, LocationSize::precise(Value: StoreSize), AAInfo))) {
1424	Flags \|= MachineMemOperand::MOInvariant;
1425	}
1426	}
1427
1428	const MDNode *Ranges =
1429	Regs.size() == `1` ? LI.getMetadata(KindID: LLVMContext::MD_range) : nullptr;
1430	for (unsigned i = `0`; i < Regs.size(); ++i) {
1431	Register Addr;
1432	MIRBuilder.materializeObjectPtrOffset(Res&: Addr, Op0: Base, ValueTy: OffsetTy, Value: Offsets [i]);
1433
1434	MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets [i]);
1435	Align BaseAlign = getMemOpAlign(I: LI);
1436	auto MMO =
1437	MF->getMachineMemOperand(PtrInfo: Ptr, f: Flags, MemTy: MRI->getType(Reg: Regs [i]),
1438	base_alignment: commonAlignment(A: BaseAlign, Offset: Offsets [i]), AAInfo,
1439	Ranges, SSID: LI.getSyncScopeID(), Ordering: LI.getOrdering());
1440	MIRBuilder.buildLoad(Res: Regs [i], Addr, MMO&: *MMO);
1441	}
1442
1443	return true;
1444	}
1445
1446	bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
1447	const StoreInst &SI = cast<StoreInst>(Val: U);
1448	if (DL->getTypeStoreSize(Ty: SI.getValueOperand()->getType()).isZero())
1449	return true;
1450
1451	ArrayRef<Register> Vals = getOrCreateVRegs(Val: *SI.getValueOperand());
1452	ArrayRef<uint64_t> Offsets = VMap.getOffsets(V: SI.getValueOperand());
1453	Register Base = getOrCreateVReg(Val: *SI.getPointerOperand());
1454
1455	Type *OffsetIRTy = DL->getIndexType(PtrTy: SI.getPointerOperandType());
1456	LLT OffsetTy = getLLTForType(Ty&: OffsetIRTy, DL: DL);
1457
1458	if (CLI->supportSwiftError() && isSwiftError(V: SI.getPointerOperand())) {
1459	assert(Vals.size() == `1` && "swifterror should be single pointer");
1460
1461	Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
1462	SI.getPointerOperand());
1463	MIRBuilder.buildCopy(Res: VReg, Op: Vals [`0`]);
1464	return true;
1465	}
1466
1467	MachineMemOperand::Flags Flags = TLI->getStoreMemOperandFlags(SI, DL: *DL);
1468
1469	for (unsigned i = `0`; i < Vals.size(); ++i) {
1470	Register Addr;
1471	MIRBuilder.materializeObjectPtrOffset(Res&: Addr, Op0: Base, ValueTy: OffsetTy, Value: Offsets [i]);
1472
1473	MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets [i]);
1474	Align BaseAlign = getMemOpAlign(I: SI);
1475	auto MMO = MF->getMachineMemOperand(PtrInfo: Ptr, f: Flags, MemTy: MRI->getType(Reg: Vals [i]),
1476	base_alignment: commonAlignment(A: BaseAlign, Offset: Offsets [i]),
1477	AAInfo: SI.getAAMetadata(), Ranges: nullptr,
1478	SSID: SI.getSyncScopeID(), Ordering: SI.getOrdering());
1479	MIRBuilder.buildStore(Val: Vals [i], Addr, MMO&: *MMO);
1480	}
1481	return true;
1482	}
1483
1484	static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
1485	const Value *Src = U.getOperand(i: `0`);
1486	Type *Int32Ty = Type::getInt32Ty(C&: U.getContext());
1487
1488	// getIndexedOffsetInType is designed for GEPs, so the first index is the
1489	// usual array element rather than looking into the actual aggregate.
1490	SmallVector<Value *, `1`> Indices;
1491	Indices.push_back(Elt: ConstantInt::get(Ty: Int32Ty, V: `0`));
1492
1493	if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val: &U)) {
1494	for (auto Idx : EVI->indices())
1495	Indices.push_back(Elt: ConstantInt::get(Ty: Int32Ty, V: Idx));
1496	} else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Val: &U)) {
1497	for (auto Idx : IVI->indices())
1498	Indices.push_back(Elt: ConstantInt::get(Ty: Int32Ty, V: Idx));
1499	} else {
1500	llvm::append_range(C&: Indices, R: drop_begin(RangeOrContainer: U.operands()));
1501	}
1502
1503	return static_cast<uint64_t>(
1504	DL.getIndexedOffsetInType(ElemTy: Src->getType(), Indices));
1505	}
1506
1507	bool IRTranslator::translateExtractValue(const User &U,
1508	MachineIRBuilder &MIRBuilder) {
1509	const Value *Src = U.getOperand(i: `0`);
1510	uint64_t Offset = getOffsetFromIndices(U, DL: *DL);
1511	ArrayRef<Register> SrcRegs = getOrCreateVRegs(Val: *Src);
1512	ArrayRef<uint64_t> Offsets = VMap.getOffsets(V: Src);
1513	unsigned Idx = llvm::lower_bound(Range&: Offsets, Value&: Offset) - Offsets.begin();
1514	auto &DstRegs = allocateVRegs(Val: U);
1515
1516	for (unsigned i = `0`; i < DstRegs.size(); ++i)
1517	DstRegs [i] = SrcRegs [Idx++];
1518
1519	return true;
1520	}
1521
1522	bool IRTranslator::translateInsertValue(const User &U,
1523	MachineIRBuilder &MIRBuilder) {
1524	const Value *Src = U.getOperand(i: `0`);
1525	uint64_t Offset = getOffsetFromIndices(U, DL: *DL);
1526	auto &DstRegs = allocateVRegs(Val: U);
1527	ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(V: U);
1528	ArrayRef<Register> SrcRegs = getOrCreateVRegs(Val: *Src);
1529	ArrayRef<Register> InsertedRegs = getOrCreateVRegs(Val: *U.getOperand(i: `1`));
1530	auto *InsertedIt = InsertedRegs.begin();
1531
1532	for (unsigned i = `0`; i < DstRegs.size(); ++i) {
1533	if (DstOffsets [i] >= Offset && InsertedIt != InsertedRegs.end())
1534	DstRegs [i] = *InsertedIt++;
1535	else
1536	DstRegs [i] = SrcRegs [i];
1537	}
1538
1539	return true;
1540	}
1541
1542	bool IRTranslator::translateSelect(const User &U,
1543	MachineIRBuilder &MIRBuilder) {
1544	Register Tst = getOrCreateVReg(Val: *U.getOperand(i: `0`));
1545	ArrayRef<Register> ResRegs = getOrCreateVRegs(Val: U);
1546	ArrayRef<Register> Op0Regs = getOrCreateVRegs(Val: *U.getOperand(i: `1`));
1547	ArrayRef<Register> Op1Regs = getOrCreateVRegs(Val: *U.getOperand(i: `2`));
1548
1549	uint32_t Flags = `0`;
1550	if (const SelectInst *SI = dyn_cast<SelectInst>(Val: &U))
1551	Flags = MachineInstr::copyFlagsFromInstruction(I: *SI);
1552
1553	for (unsigned i = `0`; i < ResRegs.size(); ++i) {
1554	MIRBuilder.buildSelect(Res: ResRegs [i], Tst, Op0: Op0Regs [i], Op1: Op1Regs [i], Flags);
1555	}
1556
1557	return true;
1558	}
1559
1560	bool IRTranslator::translateCopy(const User &U, const Value &V,
1561	MachineIRBuilder &MIRBuilder) {
1562	Register Src = getOrCreateVReg(Val: V);
1563	auto &Regs = *VMap.getVRegs(V: U);
1564	if (Regs.empty()) {
1565	Regs.push_back(Elt: Src);
1566	VMap.getOffsets(V: U)->push_back(Elt: `0`);
1567	} else {
1568	// If we already assigned a vreg for this instruction, we can't change that.
1569	// Emit a copy to satisfy the users we already emitted.
1570	MIRBuilder.buildCopy(Res: Regs [`0`], Op: Src);
1571	}
1572	return true;
1573	}
1574
1575	bool IRTranslator::translateBitCast(const User &U,
1576	MachineIRBuilder &MIRBuilder) {
1577	// If we're bitcasting to the source type, we can reuse the source vreg.
1578	if (getLLTForType(Ty&: U.getOperand(i: `0`)->getType(), DL: DL) ==
1579	getLLTForType(Ty&: U.getType(), DL: DL)) {
1580	// If the source is a ConstantInt then it was probably created by
1581	// ConstantHoisting and we should leave it alone.
1582	if (isa<ConstantInt>(Val: U.getOperand(i: `0`)))
1583	return translateCast(Opcode: TargetOpcode::G_CONSTANT_FOLD_BARRIER, U,
1584	MIRBuilder);
1585	return translateCopy(U, V: *U.getOperand(i: `0`), MIRBuilder);
1586	}
1587
1588	return translateCast(Opcode: TargetOpcode::G_BITCAST, U, MIRBuilder);
1589	}
1590
1591	bool IRTranslator::translateCast(unsigned Opcode, const User &U,
1592	MachineIRBuilder &MIRBuilder) {
1593	if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
1594	return false;
1595
1596	uint32_t Flags = `0`;
1597	if (const Instruction *I = dyn_cast<Instruction>(Val: &U))
1598	Flags = MachineInstr::copyFlagsFromInstruction(I: *I);
1599
1600	Register Op = getOrCreateVReg(Val: *U.getOperand(i: `0`));
1601	Register Res = getOrCreateVReg(Val: U);
1602	MIRBuilder.buildInstr(Opc: Opcode, DstOps: {Res}, SrcOps: {Op}, Flags);
1603	return true;
1604	}
1605
1606	bool IRTranslator::translateGetElementPtr(const User &U,
1607	MachineIRBuilder &MIRBuilder) {
1608	Value &Op0 = *U.getOperand(i: `0`);
1609	Register BaseReg = getOrCreateVReg(Val: Op0);
1610	Type *PtrIRTy = Op0.getType();
1611	LLT PtrTy = getLLTForType(Ty&: PtrIRTy, DL: DL);
1612	Type *OffsetIRTy = DL->getIndexType(PtrTy: PtrIRTy);
1613	LLT OffsetTy = getLLTForType(Ty&: OffsetIRTy, DL: DL);
1614
1615	uint32_t PtrAddFlags = `0`;
1616	// Each PtrAdd generated to implement the GEP inherits its nuw, nusw, inbounds
1617	// flags.
1618	if (const Instruction *I = dyn_cast<Instruction>(Val: &U))
1619	PtrAddFlags = MachineInstr::copyFlagsFromInstruction(I: *I);
1620
1621	auto PtrAddFlagsWithConst = [&](int64_t Offset) {
1622	// For nusw/inbounds GEP with an offset that is nonnegative when interpreted
1623	// as signed, assume there is no unsigned overflow.
1624	if (Offset >= `0` && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap))
1625	return PtrAddFlags \| MachineInstr::MIFlag::NoUWrap;
1626	return PtrAddFlags;
1627	};
1628
1629	// Normalize Vector GEP - all scalar operands should be converted to the
1630	// splat vector.
1631	unsigned VectorWidth = `0`;
1632
1633	// True if we should use a splat vector; using VectorWidth alone is not
1634	// sufficient.
1635	bool WantSplatVector = false;
1636	if (auto *VT = dyn_cast<VectorType>(Val: U.getType())) {
1637	VectorWidth = cast<FixedVectorType>(Val: VT)->getNumElements();
1638	// We don't produce 1 x N vectors; those are treated as scalars.
1639	WantSplatVector = VectorWidth > `1`;
1640	}
1641
1642	// We might need to splat the base pointer into a vector if the offsets
1643	// are vectors.
1644	if (WantSplatVector && !PtrTy.isVector()) {
1645	BaseReg = MIRBuilder
1646	.buildSplatBuildVector(Res: LLT::fixed_vector(NumElements: VectorWidth, ScalarTy: PtrTy),
1647	Src: BaseReg)
1648	.getReg(Idx: `0`);
1649	PtrIRTy = FixedVectorType::get(ElementType: PtrIRTy, NumElts: VectorWidth);
1650	PtrTy = getLLTForType(Ty&: PtrIRTy, DL: DL);
1651	OffsetIRTy = DL->getIndexType(PtrTy: PtrIRTy);
1652	OffsetTy = getLLTForType(Ty&: OffsetIRTy, DL: DL);
1653	}
1654
1655	int64_t Offset = `0`;
1656	for (gep_type_iterator GTI = gep_type_begin(GEP: &U), E = gep_type_end(GEP: &U);
1657	GTI != E; ++GTI) {
1658	const Value *Idx = GTI.getOperand();
1659	if (StructType *StTy = GTI.getStructTypeOrNull()) {
1660	unsigned Field = cast<Constant>(Val: Idx)->getUniqueInteger().getZExtValue();
1661	Offset += DL->getStructLayout(Ty: StTy)->getElementOffset(Idx: Field);
1662	continue;
1663	} else {
1664	uint64_t ElementSize = GTI.getSequentialElementStride(DL: *DL);
1665
1666	// If this is a scalar constant or a splat vector of constants,
1667	// handle it quickly.
1668	if (const auto *CI = dyn_cast<ConstantInt>(Val: Idx)) {
1669	if (std::optional<int64_t> Val = CI->getValue().trySExtValue()) {
1670	Offset += ElementSize * *Val;
1671	continue;
1672	}
1673	}
1674
1675	if (Offset != `0`) {
1676	auto OffsetMIB = MIRBuilder.buildConstant(Res: {OffsetTy}, Val: Offset);
1677	BaseReg = MIRBuilder
1678	.buildPtrAdd(Res: PtrTy, Op0: BaseReg, Op1: OffsetMIB.getReg(Idx: `0`),
1679	Flags: PtrAddFlagsWithConst (Offset))
1680	.getReg(Idx: `0`);
1681	Offset = `0`;
1682	}
1683
1684	Register IdxReg = getOrCreateVReg(Val: *Idx);
1685	LLT IdxTy = MRI->getType(Reg: IdxReg);
1686	if (IdxTy != OffsetTy) {
1687	if (!IdxTy.isVector() && WantSplatVector) {
1688	IdxReg = MIRBuilder
1689	.buildSplatBuildVector(Res: OffsetTy.changeElementType(NewEltTy: IdxTy),
1690	Src: IdxReg)
1691	.getReg(Idx: `0`);
1692	}
1693
1694	IdxReg = MIRBuilder.buildSExtOrTrunc(Res: OffsetTy, Op: IdxReg).getReg(Idx: `0`);
1695	}
1696
1697	// N = N + Idx ElementSize;*
1698	// Avoid doing it for ElementSize of 1.
1699	Register GepOffsetReg;
1700	if (ElementSize != `1`) {
1701	auto ElementSizeMIB = MIRBuilder.buildConstant(
1702	Res: getLLTForType(Ty&: OffsetIRTy, DL: DL), Val: ElementSize);
1703
1704	// The multiplication is NUW if the GEP is NUW and NSW if the GEP is
1705	// NUSW.
1706	uint32_t ScaleFlags = PtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1707	if (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap)
1708	ScaleFlags \|= MachineInstr::MIFlag::NoSWrap;
1709
1710	GepOffsetReg =
1711	MIRBuilder.buildMul(Dst: OffsetTy, Src0: IdxReg, Src1: ElementSizeMIB, Flags: ScaleFlags)
1712	.getReg(Idx: `0`);
1713	} else {
1714	GepOffsetReg = IdxReg;
1715	}
1716
1717	BaseReg =
1718	MIRBuilder.buildPtrAdd(Res: PtrTy, Op0: BaseReg, Op1: GepOffsetReg, Flags: PtrAddFlags)
1719	.getReg(Idx: `0`);
1720	}
1721	}
1722
1723	if (Offset != `0`) {
1724	auto OffsetMIB =
1725	MIRBuilder.buildConstant(Res: OffsetTy, Val: Offset);
1726
1727	MIRBuilder.buildPtrAdd(Res: getOrCreateVReg(Val: U), Op0: BaseReg, Op1: OffsetMIB.getReg(Idx: `0`),
1728	Flags: PtrAddFlagsWithConst (Offset));
1729	return true;
1730	}
1731
1732	MIRBuilder.buildCopy(Res: getOrCreateVReg(Val: U), Op: BaseReg);
1733	return true;
1734	}
1735
1736	bool IRTranslator::translateMemFunc(const CallInst &CI,
1737	MachineIRBuilder &MIRBuilder,
1738	unsigned Opcode) {
1739	const Value *SrcPtr = CI.getArgOperand(i: `1`);
1740	// If the source is undef, then just emit a nop.
1741	if (isa<UndefValue>(Val: SrcPtr))
1742	return true;
1743
1744	SmallVector<Register, `3`> SrcRegs;
1745
1746	unsigned MinPtrSize = UINT_MAX;
1747	for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(x: AI) != AE; ++AI) {
1748	Register SrcReg = getOrCreateVReg(Val: **AI);
1749	LLT SrcTy = MRI->getType(Reg: SrcReg);
1750	if (SrcTy.isPointer())
1751	MinPtrSize = std::min<unsigned>(a: SrcTy.getSizeInBits(), b: MinPtrSize);
1752	SrcRegs.push_back(Elt: SrcReg);
1753	}
1754
1755	LLT SizeTy = LLT::scalar(SizeInBits: MinPtrSize);
1756
1757	// The size operand should be the minimum of the pointer sizes.
1758	Register &SizeOpReg = SrcRegs [SrcRegs.size() - `1`];
1759	if (MRI->getType(Reg: SizeOpReg) != SizeTy)
1760	SizeOpReg = MIRBuilder.buildZExtOrTrunc(Res: SizeTy, Op: SizeOpReg).getReg(Idx: `0`);
1761
1762	auto ICall = MIRBuilder.buildInstr(Opcode);
1763	for (Register SrcReg : SrcRegs)
1764	ICall.addUse(RegNo: SrcReg);
1765
1766	Align DstAlign;
1767	Align SrcAlign;
1768	unsigned IsVol =
1769	cast<ConstantInt>(Val: CI.getArgOperand(i: CI.arg_size() - `1`))->getZExtValue();
1770
1771	ConstantInt CopySize = nullptr*;
1772
1773	if (auto *MCI = dyn_cast<MemCpyInst>(Val: &CI)) {
1774	DstAlign = MCI->getDestAlign().valueOrOne();
1775	SrcAlign = MCI->getSourceAlign().valueOrOne();
1776	CopySize = dyn_cast<ConstantInt>(Val: MCI->getArgOperand(i: `2`));
1777	} else if (auto *MMI = dyn_cast<MemMoveInst>(Val: &CI)) {
1778	DstAlign = MMI->getDestAlign().valueOrOne();
1779	SrcAlign = MMI->getSourceAlign().valueOrOne();
1780	CopySize = dyn_cast<ConstantInt>(Val: MMI->getArgOperand(i: `2`));
1781	} else {
1782	auto *MSI = cast<MemSetInst>(Val: &CI);
1783	DstAlign = MSI->getDestAlign().valueOrOne();
1784	}
1785
1786	if (Opcode != TargetOpcode::G_MEMCPY_INLINE) {
1787	// We need to propagate the tail call flag from the IR inst as an argument.
1788	// Otherwise, we have to pessimize and assume later that we cannot tail call
1789	// any memory intrinsics.
1790	ICall.addImm(Val: CI.isTailCall() ? `1` : `0`);
1791	}
1792
1793	// Create mem operands to store the alignment and volatile info.
1794	MachineMemOperand::Flags LoadFlags = MachineMemOperand::MOLoad;
1795	MachineMemOperand::Flags StoreFlags = MachineMemOperand::MOStore;
1796	if (IsVol) {
1797	LoadFlags \|= MachineMemOperand::MOVolatile;
1798	StoreFlags \|= MachineMemOperand::MOVolatile;
1799	}
1800
1801	AAMDNodes AAInfo = CI.getAAMetadata();
1802	if (AA && CopySize &&
1803	AA->pointsToConstantMemory(Loc: MemoryLocation (
1804	SrcPtr, LocationSize::precise(Value: CopySize->getZExtValue()), AAInfo))) {
1805	LoadFlags \|= MachineMemOperand::MOInvariant;
1806
1807	// FIXME: pointsToConstantMemory probably does not imply dereferenceable,
1808	// but the previous usage implied it did. Probably should check
1809	// isDereferenceableAndAlignedPointer.
1810	LoadFlags \|= MachineMemOperand::MODereferenceable;
1811	}
1812
1813	ICall.addMemOperand(
1814	MMO: MF->getMachineMemOperand(PtrInfo: MachinePointerInfo (CI.getArgOperand(i: `0`)),
1815	F: StoreFlags, Size: `1`, BaseAlignment: DstAlign, AAInfo));
1816	if (Opcode != TargetOpcode::G_MEMSET)
1817	ICall.addMemOperand(MMO: MF->getMachineMemOperand(
1818	PtrInfo: MachinePointerInfo (SrcPtr), F: LoadFlags, Size: `1`, BaseAlignment: SrcAlign, AAInfo));
1819
1820	return true;
1821	}
1822
1823	bool IRTranslator::translateTrap(const CallInst &CI,
1824	MachineIRBuilder &MIRBuilder,
1825	unsigned Opcode) {
1826	StringRef TrapFuncName =
1827	CI.getAttributes().getFnAttr(Kind: "trap-func-name").getValueAsString();
1828	if (TrapFuncName.empty()) {
1829	if (Opcode == TargetOpcode::G_UBSANTRAP) {
1830	uint64_t Code = cast<ConstantInt>(Val: CI.getOperand(i_nocapture: `0`))->getZExtValue();
1831	MIRBuilder.buildInstr(Opc: Opcode, DstOps: {}, SrcOps: ArrayRef<llvm::SrcOp>{Code});
1832	} else {
1833	MIRBuilder.buildInstr(Opcode);
1834	}
1835	return true;
1836	}
1837
1838	CallLowering::CallLoweringInfo Info;
1839	if (Opcode == TargetOpcode::G_UBSANTRAP)
1840	Info.OrigArgs.push_back(Elt: {getOrCreateVRegs(Val: *CI.getArgOperand(i: `0`)),
1841	CI.getArgOperand(i: `0`)->getType(), `0`});
1842
1843	Info.Callee = MachineOperand::CreateES(SymName: TrapFuncName.data());
1844	Info.CB = &CI;
1845	Info.OrigRet = {Register (), Type::getVoidTy(C&: CI.getContext()), `0`};
1846	return CLI->lowerCall(MIRBuilder, Info);
1847	}
1848
1849	bool IRTranslator::translateVectorInterleave2Intrinsic(
1850	const CallInst &CI, MachineIRBuilder &MIRBuilder) {
1851	assert(CI.getIntrinsicID() == Intrinsic::vector_interleave2 &&
1852	"This function can only be called on the interleave2 intrinsic!");
1853	// Canonicalize interleave2 to G_SHUFFLE_VECTOR (similar to SelectionDAG).
1854	Register Op0 = getOrCreateVReg(Val: *CI.getOperand(i_nocapture: `0`));
1855	Register Op1 = getOrCreateVReg(Val: *CI.getOperand(i_nocapture: `1`));
1856	Register Res = getOrCreateVReg(Val: CI);
1857
1858	LLT OpTy = MRI->getType(Reg: Op0);
1859	MIRBuilder.buildShuffleVector(Res, Src1: Op0, Src2: Op1,
1860	Mask: createInterleaveMask(VF: OpTy.getNumElements(), NumVecs: `2`));
1861
1862	return true;
1863	}
1864
1865	bool IRTranslator::translateVectorDeinterleave2Intrinsic(
1866	const CallInst &CI, MachineIRBuilder &MIRBuilder) {
1867	assert(CI.getIntrinsicID() == Intrinsic::vector_deinterleave2 &&
1868	"This function can only be called on the deinterleave2 intrinsic!");
1869	// Canonicalize deinterleave2 to shuffles that extract sub-vectors (similar to
1870	// SelectionDAG).
1871	Register Op = getOrCreateVReg(Val: *CI.getOperand(i_nocapture: `0`));
1872	auto Undef = MIRBuilder.buildUndef(Res: MRI->getType(Reg: Op));
1873	ArrayRef<Register> Res = getOrCreateVRegs(Val: CI);
1874
1875	LLT ResTy = MRI->getType(Reg: Res [`0`]);
1876	MIRBuilder.buildShuffleVector(Res: Res [`0`], Src1: Op, Src2: Undef,
1877	Mask: createStrideMask(Start: `0`, Stride: `2`, VF: ResTy.getNumElements()));
1878	MIRBuilder.buildShuffleVector(Res: Res [`1`], Src1: Op, Src2: Undef,
1879	Mask: createStrideMask(Start: `1`, Stride: `2`, VF: ResTy.getNumElements()));
1880
1881	return true;
1882	}
1883
1884	void IRTranslator::getStackGuard(Register DstReg,
1885	MachineIRBuilder &MIRBuilder) {
1886	Value *Global =
1887	TLI->getSDagStackGuard(M: MF->getFunction().getParent(), Libcalls: Libcalls);
1888	if (!Global) {
1889	LLVMContext &Ctx = MIRBuilder.getContext();
1890	Ctx.diagnose(DI: DiagnosticInfoGeneric ("unable to lower stackguard"));
1891	MIRBuilder.buildUndef(Res: DstReg);
1892	return;
1893	}
1894
1895	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1896	MRI->setRegClass(Reg: DstReg, RC: TRI->getPointerRegClass());
1897	auto MIB =
1898	MIRBuilder.buildInstr(Opc: TargetOpcode::LOAD_STACK_GUARD, DstOps: {DstReg}, SrcOps: {});
1899
1900	unsigned AddrSpace = Global->getType()->getPointerAddressSpace();
1901	LLT PtrTy = LLT::pointer(AddressSpace: AddrSpace, SizeInBits: DL->getPointerSizeInBits(AS: AddrSpace));
1902
1903	MachinePointerInfo MPInfo(Global);
1904	auto Flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOInvariant \|
1905	MachineMemOperand::MODereferenceable;
1906	MachineMemOperand *MemRef = MF->getMachineMemOperand(
1907	PtrInfo: MPInfo, f: Flags, MemTy: PtrTy, base_alignment: DL->getPointerABIAlignment(AS: AddrSpace));
1908	MIB.setMemRefs({MemRef});
1909	}
1910
1911	bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
1912	MachineIRBuilder &MIRBuilder) {
1913	ArrayRef<Register> ResRegs = getOrCreateVRegs(Val: CI);
1914	MIRBuilder.buildInstr(
1915	Opc: Op, DstOps: {ResRegs [`0`], ResRegs [`1`]},
1916	SrcOps: {getOrCreateVReg(Val: CI.getOperand(i_nocapture: `0`)), getOrCreateVReg(Val: CI.getOperand(i_nocapture: `1`))});
1917
1918	return true;
1919	}
1920
1921	bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
1922	MachineIRBuilder &MIRBuilder) {
1923	Register Dst = getOrCreateVReg(Val: CI);
1924	Register Src0 = getOrCreateVReg(Val: *CI.getOperand(i_nocapture: `0`));
1925	Register Src1 = getOrCreateVReg(Val: *CI.getOperand(i_nocapture: `1`));
1926	uint64_t Scale = cast<ConstantInt>(Val: CI.getOperand(i_nocapture: `2`))->getZExtValue();
1927	MIRBuilder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: { Src0, Src1, Scale });
1928	return true;
1929	}
1930
1931	unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
1932	switch (ID) {
1933	default:
1934	break;
1935	case Intrinsic::acos:
1936	return TargetOpcode::G_FACOS;
1937	case Intrinsic::asin:
1938	return TargetOpcode::G_FASIN;
1939	case Intrinsic::atan:
1940	return TargetOpcode::G_FATAN;
1941	case Intrinsic::atan2:
1942	return TargetOpcode::G_FATAN2;
1943	case Intrinsic::bswap:
1944	return TargetOpcode::G_BSWAP;
1945	case Intrinsic::bitreverse:
1946	return TargetOpcode::G_BITREVERSE;
1947	case Intrinsic::fshl:
1948	return TargetOpcode::G_FSHL;
1949	case Intrinsic::fshr:
1950	return TargetOpcode::G_FSHR;
1951	case Intrinsic::ceil:
1952	return TargetOpcode::G_FCEIL;
1953	case Intrinsic::cos:
1954	return TargetOpcode::G_FCOS;
1955	case Intrinsic::cosh:
1956	return TargetOpcode::G_FCOSH;
1957	case Intrinsic::ctpop:
1958	return TargetOpcode::G_CTPOP;
1959	case Intrinsic::exp:
1960	return TargetOpcode::G_FEXP;
1961	case Intrinsic::exp2:
1962	return TargetOpcode::G_FEXP2;
1963	case Intrinsic::exp10:
1964	return TargetOpcode::G_FEXP10;
1965	case Intrinsic::fabs:
1966	return TargetOpcode::G_FABS;
1967	case Intrinsic::copysign:
1968	return TargetOpcode::G_FCOPYSIGN;
1969	case Intrinsic::minnum:
1970	return TargetOpcode::G_FMINNUM;
1971	case Intrinsic::maxnum:
1972	return TargetOpcode::G_FMAXNUM;
1973	case Intrinsic::minimum:
1974	return TargetOpcode::G_FMINIMUM;
1975	case Intrinsic::maximum:
1976	return TargetOpcode::G_FMAXIMUM;
1977	case Intrinsic::minimumnum:
1978	return TargetOpcode::G_FMINIMUMNUM;
1979	case Intrinsic::maximumnum:
1980	return TargetOpcode::G_FMAXIMUMNUM;
1981	case Intrinsic::canonicalize:
1982	return TargetOpcode::G_FCANONICALIZE;
1983	case Intrinsic::floor:
1984	return TargetOpcode::G_FFLOOR;
1985	case Intrinsic::fma:
1986	return TargetOpcode::G_FMA;
1987	case Intrinsic::log:
1988	return TargetOpcode::G_FLOG;
1989	case Intrinsic::log2:
1990	return TargetOpcode::G_FLOG2;
1991	case Intrinsic::log10:
1992	return TargetOpcode::G_FLOG10;
1993	case Intrinsic::ldexp:
1994	return TargetOpcode::G_FLDEXP;
1995	case Intrinsic::nearbyint:
1996	return TargetOpcode::G_FNEARBYINT;
1997	case Intrinsic::pow:
1998	return TargetOpcode::G_FPOW;
1999	case Intrinsic::powi:
2000	return TargetOpcode::G_FPOWI;
2001	case Intrinsic::rint:
2002	return TargetOpcode::G_FRINT;
2003	case Intrinsic::round:
2004	return TargetOpcode::G_INTRINSIC_ROUND;
2005	case Intrinsic::roundeven:
2006	return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
2007	case Intrinsic::sin:
2008	return TargetOpcode::G_FSIN;
2009	case Intrinsic::sinh:
2010	return TargetOpcode::G_FSINH;
2011	case Intrinsic::sqrt:
2012	return TargetOpcode::G_FSQRT;
2013	case Intrinsic::tan:
2014	return TargetOpcode::G_FTAN;
2015	case Intrinsic::tanh:
2016	return TargetOpcode::G_FTANH;
2017	case Intrinsic::trunc:
2018	return TargetOpcode::G_INTRINSIC_TRUNC;
2019	case Intrinsic::readcyclecounter:
2020	return TargetOpcode::G_READCYCLECOUNTER;
2021	case Intrinsic::readsteadycounter:
2022	return TargetOpcode::G_READSTEADYCOUNTER;
2023	case Intrinsic::ptrmask:
2024	return TargetOpcode::G_PTRMASK;
2025	case Intrinsic::lrint:
2026	return TargetOpcode::G_INTRINSIC_LRINT;
2027	case Intrinsic::llrint:
2028	return TargetOpcode::G_INTRINSIC_LLRINT;
2029	// FADD/FMUL require checking the FMF, so are handled elsewhere.
2030	case Intrinsic::vector_reduce_fmin:
2031	return TargetOpcode::G_VECREDUCE_FMIN;
2032	case Intrinsic::vector_reduce_fmax:
2033	return TargetOpcode::G_VECREDUCE_FMAX;
2034	case Intrinsic::vector_reduce_fminimum:
2035	return TargetOpcode::G_VECREDUCE_FMINIMUM;
2036	case Intrinsic::vector_reduce_fmaximum:
2037	return TargetOpcode::G_VECREDUCE_FMAXIMUM;
2038	case Intrinsic::vector_reduce_add:
2039	return TargetOpcode::G_VECREDUCE_ADD;
2040	case Intrinsic::vector_reduce_mul:
2041	return TargetOpcode::G_VECREDUCE_MUL;
2042	case Intrinsic::vector_reduce_and:
2043	return TargetOpcode::G_VECREDUCE_AND;
2044	case Intrinsic::vector_reduce_or:
2045	return TargetOpcode::G_VECREDUCE_OR;
2046	case Intrinsic::vector_reduce_xor:
2047	return TargetOpcode::G_VECREDUCE_XOR;
2048	case Intrinsic::vector_reduce_smax:
2049	return TargetOpcode::G_VECREDUCE_SMAX;
2050	case Intrinsic::vector_reduce_smin:
2051	return TargetOpcode::G_VECREDUCE_SMIN;
2052	case Intrinsic::vector_reduce_umax:
2053	return TargetOpcode::G_VECREDUCE_UMAX;
2054	case Intrinsic::vector_reduce_umin:
2055	return TargetOpcode::G_VECREDUCE_UMIN;
2056	case Intrinsic::experimental_vector_compress:
2057	return TargetOpcode::G_VECTOR_COMPRESS;
2058	case Intrinsic::lround:
2059	return TargetOpcode::G_LROUND;
2060	case Intrinsic::llround:
2061	return TargetOpcode::G_LLROUND;
2062	case Intrinsic::get_fpenv:
2063	return TargetOpcode::G_GET_FPENV;
2064	case Intrinsic::get_fpmode:
2065	return TargetOpcode::G_GET_FPMODE;
2066	}
2067	return Intrinsic::not_intrinsic;
2068	}
2069
2070	bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
2071	Intrinsic::ID ID,
2072	MachineIRBuilder &MIRBuilder) {
2073
2074	unsigned Op = getSimpleIntrinsicOpcode(ID);
2075
2076	// Is this a simple intrinsic?
2077	if (Op == Intrinsic::not_intrinsic)
2078	return false;
2079
2080	// Yes. Let's translate it.
2081	SmallVector<llvm::SrcOp, `4`> VRegs;
2082	for (const auto &Arg : CI.args())
2083	VRegs.push_back(Elt: getOrCreateVReg(Val: *Arg));
2084
2085	MIRBuilder.buildInstr(Opc: Op, DstOps: {getOrCreateVReg(Val: CI)}, SrcOps: VRegs,
2086	Flags: MachineInstr::copyFlagsFromInstruction(I: CI));
2087	return true;
2088	}
2089
2090	// TODO: Include ConstainedOps.def when all strict instructions are defined.
2091	static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
2092	switch (ID) {
2093	case Intrinsic::experimental_constrained_fadd:
2094	return TargetOpcode::G_STRICT_FADD;
2095	case Intrinsic::experimental_constrained_fsub:
2096	return TargetOpcode::G_STRICT_FSUB;
2097	case Intrinsic::experimental_constrained_fmul:
2098	return TargetOpcode::G_STRICT_FMUL;
2099	case Intrinsic::experimental_constrained_fdiv:
2100	return TargetOpcode::G_STRICT_FDIV;
2101	case Intrinsic::experimental_constrained_frem:
2102	return TargetOpcode::G_STRICT_FREM;
2103	case Intrinsic::experimental_constrained_fma:
2104	return TargetOpcode::G_STRICT_FMA;
2105	case Intrinsic::experimental_constrained_sqrt:
2106	return TargetOpcode::G_STRICT_FSQRT;
2107	case Intrinsic::experimental_constrained_ldexp:
2108	return TargetOpcode::G_STRICT_FLDEXP;
2109	default:
2110	return `0`;
2111	}
2112	}
2113
2114	bool IRTranslator::translateConstrainedFPIntrinsic(
2115	const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
2116	fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
2117
2118	unsigned Opcode = getConstrainedOpcode(ID: FPI.getIntrinsicID());
2119	if (!Opcode)
2120	return false;
2121
2122	uint32_t Flags = MachineInstr::copyFlagsFromInstruction(I: FPI);
2123	if (EB == fp::ExceptionBehavior::ebIgnore)
2124	Flags \|= MachineInstr::NoFPExcept;
2125
2126	SmallVector<llvm::SrcOp, `4`> VRegs;
2127	for (unsigned I = `0`, E = FPI.getNonMetadataArgCount(); I != E; ++I)
2128	VRegs.push_back(Elt: getOrCreateVReg(Val: *FPI.getArgOperand(i: I)));
2129
2130	MIRBuilder.buildInstr(Opc: Opcode, DstOps: {getOrCreateVReg(Val: FPI)}, SrcOps: VRegs, Flags);
2131	return true;
2132	}
2133
2134	std::optional<MCRegister> IRTranslator::getArgPhysReg(Argument &Arg) {
2135	auto VRegs = getOrCreateVRegs(Val: Arg);
2136	if (VRegs.size() != `1`)
2137	return std::nullopt;
2138
2139	// Arguments are lowered as a copy of a livein physical register.
2140	auto *VRegDef = MF->getRegInfo().getVRegDef(Reg: VRegs [`0`]);
2141	if (!VRegDef \|\| !VRegDef->isCopy())
2142	return std::nullopt;
2143	return VRegDef->getOperand(i: `1`).getReg().asMCReg();
2144	}
2145
2146	bool IRTranslator::translateIfEntryValueArgument(bool isDeclare, Value *Val,
2147	const DILocalVariable *Var,
2148	const DIExpression *Expr,
2149	const DebugLoc &DL,
2150	MachineIRBuilder &MIRBuilder) {
2151	auto *Arg = dyn_cast<Argument>(Val);
2152	if (!Arg)
2153	return false;
2154
2155	if (!Expr->isEntryValue())
2156	return false;
2157
2158	std::optional<MCRegister> PhysReg = getArgPhysReg(Arg&: *Arg);
2159	if (!PhysReg) {
2160	LLVM_DEBUG(dbgs() << "Dropping dbg." << (isDeclare ? "declare" : "value")
2161	<< ": expression is entry_value but "
2162	<< "couldn't find a physical register\n");
2163	LLVM_DEBUG(dbgs() << *Var << "\n");
2164	return true;
2165	}
2166
2167	if (isDeclare) {
2168	// Append an op deref to account for the fact that this is a dbg_declare.
2169	Expr = DIExpression::append(Expr, Ops: dwarf::DW_OP_deref);
2170	MF->setVariableDbgInfo(Var, Expr, Reg: *PhysReg, Loc: DL);
2171	} else {
2172	MIRBuilder.buildDirectDbgValue(Reg: *PhysReg, Variable: Var, Expr);
2173	}
2174
2175	return true;
2176	}
2177
2178	static unsigned getConvOpcode(Intrinsic::ID ID) {
2179	switch (ID) {
2180	default:
2181	llvm_unreachable("Unexpected intrinsic");
2182	case Intrinsic::experimental_convergence_anchor:
2183	return TargetOpcode::CONVERGENCECTRL_ANCHOR;
2184	case Intrinsic::experimental_convergence_entry:
2185	return TargetOpcode::CONVERGENCECTRL_ENTRY;
2186	case Intrinsic::experimental_convergence_loop:
2187	return TargetOpcode::CONVERGENCECTRL_LOOP;
2188	}
2189	}
2190
2191	bool IRTranslator::translateConvergenceControlIntrinsic(
2192	const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) {
2193	MachineInstrBuilder MIB = MIRBuilder.buildInstr(Opcode: getConvOpcode(ID));
2194	Register OutputReg = getOrCreateConvergenceTokenVReg(Token: CI);
2195	MIB.addDef(RegNo: OutputReg);
2196
2197	if (ID == Intrinsic::experimental_convergence_loop) {
2198	auto Bundle = CI.getOperandBundle(ID: LLVMContext::OB_convergencectrl);
2199	assert(Bundle && "Expected a convergence control token.");
2200	Register InputReg =
2201	getOrCreateConvergenceTokenVReg(Token: *Bundle ->Inputs [`0`].get());
2202	MIB.addUse(RegNo: InputReg);
2203	}
2204
2205	return true;
2206	}
2207
2208	bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
2209	MachineIRBuilder &MIRBuilder) {
2210	if (auto *MI = dyn_cast<AnyMemIntrinsic>(Val: &CI)) {
2211	if (ORE ->enabled()) {
2212	if (MemoryOpRemark::canHandle(I: MI, TLI: *LibInfo)) {
2213	MemoryOpRemark R(ORE, "gisel-irtranslator-memsize", DL, *LibInfo);
2214	R.visit(I: MI);
2215	}
2216	}
2217	}
2218
2219	// If this is a simple intrinsic (that is, we just need to add a def of
2220	// a vreg, and uses for each arg operand, then translate it.
2221	if (translateSimpleIntrinsic(CI, ID, MIRBuilder))
2222	return true;
2223
2224	switch (ID) {
2225	default:
2226	break;
2227	case Intrinsic::lifetime_start:
2228	case Intrinsic::lifetime_end: {
2229	// No stack colouring in O0, discard region information.
2230	if (MF->getTarget().getOptLevel() == CodeGenOptLevel::None \|\|
2231	MF->getFunction().hasOptNone())
2232	return true;
2233
2234	unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
2235	: TargetOpcode::LIFETIME_END;
2236
2237	const AllocaInst *AI = dyn_cast<AllocaInst>(Val: CI.getArgOperand(i: `0`));
2238	if (!AI \|\| !AI->isStaticAlloca())
2239	return true;
2240
2241	MIRBuilder.buildInstr(Opcode: Op).addFrameIndex(Idx: getOrCreateFrameIndex(AI: *AI));
2242	return true;
2243	}
2244	case Intrinsic::fake_use: {
2245	SmallVector<llvm::SrcOp, `4`> VRegs;
2246	for (const auto &Arg : CI.args())
2247	llvm::append_range(C&: VRegs, R: getOrCreateVRegs(Val: *Arg));
2248	MIRBuilder.buildInstr(Opc: TargetOpcode::FAKE_USE, DstOps: {}, SrcOps: VRegs);
2249	MF->setHasFakeUses(true);
2250	return true;
2251	}
2252	case Intrinsic::dbg_declare: {
2253	const DbgDeclareInst &DI = cast<DbgDeclareInst>(Val: CI);
2254	assert(DI.getVariable() && "Missing variable");
2255	translateDbgDeclareRecord(Address: DI.getAddress(), HasArgList: DI.hasArgList(), Variable: DI.getVariable(),
2256	Expression: DI.getExpression(), DL: DI.getDebugLoc(), MIRBuilder);
2257	return true;
2258	}
2259	case Intrinsic::dbg_label: {
2260	const DbgLabelInst &DI = cast<DbgLabelInst>(Val: CI);
2261	assert(DI.getLabel() && "Missing label");
2262
2263	assert(DI.getLabel()->isValidLocationForIntrinsic(
2264	MIRBuilder.getDebugLoc()) &&
2265	"Expected inlined-at fields to agree");
2266
2267	MIRBuilder.buildDbgLabel(Label: DI.getLabel());
2268	return true;
2269	}
2270	case Intrinsic::vaend:
2271	// No target I know of cares about va_end. Certainly no in-tree target
2272	// does. Simplest intrinsic ever!
2273	return true;
2274	case Intrinsic::vastart: {
2275	Value *Ptr = CI.getArgOperand(i: `0`);
2276	unsigned ListSize = TLI->getVaListSizeInBits(DL: *DL) / `8`;
2277	Align Alignment = getKnownAlignment(V: Ptr, DL: *DL);
2278
2279	MIRBuilder.buildInstr(Opc: TargetOpcode::G_VASTART, DstOps: {}, SrcOps: {getOrCreateVReg(Val: *Ptr)})
2280	.addMemOperand(MMO: MF->getMachineMemOperand(PtrInfo: MachinePointerInfo (Ptr),
2281	F: MachineMemOperand::MOStore,
2282	Size: ListSize, BaseAlignment: Alignment));
2283	return true;
2284	}
2285	case Intrinsic::dbg_assign:
2286	// A dbg.assign is a dbg.value with more information about stack locations,
2287	// typically produced during optimisation of variables with leaked
2288	// addresses. We can treat it like a normal dbg_value intrinsic here; to
2289	// benefit from the full analysis of stack/SSA locations, GlobalISel would
2290	// need to register for and use the AssignmentTrackingAnalysis pass.
2291	[[fallthrough]];
2292	case Intrinsic::dbg_value: {
2293	// This form of DBG_VALUE is target-independent.
2294	const DbgValueInst &DI = cast<DbgValueInst>(Val: CI);
2295	translateDbgValueRecord(V: DI.getValue(), HasArgList: DI.hasArgList(), Variable: DI.getVariable(),
2296	Expression: DI.getExpression(), DL: DI.getDebugLoc(), MIRBuilder);
2297	return true;
2298	}
2299	case Intrinsic::uadd_with_overflow:
2300	return translateOverflowIntrinsic(CI, Op: TargetOpcode::G_UADDO, MIRBuilder);
2301	case Intrinsic::sadd_with_overflow:
2302	return translateOverflowIntrinsic(CI, Op: TargetOpcode::G_SADDO, MIRBuilder);
2303	case Intrinsic::usub_with_overflow:
2304	return translateOverflowIntrinsic(CI, Op: TargetOpcode::G_USUBO, MIRBuilder);
2305	case Intrinsic::ssub_with_overflow:
2306	return translateOverflowIntrinsic(CI, Op: TargetOpcode::G_SSUBO, MIRBuilder);
2307	case Intrinsic::umul_with_overflow:
2308	return translateOverflowIntrinsic(CI, Op: TargetOpcode::G_UMULO, MIRBuilder);
2309	case Intrinsic::smul_with_overflow:
2310	return translateOverflowIntrinsic(CI, Op: TargetOpcode::G_SMULO, MIRBuilder);
2311	case Intrinsic::uadd_sat:
2312	return translateBinaryOp(Opcode: TargetOpcode::G_UADDSAT, U: CI, MIRBuilder);
2313	case Intrinsic::sadd_sat:
2314	return translateBinaryOp(Opcode: TargetOpcode::G_SADDSAT, U: CI, MIRBuilder);
2315	case Intrinsic::usub_sat:
2316	return translateBinaryOp(Opcode: TargetOpcode::G_USUBSAT, U: CI, MIRBuilder);
2317	case Intrinsic::ssub_sat:
2318	return translateBinaryOp(Opcode: TargetOpcode::G_SSUBSAT, U: CI, MIRBuilder);
2319	case Intrinsic::ushl_sat:
2320	return translateBinaryOp(Opcode: TargetOpcode::G_USHLSAT, U: CI, MIRBuilder);
2321	case Intrinsic::sshl_sat:
2322	return translateBinaryOp(Opcode: TargetOpcode::G_SSHLSAT, U: CI, MIRBuilder);
2323	case Intrinsic::umin:
2324	return translateBinaryOp(Opcode: TargetOpcode::G_UMIN, U: CI, MIRBuilder);
2325	case Intrinsic::umax:
2326	return translateBinaryOp(Opcode: TargetOpcode::G_UMAX, U: CI, MIRBuilder);
2327	case Intrinsic::smin:
2328	return translateBinaryOp(Opcode: TargetOpcode::G_SMIN, U: CI, MIRBuilder);
2329	case Intrinsic::smax:
2330	return translateBinaryOp(Opcode: TargetOpcode::G_SMAX, U: CI, MIRBuilder);
2331	case Intrinsic::abs:
2332	// TODO: Preserve "int min is poison" arg in GMIR?
2333	return translateUnaryOp(Opcode: TargetOpcode::G_ABS, U: CI, MIRBuilder);
2334	case Intrinsic::smul_fix:
2335	return translateFixedPointIntrinsic(Op: TargetOpcode::G_SMULFIX, CI, MIRBuilder);
2336	case Intrinsic::umul_fix:
2337	return translateFixedPointIntrinsic(Op: TargetOpcode::G_UMULFIX, CI, MIRBuilder);
2338	case Intrinsic::smul_fix_sat:
2339	return translateFixedPointIntrinsic(Op: TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
2340	case Intrinsic::umul_fix_sat:
2341	return translateFixedPointIntrinsic(Op: TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
2342	case Intrinsic::sdiv_fix:
2343	return translateFixedPointIntrinsic(Op: TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
2344	case Intrinsic::udiv_fix:
2345	return translateFixedPointIntrinsic(Op: TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
2346	case Intrinsic::sdiv_fix_sat:
2347	return translateFixedPointIntrinsic(Op: TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
2348	case Intrinsic::udiv_fix_sat:
2349	return translateFixedPointIntrinsic(Op: TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
2350	case Intrinsic::fmuladd: {
2351	const TargetMachine &TM = MF->getTarget();
2352	Register Dst = getOrCreateVReg(Val: CI);
2353	Register Op0 = getOrCreateVReg(Val: *CI.getArgOperand(i: `0`));
2354	Register Op1 = getOrCreateVReg(Val: *CI.getArgOperand(i: `1`));
2355	Register Op2 = getOrCreateVReg(Val: *CI.getArgOperand(i: `2`));
2356	if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
2357	TLI->isFMAFasterThanFMulAndFAdd(MF: *MF,
2358	TLI->getValueType(DL: *DL, Ty: CI.getType()))) {
2359	// TODO: Revisit this to see if we should move this part of the
2360	// lowering to the combiner.
2361	MIRBuilder.buildFMA(Dst, Src0: Op0, Src1: Op1, Src2: Op2,
2362	Flags: MachineInstr::copyFlagsFromInstruction(I: CI));
2363	} else {
2364	LLT Ty = getLLTForType(Ty&: CI.getType(), DL: DL);
2365	auto FMul = MIRBuilder.buildFMul(
2366	Dst: Ty, Src0: Op0, Src1: Op1, Flags: MachineInstr::copyFlagsFromInstruction(I: CI));
2367	MIRBuilder.buildFAdd(Dst, Src0: FMul, Src1: Op2,
2368	Flags: MachineInstr::copyFlagsFromInstruction(I: CI));
2369	}
2370	return true;
2371	}
2372	case Intrinsic::frexp: {
2373	ArrayRef<Register> VRegs = getOrCreateVRegs(Val: CI);
2374	MIRBuilder.buildFFrexp(Fract: VRegs [`0`], Exp: VRegs [`1`],
2375	Src: getOrCreateVReg(Val: *CI.getArgOperand(i: `0`)),
2376	Flags: MachineInstr::copyFlagsFromInstruction(I: CI));
2377	return true;
2378	}
2379	case Intrinsic::modf: {
2380	ArrayRef<Register> VRegs = getOrCreateVRegs(Val: CI);
2381	MIRBuilder.buildModf(Fract: VRegs [`0`], Int: VRegs [`1`],
2382	Src: getOrCreateVReg(Val: *CI.getArgOperand(i: `0`)),
2383	Flags: MachineInstr::copyFlagsFromInstruction(I: CI));
2384	return true;
2385	}
2386	case Intrinsic::sincos: {
2387	ArrayRef<Register> VRegs = getOrCreateVRegs(Val: CI);
2388	MIRBuilder.buildFSincos(Sin: VRegs [`0`], Cos: VRegs [`1`],
2389	Src: getOrCreateVReg(Val: *CI.getArgOperand(i: `0`)),
2390	Flags: MachineInstr::copyFlagsFromInstruction(I: CI));
2391	return true;
2392	}
2393	case Intrinsic::fptosi_sat:
2394	MIRBuilder.buildFPTOSI_SAT(Dst: getOrCreateVReg(Val: CI),
2395	Src0: getOrCreateVReg(Val: *CI.getArgOperand(i: `0`)));
2396	return true;
2397	case Intrinsic::fptoui_sat:
2398	MIRBuilder.buildFPTOUI_SAT(Dst: getOrCreateVReg(Val: CI),
2399	Src0: getOrCreateVReg(Val: *CI.getArgOperand(i: `0`)));
2400	return true;
2401	case Intrinsic::memcpy_inline:
2402	return translateMemFunc(CI, MIRBuilder, Opcode: TargetOpcode::G_MEMCPY_INLINE);
2403	case Intrinsic::memcpy:
2404	return translateMemFunc(CI, MIRBuilder, Opcode: TargetOpcode::G_MEMCPY);
2405	case Intrinsic::memmove:
2406	return translateMemFunc(CI, MIRBuilder, Opcode: TargetOpcode::G_MEMMOVE);
2407	case Intrinsic::memset:
2408	return translateMemFunc(CI, MIRBuilder, Opcode: TargetOpcode::G_MEMSET);
2409	case Intrinsic::eh_typeid_for: {
2410	GlobalValue *GV = ExtractTypeInfo(V: CI.getArgOperand(i: `0`));
2411	Register Reg = getOrCreateVReg(Val: CI);
2412	unsigned TypeID = MF->getTypeIDFor(TI: GV);
2413	MIRBuilder.buildConstant(Res: Reg, Val: TypeID);
2414	return true;
2415	}
2416	case Intrinsic::objectsize:
2417	llvm_unreachable("llvm.objectsize.* should have been lowered already");
2418
2419	case Intrinsic::is_constant:
2420	llvm_unreachable("llvm.is.constant.* should have been lowered already");
2421
2422	case Intrinsic::stackguard:
2423	getStackGuard(DstReg: getOrCreateVReg(Val: CI), MIRBuilder);
2424	return true;
2425	case Intrinsic::stackprotector: {
2426	LLT PtrTy = getLLTForType(Ty&: CI.getArgOperand(i: `0`)->getType(), DL: DL);
2427	Register GuardVal;
2428	if (TLI->useLoadStackGuardNode(M: *CI.getModule())) {
2429	GuardVal = MRI->createGenericVirtualRegister(Ty: PtrTy);
2430	getStackGuard(DstReg: GuardVal, MIRBuilder);
2431	} else
2432	GuardVal = getOrCreateVReg(Val: CI.getArgOperand(i: `0`)); // The guard's value.*
2433
2434	AllocaInst *Slot = cast<AllocaInst>(Val: CI.getArgOperand(i: `1`));
2435	int FI = getOrCreateFrameIndex(AI: *Slot);
2436	MF->getFrameInfo().setStackProtectorIndex(FI);
2437
2438	MIRBuilder.buildStore(
2439	Val: GuardVal, Addr: getOrCreateVReg(Val: *Slot),
2440	MMO&: MF->getMachineMemOperand(PtrInfo: MachinePointerInfo::getFixedStack(MF&: MF, FI),
2441	f: MachineMemOperand::MOStore \|
2442	MachineMemOperand::MOVolatile,
2443	MemTy: PtrTy, base_alignment: Align (`8`)));
2444	return true;
2445	}
2446	case Intrinsic::stacksave: {
2447	MIRBuilder.buildInstr(Opc: TargetOpcode::G_STACKSAVE, DstOps: {getOrCreateVReg(Val: CI)}, SrcOps: {});
2448	return true;
2449	}
2450	case Intrinsic::stackrestore: {
2451	MIRBuilder.buildInstr(Opc: TargetOpcode::G_STACKRESTORE, DstOps: {},
2452	SrcOps: {getOrCreateVReg(Val: *CI.getArgOperand(i: `0`))});
2453	return true;
2454	}
2455	case Intrinsic::cttz:
2456	case Intrinsic::ctlz: {
2457	ConstantInt *Cst = cast<ConstantInt>(Val: CI.getArgOperand(i: `1`));
2458	bool isTrailing = ID == Intrinsic::cttz;
2459	unsigned Opcode = isTrailing
2460	? Cst->isZero() ? TargetOpcode::G_CTTZ
2461	: TargetOpcode::G_CTTZ_ZERO_UNDEF
2462	: Cst->isZero() ? TargetOpcode::G_CTLZ
2463	: TargetOpcode::G_CTLZ_ZERO_UNDEF;
2464	MIRBuilder.buildInstr(Opc: Opcode, DstOps: {getOrCreateVReg(Val: CI)},
2465	SrcOps: {getOrCreateVReg(Val: *CI.getArgOperand(i: `0`))});
2466	return true;
2467	}
2468	case Intrinsic::invariant_start: {
2469	MIRBuilder.buildUndef(Res: getOrCreateVReg(Val: CI));
2470	return true;
2471	}
2472	case Intrinsic::invariant_end:
2473	return true;
2474	case Intrinsic::expect:
2475	case Intrinsic::expect_with_probability:
2476	case Intrinsic::annotation:
2477	case Intrinsic::ptr_annotation:
2478	case Intrinsic::launder_invariant_group:
2479	case Intrinsic::strip_invariant_group: {
2480	// Drop the intrinsic, but forward the value.
2481	MIRBuilder.buildCopy(Res: getOrCreateVReg(Val: CI),
2482	Op: getOrCreateVReg(Val: *CI.getArgOperand(i: `0`)));
2483	return true;
2484	}
2485	case Intrinsic::assume:
2486	case Intrinsic::experimental_noalias_scope_decl:
2487	case Intrinsic::var_annotation:
2488	case Intrinsic::sideeffect:
2489	// Discard annotate attributes, assumptions, and artificial side-effects.
2490	return true;
2491	case Intrinsic::read_volatile_register:
2492	case Intrinsic::read_register: {
2493	Value *Arg = CI.getArgOperand(i: `0`);
2494	MIRBuilder
2495	.buildInstr(Opc: TargetOpcode::G_READ_REGISTER, DstOps: {getOrCreateVReg(Val: CI)}, SrcOps: {})
2496	.addMetadata(MD: cast<MDNode>(Val: cast<MetadataAsValue>(Val: Arg)->getMetadata()));
2497	return true;
2498	}
2499	case Intrinsic::write_register: {
2500	Value *Arg = CI.getArgOperand(i: `0`);
2501	MIRBuilder.buildInstr(Opcode: TargetOpcode::G_WRITE_REGISTER)
2502	.addMetadata(MD: cast<MDNode>(Val: cast<MetadataAsValue>(Val: Arg)->getMetadata()))
2503	.addUse(RegNo: getOrCreateVReg(Val: *CI.getArgOperand(i: `1`)));
2504	return true;
2505	}
2506	case Intrinsic::localescape: {
2507	MachineBasicBlock &EntryMBB = MF->front();
2508	StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(Name: MF->getName());
2509
2510	// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
2511	// is the same on all targets.
2512	for (unsigned Idx = `0`, E = CI.arg_size(); Idx < E; ++Idx) {
2513	Value *Arg = CI.getArgOperand(i: Idx)->stripPointerCasts();
2514	if (isa<ConstantPointerNull>(Val: Arg))
2515	continue; // Skip null pointers. They represent a hole in index space.
2516
2517	int FI = getOrCreateFrameIndex(AI: *cast<AllocaInst>(Val: Arg));
2518	MCSymbol *FrameAllocSym =
2519	MF->getContext().getOrCreateFrameAllocSymbol(FuncName: EscapedName, Idx);
2520
2521	// This should be inserted at the start of the entry block.
2522	auto LocalEscape =
2523	MIRBuilder.buildInstrNoInsert(Opcode: TargetOpcode::LOCAL_ESCAPE)
2524	.addSym(Sym: FrameAllocSym)
2525	.addFrameIndex(Idx: FI);
2526
2527	EntryMBB.insert(I: EntryMBB.begin(), MI: LocalEscape);
2528	}
2529
2530	return true;
2531	}
2532	case Intrinsic::vector_reduce_fadd:
2533	case Intrinsic::vector_reduce_fmul: {
2534	// Need to check for the reassoc flag to decide whether we want a
2535	// sequential reduction opcode or not.
2536	Register Dst = getOrCreateVReg(Val: CI);
2537	Register ScalarSrc = getOrCreateVReg(Val: *CI.getArgOperand(i: `0`));
2538	Register VecSrc = getOrCreateVReg(Val: *CI.getArgOperand(i: `1`));
2539	unsigned Opc = `0`;
2540	if (!CI.hasAllowReassoc()) {
2541	// The sequential ordering case.
2542	Opc = ID == Intrinsic::vector_reduce_fadd
2543	? TargetOpcode::G_VECREDUCE_SEQ_FADD
2544	: TargetOpcode::G_VECREDUCE_SEQ_FMUL;
2545	if (!MRI->getType(Reg: VecSrc).isVector())
2546	Opc = ID == Intrinsic::vector_reduce_fadd ? TargetOpcode::G_FADD
2547	: TargetOpcode::G_FMUL;
2548	MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {ScalarSrc, VecSrc},
2549	Flags: MachineInstr::copyFlagsFromInstruction(I: CI));
2550	return true;
2551	}
2552	// We split the operation into a separate G_FADD/G_FMUL + the reduce,
2553	// since the associativity doesn't matter.
2554	unsigned ScalarOpc;
2555	if (ID == Intrinsic::vector_reduce_fadd) {
2556	Opc = TargetOpcode::G_VECREDUCE_FADD;
2557	ScalarOpc = TargetOpcode::G_FADD;
2558	} else {
2559	Opc = TargetOpcode::G_VECREDUCE_FMUL;
2560	ScalarOpc = TargetOpcode::G_FMUL;
2561	}
2562	LLT DstTy = MRI->getType(Reg: Dst);
2563	auto Rdx = MIRBuilder.buildInstr(
2564	Opc, DstOps: {DstTy}, SrcOps: {VecSrc}, Flags: MachineInstr::copyFlagsFromInstruction(I: CI));
2565	MIRBuilder.buildInstr(Opc: ScalarOpc, DstOps: {Dst}, SrcOps: {ScalarSrc, Rdx},
2566	Flags: MachineInstr::copyFlagsFromInstruction(I: CI));
2567
2568	return true;
2569	}
2570	case Intrinsic::trap:
2571	return translateTrap(CI, MIRBuilder, Opcode: TargetOpcode::G_TRAP);
2572	case Intrinsic::debugtrap:
2573	return translateTrap(CI, MIRBuilder, Opcode: TargetOpcode::G_DEBUGTRAP);
2574	case Intrinsic::ubsantrap:
2575	return translateTrap(CI, MIRBuilder, Opcode: TargetOpcode::G_UBSANTRAP);
2576	case Intrinsic::allow_runtime_check:
2577	case Intrinsic::allow_ubsan_check:
2578	MIRBuilder.buildCopy(Res: getOrCreateVReg(Val: CI),
2579	Op: getOrCreateVReg(Val: *ConstantInt::getTrue(Ty: CI.getType())));
2580	return true;
2581	case Intrinsic::amdgcn_cs_chain:
2582	case Intrinsic::amdgcn_call_whole_wave:
2583	return translateCallBase(CB: CI, MIRBuilder);
2584	case Intrinsic::fptrunc_round: {
2585	uint32_t Flags = MachineInstr::copyFlagsFromInstruction(I: CI);
2586
2587	// Convert the metadata argument to a constant integer
2588	Metadata *MD = cast<MetadataAsValue>(Val: CI.getArgOperand(i: `1`))->getMetadata();
2589	std::optional<RoundingMode> RoundMode =
2590	convertStrToRoundingMode(cast<MDString>(Val: MD)->getString());
2591
2592	// Add the Rounding mode as an integer
2593	MIRBuilder
2594	.buildInstr(Opc: TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND,
2595	DstOps: {getOrCreateVReg(Val: CI)},
2596	SrcOps: {getOrCreateVReg(Val: *CI.getArgOperand(i: `0`))}, Flags)
2597	.addImm(Val: (int)*RoundMode);
2598
2599	return true;
2600	}
2601	case Intrinsic::is_fpclass: {
2602	Value *FpValue = CI.getOperand(i_nocapture: `0`);
2603	ConstantInt *TestMaskValue = cast<ConstantInt>(Val: CI.getOperand(i_nocapture: `1`));
2604
2605	MIRBuilder
2606	.buildInstr(Opc: TargetOpcode::G_IS_FPCLASS, DstOps: {getOrCreateVReg(Val: CI)},
2607	SrcOps: {getOrCreateVReg(Val: *FpValue)})
2608	.addImm(Val: TestMaskValue->getZExtValue());
2609
2610	return true;
2611	}
2612	case Intrinsic::set_fpenv: {
2613	Value *FPEnv = CI.getOperand(i_nocapture: `0`);
2614	MIRBuilder.buildSetFPEnv(Src: getOrCreateVReg(Val: *FPEnv));
2615	return true;
2616	}
2617	case Intrinsic::reset_fpenv:
2618	MIRBuilder.buildResetFPEnv();
2619	return true;
2620	case Intrinsic::set_fpmode: {
2621	Value *FPState = CI.getOperand(i_nocapture: `0`);
2622	MIRBuilder.buildSetFPMode(Src: getOrCreateVReg(Val: *FPState));
2623	return true;
2624	}
2625	case Intrinsic::reset_fpmode:
2626	MIRBuilder.buildResetFPMode();
2627	return true;
2628	case Intrinsic::get_rounding:
2629	MIRBuilder.buildGetRounding(Dst: getOrCreateVReg(Val: CI));
2630	return true;
2631	case Intrinsic::set_rounding:
2632	MIRBuilder.buildSetRounding(Src: getOrCreateVReg(Val: *CI.getOperand(i_nocapture: `0`)));
2633	return true;
2634	case Intrinsic::vscale: {
2635	MIRBuilder.buildVScale(Res: getOrCreateVReg(Val: CI), MinElts: `1`);
2636	return true;
2637	}
2638	case Intrinsic::scmp:
2639	MIRBuilder.buildSCmp(Res: getOrCreateVReg(Val: CI),
2640	Op0: getOrCreateVReg(Val: *CI.getOperand(i_nocapture: `0`)),
2641	Op1: getOrCreateVReg(Val: *CI.getOperand(i_nocapture: `1`)));
2642	return true;
2643	case Intrinsic::ucmp:
2644	MIRBuilder.buildUCmp(Res: getOrCreateVReg(Val: CI),
2645	Op0: getOrCreateVReg(Val: *CI.getOperand(i_nocapture: `0`)),
2646	Op1: getOrCreateVReg(Val: *CI.getOperand(i_nocapture: `1`)));
2647	return true;
2648	case Intrinsic::vector_extract:
2649	return translateExtractVector(U: CI, MIRBuilder);
2650	case Intrinsic::vector_insert:
2651	return translateInsertVector(U: CI, MIRBuilder);
2652	case Intrinsic::stepvector: {
2653	MIRBuilder.buildStepVector(Res: getOrCreateVReg(Val: CI), Step: `1`);
2654	return true;
2655	}
2656	case Intrinsic::prefetch: {
2657	Value *Addr = CI.getOperand(i_nocapture: `0`);
2658	unsigned RW = cast<ConstantInt>(Val: CI.getOperand(i_nocapture: `1`))->getZExtValue();
2659	unsigned Locality = cast<ConstantInt>(Val: CI.getOperand(i_nocapture: `2`))->getZExtValue();
2660	unsigned CacheType = cast<ConstantInt>(Val: CI.getOperand(i_nocapture: `3`))->getZExtValue();
2661
2662	auto Flags = RW ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
2663	auto &MMO = *MF->getMachineMemOperand(PtrInfo: MachinePointerInfo (Addr), f: Flags,
2664	MemTy: LLT (), base_alignment: Align ());
2665
2666	MIRBuilder.buildPrefetch(Addr: getOrCreateVReg(Val: *Addr), RW, Locality, CacheType,
2667	MMO);
2668
2669	return true;
2670	}
2671
2672	case Intrinsic::vector_interleave2:
2673	case Intrinsic::vector_deinterleave2: {
2674	// Both intrinsics have at least one operand.
2675	Value *Op0 = CI.getOperand(i_nocapture: `0`);
2676	LLT ResTy = getLLTForType(Ty&: *Op0->getType(), DL: MIRBuilder.getDataLayout());
2677	if (!ResTy.isFixedVector())
2678	return false;
2679
2680	if (CI.getIntrinsicID() == Intrinsic::vector_interleave2)
2681	return translateVectorInterleave2Intrinsic(CI, MIRBuilder);
2682
2683	return translateVectorDeinterleave2Intrinsic(CI, MIRBuilder);
2684	}
2685
2686	#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
2687	case Intrinsic::INTRINSIC:
2688	#include "llvm/IR/ConstrainedOps.def"
2689	return translateConstrainedFPIntrinsic(FPI: cast<ConstrainedFPIntrinsic>(Val: CI),
2690	MIRBuilder);
2691	case Intrinsic::experimental_convergence_anchor:
2692	case Intrinsic::experimental_convergence_entry:
2693	case Intrinsic::experimental_convergence_loop:
2694	return translateConvergenceControlIntrinsic(CI, ID, MIRBuilder);
2695	case Intrinsic::reloc_none: {
2696	Metadata *MD = cast<MetadataAsValue>(Val: CI.getArgOperand(i: `0`))->getMetadata();
2697	StringRef SymbolName = cast<MDString>(Val: MD)->getString();
2698	MIRBuilder.buildInstr(Opcode: TargetOpcode::RELOC_NONE)
2699	.addExternalSymbol(FnName: SymbolName.data());
2700	return true;
2701	}
2702	}
2703	return false;
2704	}
2705
2706	bool IRTranslator::translateInlineAsm(const CallBase &CB,
2707	MachineIRBuilder &MIRBuilder) {
2708	if (containsBF16Type(U: CB) && !targetSupportsBF16Type(MF))
2709	return false;
2710
2711	const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering();
2712
2713	if (!ALI) {
2714	LLVM_DEBUG(
2715	dbgs() << "Inline asm lowering is not supported for this target yet\n");
2716	return false;
2717	}
2718
2719	return ALI->lowerInlineAsm(
2720	MIRBuilder, CB, GetOrCreateVRegs: [&](const Value &Val) { return getOrCreateVRegs(Val); });
2721	}
2722
2723	bool IRTranslator::translateCallBase(const CallBase &CB,
2724	MachineIRBuilder &MIRBuilder) {
2725	ArrayRef<Register> Res = getOrCreateVRegs(Val: CB);
2726
2727	SmallVector<ArrayRef<Register>, `8`> Args;
2728	Register SwiftInVReg = `0`;
2729	Register SwiftErrorVReg = `0`;
2730	for (const auto &Arg : CB.args()) {
2731	if (CLI->supportSwiftError() && isSwiftError(V: Arg)) {
2732	assert(SwiftInVReg == `0` && "Expected only one swift error argument");
2733	LLT Ty = getLLTForType(Ty&: Arg ->getType(), DL: DL);
2734	SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
2735	MIRBuilder.buildCopy(Res: SwiftInVReg, Op: SwiftError.getOrCreateVRegUseAt(
2736	&CB, &MIRBuilder.getMBB(), Arg));
2737	Args.emplace_back(Args: ArrayRef(SwiftInVReg));
2738	SwiftErrorVReg =
2739	SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg);
2740	continue;
2741	}
2742	Args.push_back(Elt: getOrCreateVRegs(Val: *Arg));
2743	}
2744
2745	if (auto *CI = dyn_cast<CallInst>(Val: &CB)) {
2746	if (ORE ->enabled()) {
2747	if (MemoryOpRemark::canHandle(I: CI, TLI: *LibInfo)) {
2748	MemoryOpRemark R(ORE, "gisel-irtranslator-memsize", DL, *LibInfo);
2749	R.visit(I: CI);
2750	}
2751	}
2752	}
2753
2754	std::optional<CallLowering::PtrAuthInfo> PAI;
2755	if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_ptrauth)) {
2756	// Functions should never be ptrauth-called directly.
2757	assert(!CB.getCalledFunction() && "invalid direct ptrauth call");
2758
2759	const Value *Key = Bundle ->Inputs [`0`];
2760	const Value *Discriminator = Bundle ->Inputs [`1`];
2761
2762	// Look through ptrauth constants to try to eliminate the matching bundle
2763	// and turn this into a direct call with no ptrauth.
2764	// CallLowering will use the raw pointer if it doesn't find the PAI.
2765	const auto *CalleeCPA = dyn_cast<ConstantPtrAuth>(Val: CB.getCalledOperand());
2766	if (!CalleeCPA \|\| !isa<Function>(Val: CalleeCPA->getPointer()) \|\|
2767	!CalleeCPA->isKnownCompatibleWith(Key, Discriminator, DL: *DL)) {
2768	// If we can't make it direct, package the bundle into PAI.
2769	Register DiscReg = getOrCreateVReg(Val: *Discriminator);
2770	PAI = CallLowering::PtrAuthInfo{.Key: cast<ConstantInt>(Val: Key)->getZExtValue(),
2771	.Discriminator: DiscReg};
2772	}
2773	}
2774
2775	Register ConvergenceCtrlToken = `0`;
2776	if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_convergencectrl)) {
2777	const auto &Token = *Bundle ->Inputs [`0`].get();
2778	ConvergenceCtrlToken = getOrCreateConvergenceTokenVReg(Token);
2779	}
2780
2781	// We don't set HasCalls on MFI here yet because call lowering may decide to
2782	// optimize into tail calls. Instead, we defer that to selection where a final
2783	// scan is done to check if any instructions are calls.
2784	bool Success = CLI->lowerCall(
2785	MIRBuilder, Call: CB, ResRegs: Res, ArgRegs: Args, SwiftErrorVReg, PAI, ConvergenceCtrlToken,
2786	GetCalleeReg: [&]() { return getOrCreateVReg(Val: *CB.getCalledOperand()); });
2787
2788	// Check if we just inserted a tail call.
2789	if (Success) {
2790	assert(!HasTailCall && "Can't tail call return twice from block?");
2791	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2792	HasTailCall = TII->isTailCall(Inst: *std::prev(x: MIRBuilder.getInsertPt()));
2793	}
2794
2795	return Success;
2796	}
2797
2798	bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
2799	if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
2800	return false;
2801
2802	const CallInst &CI = cast<CallInst>(Val: U);
2803	const Function *F = CI.getCalledFunction();
2804
2805	// FIXME: support Windows dllimport function calls and calls through
2806	// weak symbols.
2807	if (F && (F->hasDLLImportStorageClass() \|\|
2808	(MF->getTarget().getTargetTriple().isOSWindows() &&
2809	F->hasExternalWeakLinkage())))
2810	return false;
2811
2812	// FIXME: support control flow guard targets.
2813	if (CI.countOperandBundlesOfType(ID: LLVMContext::OB_cfguardtarget))
2814	return false;
2815
2816	// FIXME: support statepoints and related.
2817	if (isa<GCStatepointInst, GCRelocateInst, GCResultInst>(Val: U))
2818	return false;
2819
2820	if (CI.isInlineAsm())
2821	return translateInlineAsm(CB: CI, MIRBuilder);
2822
2823	Intrinsic::ID ID = F ? F->getIntrinsicID() : Intrinsic::not_intrinsic;
2824	if (!F \|\| ID == Intrinsic::not_intrinsic) {
2825	if (translateCallBase(CB: CI, MIRBuilder)) {
2826	diagnoseDontCall(CI);
2827	return true;
2828	}
2829	return false;
2830	}
2831
2832	assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
2833
2834	if (translateKnownIntrinsic(CI, ID, MIRBuilder))
2835	return true;
2836
2837	SmallVector<TargetLowering::IntrinsicInfo> Infos;
2838	TLI->getTgtMemIntrinsic(Infos, I: CI, MF&: *MF, Intrinsic: ID);
2839
2840	return translateIntrinsic(CB: CI, ID, MIRBuilder, TgtMemIntrinsicInfos: Infos);
2841	}
2842
2843	/// Translate a call or callbr to an intrinsic.
2844	bool IRTranslator::translateIntrinsic(
2845	const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder,
2846	ArrayRef<TargetLowering::IntrinsicInfo> TgtMemIntrinsicInfos) {
2847	ArrayRef<Register> ResultRegs;
2848	if (!CB.getType()->isVoidTy())
2849	ResultRegs = getOrCreateVRegs(Val: CB);
2850
2851	// Ignore the callsite attributes. Backend code is most likely not expecting
2852	// an intrinsic to sometimes have side effects and sometimes not.
2853	MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, Res: ResultRegs);
2854	if (isa<FPMathOperator>(Val: CB))
2855	MIB ->copyIRFlags(I: CB);
2856
2857	for (const auto &Arg : enumerate(First: CB.args())) {
2858	// If this is required to be an immediate, don't materialize it in a
2859	// register.
2860	if (CB.paramHasAttr(ArgNo: Arg.index(), Kind: Attribute::ImmArg)) {
2861	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: Arg.value())) {
2862	// imm arguments are more convenient than cimm (and realistically
2863	// probably sufficient), so use them.
2864	assert(CI->getBitWidth() <= `64` &&
2865	"large intrinsic immediates not handled");
2866	MIB.addImm(Val: CI->getSExtValue());
2867	} else {
2868	MIB.addFPImm(Val: cast<ConstantFP>(Val: Arg.value()));
2869	}
2870	} else if (auto *MDVal = dyn_cast<MetadataAsValue>(Val: Arg.value())) {
2871	auto *MD = MDVal->getMetadata();
2872	auto *MDN = dyn_cast<MDNode>(Val: MD);
2873	if (!MDN) {
2874	if (auto *ConstMD = dyn_cast<ConstantAsMetadata>(Val: MD))
2875	MDN = MDNode::get(Context&: MF->getFunction().getContext(), MDs: ConstMD);
2876	else // This was probably an MDString.
2877	return false;
2878	}
2879	MIB.addMetadata(MD: MDN);
2880	} else {
2881	ArrayRef<Register> VRegs = getOrCreateVRegs(Val: *Arg.value());
2882	if (VRegs.size() > `1`)
2883	return false;
2884	MIB.addUse(RegNo: VRegs [`0`]);
2885	}
2886	}
2887
2888	// Add MachineMemOperands for each memory access described by the target.
2889	for (const auto &Info : TgtMemIntrinsicInfos) {
2890	Align Alignment = Info.align.value_or(
2891	u: DL->getABITypeAlign(Ty: Info.memVT.getTypeForEVT(Context&: CB.getContext())));
2892	LLT MemTy = Info.memVT.isSimple()
2893	? getLLTForMVT(Ty: Info.memVT.getSimpleVT())
2894	: LLT::scalar(SizeInBits: Info.memVT.getStoreSizeInBits());
2895
2896	// TODO: We currently just fallback to address space 0 if
2897	// getTgtMemIntrinsic didn't yield anything useful.
2898	MachinePointerInfo MPI;
2899	if (Info.ptrVal) {
2900	MPI = MachinePointerInfo (Info.ptrVal, Info.offset);
2901	} else if (Info.fallbackAddressSpace) {
2902	MPI = MachinePointerInfo (*Info.fallbackAddressSpace);
2903	}
2904	MIB.addMemOperand(MMO: MF->getMachineMemOperand(
2905	PtrInfo: MPI, f: Info.flags, MemTy, base_alignment: Alignment, AAInfo: CB.getAAMetadata(),
2906	/Ranges=/nullptr, SSID: Info.ssid, Ordering: Info.order, FailureOrdering: Info.failureOrder));
2907	}
2908
2909	if (CB.isConvergent()) {
2910	if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_convergencectrl)) {
2911	auto *Token = Bundle ->Inputs [`0`].get();
2912	Register TokenReg = getOrCreateVReg(Val: *Token);
2913	MIB.addUse(RegNo: TokenReg, Flags: RegState::Implicit);
2914	}
2915	}
2916
2917	if (auto Bundle = CB.getOperandBundle(ID: LLVMContext::OB_deactivation_symbol))
2918	MIB ->setDeactivationSymbol(MF&: *MF, DS: Bundle ->Inputs [`0`].get());
2919
2920	return true;
2921	}
2922
2923	bool IRTranslator::findUnwindDestinations(
2924	const BasicBlock *EHPadBB,
2925	BranchProbability Prob,
2926	SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
2927	&UnwindDests) {
2928	EHPersonality Personality = classifyEHPersonality(
2929	Pers: EHPadBB->getParent()->getFunction().getPersonalityFn());
2930	bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
2931	bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
2932	bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
2933	bool IsSEH = isAsynchronousEHPersonality(Pers: Personality);
2934
2935	if (IsWasmCXX) {
2936	// Ignore this for now.
2937	return false;
2938	}
2939
2940	while (EHPadBB) {
2941	BasicBlock::const_iterator Pad = EHPadBB->getFirstNonPHIIt();
2942	BasicBlock NewEHPadBB = nullptr*;
2943	if (isa<LandingPadInst>(Val: Pad)) {
2944	// Stop on landingpads. They are not funclets.
2945	UnwindDests.emplace_back(Args: &getMBB(BB: *EHPadBB), Args&: Prob);
2946	break;
2947	}
2948	if (isa<CleanupPadInst>(Val: Pad)) {
2949	// Stop on cleanup pads. Cleanups are always funclet entries for all known
2950	// personalities.
2951	UnwindDests.emplace_back(Args: &getMBB(BB: *EHPadBB), Args&: Prob);
2952	UnwindDests.back().first->setIsEHScopeEntry();
2953	UnwindDests.back().first->setIsEHFuncletEntry();
2954	break;
2955	}
2956	if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val&: Pad)) {
2957	// Add the catchpad handlers to the possible destinations.
2958	for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
2959	UnwindDests.emplace_back(Args: &getMBB(BB: *CatchPadBB), Args&: Prob);
2960	// For MSVC++ and the CLR, catchblocks are funclets and need prologues.
2961	if (IsMSVCCXX \|\| IsCoreCLR)
2962	UnwindDests.back().first->setIsEHFuncletEntry();
2963	if (!IsSEH)
2964	UnwindDests.back().first->setIsEHScopeEntry();
2965	}
2966	NewEHPadBB = CatchSwitch->getUnwindDest();
2967	} else {
2968	continue;
2969	}
2970
2971	BranchProbabilityInfo *BPI = FuncInfo.BPI;
2972	if (BPI && NewEHPadBB)
2973	Prob *= BPI->getEdgeProbability(Src: EHPadBB, Dst: NewEHPadBB);
2974	EHPadBB = NewEHPadBB;
2975	}
2976	return true;
2977	}
2978
2979	bool IRTranslator::translateInvoke(const User &U,
2980	MachineIRBuilder &MIRBuilder) {
2981	const InvokeInst &I = cast<InvokeInst>(Val: U);
2982	MCContext &Context = MF->getContext();
2983
2984	const BasicBlock *ReturnBB = I.getSuccessor(i: `0`);
2985	const BasicBlock *EHPadBB = I.getSuccessor(i: `1`);
2986
2987	const Function *Fn = I.getCalledFunction();
2988
2989	// FIXME: support invoking patchpoint and statepoint intrinsics.
2990	if (Fn && Fn->isIntrinsic())
2991	return false;
2992
2993	// FIXME: support whatever these are.
2994	if (I.hasDeoptState())
2995	return false;
2996
2997	// FIXME: support control flow guard targets.
2998	if (I.countOperandBundlesOfType(ID: LLVMContext::OB_cfguardtarget))
2999	return false;
3000
3001	// FIXME: support Windows exception handling.
3002	if (!isa<LandingPadInst>(Val: EHPadBB->getFirstNonPHIIt()))
3003	return false;
3004
3005	// FIXME: support Windows dllimport function calls and calls through
3006	// weak symbols.
3007	if (Fn && (Fn->hasDLLImportStorageClass() \|\|
3008	(MF->getTarget().getTargetTriple().isOSWindows() &&
3009	Fn->hasExternalWeakLinkage())))
3010	return false;
3011
3012	bool LowerInlineAsm = I.isInlineAsm();
3013	bool NeedEHLabel = true;
3014
3015	// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
3016	// the region covered by the try.
3017	MCSymbol BeginSymbol = nullptr*;
3018	if (NeedEHLabel) {
3019	MIRBuilder.buildInstr(Opcode: TargetOpcode::G_INVOKE_REGION_START);
3020	BeginSymbol = Context.createTempSymbol();
3021	MIRBuilder.buildInstr(Opcode: TargetOpcode::EH_LABEL).addSym(Sym: BeginSymbol);
3022	}
3023
3024	if (LowerInlineAsm) {
3025	if (!translateInlineAsm(CB: I, MIRBuilder))
3026	return false;
3027	} else if (!translateCallBase(CB: I, MIRBuilder))
3028	return false;
3029
3030	MCSymbol EndSymbol = nullptr*;
3031	if (NeedEHLabel) {
3032	EndSymbol = Context.createTempSymbol();
3033	MIRBuilder.buildInstr(Opcode: TargetOpcode::EH_LABEL).addSym(Sym: EndSymbol);
3034	}
3035
3036	SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, `1`> UnwindDests;
3037	BranchProbabilityInfo *BPI = FuncInfo.BPI;
3038	MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
3039	BranchProbability EHPadBBProb =
3040	BPI ? BPI->getEdgeProbability(Src: InvokeMBB->getBasicBlock(), Dst: EHPadBB)
3041	: BranchProbability::getZero();
3042
3043	if (!findUnwindDestinations(EHPadBB, Prob: EHPadBBProb, UnwindDests))
3044	return false;
3045
3046	MachineBasicBlock &EHPadMBB = getMBB(BB: *EHPadBB),
3047	&ReturnMBB = getMBB(BB: *ReturnBB);
3048	// Update successor info.
3049	addSuccessorWithProb(Src: InvokeMBB, Dst: &ReturnMBB);
3050	for (auto &UnwindDest : UnwindDests) {
3051	UnwindDest.first->setIsEHPad();
3052	addSuccessorWithProb(Src: InvokeMBB, Dst: UnwindDest.first, Prob: UnwindDest.second);
3053	}
3054	InvokeMBB->normalizeSuccProbs();
3055
3056	if (NeedEHLabel) {
3057	assert(BeginSymbol && "Expected a begin symbol!");
3058	assert(EndSymbol && "Expected an end symbol!");
3059	MF->addInvoke(LandingPad: &EHPadMBB, BeginLabel: BeginSymbol, EndLabel: EndSymbol);
3060	}
3061
3062	MIRBuilder.buildBr(Dest&: ReturnMBB);
3063	return true;
3064	}
3065
3066	/// The intrinsics currently supported by callbr are implicit control flow
3067	/// intrinsics such as amdgcn.kill.
3068	bool IRTranslator::translateCallBr(const User &U,
3069	MachineIRBuilder &MIRBuilder) {
3070	if (containsBF16Type(U))
3071	return false; // see translateCall
3072
3073	const CallBrInst &I = cast<CallBrInst>(Val: U);
3074	MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB();
3075
3076	Intrinsic::ID IID = I.getIntrinsicID();
3077	if (I.isInlineAsm()) {
3078	// FIXME: inline asm is not yet supported for callbr in GlobalISel. As soon
3079	// as we add support, we need to handle the indirect asm targets, see
3080	// SelectionDAGBuilder::visitCallBr().
3081	return false;
3082	}
3083	if (!translateIntrinsic(CB: I, ID: IID, MIRBuilder))
3084	return false;
3085
3086	// Retrieve successors.
3087	SmallPtrSet<BasicBlock *, `8`> Dests = {I.getDefaultDest()};
3088	MachineBasicBlock Return = &getMBB(BB: I.getDefaultDest());
3089
3090	// Update successor info.
3091	addSuccessorWithProb(Src: CallBrMBB, Dst: Return, Prob: BranchProbability::getOne());
3092
3093	// Add indirect targets as successors. For intrinsic callbr, these represent
3094	// implicit control flow (e.g., the "kill" path for amdgcn.kill). We mark them
3095	// with setIsInlineAsmBrIndirectTarget so the machine verifier accepts them as
3096	// valid successors, even though they're not from inline asm.
3097	for (BasicBlock *Dest : I.getIndirectDests()) {
3098	MachineBasicBlock &Target = getMBB(BB: *Dest);
3099	Target.setIsInlineAsmBrIndirectTarget();
3100	Target.setLabelMustBeEmitted();
3101	// Don't add duplicate machine successors.
3102	if (Dests.insert(Ptr: Dest).second)
3103	addSuccessorWithProb(Src: CallBrMBB, Dst: &Target, Prob: BranchProbability::getZero());
3104	}
3105
3106	CallBrMBB->normalizeSuccProbs();
3107
3108	// Drop into default successor.
3109	MIRBuilder.buildBr(Dest&: *Return);
3110
3111	return true;
3112	}
3113
3114	bool IRTranslator::translateLandingPad(const User &U,
3115	MachineIRBuilder &MIRBuilder) {
3116	const LandingPadInst &LP = cast<LandingPadInst>(Val: U);
3117
3118	MachineBasicBlock &MBB = MIRBuilder.getMBB();
3119
3120	MBB.setIsEHPad();
3121
3122	// If there aren't registers to copy the values into (e.g., during SjLj
3123	// exceptions), then don't bother.
3124	const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
3125	if (TLI->getExceptionPointerRegister(PersonalityFn) == `0` &&
3126	TLI->getExceptionSelectorRegister(PersonalityFn) == `0`)
3127	return true;
3128
3129	// If landingpad's return type is token type, we don't create DAG nodes
3130	// for its exception pointer and selector value. The extraction of exception
3131	// pointer or selector value from token type landingpads is not currently
3132	// supported.
3133	if (LP.getType()->isTokenTy())
3134	return true;
3135
3136	// Add a label to mark the beginning of the landing pad. Deletion of the
3137	// landing pad can thus be detected via the MachineModuleInfo.
3138	MIRBuilder.buildInstr(Opcode: TargetOpcode::EH_LABEL)
3139	.addSym(Sym: MF->addLandingPad(LandingPad: &MBB));
3140
3141	// If the unwinder does not preserve all registers, ensure that the
3142	// function marks the clobbered registers as used.
3143	const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
3144	if (auto RegMask = TRI.getCustomEHPadPreservedMask(MF: MF))
3145	MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
3146
3147	LLT Ty = getLLTForType(Ty&: LP.getType(), DL: DL);
3148	Register Undef = MRI->createGenericVirtualRegister(Ty);
3149	MIRBuilder.buildUndef(Res: Undef);
3150
3151	SmallVector<LLT, `2`> Tys;
3152	for (Type *Ty : cast<StructType>(Val: LP.getType())->elements())
3153	Tys.push_back(Elt: getLLTForType(Ty&: Ty, DL: DL));
3154	assert(Tys.size() == `2` && "Only two-valued landingpads are supported");
3155
3156	// Mark exception register as live in.
3157	Register ExceptionReg = TLI->getExceptionPointerRegister(PersonalityFn);
3158	if (!ExceptionReg)
3159	return false;
3160
3161	MBB.addLiveIn(PhysReg: ExceptionReg);
3162	ArrayRef<Register> ResRegs = getOrCreateVRegs(Val: LP);
3163	MIRBuilder.buildCopy(Res: ResRegs [`0`], Op: ExceptionReg);
3164
3165	Register SelectorReg = TLI->getExceptionSelectorRegister(PersonalityFn);
3166	if (!SelectorReg)
3167	return false;
3168
3169	MBB.addLiveIn(PhysReg: SelectorReg);
3170	Register PtrVReg = MRI->createGenericVirtualRegister(Ty: Tys [`0`]);
3171	MIRBuilder.buildCopy(Res: PtrVReg, Op: SelectorReg);
3172	MIRBuilder.buildCast(Dst: ResRegs [`1`], Src: PtrVReg);
3173
3174	return true;
3175	}
3176
3177	bool IRTranslator::translateAlloca(const User &U,
3178	MachineIRBuilder &MIRBuilder) {
3179	auto &AI = cast<AllocaInst>(Val: U);
3180
3181	if (AI.isSwiftError())
3182	return true;
3183
3184	if (AI.isStaticAlloca()) {
3185	Register Res = getOrCreateVReg(Val: AI);
3186	int FI = getOrCreateFrameIndex(AI);
3187	MIRBuilder.buildFrameIndex(Res, Idx: FI);
3188	return true;
3189	}
3190
3191	// FIXME: support stack probing for Windows.
3192	if (MF->getTarget().getTargetTriple().isOSWindows())
3193	return false;
3194
3195	// Now we're in the harder dynamic case.
3196	Register NumElts = getOrCreateVReg(Val: *AI.getArraySize());
3197	Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
3198	LLT IntPtrTy = getLLTForType(Ty&: IntPtrIRTy, DL: DL);
3199	if (MRI->getType(Reg: NumElts) != IntPtrTy) {
3200	Register ExtElts = MRI->createGenericVirtualRegister(Ty: IntPtrTy);
3201	MIRBuilder.buildZExtOrTrunc(Res: ExtElts, Op: NumElts);
3202	NumElts = ExtElts;
3203	}
3204
3205	Type *Ty = AI.getAllocatedType();
3206	TypeSize TySize = DL->getTypeAllocSize(Ty);
3207
3208	Register AllocSize = MRI->createGenericVirtualRegister(Ty: IntPtrTy);
3209	Register TySizeReg;
3210	if (TySize.isScalable()) {
3211	// For scalable types, use vscale min_value*
3212	TySizeReg = MRI->createGenericVirtualRegister(Ty: IntPtrTy);
3213	MIRBuilder.buildVScale(Res: TySizeReg, MinElts: TySize.getKnownMinValue());
3214	} else {
3215	// For fixed types, use a constant
3216	TySizeReg =
3217	getOrCreateVReg(Val: *ConstantInt::get(Ty: IntPtrIRTy, V: TySize.getFixedValue()));
3218	}
3219	MIRBuilder.buildMul(Dst: AllocSize, Src0: NumElts, Src1: TySizeReg);
3220
3221	// Round the size of the allocation up to the stack alignment size
3222	// by add SA-1 to the size. This doesn't overflow because we're computing
3223	// an address inside an alloca.
3224	Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
3225	auto SAMinusOne = MIRBuilder.buildConstant(Res: IntPtrTy, Val: StackAlign.value() - `1`);
3226	auto AllocAdd = MIRBuilder.buildAdd(Dst: IntPtrTy, Src0: AllocSize, Src1: SAMinusOne,
3227	Flags: MachineInstr::NoUWrap);
3228	auto AlignCst =
3229	MIRBuilder.buildConstant(Res: IntPtrTy, Val: ~(uint64_t)(StackAlign.value() - `1`));
3230	auto AlignedAlloc = MIRBuilder.buildAnd(Dst: IntPtrTy, Src0: AllocAdd, Src1: AlignCst);
3231
3232	Align Alignment = AI.getAlign();
3233	if (Alignment <= StackAlign)
3234	Alignment = Align (`1`);
3235	MIRBuilder.buildDynStackAlloc(Res: getOrCreateVReg(Val: AI), Size: AlignedAlloc, Alignment);
3236
3237	MF->getFrameInfo().CreateVariableSizedObject(Alignment, Alloca: &AI);
3238	assert(MF->getFrameInfo().hasVarSizedObjects());
3239	return true;
3240	}
3241
3242	bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
3243	// FIXME: We may need more info about the type. Because of how LLT works,
3244	// we're completely discarding the i64/double distinction here (amongst
3245	// others). Fortunately the ABIs I know of where that matters don't use va_arg
3246	// anyway but that's not guaranteed.
3247	MIRBuilder.buildInstr(Opc: TargetOpcode::G_VAARG, DstOps: {getOrCreateVReg(Val: U)},
3248	SrcOps: {getOrCreateVReg(Val: *U.getOperand(i: `0`)),
3249	DL->getABITypeAlign(Ty: U.getType()).value()});
3250	return true;
3251	}
3252
3253	bool IRTranslator::translateUnreachable(const User &U,
3254	MachineIRBuilder &MIRBuilder) {
3255	auto &UI = cast<UnreachableInst>(Val: U);
3256	if (!UI.shouldLowerToTrap(TrapUnreachable: MF->getTarget().Options.TrapUnreachable,
3257	NoTrapAfterNoreturn: MF->getTarget().Options.NoTrapAfterNoreturn))
3258	return true;
3259
3260	MIRBuilder.buildTrap();
3261	return true;
3262	}
3263
3264	bool IRTranslator::translateInsertElement(const User &U,
3265	MachineIRBuilder &MIRBuilder) {
3266	// If it is a <1 x Ty> vector, use the scalar as it is
3267	// not a legal vector type in LLT.
3268	if (auto *FVT = dyn_cast<FixedVectorType>(Val: U.getType());
3269	FVT && FVT->getNumElements() == `1`)
3270	return translateCopy(U, V: *U.getOperand(i: `1`), MIRBuilder);
3271
3272	Register Res = getOrCreateVReg(Val: U);
3273	Register Val = getOrCreateVReg(Val: *U.getOperand(i: `0`));
3274	Register Elt = getOrCreateVReg(Val: *U.getOperand(i: `1`));
3275	unsigned PreferredVecIdxWidth = TLI->getVectorIdxWidth(DL: *DL);
3276	Register Idx;
3277	if (auto *CI = dyn_cast<ConstantInt>(Val: U.getOperand(i: `2`))) {
3278	if (CI->getBitWidth() != PreferredVecIdxWidth) {
3279	APInt NewIdx = CI->getValue().zextOrTrunc(width: PreferredVecIdxWidth);
3280	auto *NewIdxCI = ConstantInt::get(Context&: CI->getContext(), V: NewIdx);
3281	Idx = getOrCreateVReg(Val: *NewIdxCI);
3282	}
3283	}
3284	if (!Idx)
3285	Idx = getOrCreateVReg(Val: *U.getOperand(i: `2`));
3286	if (MRI->getType(Reg: Idx).getSizeInBits() != PreferredVecIdxWidth) {
3287	const LLT VecIdxTy = LLT::scalar(SizeInBits: PreferredVecIdxWidth);
3288	Idx = MIRBuilder.buildZExtOrTrunc(Res: VecIdxTy, Op: Idx).getReg(Idx: `0`);
3289	}
3290	MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
3291	return true;
3292	}
3293
3294	bool IRTranslator::translateInsertVector(const User &U,
3295	MachineIRBuilder &MIRBuilder) {
3296	Register Dst = getOrCreateVReg(Val: U);
3297	Register Vec = getOrCreateVReg(Val: *U.getOperand(i: `0`));
3298	Register Elt = getOrCreateVReg(Val: *U.getOperand(i: `1`));
3299
3300	ConstantInt *CI = cast<ConstantInt>(Val: U.getOperand(i: `2`));
3301	unsigned PreferredVecIdxWidth = TLI->getVectorIdxWidth(DL: *DL);
3302
3303	// Resize Index to preferred index width.
3304	if (CI->getBitWidth() != PreferredVecIdxWidth) {
3305	APInt NewIdx = CI->getValue().zextOrTrunc(width: PreferredVecIdxWidth);
3306	CI = ConstantInt::get(Context&: CI->getContext(), V: NewIdx);
3307	}
3308
3309	// If it is a <1 x Ty> vector, we have to use other means.
3310	if (auto *ResultType = dyn_cast<FixedVectorType>(Val: U.getOperand(i: `1`)->getType());
3311	ResultType && ResultType->getNumElements() == `1`) {
3312	if (auto *InputType = dyn_cast<FixedVectorType>(Val: U.getOperand(i: `0`)->getType());
3313	InputType && InputType->getNumElements() == `1`) {
3314	// We are inserting an illegal fixed vector into an illegal
3315	// fixed vector, use the scalar as it is not a legal vector type
3316	// in LLT.
3317	return translateCopy(U, V: *U.getOperand(i: `0`), MIRBuilder);
3318	}
3319	if (isa<FixedVectorType>(Val: U.getOperand(i: `0`)->getType())) {
3320	// We are inserting an illegal fixed vector into a legal fixed
3321	// vector, use the scalar as it is not a legal vector type in
3322	// LLT.
3323	Register Idx = getOrCreateVReg(Val: *CI);
3324	MIRBuilder.buildInsertVectorElement(Res: Dst, Val: Vec, Elt, Idx);
3325	return true;
3326	}
3327	if (isa<ScalableVectorType>(Val: U.getOperand(i: `0`)->getType())) {
3328	// We are inserting an illegal fixed vector into a scalable
3329	// vector, use a scalar element insert.
3330	LLT VecIdxTy = LLT::scalar(SizeInBits: PreferredVecIdxWidth);
3331	Register Idx = getOrCreateVReg(Val: *CI);
3332	auto ScaledIndex = MIRBuilder.buildMul(
3333	Dst: VecIdxTy, Src0: MIRBuilder.buildVScale(Res: VecIdxTy, MinElts: `1`), Src1: Idx);
3334	MIRBuilder.buildInsertVectorElement(Res: Dst, Val: Vec, Elt, Idx: ScaledIndex);
3335	return true;
3336	}
3337	}
3338
3339	MIRBuilder.buildInsertSubvector(
3340	Res: getOrCreateVReg(Val: U), Src0: getOrCreateVReg(Val: *U.getOperand(i: `0`)),
3341	Src1: getOrCreateVReg(Val: *U.getOperand(i: `1`)), Index: CI->getZExtValue());
3342	return true;
3343	}
3344
3345	bool IRTranslator::translateExtractElement(const User &U,
3346	MachineIRBuilder &MIRBuilder) {
3347	// If it is a <1 x Ty> vector, use the scalar as it is
3348	// not a legal vector type in LLT.
3349	if (const FixedVectorType *FVT =
3350	dyn_cast<FixedVectorType>(Val: U.getOperand(i: `0`)->getType()))
3351	if (FVT->getNumElements() == `1`)
3352	return translateCopy(U, V: *U.getOperand(i: `0`), MIRBuilder);
3353
3354	Register Res = getOrCreateVReg(Val: U);
3355	Register Val = getOrCreateVReg(Val: *U.getOperand(i: `0`));
3356	unsigned PreferredVecIdxWidth = TLI->getVectorIdxWidth(DL: *DL);
3357	Register Idx;
3358	if (auto *CI = dyn_cast<ConstantInt>(Val: U.getOperand(i: `1`))) {
3359	if (CI->getBitWidth() != PreferredVecIdxWidth) {
3360	APInt NewIdx = CI->getValue().zextOrTrunc(width: PreferredVecIdxWidth);
3361	auto *NewIdxCI = ConstantInt::get(Context&: CI->getContext(), V: NewIdx);
3362	Idx = getOrCreateVReg(Val: *NewIdxCI);
3363	}
3364	}
3365	if (!Idx)
3366	Idx = getOrCreateVReg(Val: *U.getOperand(i: `1`));
3367	if (MRI->getType(Reg: Idx).getSizeInBits() != PreferredVecIdxWidth) {
3368	const LLT VecIdxTy = LLT::scalar(SizeInBits: PreferredVecIdxWidth);
3369	Idx = MIRBuilder.buildZExtOrTrunc(Res: VecIdxTy, Op: Idx).getReg(Idx: `0`);
3370	}
3371	MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
3372	return true;
3373	}
3374
3375	bool IRTranslator::translateExtractVector(const User &U,
3376	MachineIRBuilder &MIRBuilder) {
3377	Register Res = getOrCreateVReg(Val: U);
3378	Register Vec = getOrCreateVReg(Val: *U.getOperand(i: `0`));
3379	ConstantInt *CI = cast<ConstantInt>(Val: U.getOperand(i: `1`));
3380	unsigned PreferredVecIdxWidth = TLI->getVectorIdxWidth(DL: *DL);
3381
3382	// Resize Index to preferred index width.
3383	if (CI->getBitWidth() != PreferredVecIdxWidth) {
3384	APInt NewIdx = CI->getValue().zextOrTrunc(width: PreferredVecIdxWidth);
3385	CI = ConstantInt::get(Context&: CI->getContext(), V: NewIdx);
3386	}
3387
3388	// If it is a <1 x Ty> vector, we have to use other means.
3389	if (auto *ResultType = dyn_cast<FixedVectorType>(Val: U.getType());
3390	ResultType && ResultType->getNumElements() == `1`) {
3391	if (auto *InputType = dyn_cast<FixedVectorType>(Val: U.getOperand(i: `0`)->getType());
3392	InputType && InputType->getNumElements() == `1`) {
3393	// We are extracting an illegal fixed vector from an illegal fixed vector,
3394	// use the scalar as it is not a legal vector type in LLT.
3395	return translateCopy(U, V: *U.getOperand(i: `0`), MIRBuilder);
3396	}
3397	if (isa<FixedVectorType>(Val: U.getOperand(i: `0`)->getType())) {
3398	// We are extracting an illegal fixed vector from a legal fixed
3399	// vector, use the scalar as it is not a legal vector type in
3400	// LLT.
3401	Register Idx = getOrCreateVReg(Val: *CI);
3402	MIRBuilder.buildExtractVectorElement(Res, Val: Vec, Idx);
3403	return true;
3404	}
3405	if (isa<ScalableVectorType>(Val: U.getOperand(i: `0`)->getType())) {
3406	// We are extracting an illegal fixed vector from a scalable
3407	// vector, use a scalar element extract.
3408	LLT VecIdxTy = LLT::scalar(SizeInBits: PreferredVecIdxWidth);
3409	Register Idx = getOrCreateVReg(Val: *CI);
3410	auto ScaledIndex = MIRBuilder.buildMul(
3411	Dst: VecIdxTy, Src0: MIRBuilder.buildVScale(Res: VecIdxTy, MinElts: `1`), Src1: Idx);
3412	MIRBuilder.buildExtractVectorElement(Res, Val: Vec, Idx: ScaledIndex);
3413	return true;
3414	}
3415	}
3416
3417	MIRBuilder.buildExtractSubvector(Res: getOrCreateVReg(Val: U),
3418	Src: getOrCreateVReg(Val: *U.getOperand(i: `0`)),
3419	Index: CI->getZExtValue());
3420	return true;
3421	}
3422
3423	bool IRTranslator::translateShuffleVector(const User &U,
3424	MachineIRBuilder &MIRBuilder) {
3425	// A ShuffleVector that operates on scalable vectors is a splat vector where
3426	// the value of the splat vector is the 0th element of the first operand,
3427	// since the index mask operand is the zeroinitializer (undef and
3428	// poison are treated as zeroinitializer here).
3429	if (U.getOperand(i: `0`)->getType()->isScalableTy()) {
3430	Register Val = getOrCreateVReg(Val: *U.getOperand(i: `0`));
3431	auto SplatVal = MIRBuilder.buildExtractVectorElementConstant(
3432	Res: MRI->getType(Reg: Val).getElementType(), Val, Idx: `0`);
3433	MIRBuilder.buildSplatVector(Res: getOrCreateVReg(Val: U), Val: SplatVal);
3434	return true;
3435	}
3436
3437	ArrayRef<int> Mask;
3438	if (auto *SVI = dyn_cast<ShuffleVectorInst>(Val: &U))
3439	Mask = SVI->getShuffleMask();
3440	else
3441	Mask = cast<ConstantExpr>(Val: U).getShuffleMask();
3442
3443	// As GISel does not represent <1 x > vectors as a separate type from scalars,
3444	// we transform shuffle_vector with a scalar output to an
3445	// ExtractVectorElement. If the input type is also scalar it becomes a Copy.
3446	unsigned DstElts = cast<FixedVectorType>(Val: U.getType())->getNumElements();
3447	unsigned SrcElts =
3448	cast<FixedVectorType>(Val: U.getOperand(i: `0`)->getType())->getNumElements();
3449	if (DstElts == `1`) {
3450	unsigned M = Mask [`0`];
3451	if (SrcElts == `1`) {
3452	if (M == `0` \|\| M == `1`)
3453	return translateCopy(U, V: *U.getOperand(i: M), MIRBuilder);
3454	MIRBuilder.buildUndef(Res: getOrCreateVReg(Val: U));
3455	} else {
3456	Register Dst = getOrCreateVReg(Val: U);
3457	if (M < SrcElts) {
3458	MIRBuilder.buildExtractVectorElementConstant(
3459	Res: Dst, Val: getOrCreateVReg(Val: *U.getOperand(i: `0`)), Idx: M);
3460	} else if (M < SrcElts * `2`) {
3461	MIRBuilder.buildExtractVectorElementConstant(
3462	Res: Dst, Val: getOrCreateVReg(Val: *U.getOperand(i: `1`)), Idx: M - SrcElts);
3463	} else {
3464	MIRBuilder.buildUndef(Res: Dst);
3465	}
3466	}
3467	return true;
3468	}
3469
3470	// A single element src is transformed to a build_vector.
3471	if (SrcElts == `1`) {
3472	SmallVector<Register> Ops;
3473	Register Undef;
3474	for (int M : Mask) {
3475	LLT SrcTy = getLLTForType(Ty&: U.getOperand(i: `0`)->getType(), DL: DL);
3476	if (M == `0` \|\| M == `1`) {
3477	Ops.push_back(Elt: getOrCreateVReg(Val: *U.getOperand(i: M)));
3478	} else {
3479	if (!Undef.isValid()) {
3480	Undef = MRI->createGenericVirtualRegister(Ty: SrcTy);
3481	MIRBuilder.buildUndef(Res: Undef);
3482	}
3483	Ops.push_back(Elt: Undef);
3484	}
3485	}
3486	MIRBuilder.buildBuildVector(Res: getOrCreateVReg(Val: U), Ops);
3487	return true;
3488	}
3489
3490	ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
3491	MIRBuilder
3492	.buildInstr(Opc: TargetOpcode::G_SHUFFLE_VECTOR, DstOps: {getOrCreateVReg(Val: U)},
3493	SrcOps: {getOrCreateVReg(Val: *U.getOperand(i: `0`)),
3494	getOrCreateVReg(Val: *U.getOperand(i: `1`))})
3495	.addShuffleMask(Val: MaskAlloc);
3496	return true;
3497	}
3498
3499	bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
3500	const PHINode &PI = cast<PHINode>(Val: U);
3501
3502	SmallVector<MachineInstr *, `4`> Insts;
3503	for (auto Reg : getOrCreateVRegs(Val: PI)) {
3504	auto MIB = MIRBuilder.buildInstr(Opc: TargetOpcode::G_PHI, DstOps: {Reg}, SrcOps: {});
3505	Insts.push_back(Elt: MIB.getInstr());
3506	}
3507
3508	PendingPHIs.emplace_back(Args: &PI, Args: std::move(Insts));
3509	return true;
3510	}
3511
3512	bool IRTranslator::translateAtomicCmpXchg(const User &U,
3513	MachineIRBuilder &MIRBuilder) {
3514	const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(Val: U);
3515
3516	auto Flags = TLI->getAtomicMemOperandFlags(AI: I, DL: *DL);
3517
3518	auto Res = getOrCreateVRegs(Val: I);
3519	Register OldValRes = Res [`0`];
3520	Register SuccessRes = Res [`1`];
3521	Register Addr = getOrCreateVReg(Val: *I.getPointerOperand());
3522	Register Cmp = getOrCreateVReg(Val: *I.getCompareOperand());
3523	Register NewVal = getOrCreateVReg(Val: *I.getNewValOperand());
3524
3525	MIRBuilder.buildAtomicCmpXchgWithSuccess(
3526	OldValRes, SuccessRes, Addr, CmpVal: Cmp, NewVal,
3527	MMO&: *MF->getMachineMemOperand(
3528	PtrInfo: MachinePointerInfo (I.getPointerOperand()), f: Flags, MemTy: MRI->getType(Reg: Cmp),
3529	base_alignment: getMemOpAlign(I), AAInfo: I.getAAMetadata(), Ranges: nullptr, SSID: I.getSyncScopeID(),
3530	Ordering: I.getSuccessOrdering(), FailureOrdering: I.getFailureOrdering()));
3531	return true;
3532	}
3533
3534	bool IRTranslator::translateAtomicRMW(const User &U,
3535	MachineIRBuilder &MIRBuilder) {
3536	if (containsBF16Type(U) && !targetSupportsBF16Type(MF))
3537	return false;
3538
3539	const AtomicRMWInst &I = cast<AtomicRMWInst>(Val: U);
3540	auto Flags = TLI->getAtomicMemOperandFlags(AI: I, DL: *DL);
3541
3542	Register Res = getOrCreateVReg(Val: I);
3543	Register Addr = getOrCreateVReg(Val: *I.getPointerOperand());
3544	Register Val = getOrCreateVReg(Val: *I.getValOperand());
3545
3546	unsigned Opcode = `0`;
3547	switch (I.getOperation()) {
3548	default:
3549	return false;
3550	case AtomicRMWInst::Xchg:
3551	Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
3552	break;
3553	case AtomicRMWInst::Add:
3554	Opcode = TargetOpcode::G_ATOMICRMW_ADD;
3555	break;
3556	case AtomicRMWInst::Sub:
3557	Opcode = TargetOpcode::G_ATOMICRMW_SUB;
3558	break;
3559	case AtomicRMWInst::And:
3560	Opcode = TargetOpcode::G_ATOMICRMW_AND;
3561	break;
3562	case AtomicRMWInst::Nand:
3563	Opcode = TargetOpcode::G_ATOMICRMW_NAND;
3564	break;
3565	case AtomicRMWInst::Or:
3566	Opcode = TargetOpcode::G_ATOMICRMW_OR;
3567	break;
3568	case AtomicRMWInst::Xor:
3569	Opcode = TargetOpcode::G_ATOMICRMW_XOR;
3570	break;
3571	case AtomicRMWInst::Max:
3572	Opcode = TargetOpcode::G_ATOMICRMW_MAX;
3573	break;
3574	case AtomicRMWInst::Min:
3575	Opcode = TargetOpcode::G_ATOMICRMW_MIN;
3576	break;
3577	case AtomicRMWInst::UMax:
3578	Opcode = TargetOpcode::G_ATOMICRMW_UMAX;
3579	break;
3580	case AtomicRMWInst::UMin:
3581	Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
3582	break;
3583	case AtomicRMWInst::FAdd:
3584	Opcode = TargetOpcode::G_ATOMICRMW_FADD;
3585	break;
3586	case AtomicRMWInst::FSub:
3587	Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
3588	break;
3589	case AtomicRMWInst::FMax:
3590	Opcode = TargetOpcode::G_ATOMICRMW_FMAX;
3591	break;
3592	case AtomicRMWInst::FMin:
3593	Opcode = TargetOpcode::G_ATOMICRMW_FMIN;
3594	break;
3595	case AtomicRMWInst::FMaximum:
3596	Opcode = TargetOpcode::G_ATOMICRMW_FMAXIMUM;
3597	break;
3598	case AtomicRMWInst::FMinimum:
3599	Opcode = TargetOpcode::G_ATOMICRMW_FMINIMUM;
3600	break;
3601	case AtomicRMWInst::UIncWrap:
3602	Opcode = TargetOpcode::G_ATOMICRMW_UINC_WRAP;
3603	break;
3604	case AtomicRMWInst::UDecWrap:
3605	Opcode = TargetOpcode::G_ATOMICRMW_UDEC_WRAP;
3606	break;
3607	case AtomicRMWInst::USubCond:
3608	Opcode = TargetOpcode::G_ATOMICRMW_USUB_COND;
3609	break;
3610	case AtomicRMWInst::USubSat:
3611	Opcode = TargetOpcode::G_ATOMICRMW_USUB_SAT;
3612	break;
3613	}
3614
3615	MIRBuilder.buildAtomicRMW(
3616	Opcode, OldValRes: Res, Addr, Val,
3617	MMO&: *MF->getMachineMemOperand(PtrInfo: MachinePointerInfo (I.getPointerOperand()),
3618	f: Flags, MemTy: MRI->getType(Reg: Val), base_alignment: getMemOpAlign(I),
3619	AAInfo: I.getAAMetadata(), Ranges: nullptr, SSID: I.getSyncScopeID(),
3620	Ordering: I.getOrdering()));
3621	return true;
3622	}
3623
3624	bool IRTranslator::translateFence(const User &U,
3625	MachineIRBuilder &MIRBuilder) {
3626	const FenceInst &Fence = cast<FenceInst>(Val: U);
3627	MIRBuilder.buildFence(Ordering: static_cast<unsigned>(Fence.getOrdering()),
3628	Scope: Fence.getSyncScopeID());
3629	return true;
3630	}
3631
3632	bool IRTranslator::translateFreeze(const User &U,
3633	MachineIRBuilder &MIRBuilder) {
3634	const ArrayRef<Register> DstRegs = getOrCreateVRegs(Val: U);
3635	const ArrayRef<Register> SrcRegs = getOrCreateVRegs(Val: *U.getOperand(i: `0`));
3636
3637	assert(DstRegs.size() == SrcRegs.size() &&
3638	"Freeze with different source and destination type?");
3639
3640	for (unsigned I = `0`; I < DstRegs.size(); ++I) {
3641	MIRBuilder.buildFreeze(Dst: DstRegs [I], Src: SrcRegs [I]);
3642	}
3643
3644	return true;
3645	}
3646
3647	void IRTranslator::finishPendingPhis() {
3648	#ifndef NDEBUG
3649	DILocationVerifier Verifier;
3650	GISelObserverWrapper WrapperObserver(&Verifier);
3651	RAIIMFObsDelInstaller ObsInstall(*MF, WrapperObserver);
3652	#endif // ifndef NDEBUG
3653	for (auto &Phi : PendingPHIs) {
3654	const PHINode *PI = Phi.first;
3655	if (PI->getType()->isEmptyTy())
3656	continue;
3657	ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
3658	MachineBasicBlock *PhiMBB = ComponentPHIs [`0`]->getParent();
3659	EntryBuilder ->setDebugLoc(PI->getDebugLoc());
3660	#ifndef NDEBUG
3661	Verifier.setCurrentInst(PI);
3662	#endif // ifndef NDEBUG
3663
3664	SmallPtrSet<const MachineBasicBlock *, `16`> SeenPreds;
3665	for (unsigned i = `0`; i < PI->getNumIncomingValues(); ++i) {
3666	auto IRPred = PI->getIncomingBlock(i);
3667	ArrayRef<Register> ValRegs = getOrCreateVRegs(Val: *PI->getIncomingValue(i));
3668	for (auto *Pred : getMachinePredBBs(Edge: {IRPred, PI->getParent()})) {
3669	if (SeenPreds.count(Ptr: Pred) \|\| !PhiMBB->isPredecessor(MBB: Pred))
3670	continue;
3671	SeenPreds.insert(Ptr: Pred);
3672	for (unsigned j = `0`; j < ValRegs.size(); ++j) {
3673	MachineInstrBuilder MIB(*MF, ComponentPHIs [j]);
3674	MIB.addUse(RegNo: ValRegs [j]);
3675	MIB.addMBB(MBB: Pred);
3676	}
3677	}
3678	}
3679	}
3680	}
3681
3682	void IRTranslator::translateDbgValueRecord(Value V, bool* HasArgList,
3683	const DILocalVariable *Variable,
3684	const DIExpression *Expression,
3685	const DebugLoc &DL,
3686	MachineIRBuilder &MIRBuilder) {
3687	assert(Variable->isValidLocationForIntrinsic(DL) &&
3688	"Expected inlined-at fields to agree");
3689	// Act as if we're handling a debug intrinsic.
3690	MIRBuilder.setDebugLoc(DL);
3691
3692	if (!V \|\| HasArgList) {
3693	// DI cannot produce a valid DBG_VALUE, so produce an undef DBG_VALUE to
3694	// terminate any prior location.
3695	MIRBuilder.buildIndirectDbgValue(Reg: `0`, Variable, Expr: Expression);
3696	return;
3697	}
3698
3699	if (const auto *CI = dyn_cast<Constant>(Val: V)) {
3700	MIRBuilder.buildConstDbgValue(C: *CI, Variable, Expr: Expression);
3701	return;
3702	}
3703
3704	if (auto *AI = dyn_cast<AllocaInst>(Val: V);
3705	AI && AI->isStaticAlloca() && Expression->startsWithDeref()) {
3706	// If the value is an alloca and the expression starts with a
3707	// dereference, track a stack slot instead of a register, as registers
3708	// may be clobbered.
3709	auto ExprOperands = Expression->getElements();
3710	auto *ExprDerefRemoved =
3711	DIExpression::get(Context&: AI->getContext(), Elements: ExprOperands.drop_front());
3712	MIRBuilder.buildFIDbgValue(FI: getOrCreateFrameIndex(AI: *AI), Variable,
3713	Expr: ExprDerefRemoved);
3714	return;
3715	}
3716	if (translateIfEntryValueArgument(isDeclare: false, Val: V, Var: Variable, Expr: Expression, DL,
3717	MIRBuilder))
3718	return;
3719	for (Register Reg : getOrCreateVRegs(Val: *V)) {
3720	// FIXME: This does not handle register-indirect values at offset 0. The
3721	// direct/indirect thing shouldn't really be handled by something as
3722	// implicit as reg+noreg vs reg+imm in the first place, but it seems
3723	// pretty baked in right now.
3724	MIRBuilder.buildDirectDbgValue(Reg, Variable, Expr: Expression);
3725	}
3726	}
3727
3728	void IRTranslator::translateDbgDeclareRecord(Value Address, bool* HasArgList,
3729	const DILocalVariable *Variable,
3730	const DIExpression *Expression,
3731	const DebugLoc &DL,
3732	MachineIRBuilder &MIRBuilder) {
3733	if (!Address \|\| isa<UndefValue>(Val: Address)) {
3734	LLVM_DEBUG(dbgs() << "Dropping debug info for " << *Variable << "\n");
3735	return;
3736	}
3737
3738	assert(Variable->isValidLocationForIntrinsic(DL) &&
3739	"Expected inlined-at fields to agree");
3740	auto AI = dyn_cast<AllocaInst>(Val: Address);
3741	if (AI && AI->isStaticAlloca()) {
3742	// Static allocas are tracked at the MF level, no need for DBG_VALUE
3743	// instructions (in fact, they get ignored if they do* exist).*
3744	MF->setVariableDbgInfo(Var: Variable, Expr: Expression,
3745	Slot: getOrCreateFrameIndex(AI: *AI), Loc: DL);
3746	return;
3747	}
3748
3749	if (translateIfEntryValueArgument(isDeclare: true, Val: Address, Var: Variable,
3750	Expr: Expression, DL,
3751	MIRBuilder))
3752	return;
3753
3754	// A dbg.declare describes the address of a source variable, so lower it
3755	// into an indirect DBG_VALUE.
3756	MIRBuilder.setDebugLoc(DL);
3757	MIRBuilder.buildIndirectDbgValue(Reg: getOrCreateVReg(Val: *Address), Variable,
3758	Expr: Expression);
3759	}
3760
3761	void IRTranslator::translateDbgInfo(const Instruction &Inst,
3762	MachineIRBuilder &MIRBuilder) {
3763	for (DbgRecord &DR : Inst.getDbgRecordRange()) {
3764	if (DbgLabelRecord *DLR = dyn_cast<DbgLabelRecord>(Val: &DR)) {
3765	MIRBuilder.setDebugLoc(DLR->getDebugLoc());
3766	assert(DLR->getLabel() && "Missing label");
3767	assert(DLR->getLabel()->isValidLocationForIntrinsic(
3768	MIRBuilder.getDebugLoc()) &&
3769	"Expected inlined-at fields to agree");
3770	MIRBuilder.buildDbgLabel(Label: DLR->getLabel());
3771	continue;
3772	}
3773	DbgVariableRecord &DVR = cast<DbgVariableRecord>(Val&: DR);
3774	const DILocalVariable *Variable = DVR.getVariable();
3775	const DIExpression *Expression = DVR.getExpression();
3776	Value *V = DVR.getVariableLocationOp(OpIdx: `0`);
3777	if (DVR.isDbgDeclare())
3778	translateDbgDeclareRecord(Address: V, HasArgList: DVR.hasArgList(), Variable, Expression,
3779	DL: DVR.getDebugLoc(), MIRBuilder);
3780	else
3781	translateDbgValueRecord(V, HasArgList: DVR.hasArgList(), Variable, Expression,
3782	DL: DVR.getDebugLoc(), MIRBuilder);
3783	}
3784	}
3785
3786	bool IRTranslator::translate(const Instruction &Inst) {
3787	CurBuilder ->setDebugLoc(Inst.getDebugLoc());
3788	CurBuilder ->setPCSections(Inst.getMetadata(KindID: LLVMContext::MD_pcsections));
3789	CurBuilder ->setMMRAMetadata(Inst.getMetadata(KindID: LLVMContext::MD_mmra));
3790
3791	if (TLI->fallBackToDAGISel(Inst))
3792	return false;
3793
3794	switch (Inst.getOpcode()) {
3795	#define HANDLE_INST(NUM, OPCODE, CLASS) \
3796	case Instruction::OPCODE: \
3797	return translate##OPCODE(Inst, *CurBuilder.get());
3798	#include "llvm/IR/Instruction.def"
3799	default:
3800	return false;
3801	}
3802	}
3803
3804	bool IRTranslator::translate(const Constant &C, Register Reg) {
3805	// We only emit constants into the entry block from here. To prevent jumpy
3806	// debug behaviour remove debug line.
3807	if (auto CurrInstDL = CurBuilder ->getDL())
3808	EntryBuilder ->setDebugLoc(DebugLoc ());
3809
3810	if (auto CI = dyn_cast<ConstantInt>(Val: &C)) {
3811	// buildConstant expects a to-be-splatted scalar ConstantInt.
3812	if (isa<VectorType>(Val: CI->getType()))
3813	CI = ConstantInt::get(Context&: CI->getContext(), V: CI->getValue());
3814	EntryBuilder ->buildConstant(Res: Reg, Val: *CI);
3815	} else if (auto CF = dyn_cast<ConstantFP>(Val: &C)) {
3816	// buildFConstant expects a to-be-splatted scalar ConstantFP.
3817	if (isa<VectorType>(Val: CF->getType()))
3818	CF = ConstantFP::get(Context&: CF->getContext(), V: CF->getValue());
3819	EntryBuilder ->buildFConstant(Res: Reg, Val: *CF);
3820	} else if (isa<UndefValue>(Val: C))
3821	EntryBuilder ->buildUndef(Res: Reg);
3822	else if (isa<ConstantPointerNull>(Val: C))
3823	EntryBuilder ->buildConstant(Res: Reg, Val: `0`);
3824	else if (auto GV = dyn_cast<GlobalValue>(Val: &C))
3825	EntryBuilder ->buildGlobalValue(Res: Reg, GV);
3826	else if (auto CPA = dyn_cast<ConstantPtrAuth>(Val: &C)) {
3827	Register Addr = getOrCreateVReg(Val: *CPA->getPointer());
3828	Register AddrDisc = getOrCreateVReg(Val: *CPA->getAddrDiscriminator());
3829	EntryBuilder ->buildConstantPtrAuth(Res: Reg, CPA, Addr, AddrDisc);
3830	} else if (auto CAZ = dyn_cast<ConstantAggregateZero>(Val: &C)) {
3831	Constant &Elt = *CAZ->getElementValue(Idx: `0u`);
3832	if (isa<ScalableVectorType>(Val: CAZ->getType())) {
3833	EntryBuilder ->buildSplatVector(Res: Reg, Val: getOrCreateVReg(Val: Elt));
3834	return true;
3835	}
3836	// Return the scalar if it is a <1 x Ty> vector.
3837	unsigned NumElts = CAZ->getElementCount().getFixedValue();
3838	if (NumElts == `1`)
3839	return translateCopy(U: C, V: Elt, MIRBuilder&: *EntryBuilder);
3840	// All elements are zero so we can just use the first one.
3841	EntryBuilder ->buildSplatBuildVector(Res: Reg, Src: getOrCreateVReg(Val: Elt));
3842	} else if (auto CV = dyn_cast<ConstantDataVector>(Val: &C)) {
3843	// Return the scalar if it is a <1 x Ty> vector.
3844	if (CV->getNumElements() == `1`)
3845	return translateCopy(U: C, V: CV->getElementAsConstant(i: `0`), MIRBuilder&: EntryBuilder);
3846	SmallVector<Register, `4`> Ops;
3847	for (unsigned i = `0`; i < CV->getNumElements(); ++i) {
3848	Constant &Elt = *CV->getElementAsConstant(i);
3849	Ops.push_back(Elt: getOrCreateVReg(Val: Elt));
3850	}
3851	EntryBuilder ->buildBuildVector(Res: Reg, Ops);
3852	} else if (auto CE = dyn_cast<ConstantExpr>(Val: &C)) {
3853	switch(CE->getOpcode()) {
3854	#define HANDLE_INST(NUM, OPCODE, CLASS) \
3855	case Instruction::OPCODE: \
3856	return translate##OPCODE(CE, EntryBuilder.get());
3857	#include "llvm/IR/Instruction.def"
3858	default:
3859	return false;
3860	}
3861	} else if (auto CV = dyn_cast<ConstantVector>(Val: &C)) {
3862	if (CV->getNumOperands() == `1`)
3863	return translateCopy(U: C, V: CV->getOperand(i_nocapture: `0`), MIRBuilder&: EntryBuilder);
3864	SmallVector<Register, `4`> Ops;
3865	for (unsigned i = `0`; i < CV->getNumOperands(); ++i) {
3866	Ops.push_back(Elt: getOrCreateVReg(Val: *CV->getOperand(i_nocapture: i)));
3867	}
3868	EntryBuilder ->buildBuildVector(Res: Reg, Ops);
3869	} else if (auto *BA = dyn_cast<BlockAddress>(Val: &C)) {
3870	EntryBuilder ->buildBlockAddress(Res: Reg, BA);
3871	} else
3872	return false;
3873
3874	return true;
3875	}
3876
3877	bool IRTranslator::finalizeBasicBlock(const BasicBlock &BB,
3878	MachineBasicBlock &MBB) {
3879	for (auto &BTB : SL ->BitTestCases) {
3880	// Emit header first, if it wasn't already emitted.
3881	if (!BTB.Emitted)
3882	emitBitTestHeader(B&: BTB, SwitchBB: BTB.Parent);
3883
3884	BranchProbability UnhandledProb = BTB.Prob;
3885	for (unsigned j = `0`, ej = BTB.Cases.size(); j != ej; ++j) {
3886	UnhandledProb -= BTB.Cases [j].ExtraProb;
3887	// Set the current basic block to the mbb we wish to insert the code into
3888	MachineBasicBlock *MBB = BTB.Cases [j].ThisBB;
3889	// If all cases cover a contiguous range, it is not necessary to jump to
3890	// the default block after the last bit test fails. This is because the
3891	// range check during bit test header creation has guaranteed that every
3892	// case here doesn't go outside the range. In this case, there is no need
3893	// to perform the last bit test, as it will always be true. Instead, make
3894	// the second-to-last bit-test fall through to the target of the last bit
3895	// test, and delete the last bit test.
3896
3897	MachineBasicBlock *NextMBB;
3898	if ((BTB.ContiguousRange \|\| BTB.FallthroughUnreachable) && j + `2` == ej) {
3899	// Second-to-last bit-test with contiguous range: fall through to the
3900	// target of the final bit test.
3901	NextMBB = BTB.Cases [j + `1`].TargetBB;
3902	} else if (j + `1` == ej) {
3903	// For the last bit test, fall through to Default.
3904	NextMBB = BTB.Default;
3905	} else {
3906	// Otherwise, fall through to the next bit test.
3907	NextMBB = BTB.Cases [j + `1`].ThisBB;
3908	}
3909
3910	emitBitTestCase(BB&: BTB, NextMBB, BranchProbToNext: UnhandledProb, Reg: BTB.Reg, B&: BTB.Cases [j], SwitchBB: MBB);
3911
3912	if ((BTB.ContiguousRange \|\| BTB.FallthroughUnreachable) && j + `2` == ej) {
3913	// We need to record the replacement phi edge here that normally
3914	// happens in emitBitTestCase before we delete the case, otherwise the
3915	// phi edge will be lost.
3916	addMachineCFGPred(Edge: {BTB.Parent->getBasicBlock(),
3917	BTB.Cases [ej - `1`].TargetBB->getBasicBlock()},
3918	NewPred: MBB);
3919	// Since we're not going to use the final bit test, remove it.
3920	BTB.Cases.pop_back();
3921	break;
3922	}
3923	}
3924	// This is "default" BB. We have two jumps to it. From "header" BB and from
3925	// last "case" BB, unless the latter was skipped.
3926	CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
3927	BTB.Default->getBasicBlock()};
3928	addMachineCFGPred(Edge: HeaderToDefaultEdge, NewPred: BTB.Parent);
3929	if (!BTB.ContiguousRange) {
3930	addMachineCFGPred(Edge: HeaderToDefaultEdge, NewPred: BTB.Cases.back().ThisBB);
3931	}
3932	}
3933	SL ->BitTestCases.clear();
3934
3935	for (auto &JTCase : SL ->JTCases) {
3936	// Emit header first, if it wasn't already emitted.
3937	if (!JTCase.first.Emitted)
3938	emitJumpTableHeader(JT&: JTCase.second, JTH&: JTCase.first, HeaderBB: JTCase.first.HeaderBB);
3939
3940	emitJumpTable(JT&: JTCase.second, MBB: JTCase.second.MBB);
3941	}
3942	SL ->JTCases.clear();
3943
3944	for (auto &SwCase : SL ->SwitchCases)
3945	emitSwitchCase(CB&: SwCase, SwitchBB: &CurBuilder ->getMBB(), MIB&: *CurBuilder);
3946	SL ->SwitchCases.clear();
3947
3948	// Check if we need to generate stack-protector guard checks.
3949	StackProtector &SP = getAnalysis<StackProtector>();
3950	if (SP.shouldEmitSDCheck(BB)) {
3951	bool FunctionBasedInstrumentation =
3952	TLI->getSSPStackGuardCheck(M: MF->getFunction().getParent(), Libcalls: Libcalls);
3953	SPDescriptor.initialize(BB: &BB, MBB: &MBB, FunctionBasedInstrumentation);
3954	}
3955	// Handle stack protector.
3956	if (SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
3957	LLVM_DEBUG(dbgs() << "Unimplemented stack protector case\n");
3958	return false;
3959	} else if (SPDescriptor.shouldEmitStackProtector()) {
3960	MachineBasicBlock *ParentMBB = SPDescriptor.getParentMBB();
3961	MachineBasicBlock *SuccessMBB = SPDescriptor.getSuccessMBB();
3962
3963	// Find the split point to split the parent mbb. At the same time copy all
3964	// physical registers used in the tail of parent mbb into virtual registers
3965	// before the split point and back into physical registers after the split
3966	// point. This prevents us needing to deal with Live-ins and many other
3967	// register allocation issues caused by us splitting the parent mbb. The
3968	// register allocator will clean up said virtual copies later on.
3969	MachineBasicBlock::iterator SplitPoint = findSplitPointForStackProtector(
3970	BB: ParentMBB, TII: *MF->getSubtarget().getInstrInfo());
3971
3972	// Splice the terminator of ParentMBB into SuccessMBB.
3973	SuccessMBB->splice(Where: SuccessMBB->end(), Other: ParentMBB, From: SplitPoint,
3974	To: ParentMBB->end());
3975
3976	// Add compare/jump on neq/jump to the parent BB.
3977	if (!emitSPDescriptorParent(SPD&: SPDescriptor, ParentBB: ParentMBB))
3978	return false;
3979
3980	// CodeGen Failure MBB if we have not codegened it yet.
3981	MachineBasicBlock *FailureMBB = SPDescriptor.getFailureMBB();
3982	if (FailureMBB->empty()) {
3983	if (!emitSPDescriptorFailure(SPD&: SPDescriptor, FailureBB: FailureMBB))
3984	return false;
3985	}
3986
3987	// Clear the Per-BB State.
3988	SPDescriptor.resetPerBBState();
3989	}
3990	return true;
3991	}
3992
3993	bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
3994	MachineBasicBlock *ParentBB) {
3995	CurBuilder ->setInsertPt(MBB&: *ParentBB, II: ParentBB->end());
3996	// First create the loads to the guard/stack slot for the comparison.
3997	Type *PtrIRTy = PointerType::getUnqual(C&: MF->getFunction().getContext());
3998	const LLT PtrTy = getLLTForType(Ty&: PtrIRTy, DL: DL);
3999	LLT PtrMemTy = getLLTForMVT(Ty: TLI->getPointerMemTy(DL: *DL));
4000
4001	MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
4002	int FI = MFI.getStackProtectorIndex();
4003
4004	Register Guard;
4005	Register StackSlotPtr = CurBuilder ->buildFrameIndex(Res: PtrTy, Idx: FI).getReg(Idx: `0`);
4006	const Module &M = *ParentBB->getParent()->getFunction().getParent();
4007	Align Align = DL->getPrefTypeAlign(Ty: PointerType::getUnqual(C&: M.getContext()));
4008
4009	// Generate code to load the content of the guard slot.
4010	Register GuardVal =
4011	CurBuilder
4012	->buildLoad(Res: PtrMemTy, Addr: StackSlotPtr,
4013	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), Alignment: Align,
4014	MMOFlags: MachineMemOperand::MOLoad \| MachineMemOperand::MOVolatile)
4015	.getReg(Idx: `0`);
4016
4017	if (TLI->useStackGuardXorFP()) {
4018	LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented");
4019	return false;
4020	}
4021
4022	// Retrieve guard check function, nullptr if instrumentation is inlined.
4023	if (const Function GuardCheckFn = TLI->getSSPStackGuardCheck(M, Libcalls: Libcalls)) {
4024	// This path is currently untestable on GlobalISel, since the only platform
4025	// that needs this seems to be Windows, and we fall back on that currently.
4026	// The code still lives here in case that changes.
4027	// Silence warning about unused variable until the code below that uses
4028	// 'GuardCheckFn' is enabled.
4029	(void)GuardCheckFn;
4030	return false;
4031	#if 0
4032	// The target provides a guard check function to validate the guard value.
4033	// Generate a call to that function with the content of the guard slot as
4034	// argument.
4035	FunctionType *FnTy = GuardCheckFn->getFunctionType();
4036	assert(FnTy->getNumParams() == `1` && "Invalid function signature");
4037	ISD::ArgFlagsTy Flags;
4038	if (GuardCheckFn->hasAttribute(`1`, Attribute::AttrKind::InReg))
4039	Flags.setInReg();
4040	CallLowering::ArgInfo GuardArgInfo(
4041	{GuardVal, FnTy->getParamType(`0`), {Flags}});
4042
4043	CallLowering::CallLoweringInfo Info;
4044	Info.OrigArgs.push_back(GuardArgInfo);
4045	Info.CallConv = GuardCheckFn->getCallingConv();
4046	Info.Callee = MachineOperand::CreateGA(GuardCheckFn, `0`);
4047	Info.OrigRet = {Register(), FnTy->getReturnType()};
4048	if (!CLI->lowerCall(MIRBuilder, Info)) {
4049	LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector check\n");
4050	return false;
4051	}
4052	return true;
4053	#endif
4054	}
4055
4056	// If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
4057	// Otherwise, emit a volatile load to retrieve the stack guard value.
4058	if (TLI->useLoadStackGuardNode(M: *ParentBB->getBasicBlock()->getModule())) {
4059	Guard =
4060	MRI->createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: PtrTy.getSizeInBits()));
4061	getStackGuard(DstReg: Guard, MIRBuilder&: *CurBuilder);
4062	} else {
4063	// TODO: test using android subtarget when we support @llvm.thread.pointer.
4064	const Value IRGuard = TLI->getSDagStackGuard(M, Libcalls: Libcalls);
4065	Register GuardPtr = getOrCreateVReg(Val: *IRGuard);
4066
4067	Guard = CurBuilder
4068	->buildLoad(Res: PtrMemTy, Addr: GuardPtr,
4069	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), Alignment: Align,
4070	MMOFlags: MachineMemOperand::MOLoad \|
4071	MachineMemOperand::MOVolatile)
4072	.getReg(Idx: `0`);
4073	}
4074
4075	// Perform the comparison.
4076	auto Cmp =
4077	CurBuilder ->buildICmp(Pred: CmpInst::ICMP_NE, Res: LLT::scalar(SizeInBits: `1`), Op0: Guard, Op1: GuardVal);
4078	// If the guard/stackslot do not equal, branch to failure MBB.
4079	CurBuilder ->buildBrCond(Tst: Cmp, Dest&: *SPD.getFailureMBB());
4080	// Otherwise branch to success MBB.
4081	CurBuilder ->buildBr(Dest&: *SPD.getSuccessMBB());
4082	return true;
4083	}
4084
4085	bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
4086	MachineBasicBlock *FailureBB) {
4087	const RTLIB::LibcallImpl LibcallImpl =
4088	Libcalls->getLibcallImpl(Call: RTLIB::STACKPROTECTOR_CHECK_FAIL);
4089	if (LibcallImpl == RTLIB::Unsupported)
4090	return false;
4091
4092	CurBuilder ->setInsertPt(MBB&: *FailureBB, II: FailureBB->end());
4093
4094	CallLowering::CallLoweringInfo Info;
4095	Info.CallConv = Libcalls->getLibcallImplCallingConv(Call: LibcallImpl);
4096
4097	StringRef LibcallName =
4098	RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: LibcallImpl);
4099	Info.Callee = MachineOperand::CreateES(SymName: LibcallName.data());
4100	Info.OrigRet = {Register (), Type::getVoidTy(C&: MF->getFunction().getContext()),
4101	`0`};
4102	if (!CLI->lowerCall(MIRBuilder&: *CurBuilder, Info)) {
4103	LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector fail\n");
4104	return false;
4105	}
4106
4107	// Emit a trap instruction if we are required to do so.
4108	const TargetOptions &TargetOpts = TLI->getTargetMachine().Options;
4109	if (TargetOpts.TrapUnreachable && !TargetOpts.NoTrapAfterNoreturn)
4110	CurBuilder ->buildInstr(Opcode: TargetOpcode::G_TRAP);
4111
4112	return true;
4113	}
4114
4115	void IRTranslator::finalizeFunction() {
4116	// Release the memory used by the different maps we
4117	// needed during the translation.
4118	PendingPHIs.clear();
4119	VMap.reset();
4120	FrameIndices.clear();
4121	MachinePreds.clear();
4122	// MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
4123	// to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
4124	// destroying it twice (in ~IRTranslator() and ~LLVMContext())
4125	EntryBuilder.reset();
4126	CurBuilder.reset();
4127	FuncInfo.clear();
4128	SPDescriptor.resetPerFunctionState();
4129	}
4130
4131	/// Returns true if a BasicBlock \p BB within a variadic function contains a
4132	/// variadic musttail call.
4133	static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
4134	if (!IsVarArg)
4135	return false;
4136
4137	// Walk the block backwards, because tail calls usually only appear at the end
4138	// of a block.
4139	return llvm::any_of(Range: llvm::reverse(C: BB), P: [](const Instruction &I) {
4140	const auto *CI = dyn_cast<CallInst>(Val: &I);
4141	return CI && CI->isMustTailCall();
4142	});
4143	}
4144
4145	bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
4146	MF = &CurMF;
4147	const Function &F = MF->getFunction();
4148	ORE = std::make_unique<OptimizationRemarkEmitter>(args: &F);
4149	CLI = MF->getSubtarget().getCallLowering();
4150
4151	if (CLI->fallBackToDAGISel(MF: *MF)) {
4152	OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
4153	F.getSubprogram(), &F.getEntryBlock());
4154	R << "unable to lower function: "
4155	<< ore::NV ("Prototype", F.getFunctionType());
4156
4157	reportTranslationError(MF&: MF, ORE&: ORE, R);
4158	return false;
4159	}
4160
4161	GISelCSEAnalysisWrapper &Wrapper =
4162	getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
4163	// Set the CSEConfig and run the analysis.
4164	GISelCSEInfo CSEInfo = nullptr*;
4165	TPC = &getAnalysis<TargetPassConfig>();
4166
4167	bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
4168	? EnableCSEInIRTranslator
4169	: TPC->isGISelCSEEnabled();
4170
4171	const TargetSubtargetInfo &Subtarget = MF->getSubtarget();
4172	TLI = Subtarget.getTargetLowering();
4173
4174	if (EnableCSE) {
4175	EntryBuilder = std::make_unique<CSEMIRBuilder>(args&: CurMF);
4176	CSEInfo = &Wrapper.get(CSEOpt: TPC->getCSEConfig());
4177	EntryBuilder ->setCSEInfo(CSEInfo);
4178	CurBuilder = std::make_unique<CSEMIRBuilder>(args&: CurMF);
4179	CurBuilder ->setCSEInfo(CSEInfo);
4180	} else {
4181	EntryBuilder = std::make_unique<MachineIRBuilder>();
4182	CurBuilder = std::make_unique<MachineIRBuilder>();
4183	}
4184	CLI = Subtarget.getCallLowering();
4185	CurBuilder ->setMF(*MF);
4186	EntryBuilder ->setMF(*MF);
4187	MRI = &MF->getRegInfo();
4188	DL = &F.getDataLayout();
4189	const TargetMachine &TM = MF->getTarget();
4190	TM.resetTargetOptions(F);
4191	EnableOpts = OptLevel != CodeGenOptLevel::None && !skipFunction(F);
4192	FuncInfo.MF = MF;
4193	if (EnableOpts) {
4194	AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4195	FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
4196	} else {
4197	AA = nullptr;
4198	FuncInfo.BPI = nullptr;
4199	}
4200
4201	AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
4202	F&: MF->getFunction());
4203	LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
4204	Libcalls = &getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
4205	M: *F.getParent(), Subtarget);
4206
4207	FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(MF&: *MF);
4208
4209	SL = std::make_unique<GISelSwitchLowering>(args: this, args&: FuncInfo);
4210	SL ->init(tli: TLI, tm: TM, dl: DL);
4211
4212	assert(PendingPHIs.empty() && "stale PHIs");
4213
4214	// Targets which want to use big endian can enable it using
4215	// enableBigEndian()
4216	if (!DL->isLittleEndian() && !CLI->enableBigEndian()) {
4217	// Currently we don't properly handle big endian code.
4218	OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
4219	F.getSubprogram(), &F.getEntryBlock());
4220	R << "unable to translate in big endian mode";
4221	reportTranslationError(MF&: MF, ORE&: ORE, R);
4222	return false;
4223	}
4224
4225	// Release the per-function state when we return, whether we succeeded or not.
4226	llvm::scope_exit FinalizeOnReturn([this]() { finalizeFunction(); });
4227
4228	// Setup a separate basic-block for the arguments and constants
4229	MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
4230	MF->push_back(MBB: EntryBB);
4231	EntryBuilder ->setMBB(*EntryBB);
4232
4233	DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHIIt()->getDebugLoc();
4234	SwiftError.setFunction(CurMF);
4235	SwiftError.createEntriesInEntryBlock(DbgLoc);
4236
4237	bool IsVarArg = F.isVarArg();
4238	bool HasMustTailInVarArgFn = false;
4239
4240	// Create all blocks, in IR order, to preserve the layout.
4241	FuncInfo.MBBMap.resize(N: F.getMaxBlockNumber());
4242	for (const BasicBlock &BB: F) {
4243	auto *&MBB = FuncInfo.MBBMap [BB.getNumber()];
4244
4245	MBB = MF->CreateMachineBasicBlock(BB: &BB);
4246	MF->push_back(MBB);
4247
4248	// Only mark the block if the BlockAddress actually has users. The
4249	// hasAddressTaken flag may be stale if the BlockAddress was optimized away
4250	// but the constant still exists in the uniquing table.
4251	if (BB.hasAddressTaken()) {
4252	if (BlockAddress *BA = BlockAddress::lookup(BB: &BB))
4253	if (!BA->hasZeroLiveUses())
4254	MBB->setAddressTakenIRBlock(const_cast<BasicBlock *>(&BB));
4255	}
4256
4257	if (!HasMustTailInVarArgFn)
4258	HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
4259	}
4260
4261	MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn);
4262
4263	// Make our arguments/constants entry block fallthrough to the IR entry block.
4264	EntryBB->addSuccessor(Succ: &getMBB(BB: F.front()));
4265
4266	// Lower the actual args into this basic block.
4267	SmallVector<ArrayRef<Register>, `8`> VRegArgs;
4268	for (const Argument &Arg: F.args()) {
4269	if (DL->getTypeStoreSize(Ty: Arg.getType()).isZero())
4270	continue; // Don't handle zero sized types.
4271	ArrayRef<Register> VRegs = getOrCreateVRegs(Val: Arg);
4272	VRegArgs.push_back(Elt: VRegs);
4273
4274	if (Arg.hasSwiftErrorAttr()) {
4275	assert(VRegs.size() == `1` && "Too many vregs for Swift error");
4276	SwiftError.setCurrentVReg(MBB: EntryBB, SwiftError.getFunctionArg(), VRegs [`0`]);
4277	}
4278	}
4279
4280	if (!CLI->lowerFormalArguments(MIRBuilder&: *EntryBuilder, F, VRegs: VRegArgs, FLI&: FuncInfo)) {
4281	OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
4282	F.getSubprogram(), &F.getEntryBlock());
4283	R << "unable to lower arguments: "
4284	<< ore::NV ("Prototype", F.getFunctionType());
4285	reportTranslationError(MF&: MF, ORE&: ORE, R);
4286	return false;
4287	}
4288
4289	// Need to visit defs before uses when translating instructions.
4290	GISelObserverWrapper WrapperObserver;
4291	if (EnableCSE && CSEInfo)
4292	WrapperObserver.addObserver(O: CSEInfo);
4293	{
4294	ReversePostOrderTraversal<const Function *> RPOT(&F);
4295	#ifndef NDEBUG
4296	DILocationVerifier Verifier;
4297	WrapperObserver.addObserver(&Verifier);
4298	#endif // ifndef NDEBUG
4299	RAIIMFObsDelInstaller ObsInstall(*MF, WrapperObserver);
4300	for (const BasicBlock *BB : RPOT) {
4301	MachineBasicBlock &MBB = getMBB(BB: *BB);
4302	// Set the insertion point of all the following translations to
4303	// the end of this basic block.
4304	CurBuilder ->setMBB(MBB);
4305	HasTailCall = false;
4306	for (const Instruction &Inst : *BB) {
4307	// If we translated a tail call in the last step, then we know
4308	// everything after the call is either a return, or something that is
4309	// handled by the call itself. (E.g. a lifetime marker or assume
4310	// intrinsic.) In this case, we should stop translating the block and
4311	// move on.
4312	if (HasTailCall)
4313	break;
4314	#ifndef NDEBUG
4315	Verifier.setCurrentInst(&Inst);
4316	#endif // ifndef NDEBUG
4317
4318	// Translate any debug-info attached to the instruction.
4319	translateDbgInfo(Inst, MIRBuilder&: *CurBuilder);
4320
4321	if (translate(Inst))
4322	continue;
4323
4324	OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
4325	Inst.getDebugLoc(), BB);
4326	R << "unable to translate instruction: " << ore::NV ("Opcode", &Inst);
4327
4328	if (ORE ->allowExtraAnalysis(PassName: "gisel-irtranslator")) {
4329	std::string InstStrStorage;
4330	raw_string_ostream InstStr(InstStrStorage);
4331	InstStr << Inst;
4332
4333	R << ": '" << InstStrStorage << "'";
4334	}
4335
4336	reportTranslationError(MF&: MF, ORE&: ORE, R);
4337	return false;
4338	}
4339
4340	if (!finalizeBasicBlock(BB: *BB, MBB)) {
4341	OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
4342	BB->getTerminator()->getDebugLoc(), BB);
4343	R << "unable to translate basic block";
4344	reportTranslationError(MF&: MF, ORE&: ORE, R);
4345	return false;
4346	}
4347	}
4348	#ifndef NDEBUG
4349	WrapperObserver.removeObserver(&Verifier);
4350	#endif
4351	}
4352
4353	finishPendingPhis();
4354
4355	SwiftError.propagateVRegs();
4356
4357	// Merge the argument lowering and constants block with its single
4358	// successor, the LLVM-IR entry block. We want the basic block to
4359	// be maximal.
4360	assert(EntryBB->succ_size() == `1` &&
4361	"Custom BB used for lowering should have only one successor");
4362	// Get the successor of the current entry block.
4363	MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
4364	assert(NewEntryBB.pred_size() == `1` &&
4365	"LLVM-IR entry block has a predecessor!?");
4366	// Move all the instruction from the current entry block to the
4367	// new entry block.
4368	NewEntryBB.splice(Where: NewEntryBB.begin(), Other: EntryBB, From: EntryBB->begin(),
4369	To: EntryBB->end());
4370
4371	// Update the live-in information for the new entry block.
4372	for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
4373	NewEntryBB.addLiveIn(RegMaskPair: LiveIn);
4374	NewEntryBB.sortUniqueLiveIns();
4375
4376	// Get rid of the now empty basic block.
4377	EntryBB->removeSuccessor(Succ: &NewEntryBB);
4378	MF->remove(MBBI: EntryBB);
4379	MF->deleteMachineBasicBlock(MBB: EntryBB);
4380
4381	assert(&MF->front() == &NewEntryBB &&
4382	"New entry wasn't next in the list of basic block!");
4383
4384	// Initialize stack protector information.
4385	StackProtector &SP = getAnalysis<StackProtector>();
4386	SP.copyToMachineFrameInfo(MFI&: MF->getFrameInfo());
4387
4388	return false;
4389	}
4390

Browse the source code of llvm_projects/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp