X86FlagsCopyLowering.cpp source code [llvm_projects/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp]

1	//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	///
10	/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
11	/// flag bits.
12	///
13	/// We have to do this by carefully analyzing and rewriting the usage of the
14	/// copied EFLAGS register because there is no general way to rematerialize the
15	/// entire EFLAGS register safely and efficiently. Using `popf` both forces
16	/// dynamic stack adjustment and can create correctness issues due to IF, TF,
17	/// and other non-status flags being overwritten. Using sequences involving
18	/// SAHF don't work on all x86 processors and are often quite slow compared to
19	/// directly testing a single status preserved in its own GPR.
20	///
21	//===----------------------------------------------------------------------===//
22
23	#include "X86.h"
24	#include "X86InstrInfo.h"
25	#include "X86Subtarget.h"
26	#include "llvm/ADT/DepthFirstIterator.h"
27	#include "llvm/ADT/PostOrderIterator.h"
28	#include "llvm/ADT/STLExtras.h"
29	#include "llvm/ADT/ScopeExit.h"
30	#include "llvm/ADT/SmallPtrSet.h"
31	#include "llvm/ADT/SmallVector.h"
32	#include "llvm/ADT/Statistic.h"
33	#include "llvm/CodeGen/MachineBasicBlock.h"
34	#include "llvm/CodeGen/MachineConstantPool.h"
35	#include "llvm/CodeGen/MachineDominators.h"
36	#include "llvm/CodeGen/MachineFunction.h"
37	#include "llvm/CodeGen/MachineFunctionPass.h"
38	#include "llvm/CodeGen/MachineInstr.h"
39	#include "llvm/CodeGen/MachineInstrBuilder.h"
40	#include "llvm/CodeGen/MachineModuleInfo.h"
41	#include "llvm/CodeGen/MachineOperand.h"
42	#include "llvm/CodeGen/MachineRegisterInfo.h"
43	#include "llvm/CodeGen/MachineSSAUpdater.h"
44	#include "llvm/CodeGen/TargetInstrInfo.h"
45	#include "llvm/CodeGen/TargetRegisterInfo.h"
46	#include "llvm/CodeGen/TargetSchedule.h"
47	#include "llvm/CodeGen/TargetSubtargetInfo.h"
48	#include "llvm/IR/DebugLoc.h"
49	#include "llvm/MC/MCSchedule.h"
50	#include "llvm/Pass.h"
51	#include "llvm/Support/Debug.h"
52	#include "llvm/Support/raw_ostream.h"
53	#include <algorithm>
54	#include <cassert>
55	#include <iterator>
56	#include <utility>
57
58	using namespace llvm;
59
60	#define PASS_KEY "x86-flags-copy-lowering"
61	#define DEBUG_TYPE PASS_KEY
62
63	STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
64	STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
65	STATISTIC(NumTestsInserted, "Number of test instructions inserted");
66	STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
67	STATISTIC(NumNFsConvertedTo, "Number of NF instructions converted to");
68
69	extern cl::opt<bool> X86EnableAPXForRelocation;
70
71	namespace {
72
73	// Convenient array type for storing registers associated with each condition.
74	using CondRegArray = std::array<Register, X86::LAST_VALID_COND + `1`>;
75
76	class X86FlagsCopyLoweringPass : public MachineFunctionPass {
77	public:
78	X86FlagsCopyLoweringPass() : MachineFunctionPass (ID) {}
79
80	StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
81	bool runOnMachineFunction(MachineFunction &MF) override;
82	void getAnalysisUsage(AnalysisUsage &AU) const override;
83
84	/// Pass identification, replacement for typeid.
85	static char ID;
86
87	private:
88	MachineRegisterInfo MRI = nullptr*;
89	const X86Subtarget Subtarget = nullptr*;
90	const X86InstrInfo TII = nullptr*;
91	const TargetRegisterInfo TRI = nullptr*;
92	const TargetRegisterClass PromoteRC = nullptr*;
93	MachineDominatorTree MDT = nullptr*;
94
95	CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
96	MachineBasicBlock::iterator CopyDefI);
97
98	Register promoteCondToReg(MachineBasicBlock &MBB,
99	MachineBasicBlock::iterator TestPos,
100	const DebugLoc &TestLoc, X86::CondCode Cond);
101	std::pair<Register, bool> getCondOrInverseInReg(
102	MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
103	const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs);
104	void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
105	const DebugLoc &Loc, Register Reg);
106
107	void rewriteSetCC(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
108	const DebugLoc &Loc, MachineInstr &MI,
109	CondRegArray &CondRegs);
110	void rewriteArithmetic(MachineBasicBlock &MBB,
111	MachineBasicBlock::iterator Pos, const DebugLoc &Loc,
112	MachineInstr &MI, CondRegArray &CondRegs);
113	void rewriteMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
114	const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs);
115	};
116
117	} // end anonymous namespace
118
119	INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
120	"X86 EFLAGS copy lowering", false, false)
121	INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
122	"X86 EFLAGS copy lowering", false, false)
123
124	FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
125	return new X86FlagsCopyLoweringPass ();
126	}
127
128	char X86FlagsCopyLoweringPass::ID = `0`;
129
130	void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
131	AU.addUsedIfAvailable<MachineDominatorTreeWrapperPass>();
132	MachineFunctionPass::getAnalysisUsage(AU);
133	}
134
135	static bool isArithmeticOp(unsigned Opc) {
136	return X86::isADC(Opcode: Opc) \|\| X86::isSBB(Opcode: Opc) \|\| X86::isRCL(Opcode: Opc) \|\|
137	X86::isRCR(Opcode: Opc) \|\| (Opc == X86::SETB_C32r \|\| Opc == X86::SETB_C64r);
138	}
139
140	static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
141	MachineInstr &SplitI,
142	const X86InstrInfo &TII) {
143	MachineFunction &MF = *MBB.getParent();
144
145	assert(SplitI.getParent() == &MBB &&
146	"Split instruction must be in the split block!");
147	assert(SplitI.isBranch() &&
148	"Only designed to split a tail of branch instructions!");
149	assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID &&
150	"Must split on an actual jCC instruction!");
151
152	// Dig out the previous instruction to the split point.
153	MachineInstr &PrevI = *std::prev(x: SplitI.getIterator());
154	assert(PrevI.isBranch() && "Must split after a branch!");
155	assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID &&
156	"Must split after an actual jCC instruction!");
157	assert(!std::prev(PrevI.getIterator())->isTerminator() &&
158	"Must only have this one terminator prior to the split!");
159
160	// Grab the one successor edge that will stay in `MBB`.
161	MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(i: `0`).getMBB();
162
163	// Analyze the original block to see if we are actually splitting an edge
164	// into two edges. This can happen when we have multiple conditional jumps to
165	// the same successor.
166	bool IsEdgeSplit =
167	std::any_of(first: SplitI.getIterator(), last: MBB.instr_end(),
168	pred: [&](MachineInstr &MI) {
169	assert(MI.isTerminator() &&
170	"Should only have spliced terminators!");
171	return llvm::any_of(
172	Range: MI.operands(), P: [&](MachineOperand &MOp) {
173	return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
174	});
175	}) \|\|
176	MBB.getFallThrough() == &UnsplitSucc;
177
178	MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
179
180	// Insert the new block immediately after the current one. Any existing
181	// fallthrough will be sunk into this new block anyways.
182	MF.insert(MBBI: std::next(x: MachineFunction::iterator (&MBB)), MBB: &NewMBB);
183
184	// Splice the tail of instructions into the new block.
185	NewMBB.splice(Where: NewMBB.end(), Other: &MBB, From: SplitI.getIterator(), To: MBB.end());
186
187	// Copy the necessary succesors (and their probability info) into the new
188	// block.
189	for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
190	if (IsEdgeSplit \|\| *SI != &UnsplitSucc)
191	NewMBB.copySuccessor(Orig: &MBB, I: SI);
192	// Normalize the probabilities if we didn't end up splitting the edge.
193	if (!IsEdgeSplit)
194	NewMBB.normalizeSuccProbs();
195
196	// Now replace all of the moved successors in the original block with the new
197	// block. This will merge their probabilities.
198	for (MachineBasicBlock *Succ : NewMBB.successors())
199	if (Succ != &UnsplitSucc)
200	MBB.replaceSuccessor(Old: Succ, New: &NewMBB);
201
202	// We should always end up replacing at least one successor.
203	assert(MBB.isSuccessor(&NewMBB) &&
204	"Failed to make the new block a successor!");
205
206	// Now update all the PHIs.
207	for (MachineBasicBlock *Succ : NewMBB.successors()) {
208	for (MachineInstr &MI : *Succ) {
209	if (!MI.isPHI())
210	break;
211
212	for (int OpIdx = `1`, NumOps = MI.getNumOperands(); OpIdx < NumOps;
213	OpIdx += `2`) {
214	MachineOperand &OpV = MI.getOperand(i: OpIdx);
215	MachineOperand &OpMBB = MI.getOperand(i: OpIdx + `1`);
216	assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
217	if (OpMBB.getMBB() != &MBB)
218	continue;
219
220	// Replace the operand for unsplit successors
221	if (!IsEdgeSplit \|\| Succ != &UnsplitSucc) {
222	OpMBB.setMBB(&NewMBB);
223
224	// We have to continue scanning as there may be multiple entries in
225	// the PHI.
226	continue;
227	}
228
229	// When we have split the edge append a new successor.
230	MI.addOperand(MF, Op: OpV);
231	MI.addOperand(MF, Op: MachineOperand::CreateMBB(MBB: &NewMBB));
232	break;
233	}
234	}
235	}
236
237	return NewMBB;
238	}
239
240	enum EFLAGSClobber { NoClobber, EvitableClobber, InevitableClobber };
241
242	static EFLAGSClobber getClobberType(const MachineInstr &MI) {
243	const MachineOperand *FlagDef =
244	MI.findRegisterDefOperand(Reg: X86::EFLAGS, /TRI=/nullptr);
245	if (!FlagDef)
246	return NoClobber;
247
248	// For the instructions are ADDrm/ADDmr with relocation, we'll skip the
249	// optimization for replacing non-NF with NF. This is to keep backward
250	// compatiblity with old version of linkers without APX relocation type
251	// support on Linux OS.
252	bool IsWithReloc =
253	X86EnableAPXForRelocation ? false : isAddMemInstrWithRelocation(MI);
254
255	if (FlagDef->isDead() && X86::getNFVariant(Opc: MI.getOpcode()) && !IsWithReloc)
256	return EvitableClobber;
257
258	return InevitableClobber;
259	}
260
261	bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
262	LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
263	<< " **********\n");
264
265	Subtarget = &MF.getSubtarget<X86Subtarget>();
266	MRI = &MF.getRegInfo();
267	TII = Subtarget->getInstrInfo();
268	TRI = Subtarget->getRegisterInfo();
269	PromoteRC = &X86::GR8RegClass;
270
271	if (MF.empty())
272	// Nothing to do for a degenerate empty function...
273	return false;
274
275	if (none_of(Range: MRI->def_instructions(Reg: X86::EFLAGS), P: [](const MachineInstr &MI) {
276	return MI.getOpcode() == TargetOpcode::COPY;
277	}))
278	return false;
279
280	// We change the code, so we don't preserve the dominator tree anyway. If we
281	// got a valid MDT from the pass manager, use that, otherwise construct one
282	// now. This is an optimization that avoids unnecessary MDT construction for
283	// functions that have no flag copies.
284
285	auto MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
286	std::unique_ptr<MachineDominatorTree> OwnedMDT;
287	if (MDTWrapper) {
288	MDT = &MDTWrapper->getDomTree();
289	} else {
290	OwnedMDT = std::make_unique<MachineDominatorTree>(args&: MF);
291	MDT = OwnedMDT.get();
292	}
293
294	// Collect the copies in RPO so that when there are chains where a copy is in
295	// turn copied again we visit the first one first. This ensures we can find
296	// viable locations for testing the original EFLAGS that dominate all the
297	// uses across complex CFGs.
298	SmallSetVector<MachineInstr *, `4`> Copies;
299	ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
300	for (MachineBasicBlock *MBB : RPOT)
301	for (MachineInstr &MI : *MBB)
302	if (MI.getOpcode() == TargetOpcode::COPY &&
303	MI.getOperand(i: `0`).getReg() == X86::EFLAGS)
304	Copies.insert(X: &MI);
305
306	// Try to elminate the copys by transform the instructions between copy and
307	// copydef to the NF (no flags update) variants, e.g.
308	//
309	// %1:gr64 = COPY $eflags
310	// OP1 implicit-def dead $eflags
311	// $eflags = COPY %1
312	// OP2 cc, implicit $eflags
313	//
314	// ->
315	//
316	// OP1_NF
317	// OP2 implicit $eflags
318	if (Subtarget->hasNF()) {
319	SmallSetVector<MachineInstr *, `4`> RemovedCopies;
320	// CopyIIt may be invalidated by removing copies.
321	auto CopyIIt = Copies.begin(), CopyIEnd = Copies.end();
322	while (CopyIIt != CopyIEnd) {
323	auto NCopyIIt = std::next(x: CopyIIt);
324	SmallSetVector<MachineInstr *, `4`> EvitableClobbers;
325	MachineInstr CopyI = CopyIIt;
326	MachineOperand &VOp = CopyI->getOperand(i: `1`);
327	MachineInstr *CopyDefI = MRI->getVRegDef(Reg: VOp.getReg());
328	MachineBasicBlock *CopyIMBB = CopyI->getParent();
329	MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent();
330	// Walk all basic blocks reachable in depth-first iteration on the inverse
331	// CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that
332	// may be executed between the execution of CopyDefIMBB and CopyIMBB. On
333	// all execution paths, instructions from CopyDefI to CopyI (exclusive)
334	// has to be NF-convertible if it clobbers flags.
335	for (auto BI = idf_begin(G: CopyIMBB), BE = idf_end(G: CopyDefIMBB); BI != BE;
336	++BI) {
337	MachineBasicBlock MBB = BI;
338	for (auto I = (MBB != CopyDefIMBB)
339	? MBB->begin()
340	: std::next(x: MachineBasicBlock::iterator (CopyDefI)),
341	E = (MBB != CopyIMBB) ? MBB->end()
342	: MachineBasicBlock::iterator (CopyI);
343	I != E; ++I) {
344	MachineInstr &MI = *I;
345	EFLAGSClobber ClobberType = getClobberType(MI);
346	if (ClobberType == NoClobber)
347	continue;
348
349	if (ClobberType == InevitableClobber)
350	goto ProcessNextCopyI;
351
352	assert(ClobberType == EvitableClobber && "unexpected workflow");
353	EvitableClobbers.insert(X: &MI);
354	}
355	}
356	// Covert evitable clobbers into NF variants and remove the copyies.
357	RemovedCopies.insert(X: CopyI);
358	CopyI->eraseFromParent();
359	if (MRI->use_nodbg_empty(RegNo: CopyDefI->getOperand(i: `0`).getReg())) {
360	RemovedCopies.insert(X: CopyDefI);
361	CopyDefI->eraseFromParent();
362	}
363	++NumCopiesEliminated;
364	for (auto *Clobber : EvitableClobbers) {
365	unsigned NewOpc = X86::getNFVariant(Opc: Clobber->getOpcode());
366	assert(NewOpc && "evitable clobber must have a NF variant");
367	Clobber->setDesc(TII->get(Opcode: NewOpc));
368	Clobber->removeOperand(
369	OpNo: Clobber->findRegisterDefOperand(Reg: X86::EFLAGS, /TRI=/nullptr)
370	->getOperandNo());
371	++NumNFsConvertedTo;
372	}
373	// Update liveins for basic blocks in the path
374	for (auto BI = idf_begin(G: CopyIMBB), BE = idf_end(G: CopyDefIMBB); BI != BE;
375	++BI)
376	if (*BI != CopyDefIMBB)
377	BI ->addLiveIn(PhysReg: X86::EFLAGS);
378	ProcessNextCopyI:
379	CopyIIt = NCopyIIt;
380	}
381	Copies.set_subtract(RemovedCopies);
382	}
383
384	// For the rest of copies that cannot be eliminated by NF transform, we use
385	// setcc to preserve the flags in GPR32 before OP1, and recheck its value
386	// before using the flags, e.g.
387	//
388	// %1:gr64 = COPY $eflags
389	// OP1 implicit-def dead $eflags
390	// $eflags = COPY %1
391	// OP2 cc, implicit $eflags
392	//
393	// ->
394	//
395	// %1:gr8 = SETCCr cc, implicit $eflags
396	// OP1 implicit-def dead $eflags
397	// TEST8rr %1, %1, implicit-def $eflags
398	// OP2 ne, implicit $eflags
399	for (MachineInstr *CopyI : Copies) {
400	MachineBasicBlock &MBB = *CopyI->getParent();
401
402	MachineOperand &VOp = CopyI->getOperand(i: `1`);
403	assert(VOp.isReg() &&
404	"The input to the copy for EFLAGS should always be a register!");
405	MachineInstr &CopyDefI = *MRI->getVRegDef(Reg: VOp.getReg());
406	if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
407	// FIXME: The big likely candidate here are PHI nodes. We could in theory
408	// handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
409	// enough that it is probably better to change every other part of LLVM
410	// to avoid creating them. The issue is that once we have PHIs we won't
411	// know which original EFLAGS value we need to capture with our setCCs
412	// below. The end result will be computing a complete set of setCCs that
413	// we might* want, computing them in every place where we copy out of*
414	// EFLAGS and then doing SSA formation on all of them to insert necessary
415	// PHI nodes and consume those here. Then hoping that somehow we DCE the
416	// unnecessary ones. This DCE seems very unlikely to be successful and so
417	// we will almost certainly end up with a glut of dead setCC
418	// instructions. Until we have a motivating test case and fail to avoid
419	// it by changing other parts of LLVM's lowering, we refuse to handle
420	// this complex case here.
421	LLVM_DEBUG(
422	dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
423	CopyDefI.dump());
424	report_fatal_error(
425	reason: "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
426	}
427
428	auto Cleanup = make_scope_exit(F: [&] {
429	// All uses of the EFLAGS copy are now rewritten, kill the copy into
430	// eflags and if dead the copy from.
431	CopyI->eraseFromParent();
432	if (MRI->use_empty(RegNo: CopyDefI.getOperand(i: `0`).getReg()))
433	CopyDefI.eraseFromParent();
434	++NumCopiesEliminated;
435	});
436
437	MachineOperand &DOp = CopyI->getOperand(i: `0`);
438	assert(DOp.isDef() && "Expected register def!");
439	assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
440	if (DOp.isDead())
441	continue;
442
443	MachineBasicBlock *TestMBB = CopyDefI.getParent();
444	auto TestPos = CopyDefI.getIterator();
445	DebugLoc TestLoc = CopyDefI.getDebugLoc();
446
447	LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
448
449	// Walk up across live-in EFLAGS to find where they were actually def'ed.
450	//
451	// This copy's def may just be part of a region of blocks covered by
452	// a single def of EFLAGS and we want to find the top of that region where
453	// possible.
454	//
455	// This is essentially a search for a candidate* reaching definition*
456	// location. We don't need to ever find the actual reaching definition here,
457	// but we want to walk up the dominator tree to find the highest point which
458	// would be viable for such a definition.
459	auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
460	MachineBasicBlock::iterator End) {
461	// Scan backwards as we expect these to be relatively short and often find
462	// a clobber near the end.
463	return llvm::any_of(
464	Range: llvm::reverse(C: llvm::make_range(x: Begin, y: End)), P: [&](MachineInstr &MI) {
465	// Flag any instruction (other than the copy we are
466	// currently rewriting) that defs EFLAGS.
467	return &MI != CopyI &&
468	MI.findRegisterDefOperand(Reg: X86::EFLAGS, /TRI=/nullptr);
469	});
470	};
471	auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
472	MachineBasicBlock *EndMBB) {
473	assert(MDT->dominates(BeginMBB, EndMBB) &&
474	"Only support paths down the dominator tree!");
475	SmallPtrSet<MachineBasicBlock *, `4`> Visited;
476	SmallVector<MachineBasicBlock *, `4`> Worklist;
477	// We terminate at the beginning. No need to scan it.
478	Visited.insert(Ptr: BeginMBB);
479	Worklist.push_back(Elt: EndMBB);
480	do {
481	auto *MBB = Worklist.pop_back_val();
482	for (auto *PredMBB : MBB->predecessors()) {
483	if (!Visited.insert(Ptr: PredMBB).second)
484	continue;
485	if (HasEFLAGSClobber (PredMBB->begin(), PredMBB->end()))
486	return true;
487	// Enqueue this block to walk its predecessors.
488	Worklist.push_back(Elt: PredMBB);
489	}
490	} while (!Worklist.empty());
491	// No clobber found along a path from the begin to end.
492	return false;
493	};
494	while (TestMBB->isLiveIn(Reg: X86::EFLAGS) && !TestMBB->pred_empty() &&
495	!HasEFLAGSClobber (TestMBB->begin(), TestPos)) {
496	// Find the nearest common dominator of the predecessors, as
497	// that will be the best candidate to hoist into.
498	MachineBasicBlock *HoistMBB =
499	std::accumulate(first: std::next(x: TestMBB->pred_begin()), last: TestMBB->pred_end(),
500	init: *TestMBB->pred_begin(),
501	binary_op: [&](MachineBasicBlock LHS, MachineBasicBlock RHS) {
502	return MDT->findNearestCommonDominator(A: LHS, B: RHS);
503	});
504
505	// Now we need to scan all predecessors that may be reached along paths to
506	// the hoist block. A clobber anywhere in any of these blocks the hoist.
507	// Note that this even handles loops because we require no* clobbers.*
508	if (HasEFLAGSClobberPath (HoistMBB, TestMBB))
509	break;
510
511	// We also need the terminators to not sneakily clobber flags.
512	if (HasEFLAGSClobber (HoistMBB->getFirstTerminator()->getIterator(),
513	HoistMBB->instr_end()))
514	break;
515
516	// We found a viable location, hoist our test position to it.
517	TestMBB = HoistMBB;
518	TestPos = TestMBB->getFirstTerminator()->getIterator();
519	// Clear the debug location as it would just be confusing after hoisting.
520	TestLoc = DebugLoc ();
521	}
522	LLVM_DEBUG({
523	auto DefIt = llvm::find_if(
524	llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
525	[&](MachineInstr &MI) {
526	return MI.findRegisterDefOperand(X86::EFLAGS, /TRI=/nullptr);
527	});
528	if (DefIt.base() != TestMBB->instr_begin()) {
529	dbgs() << " Using EFLAGS defined by: ";
530	DefIt->dump();
531	} else {
532	dbgs() << " Using live-in flags for BB:\n";
533	TestMBB->dump();
534	}
535	});
536
537	// While rewriting uses, we buffer jumps and rewrite them in a second pass
538	// because doing so will perturb the CFG that we are walking to find the
539	// uses in the first place.
540	SmallVector<MachineInstr *, `4`> JmpIs;
541
542	// Gather the condition flags that have already been preserved in
543	// registers. We do this from scratch each time as we expect there to be
544	// very few of them and we expect to not revisit the same copy definition
545	// many times. If either of those change sufficiently we could build a map
546	// of these up front instead.
547	CondRegArray CondRegs = collectCondsInRegs(MBB&: *TestMBB, CopyDefI: TestPos);
548
549	// Collect the basic blocks we need to scan. Typically this will just be
550	// a single basic block but we may have to scan multiple blocks if the
551	// EFLAGS copy lives into successors.
552	SmallVector<MachineBasicBlock *, `2`> Blocks;
553	SmallPtrSet<MachineBasicBlock *, `2`> VisitedBlocks;
554	Blocks.push_back(Elt: &MBB);
555
556	do {
557	MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
558
559	// Track when if/when we find a kill of the flags in this block.
560	bool FlagsKilled = false;
561
562	// In most cases, we walk from the beginning to the end of the block. But
563	// when the block is the same block as the copy is from, we will visit it
564	// twice. The first time we start from the copy and go to the end. The
565	// second time we start from the beginning and go to the copy. This lets
566	// us handle copies inside of cycles.
567	// FIXME: This loop is super* confusing. This is at least in part*
568	// a symptom of all of this routine needing to be refactored into
569	// documentable components. Once done, there may be a better way to write
570	// this loop.
571	for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(Ptr: &UseMBB))
572	? std::next(x: CopyI->getIterator())
573	: UseMBB.instr_begin(),
574	MIE = UseMBB.instr_end();
575	MII != MIE;) {
576	MachineInstr &MI = *MII ++;
577	// If we are in the original copy block and encounter either the copy
578	// def or the copy itself, break so that we don't re-process any part of
579	// the block or process the instructions in the range that was copied
580	// over.
581	if (&MI == CopyI \|\| &MI == &CopyDefI) {
582	assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
583	"Should only encounter these on the second pass over the "
584	"original block.");
585	break;
586	}
587
588	MachineOperand *FlagUse =
589	MI.findRegisterUseOperand(Reg: X86::EFLAGS, /TRI=/nullptr);
590	FlagsKilled = MI.modifiesRegister(Reg: X86::EFLAGS, TRI);
591
592	if (!FlagUse && FlagsKilled)
593	break;
594	else if (!FlagUse)
595	continue;
596
597	LLVM_DEBUG(dbgs() << " Rewriting use: "; MI.dump());
598
599	// Check the kill flag before we rewrite as that may change it.
600	if (FlagUse->isKill())
601	FlagsKilled = true;
602
603	// Once we encounter a branch, the rest of the instructions must also be
604	// branches. We can't rewrite in place here, so we handle them below.
605	//
606	// Note that we don't have to handle tail calls here, even conditional
607	// tail calls, as those are not introduced into the X86 MI until post-RA
608	// branch folding or black placement. As a consequence, we get to deal
609	// with the simpler formulation of conditional branches followed by tail
610	// calls.
611	if (X86::getCondFromBranch(MI) != X86::COND_INVALID) {
612	auto JmpIt = MI.getIterator();
613	do {
614	JmpIs.push_back(Elt: &*JmpIt);
615	++JmpIt;
616	} while (JmpIt != UseMBB.instr_end() &&
617	X86::getCondFromBranch(MI: *JmpIt) != X86::COND_INVALID);
618	break;
619	}
620
621	// Otherwise we can just rewrite in-place.
622	unsigned Opc = MI.getOpcode();
623	if (Opc == TargetOpcode::COPY) {
624	// Just replace this copy with the original copy def.
625	MRI->replaceRegWith(FromReg: MI.getOperand(i: `0`).getReg(),
626	ToReg: CopyDefI.getOperand(i: `0`).getReg());
627	MI.eraseFromParent();
628	} else if (X86::isSETCC(Opcode: Opc) \|\| X86::isSETZUCC(Opcode: Opc)) {
629	rewriteSetCC(MBB&: *TestMBB, Pos: TestPos, Loc: TestLoc, MI, CondRegs);
630	} else if (isArithmeticOp(Opc)) {
631	rewriteArithmetic(MBB&: *TestMBB, Pos: TestPos, Loc: TestLoc, MI, CondRegs);
632	} else {
633	rewriteMI(MBB&: *TestMBB, Pos: TestPos, Loc: TestLoc, MI, CondRegs);
634	}
635
636	// If this was the last use of the flags, we're done.
637	if (FlagsKilled)
638	break;
639	}
640
641	// If the flags were killed, we're done with this block.
642	if (FlagsKilled)
643	continue;
644
645	// Otherwise we need to scan successors for ones where the flags live-in
646	// and queue those up for processing.
647	for (MachineBasicBlock *SuccMBB : UseMBB.successors())
648	if (SuccMBB->isLiveIn(Reg: X86::EFLAGS) &&
649	VisitedBlocks.insert(Ptr: SuccMBB).second) {
650	// We currently don't do any PHI insertion and so we require that the
651	// test basic block dominates all of the use basic blocks. Further, we
652	// can't have a cycle from the test block back to itself as that would
653	// create a cycle requiring a PHI to break it.
654	//
655	// We could in theory do PHI insertion here if it becomes useful by
656	// just taking undef values in along every edge that we don't trace
657	// this EFLAGS copy along. This isn't as bad as fully general PHI
658	// insertion, but still seems like a great deal of complexity.
659	//
660	// Because it is theoretically possible that some earlier MI pass or
661	// other lowering transformation could induce this to happen, we do
662	// a hard check even in non-debug builds here.
663	if (SuccMBB == TestMBB \|\| !MDT->dominates(A: TestMBB, B: SuccMBB)) {
664	LLVM_DEBUG({
665	dbgs()
666	<< "ERROR: Encountered use that is not dominated by our test "
667	"basic block! Rewriting this would require inserting PHI "
668	"nodes to track the flag state across the CFG.\n\nTest "
669	"block:\n";
670	TestMBB->dump();
671	dbgs() << "Use block:\n";
672	SuccMBB->dump();
673	});
674	report_fatal_error(
675	reason: "Cannot lower EFLAGS copy when original copy def "
676	"does not dominate all uses.");
677	}
678
679	Blocks.push_back(Elt: SuccMBB);
680
681	// After this, EFLAGS will be recreated before each use.
682	SuccMBB->removeLiveIn(Reg: X86::EFLAGS);
683	}
684	} while (!Blocks.empty());
685
686	// Now rewrite the jumps that use the flags. These we handle specially
687	// because if there are multiple jumps in a single basic block we'll have
688	// to do surgery on the CFG.
689	MachineBasicBlock LastJmpMBB = nullptr*;
690	for (MachineInstr *JmpI : JmpIs) {
691	// Past the first jump within a basic block we need to split the blocks
692	// apart.
693	if (JmpI->getParent() == LastJmpMBB)
694	splitBlock(MBB&: JmpI->getParent(), SplitI&: JmpI, TII: *TII);
695	else
696	LastJmpMBB = JmpI->getParent();
697
698	rewriteMI(MBB&: TestMBB, Pos: TestPos, Loc: TestLoc, MI&: JmpI, CondRegs);
699	}
700
701	// FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
702	// the copy's def operand is itself a kill.
703	}
704
705	#ifndef NDEBUG
706	for (MachineBasicBlock &MBB : MF)
707	for (MachineInstr &MI : MBB)
708	if (MI.getOpcode() == TargetOpcode::COPY &&
709	(MI.getOperand(`0`).getReg() == X86::EFLAGS \|\|
710	MI.getOperand(`1`).getReg() == X86::EFLAGS)) {
711	LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
712	MI.dump());
713	llvm_unreachable("Unlowered EFLAGS copy!");
714	}
715	#endif
716
717	return true;
718	}
719
720	/// Collect any conditions that have already been set in registers so that we
721	/// can re-use them rather than adding duplicates.
722	CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
723	MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) {
724	CondRegArray CondRegs = {};
725
726	// Scan backwards across the range of instructions with live EFLAGS.
727	for (MachineInstr &MI :
728	llvm::reverse(C: llvm::make_range(x: MBB.begin(), y: TestPos))) {
729	X86::CondCode Cond = X86::getCondFromSETCC(MI);
730	if (Cond != X86::COND_INVALID && !MI.mayStore() &&
731	MI.getOperand(i: `0`).isReg() && MI.getOperand(i: `0`).getReg().isVirtual()) {
732	assert(MI.getOperand(`0`).isDef() &&
733	"A non-storing SETcc should always define a register!");
734	CondRegs [Cond] = MI.getOperand(i: `0`).getReg();
735	}
736
737	// Stop scanning when we see the first definition of the EFLAGS as prior to
738	// this we would potentially capture the wrong flag state.
739	if (MI.findRegisterDefOperand(Reg: X86::EFLAGS, /TRI=/nullptr))
740	break;
741	}
742	return CondRegs;
743	}
744
745	Register X86FlagsCopyLoweringPass::promoteCondToReg(
746	MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
747	const DebugLoc &TestLoc, X86::CondCode Cond) {
748	Register Reg = MRI->createVirtualRegister(RegClass: PromoteRC);
749	auto SetI = BuildMI(BB&: TestMBB, I: TestPos, MIMD: TestLoc, MCID: TII->get(Opcode: X86::SETCCr), DestReg: Reg)
750	.addImm(Val: Cond);
751	(void)SetI;
752	LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump());
753	++NumSetCCsInserted;
754	return Reg;
755	}
756
757	std::pair<Register, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
758	MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
759	const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
760	Register &CondReg = CondRegs [Cond];
761	Register &InvCondReg = CondRegs [X86::GetOppositeBranchCondition(CC: Cond)];
762	if (!CondReg && !InvCondReg)
763	CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
764
765	if (CondReg)
766	return {CondReg, false};
767	else
768	return {InvCondReg, true};
769	}
770
771	void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
772	MachineBasicBlock::iterator Pos,
773	const DebugLoc &Loc, Register Reg) {
774	auto TestI =
775	BuildMI(BB&: MBB, I: Pos, MIMD: Loc, MCID: TII->get(Opcode: X86::TEST8rr)).addReg(RegNo: Reg).addReg(RegNo: Reg);
776	(void)TestI;
777	LLVM_DEBUG(dbgs() << " test cond: "; TestI->dump());
778	++NumTestsInserted;
779	}
780
781	void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB,
782	MachineBasicBlock::iterator Pos,
783	const DebugLoc &Loc,
784	MachineInstr &MI,
785	CondRegArray &CondRegs) {
786	X86::CondCode Cond = X86::getCondFromSETCC(MI);
787	// Note that we can't usefully rewrite this to the inverse without complex
788	// analysis of the users of the setCC. Largely we rely on duplicates which
789	// could have been avoided already being avoided here.
790	Register &CondReg = CondRegs [Cond];
791	if (!CondReg)
792	CondReg = promoteCondToReg(TestMBB&: MBB, TestPos: Pos, TestLoc: Loc, Cond);
793
794	if (X86::isSETZUCC(Opcode: MI.getOpcode())) {
795	// SETZUCC is generated for register only for now.
796	assert(!MI.mayStore() && "Cannot handle memory variants");
797	assert(MI.getOperand(`0`).isReg() &&
798	"Cannot have a non-register defined operand to SETZUcc!");
799	Register OldReg = MI.getOperand(i: `0`).getReg();
800	// Drop Kill flags on the old register before replacing. CondReg may have
801	// a longer live range.
802	MRI->clearKillFlags(Reg: OldReg);
803	for (auto &Use : MRI->use_instructions(Reg: OldReg)) {
804	assert(Use.getOpcode() == X86::INSERT_SUBREG &&
805	"SETZUCC should be only used by INSERT_SUBREG");
806	Use.getOperand(i: `2`).setReg(CondReg);
807	// Recover MOV32r0 before INSERT_SUBREG, which removed by SETZUCC.
808	Register ZeroReg = MRI->createVirtualRegister(RegClass: &X86::GR32RegClass);
809	BuildMI(BB&: *Use.getParent(), I: &Use, MIMD: Use.getDebugLoc(), MCID: TII->get(Opcode: X86::MOV32r0),
810	DestReg: ZeroReg);
811	Use.getOperand(i: `1`).setReg(ZeroReg);
812	}
813	MI.eraseFromParent();
814	return;
815	}
816
817	// Rewriting a register def is trivial: we just replace the register and
818	// remove the setcc.
819	if (!MI.mayStore()) {
820	assert(MI.getOperand(`0`).isReg() &&
821	"Cannot have a non-register defined operand to SETcc!");
822	Register OldReg = MI.getOperand(i: `0`).getReg();
823	// Drop Kill flags on the old register before replacing. CondReg may have
824	// a longer live range.
825	MRI->clearKillFlags(Reg: OldReg);
826	MRI->replaceRegWith(FromReg: OldReg, ToReg: CondReg);
827	MI.eraseFromParent();
828	return;
829	}
830
831	// Otherwise, we need to emit a store.
832	auto MIB = BuildMI(BB&: *MI.getParent(), I: MI.getIterator(), MIMD: MI.getDebugLoc(),
833	MCID: TII->get(Opcode: X86::MOV8mr));
834	// Copy the address operands.
835	for (int i = `0`; i < X86::AddrNumOperands; ++i)
836	MIB.add(MO: MI.getOperand(i));
837
838	MIB.addReg(RegNo: CondReg);
839	MIB.setMemRefs(MI.memoperands());
840	MI.eraseFromParent();
841	}
842
843	void X86FlagsCopyLoweringPass::rewriteArithmetic(
844	MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
845	const DebugLoc &Loc, MachineInstr &MI, CondRegArray &CondRegs) {
846	// Arithmetic is either reading CF or OF.
847	X86::CondCode Cond = X86::COND_B; // CF == 1
848	// The addend to use to reset CF or OF when added to the flag value.
849	// Set up an addend that when one is added will need a carry due to not
850	// having a higher bit available.
851	int Addend = `255`;
852
853	// Now get a register that contains the value of the flag input to the
854	// arithmetic. We require exactly this flag to simplify the arithmetic
855	// required to materialize it back into the flag.
856	Register &CondReg = CondRegs [Cond];
857	if (!CondReg)
858	CondReg = promoteCondToReg(TestMBB&: MBB, TestPos: Pos, TestLoc: Loc, Cond);
859
860	// Insert an instruction that will set the flag back to the desired value.
861	Register TmpReg = MRI->createVirtualRegister(RegClass: PromoteRC);
862	auto AddI =
863	BuildMI(BB&: *MI.getParent(), I: MI.getIterator(), MIMD: MI.getDebugLoc(),
864	MCID: TII->get(Opcode: Subtarget->hasNDD() ? X86::ADD8ri_ND : X86::ADD8ri))
865	.addDef(RegNo: TmpReg, Flags: RegState::Dead)
866	.addReg(RegNo: CondReg)
867	.addImm(Val: Addend);
868	(void)AddI;
869	LLVM_DEBUG(dbgs() << " add cond: "; AddI->dump());
870	++NumAddsInserted;
871	MI.findRegisterUseOperand(Reg: X86::EFLAGS, /TRI=/nullptr)->setIsKill(true);
872	}
873
874	static X86::CondCode getImplicitCondFromMI(unsigned Opc) {
875	#define FROM_TO(A, B) \
876	case X86::CMOV##A##_Fp32: \
877	case X86::CMOV##A##_Fp64: \
878	case X86::CMOV##A##_Fp80: \
879	return X86::COND_##B;
880
881	switch (Opc) {
882	default:
883	return X86::COND_INVALID;
884	FROM_TO(B, B)
885	FROM_TO(E, E)
886	FROM_TO(P, P)
887	FROM_TO(BE, BE)
888	FROM_TO(NB, AE)
889	FROM_TO(NE, NE)
890	FROM_TO(NP, NP)
891	FROM_TO(NBE, A)
892	}
893	#undef FROM_TO
894	}
895
896	static unsigned getOpcodeWithCC(unsigned Opc, X86::CondCode CC) {
897	assert((CC == X86::COND_E \|\| CC == X86::COND_NE) && "Unexpected CC");
898	#define CASE(A) \
899	case X86::CMOVB_##A: \
900	case X86::CMOVE_##A: \
901	case X86::CMOVP_##A: \
902	case X86::CMOVBE_##A: \
903	case X86::CMOVNB_##A: \
904	case X86::CMOVNE_##A: \
905	case X86::CMOVNP_##A: \
906	case X86::CMOVNBE_##A: \
907	return (CC == X86::COND_E) ? X86::CMOVE_##A : X86::CMOVNE_##A;
908	switch (Opc) {
909	default:
910	llvm_unreachable("Unexpected opcode");
911	CASE(Fp32)
912	CASE(Fp64)
913	CASE(Fp80)
914	}
915	#undef CASE
916	}
917
918	void X86FlagsCopyLoweringPass::rewriteMI(MachineBasicBlock &MBB,
919	MachineBasicBlock::iterator Pos,
920	const DebugLoc &Loc, MachineInstr &MI,
921	CondRegArray &CondRegs) {
922	// First get the register containing this specific condition.
923	bool IsImplicitCC = false;
924	X86::CondCode CC = X86::getCondFromMI(MI);
925	if (CC == X86::COND_INVALID) {
926	CC = getImplicitCondFromMI(Opc: MI.getOpcode());
927	IsImplicitCC = true;
928	}
929	assert(CC != X86::COND_INVALID && "Unknown EFLAG user!");
930	Register CondReg;
931	bool Inverted;
932	std::tie(args&: CondReg, args&: Inverted) =
933	getCondOrInverseInReg(TestMBB&: MBB, TestPos: Pos, TestLoc: Loc, Cond: CC, CondRegs);
934
935	// Insert a direct test of the saved register.
936	insertTest(MBB&: *MI.getParent(), Pos: MI.getIterator(), Loc: MI.getDebugLoc(), Reg: CondReg);
937
938	// Rewrite the instruction to use the !ZF flag from the test, and then kill
939	// its use of the flags afterward.
940	X86::CondCode NewCC = Inverted ? X86::COND_E : X86::COND_NE;
941	if (IsImplicitCC)
942	MI.setDesc(TII->get(Opcode: getOpcodeWithCC(Opc: MI.getOpcode(), CC: NewCC)));
943	else
944	MI.getOperand(i: MI.getDesc().getNumOperands() - `1`).setImm(NewCC);
945
946	MI.findRegisterUseOperand(Reg: X86::EFLAGS, /TRI=/nullptr)->setIsKill(true);
947	LLVM_DEBUG(dbgs() << " fixed instruction: "; MI.dump());
948	}
949

Browse the source code of llvm_projects/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp