MachineCombiner.cpp source code [llvm_projects/llvm/lib/CodeGen/MachineCombiner.cpp]

1	//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// The machine combiner pass uses machine trace metrics to ensure the combined
10	// instructions do not lengthen the critical path or the resource depth.
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/ADT/DenseMap.h"
14	#include "llvm/ADT/Statistic.h"
15	#include "llvm/Analysis/ProfileSummaryInfo.h"
16	#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
17	#include "llvm/CodeGen/MachineCombinerPattern.h"
18	#include "llvm/CodeGen/MachineDominators.h"
19	#include "llvm/CodeGen/MachineFunction.h"
20	#include "llvm/CodeGen/MachineFunctionPass.h"
21	#include "llvm/CodeGen/MachineLoopInfo.h"
22	#include "llvm/CodeGen/MachineRegisterInfo.h"
23	#include "llvm/CodeGen/MachineSizeOpts.h"
24	#include "llvm/CodeGen/MachineTraceMetrics.h"
25	#include "llvm/CodeGen/RegisterClassInfo.h"
26	#include "llvm/CodeGen/TargetInstrInfo.h"
27	#include "llvm/CodeGen/TargetRegisterInfo.h"
28	#include "llvm/CodeGen/TargetSchedule.h"
29	#include "llvm/CodeGen/TargetSubtargetInfo.h"
30	#include "llvm/InitializePasses.h"
31	#include "llvm/Support/CommandLine.h"
32	#include "llvm/Support/Debug.h"
33	#include "llvm/Support/raw_ostream.h"
34
35	using namespace llvm;
36
37	#define DEBUG_TYPE "machine-combiner"
38
39	STATISTIC(NumInstCombined, "Number of machineinst combined");
40
41	static cl::opt<unsigned>
42	inc_threshold("machine-combiner-inc-threshold", cl::Hidden,
43	cl::desc ("Incremental depth computation will be used for basic "
44	"blocks with more instructions."), cl::init(Val: `500`));
45
46	static cl::opt<bool> dump_intrs("machine-combiner-dump-subst-intrs", cl::Hidden,
47	cl::desc ("Dump all substituted intrs"),
48	cl::init(Val: false));
49
50	#ifdef EXPENSIVE_CHECKS
51	static cl::opt<bool> VerifyPatternOrder(
52	"machine-combiner-verify-pattern-order", cl::Hidden,
53	cl::desc(
54	"Verify that the generated patterns are ordered by increasing latency"),
55	cl::init(true));
56	#else
57	static cl::opt<bool> VerifyPatternOrder(
58	"machine-combiner-verify-pattern-order", cl::Hidden,
59	cl::desc (
60	"Verify that the generated patterns are ordered by increasing latency"),
61	cl::init(Val: false));
62	#endif
63
64	namespace {
65	class MachineCombiner : public MachineFunctionPass {
66	const TargetSubtargetInfo STI = nullptr*;
67	const TargetInstrInfo TII = nullptr*;
68	const TargetRegisterInfo TRI = nullptr*;
69	MCSchedModel SchedModel;
70	MachineRegisterInfo MRI = nullptr*;
71	MachineLoopInfo MLI = nullptr; // Current MachineLoopInfo*
72	MachineTraceMetrics Traces = nullptr*;
73	MachineTraceMetrics::Ensemble TraceEnsemble = nullptr*;
74	MachineBlockFrequencyInfo MBFI = nullptr*;
75	ProfileSummaryInfo PSI = nullptr*;
76	RegisterClassInfo RegClassInfo;
77
78	TargetSchedModel TSchedModel;
79
80	public:
81	static char ID;
82	MachineCombiner() : MachineFunctionPass (ID) {
83	initializeMachineCombinerPass(*PassRegistry::getPassRegistry());
84	}
85	void getAnalysisUsage(AnalysisUsage &AU) const override;
86	bool runOnMachineFunction(MachineFunction &MF) override;
87	StringRef getPassName() const override { return "Machine InstCombiner"; }
88
89	private:
90	bool combineInstructions(MachineBasicBlock *);
91	MachineInstr getOperandDef(const* MachineOperand &MO);
92	bool isTransientMI(const MachineInstr *MI);
93	unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
94	DenseMap<Register, unsigned> &InstrIdxForVirtReg,
95	MachineTraceMetrics::Trace BlockTrace,
96	const MachineBasicBlock &MBB);
97	unsigned getLatency(MachineInstr Root, MachineInstr NewRoot,
98	MachineTraceMetrics::Trace BlockTrace);
99	bool improvesCriticalPathLen(MachineBasicBlock MBB, MachineInstr Root,
100	MachineTraceMetrics::Trace BlockTrace,
101	SmallVectorImpl<MachineInstr *> &InsInstrs,
102	SmallVectorImpl<MachineInstr *> &DelInstrs,
103	DenseMap<Register, unsigned> &InstrIdxForVirtReg,
104	unsigned Pattern, bool SlackIsAccurate);
105	bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
106	SmallVectorImpl<MachineInstr *> &InsInstrs,
107	SmallVectorImpl<MachineInstr *> &DelInstrs,
108	unsigned Pattern);
109	bool preservesResourceLen(MachineBasicBlock *MBB,
110	MachineTraceMetrics::Trace BlockTrace,
111	SmallVectorImpl<MachineInstr *> &InsInstrs,
112	SmallVectorImpl<MachineInstr *> &DelInstrs);
113	void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs,
114	SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC);
115	std::pair<unsigned, unsigned>
116	getLatenciesForInstrSequences(MachineInstr &MI,
117	SmallVectorImpl<MachineInstr *> &InsInstrs,
118	SmallVectorImpl<MachineInstr *> &DelInstrs,
119	MachineTraceMetrics::Trace BlockTrace);
120
121	void verifyPatternOrder(MachineBasicBlock *MBB, MachineInstr &Root,
122	SmallVector<unsigned, `16`> &Patterns);
123	CombinerObjective getCombinerObjective(unsigned Pattern);
124	};
125	}
126
127	char MachineCombiner::ID = `0`;
128	char &llvm::MachineCombinerID = MachineCombiner::ID;
129
130	INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE,
131	"Machine InstCombiner", false, false)
132	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
133	INITIALIZE_PASS_DEPENDENCY(MachineTraceMetricsWrapperPass)
134	INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner",
135	false, false)
136
137	void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
138	AU.setPreservesCFG();
139	AU.addPreserved<MachineDominatorTreeWrapperPass>();
140	AU.addRequired<MachineLoopInfoWrapperPass>();
141	AU.addPreserved<MachineLoopInfoWrapperPass>();
142	AU.addRequired<MachineTraceMetricsWrapperPass>();
143	AU.addPreserved<MachineTraceMetricsWrapperPass>();
144	AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
145	AU.addRequired<ProfileSummaryInfoWrapperPass>();
146	MachineFunctionPass::getAnalysisUsage(AU);
147	}
148
149	MachineInstr *
150	MachineCombiner::getOperandDef(const MachineOperand &MO) {
151	MachineInstr DefInstr = nullptr*;
152	// We need a virtual register definition.
153	if (MO.isReg() && MO.getReg().isVirtual())
154	DefInstr = MRI->getUniqueVRegDef(Reg: MO.getReg());
155	return DefInstr;
156	}
157
158	/// Return true if MI is unlikely to generate an actual target instruction.
159	bool MachineCombiner::isTransientMI(const MachineInstr *MI) {
160	if (!MI->isCopy())
161	return MI->isTransient();
162
163	// If MI is a COPY, check if its src and dst registers can be coalesced.
164	Register Dst = MI->getOperand(i: `0`).getReg();
165	Register Src = MI->getOperand(i: `1`).getReg();
166
167	if (!MI->isFullCopy()) {
168	// If src RC contains super registers of dst RC, it can also be coalesced.
169	if (MI->getOperand(i: `0`).getSubReg() \|\| Src.isPhysical() \|\| Dst.isPhysical())
170	return false;
171
172	auto SrcSub = MI->getOperand(i: `1`).getSubReg();
173	auto SrcRC = MRI->getRegClass(Reg: Src);
174	auto DstRC = MRI->getRegClass(Reg: Dst);
175	return TRI->getMatchingSuperRegClass(A: SrcRC, B: DstRC, Idx: SrcSub) != nullptr;
176	}
177
178	if (Src.isPhysical() && Dst.isPhysical())
179	return Src == Dst;
180
181	if (Src.isVirtual() && Dst.isVirtual()) {
182	auto SrcRC = MRI->getRegClass(Reg: Src);
183	auto DstRC = MRI->getRegClass(Reg: Dst);
184	return SrcRC->hasSuperClassEq(RC: DstRC) \|\| SrcRC->hasSubClassEq(RC: DstRC);
185	}
186
187	if (Src.isVirtual())
188	std::swap(a&: Src, b&: Dst);
189
190	// Now Src is physical register, Dst is virtual register.
191	auto DstRC = MRI->getRegClass(Reg: Dst);
192	return DstRC->contains(Reg: Src);
193	}
194
195	/// Computes depth of instructions in vector \InsInstr.
196	///
197	/// \param InsInstrs is a vector of machine instructions
198	/// \param InstrIdxForVirtReg is a dense map of virtual register to index
199	/// of defining machine instruction in \p InsInstrs
200	/// \param BlockTrace is a trace of machine instructions
201	///
202	/// \returns Depth of last instruction in \InsInstrs ("NewRoot")
203	unsigned
204	MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
205	DenseMap<Register, unsigned> &InstrIdxForVirtReg,
206	MachineTraceMetrics::Trace BlockTrace,
207	const MachineBasicBlock &MBB) {
208	SmallVector<unsigned, `16`> InstrDepth;
209	// For each instruction in the new sequence compute the depth based on the
210	// operands. Use the trace information when possible. For new operands which
211	// are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
212	for (auto InstrPtr : InsInstrs) { // for each Use*
213	unsigned IDepth = `0`;
214	for (const MachineOperand &MO : InstrPtr->all_uses()) {
215	// Check for virtual register operand.
216	if (!MO.getReg().isVirtual())
217	continue;
218	unsigned DepthOp = `0`;
219	unsigned LatencyOp = `0`;
220	DenseMap<Register, unsigned>::iterator II =
221	InstrIdxForVirtReg.find(Val: MO.getReg());
222	if (II != InstrIdxForVirtReg.end()) {
223	// Operand is new virtual register not in trace
224	assert(II->second < InstrDepth.size() && "Bad Index");
225	MachineInstr *DefInstr = InsInstrs [II ->second];
226	assert(DefInstr &&
227	"There must be a definition for a new virtual register");
228	DepthOp = InstrDepth [II ->second];
229	int DefIdx =
230	DefInstr->findRegisterDefOperandIdx(Reg: MO.getReg(), /TRI=/nullptr);
231	int UseIdx =
232	InstrPtr->findRegisterUseOperandIdx(Reg: MO.getReg(), /TRI=/nullptr);
233	LatencyOp = TSchedModel.computeOperandLatency(DefMI: DefInstr, DefOperIdx: DefIdx,
234	UseMI: InstrPtr, UseOperIdx: UseIdx);
235	} else {
236	MachineInstr *DefInstr = getOperandDef(MO);
237	if (DefInstr && (TII->getMachineCombinerTraceStrategy() !=
238	MachineTraceStrategy::TS_Local \|\|
239	DefInstr->getParent() == &MBB)) {
240	DepthOp = BlockTrace.getInstrCycles(MI: *DefInstr).Depth;
241	if (!isTransientMI(MI: DefInstr))
242	LatencyOp = TSchedModel.computeOperandLatency(
243	DefMI: DefInstr,
244	DefOperIdx: DefInstr->findRegisterDefOperandIdx(Reg: MO.getReg(),
245	/TRI=/nullptr),
246	UseMI: InstrPtr,
247	UseOperIdx: InstrPtr->findRegisterUseOperandIdx(Reg: MO.getReg(),
248	/TRI=/nullptr));
249	}
250	}
251	IDepth = std::max(a: IDepth, b: DepthOp + LatencyOp);
252	}
253	InstrDepth.push_back(Elt: IDepth);
254	}
255	unsigned NewRootIdx = InsInstrs.size() - `1`;
256	return InstrDepth [NewRootIdx];
257	}
258
259	/// Computes instruction latency as max of latency of defined operands.
260	///
261	/// \param Root is a machine instruction that could be replaced by NewRoot.
262	/// It is used to compute a more accurate latency information for NewRoot in
263	/// case there is a dependent instruction in the same trace (\p BlockTrace)
264	/// \param NewRoot is the instruction for which the latency is computed
265	/// \param BlockTrace is a trace of machine instructions
266	///
267	/// \returns Latency of \p NewRoot
268	unsigned MachineCombiner::getLatency(MachineInstr Root, MachineInstr NewRoot,
269	MachineTraceMetrics::Trace BlockTrace) {
270	// Check each definition in NewRoot and compute the latency
271	unsigned NewRootLatency = `0`;
272
273	for (const MachineOperand &MO : NewRoot->all_defs()) {
274	// Check for virtual register operand.
275	if (!MO.getReg().isVirtual())
276	continue;
277	// Get the first instruction that uses MO
278	MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(RegNo: MO.getReg());
279	RI ++;
280	if (RI == MRI->reg_end())
281	continue;
282	MachineInstr *UseMO = RI ->getParent();
283	unsigned LatencyOp = `0`;
284	if (UseMO && BlockTrace.isDepInTrace(DefMI: Root, UseMI: UseMO)) {
285	LatencyOp = TSchedModel.computeOperandLatency(
286	DefMI: NewRoot,
287	DefOperIdx: NewRoot->findRegisterDefOperandIdx(Reg: MO.getReg(), /TRI=/nullptr),
288	UseMI: UseMO,
289	UseOperIdx: UseMO->findRegisterUseOperandIdx(Reg: MO.getReg(), /TRI=/nullptr));
290	} else {
291	LatencyOp = TSchedModel.computeInstrLatency(MI: NewRoot);
292	}
293	NewRootLatency = std::max(a: NewRootLatency, b: LatencyOp);
294	}
295	return NewRootLatency;
296	}
297
298	CombinerObjective MachineCombiner::getCombinerObjective(unsigned Pattern) {
299	// TODO: If C++ ever gets a real enum class, make this part of the
300	// MachineCombinerPattern class.
301	switch (Pattern) {
302	case MachineCombinerPattern::REASSOC_AX_BY:
303	case MachineCombinerPattern::REASSOC_AX_YB:
304	case MachineCombinerPattern::REASSOC_XA_BY:
305	case MachineCombinerPattern::REASSOC_XA_YB:
306	return CombinerObjective::MustReduceDepth;
307	default:
308	return TII->getCombinerObjective(Pattern);
309	}
310	}
311
312	/// Estimate the latency of the new and original instruction sequence by summing
313	/// up the latencies of the inserted and deleted instructions. This assumes
314	/// that the inserted and deleted instructions are dependent instruction chains,
315	/// which might not hold in all cases.
316	std::pair<unsigned, unsigned> MachineCombiner::getLatenciesForInstrSequences(
317	MachineInstr &MI, SmallVectorImpl<MachineInstr *> &InsInstrs,
318	SmallVectorImpl<MachineInstr *> &DelInstrs,
319	MachineTraceMetrics::Trace BlockTrace) {
320	assert(!InsInstrs.empty() && "Only support sequences that insert instrs.");
321	unsigned NewRootLatency = `0`;
322	// NewRoot is the last instruction in the \p InsInstrs vector.
323	MachineInstr *NewRoot = InsInstrs.back();
324	for (unsigned i = `0`; i < InsInstrs.size() - `1`; i++)
325	NewRootLatency += TSchedModel.computeInstrLatency(MI: InsInstrs [i]);
326	NewRootLatency += getLatency(Root: &MI, NewRoot, BlockTrace);
327
328	unsigned RootLatency = `0`;
329	for (auto *I : DelInstrs)
330	RootLatency += TSchedModel.computeInstrLatency(MI: I);
331
332	return {NewRootLatency, RootLatency};
333	}
334
335	bool MachineCombiner::reduceRegisterPressure(
336	MachineInstr &Root, MachineBasicBlock *MBB,
337	SmallVectorImpl<MachineInstr *> &InsInstrs,
338	SmallVectorImpl<MachineInstr > &DelInstrs, unsigned* Pattern) {
339	// FIXME: for now, we don't do any check for the register pressure patterns.
340	// We treat them as always profitable. But we can do better if we make
341	// RegPressureTracker class be aware of TIE attribute. Then we can get an
342	// accurate compare of register pressure with DelInstrs or InsInstrs.
343	return true;
344	}
345
346	/// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
347	/// The new code sequence ends in MI NewRoot. A necessary condition for the new
348	/// sequence to replace the old sequence is that it cannot lengthen the critical
349	/// path. The definition of "improve" may be restricted by specifying that the
350	/// new path improves the data dependency chain (MustReduceDepth).
351	bool MachineCombiner::improvesCriticalPathLen(
352	MachineBasicBlock MBB, MachineInstr Root,
353	MachineTraceMetrics::Trace BlockTrace,
354	SmallVectorImpl<MachineInstr *> &InsInstrs,
355	SmallVectorImpl<MachineInstr *> &DelInstrs,
356	DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned Pattern,
357	bool SlackIsAccurate) {
358	// Get depth and latency of NewRoot and Root.
359	unsigned NewRootDepth =
360	getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, MBB: *MBB);
361	unsigned RootDepth = BlockTrace.getInstrCycles(MI: *Root).Depth;
362
363	LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: "
364	<< NewRootDepth << "\tRootDepth: " << RootDepth);
365
366	// For a transform such as reassociation, the cost equation is
367	// conservatively calculated so that we must improve the depth (data
368	// dependency cycles) in the critical path to proceed with the transform.
369	// Being conservative also protects against inaccuracies in the underlying
370	// machine trace metrics and CPU models.
371	if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) {
372	LLVM_DEBUG(dbgs() << "\tIt MustReduceDepth ");
373	LLVM_DEBUG(NewRootDepth < RootDepth
374	? dbgs() << "\t and it does it\n"
375	: dbgs() << "\t but it does NOT do it\n");
376	return NewRootDepth < RootDepth;
377	}
378
379	// A more flexible cost calculation for the critical path includes the slack
380	// of the original code sequence. This may allow the transform to proceed
381	// even if the instruction depths (data dependency cycles) become worse.
382
383	// Account for the latency of the inserted and deleted instructions by
384	unsigned NewRootLatency, RootLatency;
385	if (TII->accumulateInstrSeqToRootLatency(Root&: *Root)) {
386	std::tie(args&: NewRootLatency, args&: RootLatency) =
387	getLatenciesForInstrSequences(MI&: *Root, InsInstrs, DelInstrs, BlockTrace);
388	} else {
389	NewRootLatency = TSchedModel.computeInstrLatency(MI: InsInstrs.back());
390	RootLatency = TSchedModel.computeInstrLatency(MI: Root);
391	}
392
393	unsigned RootSlack = BlockTrace.getInstrSlack(MI: *Root);
394	unsigned NewCycleCount = NewRootDepth + NewRootLatency;
395	unsigned OldCycleCount =
396	RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : `0`);
397	LLVM_DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency
398	<< "\tRootLatency: " << RootLatency << "\n\tRootSlack: "
399	<< RootSlack << " SlackIsAccurate=" << SlackIsAccurate
400	<< "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount
401	<< "\n\tRootDepth + RootLatency + RootSlack = "
402	<< OldCycleCount);
403	LLVM_DEBUG(NewCycleCount <= OldCycleCount
404	? dbgs() << "\n\t It IMPROVES PathLen because"
405	: dbgs() << "\n\t It DOES NOT improve PathLen because");
406	LLVM_DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount
407	<< ", OldCycleCount = " << OldCycleCount << "\n");
408
409	return NewCycleCount <= OldCycleCount;
410	}
411
412	/// helper routine to convert instructions into SC
413	void MachineCombiner::instr2instrSC(
414	SmallVectorImpl<MachineInstr *> &Instrs,
415	SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) {
416	for (auto *InstrPtr : Instrs) {
417	unsigned Opc = InstrPtr->getOpcode();
418	unsigned Idx = TII->get(Opcode: Opc).getSchedClass();
419	const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(SchedClassIdx: Idx);
420	InstrsSC.push_back(Elt: SC);
421	}
422	}
423
424	/// True when the new instructions do not increase resource length
425	bool MachineCombiner::preservesResourceLen(
426	MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
427	SmallVectorImpl<MachineInstr *> &InsInstrs,
428	SmallVectorImpl<MachineInstr *> &DelInstrs) {
429	if (!TSchedModel.hasInstrSchedModel())
430	return true;
431
432	// Compute current resource length
433
434	//ArrayRef<const MachineBasicBlock > MBBarr(MBB);*
435	SmallVector <const MachineBasicBlock *, `1`> MBBarr;
436	MBBarr.push_back(Elt: MBB);
437	unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(Extrablocks: MBBarr);
438
439	// Deal with SC rather than Instructions.
440	SmallVector<const MCSchedClassDesc *, `16`> InsInstrsSC;
441	SmallVector<const MCSchedClassDesc *, `16`> DelInstrsSC;
442
443	instr2instrSC(Instrs&: InsInstrs, InstrsSC&: InsInstrsSC);
444	instr2instrSC(Instrs&: DelInstrs, InstrsSC&: DelInstrsSC);
445
446	ArrayRef<const MCSchedClassDesc *> MSCInsArr{InsInstrsSC};
447	ArrayRef<const MCSchedClassDesc *> MSCDelArr{DelInstrsSC};
448
449	// Compute new resource length.
450	unsigned ResLenAfterCombine =
451	BlockTrace.getResourceLength(Extrablocks: MBBarr, ExtraInstrs: MSCInsArr, RemoveInstrs: MSCDelArr);
452
453	LLVM_DEBUG(dbgs() << "\t\tResource length before replacement: "
454	<< ResLenBeforeCombine
455	<< " and after: " << ResLenAfterCombine << "\n");
456	LLVM_DEBUG(
457	ResLenAfterCombine <=
458	ResLenBeforeCombine + TII->getExtendResourceLenLimit()
459	? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n"
460	: dbgs() << "\t\t As result it DOES NOT improve/preserve Resource "
461	"Length\n");
462
463	return ResLenAfterCombine <=
464	ResLenBeforeCombine + TII->getExtendResourceLenLimit();
465	}
466
467	/// Inserts InsInstrs and deletes DelInstrs. Incrementally updates instruction
468	/// depths if requested.
469	///
470	/// \param MBB basic block to insert instructions in
471	/// \param MI current machine instruction
472	/// \param InsInstrs new instructions to insert in \p MBB
473	/// \param DelInstrs instruction to delete from \p MBB
474	/// \param TraceEnsemble is a pointer to the machine trace information
475	/// \param RegUnits set of live registers, needed to compute instruction depths
476	/// \param TII is target instruction info, used to call target hook
477	/// \param Pattern is used to call target hook finalizeInsInstrs
478	/// \param IncrementalUpdate if true, compute instruction depths incrementally,
479	/// otherwise invalidate the trace
480	static void
481	insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
482	SmallVectorImpl<MachineInstr *> &InsInstrs,
483	SmallVectorImpl<MachineInstr *> &DelInstrs,
484	MachineTraceMetrics::Ensemble *TraceEnsemble,
485	SparseSet<LiveRegUnit> &RegUnits,
486	const TargetInstrInfo TII, unsigned* Pattern,
487	bool IncrementalUpdate) {
488	// If we want to fix up some placeholder for some target, do it now.
489	// We need this because in genAlternativeCodeSequence, we have not decided the
490	// better pattern InsInstrs or DelInstrs, so we don't want generate some
491	// sideeffect to the function. For example we need to delay the constant pool
492	// entry creation here after InsInstrs is selected as better pattern.
493	// Otherwise the constant pool entry created for InsInstrs will not be deleted
494	// even if InsInstrs is not the better pattern.
495	TII->finalizeInsInstrs(Root&: MI, Pattern, InsInstrs);
496
497	for (auto *InstrPtr : InsInstrs)
498	MBB->insert(I: (MachineBasicBlock::iterator)&MI, MI: InstrPtr);
499
500	for (auto *InstrPtr : DelInstrs) {
501	InstrPtr->eraseFromParent();
502	// Erase all LiveRegs defined by the removed instruction
503	for (auto *I = RegUnits.begin(); I != RegUnits.end();) {
504	if (I->MI == InstrPtr)
505	I = RegUnits.erase(I);
506	else
507	I++;
508	}
509	}
510
511	if (IncrementalUpdate)
512	for (auto *InstrPtr : InsInstrs)
513	TraceEnsemble->updateDepth(MBB, *InstrPtr, RegUnits);
514	else
515	TraceEnsemble->invalidate(MBB);
516
517	NumInstCombined ++;
518	}
519
520	// Check that the difference between original and new latency is decreasing for
521	// later patterns. This helps to discover sub-optimal pattern orderings.
522	void MachineCombiner::verifyPatternOrder(MachineBasicBlock *MBB,
523	MachineInstr &Root,
524	SmallVector<unsigned, `16`> &Patterns) {
525	long PrevLatencyDiff = std::numeric_limits<long>::max();
526	(void)PrevLatencyDiff; // Variable is used in assert only.
527	for (auto P : Patterns) {
528	SmallVector<MachineInstr *, `16`> InsInstrs;
529	SmallVector<MachineInstr *, `16`> DelInstrs;
530	DenseMap<Register, unsigned> InstrIdxForVirtReg;
531	TII->genAlternativeCodeSequence(Root, Pattern: P, InsInstrs, DelInstrs,
532	InstIdxForVirtReg&: InstrIdxForVirtReg);
533	// Found pattern, but did not generate alternative sequence.
534	// This can happen e.g. when an immediate could not be materialized
535	// in a single instruction.
536	if (InsInstrs.empty() \|\| !TSchedModel.hasInstrSchedModelOrItineraries())
537	continue;
538
539	unsigned NewRootLatency, RootLatency;
540	std::tie(args&: NewRootLatency, args&: RootLatency) = getLatenciesForInstrSequences(
541	MI&: Root, InsInstrs, DelInstrs, BlockTrace: TraceEnsemble->getTrace(MBB));
542	long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
543	assert(CurrentLatencyDiff <= PrevLatencyDiff &&
544	"Current pattern is better than previous pattern.");
545	PrevLatencyDiff = CurrentLatencyDiff;
546	}
547	}
548
549	/// Substitute a slow code sequence with a faster one by
550	/// evaluating instruction combining pattern.
551	/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
552	/// combining based on machine trace metrics. Only combine a sequence of
553	/// instructions when this neither lengthens the critical path nor increases
554	/// resource pressure. When optimizing for codesize always combine when the new
555	/// sequence is shorter.
556	bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
557	bool Changed = false;
558	LLVM_DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
559
560	bool IncrementalUpdate = false;
561	auto BlockIter = MBB->begin();
562	decltype(BlockIter) LastUpdate;
563	// Check if the block is in a loop.
564	const MachineLoop *ML = MLI->getLoopFor(BB: MBB);
565	if (!TraceEnsemble)
566	TraceEnsemble = Traces->getEnsemble(TII->getMachineCombinerTraceStrategy());
567
568	SparseSet<LiveRegUnit> RegUnits;
569	RegUnits.setUniverse(TRI->getNumRegUnits());
570
571	bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
572
573	bool DoRegPressureReduce =
574	TII->shouldReduceRegisterPressure(MBB, RegClassInfo: &RegClassInfo);
575
576	while (BlockIter != MBB->end()) {
577	auto &MI = *BlockIter ++;
578	SmallVector<unsigned, `16`> Patterns;
579	// The motivating example is:
580	//
581	// MUL Other MUL_op1 MUL_op2 Other
582	// \ / \ \| /
583	// ADD/SUB => MADD/MSUB
584	// (=Root) (=NewRoot)
585
586	// The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is
587	// usually beneficial for code size it unfortunately can hurt performance
588	// when the ADD is on the critical path, but the MUL is not. With the
589	// substitution the MUL becomes part of the critical path (in form of the
590	// MADD) and can lengthen it on architectures where the MADD latency is
591	// longer than the ADD latency.
592	//
593	// For each instruction we check if it can be the root of a combiner
594	// pattern. Then for each pattern the new code sequence in form of MI is
595	// generated and evaluated. When the efficiency criteria (don't lengthen
596	// critical path, don't use more resources) is met the new sequence gets
597	// hooked up into the basic block before the old sequence is removed.
598	//
599	// The algorithm does not try to evaluate all patterns and pick the best.
600	// This is only an artificial restriction though. In practice there is
601	// mostly one pattern, and getMachineCombinerPatterns() can order patterns
602	// based on an internal cost heuristic. If
603	// machine-combiner-verify-pattern-order is enabled, all patterns are
604	// checked to ensure later patterns do not provide better latency savings.
605
606	if (!TII->getMachineCombinerPatterns(Root&: MI, Patterns, DoRegPressureReduce))
607	continue;
608
609	if (VerifyPatternOrder)
610	verifyPatternOrder(MBB, Root&: MI, Patterns);
611
612	for (const auto P : Patterns) {
613	SmallVector<MachineInstr *, `16`> InsInstrs;
614	SmallVector<MachineInstr *, `16`> DelInstrs;
615	DenseMap<Register, unsigned> InstrIdxForVirtReg;
616	TII->genAlternativeCodeSequence(Root&: MI, Pattern: P, InsInstrs, DelInstrs,
617	InstIdxForVirtReg&: InstrIdxForVirtReg);
618	// Found pattern, but did not generate alternative sequence.
619	// This can happen e.g. when an immediate could not be materialized
620	// in a single instruction.
621	if (InsInstrs.empty())
622	continue;
623
624	LLVM_DEBUG(if (dump_intrs) {
625	dbgs() << "\tFor the Pattern (" << (int)P
626	<< ") these instructions could be removed\n";
627	for (auto const *InstrPtr : DelInstrs)
628	InstrPtr->print(dbgs(), /IsStandalone/false, /SkipOpers/false,
629	/SkipDebugLoc/false, /AddNewLine/true, TII);
630	dbgs() << "\tThese instructions could replace the removed ones\n";
631	for (auto const *InstrPtr : InsInstrs)
632	InstrPtr->print(dbgs(), /IsStandalone/false, /SkipOpers/false,
633	/SkipDebugLoc/false, /AddNewLine/true, TII);
634	});
635
636	if (IncrementalUpdate && LastUpdate != BlockIter) {
637	// Update depths since the last incremental update.
638	TraceEnsemble->updateDepths(Start: LastUpdate, End: BlockIter, RegUnits);
639	LastUpdate = BlockIter;
640	}
641
642	if (DoRegPressureReduce &&
643	getCombinerObjective(Pattern: P) ==
644	CombinerObjective::MustReduceRegisterPressure) {
645	if (MBB->size() > inc_threshold) {
646	// Use incremental depth updates for basic blocks above threshold
647	IncrementalUpdate = true;
648	LastUpdate = BlockIter;
649	}
650	if (reduceRegisterPressure(Root&: MI, MBB, InsInstrs, DelInstrs, Pattern: P)) {
651	// Replace DelInstrs with InsInstrs.
652	insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
653	RegUnits, TII, Pattern: P, IncrementalUpdate);
654	Changed \|= true;
655
656	// Go back to previous instruction as it may have ILP reassociation
657	// opportunity.
658	BlockIter --;
659	break;
660	}
661	}
662
663	if (ML && TII->isThroughputPattern(Pattern: P)) {
664	LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n");
665	insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
666	RegUnits, TII, Pattern: P, IncrementalUpdate);
667	// Eagerly stop after the first pattern fires.
668	Changed = true;
669	break;
670	} else if (OptForSize && InsInstrs.size() < DelInstrs.size()) {
671	LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize ("
672	<< InsInstrs.size() << " < "
673	<< DelInstrs.size() << ")\n");
674	insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
675	RegUnits, TII, Pattern: P, IncrementalUpdate);
676	// Eagerly stop after the first pattern fires.
677	Changed = true;
678	break;
679	} else {
680	// For big basic blocks, we only compute the full trace the first time
681	// we hit this. We do not invalidate the trace, but instead update the
682	// instruction depths incrementally.
683	// NOTE: Only the instruction depths up to MI are accurate. All other
684	// trace information is not updated.
685	MachineTraceMetrics::Trace BlockTrace = TraceEnsemble->getTrace(MBB);
686	Traces->verifyAnalysis();
687	if (improvesCriticalPathLen(MBB, Root: &MI, BlockTrace, InsInstrs, DelInstrs,
688	InstrIdxForVirtReg, Pattern: P,
689	SlackIsAccurate: !IncrementalUpdate) &&
690	preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
691	if (MBB->size() > inc_threshold) {
692	// Use incremental depth updates for basic blocks above treshold
693	IncrementalUpdate = true;
694	LastUpdate = BlockIter;
695	}
696
697	insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
698	RegUnits, TII, Pattern: P, IncrementalUpdate);
699
700	// Eagerly stop after the first pattern fires.
701	Changed = true;
702	break;
703	}
704	// Cleanup instructions of the alternative code sequence. There is no
705	// use for them.
706	MachineFunction *MF = MBB->getParent();
707	for (auto *InstrPtr : InsInstrs)
708	MF->deleteMachineInstr(MI: InstrPtr);
709	}
710	InstrIdxForVirtReg.clear();
711	}
712	}
713
714	if (Changed && IncrementalUpdate)
715	Traces->invalidate(MBB);
716	return Changed;
717	}
718
719	bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
720	STI = &MF.getSubtarget();
721	TII = STI->getInstrInfo();
722	TRI = STI->getRegisterInfo();
723	SchedModel = STI->getSchedModel();
724	TSchedModel.init(TSInfo: STI);
725	MRI = &MF.getRegInfo();
726	MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
727	Traces = &getAnalysis<MachineTraceMetricsWrapperPass>().getMTM();
728	PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
729	MBFI = (PSI && PSI->hasProfileSummary()) ?
730	&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
731	nullptr;
732	TraceEnsemble = nullptr;
733	RegClassInfo.runOnMachineFunction(MF);
734
735	LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << `'\n'`);
736	if (!TII->useMachineCombiner()) {
737	LLVM_DEBUG(
738	dbgs()
739	<< " Skipping pass: Target does not support machine combiner\n");
740	return false;
741	}
742
743	bool Changed = false;
744
745	// Try to combine instructions.
746	for (auto &MBB : MF)
747	Changed \|= combineInstructions(MBB: &MBB);
748
749	return Changed;
750	}
751

Browse the source code of llvm_projects/llvm/lib/CodeGen/MachineCombiner.cpp