ShrinkWrap.cpp source code [llvm_projects/llvm/lib/CodeGen/ShrinkWrap.cpp]

1	//===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass looks for safe point where the prologue and epilogue can be
10	// inserted.
11	// The safe point for the prologue (resp. epilogue) is called Save
12	// (resp. Restore).
13	// A point is safe for prologue (resp. epilogue) if and only if
14	// it 1) dominates (resp. post-dominates) all the frame related operations and
15	// between 2) two executions of the Save (resp. Restore) point there is an
16	// execution of the Restore (resp. Save) point.
17	//
18	// For instance, the following points are safe:
19	// for (int i = 0; i < 10; ++i) {
20	// Save
21	// ...
22	// Restore
23	// }
24	// Indeed, the execution looks like Save -> Restore -> Save -> Restore ...
25	// And the following points are not:
26	// for (int i = 0; i < 10; ++i) {
27	// Save
28	// ...
29	// }
30	// for (int i = 0; i < 10; ++i) {
31	// ...
32	// Restore
33	// }
34	// Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore.
35	//
36	// This pass also ensures that the safe points are 3) cheaper than the regular
37	// entry and exits blocks.
38	//
39	// Property #1 is ensured via the use of MachineDominatorTree and
40	// MachinePostDominatorTree.
41	// Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both
42	// points must be in the same loop.
43	// Property #3 is ensured via the MachineBlockFrequencyInfo.
44	//
45	// If this pass found points matching all these properties, then
46	// MachineFrameInfo is updated with this information.
47	//
48	//===----------------------------------------------------------------------===//
49
50	#include "llvm/CodeGen/ShrinkWrap.h"
51	#include "llvm/ADT/BitVector.h"
52	#include "llvm/ADT/PostOrderIterator.h"
53	#include "llvm/ADT/SetVector.h"
54	#include "llvm/ADT/SmallVector.h"
55	#include "llvm/ADT/Statistic.h"
56	#include "llvm/Analysis/CFG.h"
57	#include "llvm/Analysis/ValueTracking.h"
58	#include "llvm/CodeGen/MachineBasicBlock.h"
59	#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
60	#include "llvm/CodeGen/MachineDominators.h"
61	#include "llvm/CodeGen/MachineFrameInfo.h"
62	#include "llvm/CodeGen/MachineFunction.h"
63	#include "llvm/CodeGen/MachineFunctionPass.h"
64	#include "llvm/CodeGen/MachineInstr.h"
65	#include "llvm/CodeGen/MachineLoopInfo.h"
66	#include "llvm/CodeGen/MachineOperand.h"
67	#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
68	#include "llvm/CodeGen/MachinePostDominators.h"
69	#include "llvm/CodeGen/RegisterClassInfo.h"
70	#include "llvm/CodeGen/RegisterScavenging.h"
71	#include "llvm/CodeGen/TargetFrameLowering.h"
72	#include "llvm/CodeGen/TargetInstrInfo.h"
73	#include "llvm/CodeGen/TargetLowering.h"
74	#include "llvm/CodeGen/TargetRegisterInfo.h"
75	#include "llvm/CodeGen/TargetSubtargetInfo.h"
76	#include "llvm/IR/Attributes.h"
77	#include "llvm/IR/Function.h"
78	#include "llvm/InitializePasses.h"
79	#include "llvm/MC/MCAsmInfo.h"
80	#include "llvm/Pass.h"
81	#include "llvm/Support/CommandLine.h"
82	#include "llvm/Support/Debug.h"
83	#include "llvm/Support/ErrorHandling.h"
84	#include "llvm/Support/raw_ostream.h"
85	#include "llvm/Target/TargetMachine.h"
86	#include <cassert>
87	#include <memory>
88
89	using namespace llvm;
90
91	#define DEBUG_TYPE "shrink-wrap"
92
93	STATISTIC(NumFunc, "Number of functions");
94	STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
95	STATISTIC(NumCandidatesDropped,
96	"Number of shrink-wrapping candidates dropped because of frequency");
97
98	static cl::opt<cl::boolOrDefault>
99	EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
100	cl::desc ("enable the shrink-wrapping pass"));
101	static cl::opt<bool> EnablePostShrinkWrapOpt(
102	"enable-shrink-wrap-region-split", cl::init(Val: true), cl::Hidden,
103	cl::desc ("enable splitting of the restore block if possible"));
104
105	namespace {
106
107	/// Class to determine where the safe point to insert the
108	/// prologue and epilogue are.
109	/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the
110	/// shrink-wrapping term for prologue/epilogue placement, this pass
111	/// does not rely on expensive data-flow analysis. Instead we use the
112	/// dominance properties and loop information to decide which point
113	/// are safe for such insertion.
114	class ShrinkWrapImpl {
115	/// Hold callee-saved information.
116	RegisterClassInfo RCI;
117	MachineDominatorTree MDT = nullptr*;
118	MachinePostDominatorTree MPDT = nullptr*;
119
120	/// Current safe point found for the prologue.
121	/// The prologue will be inserted before the first instruction
122	/// in this basic block.
123	MachineBasicBlock Save = nullptr*;
124
125	/// Current safe point found for the epilogue.
126	/// The epilogue will be inserted before the first terminator instruction
127	/// in this basic block.
128	MachineBasicBlock Restore = nullptr*;
129
130	/// Hold the information of the basic block frequency.
131	/// Use to check the profitability of the new points.
132	MachineBlockFrequencyInfo MBFI = nullptr*;
133
134	/// Hold the loop information. Used to determine if Save and Restore
135	/// are in the same loop.
136	MachineLoopInfo MLI = nullptr*;
137
138	// Emit remarks.
139	MachineOptimizationRemarkEmitter ORE = nullptr*;
140
141	/// Frequency of the Entry block.
142	BlockFrequency EntryFreq;
143
144	/// Current opcode for frame setup.
145	unsigned FrameSetupOpcode = ~`0u`;
146
147	/// Current opcode for frame destroy.
148	unsigned FrameDestroyOpcode = ~`0u`;
149
150	/// Stack pointer register, used by llvm.{savestack,restorestack}
151	Register SP;
152
153	/// Entry block.
154	const MachineBasicBlock Entry = nullptr*;
155
156	using SetOfRegs = SmallSetVector<unsigned, `16`>;
157
158	/// Registers that need to be saved for the current function.
159	mutable SetOfRegs CurrentCSRs;
160
161	/// Current MachineFunction.
162	MachineFunction MachineFunc = nullptr*;
163
164	/// Is `true` for the block numbers where we assume possible stack accesses
165	/// or computation of stack-relative addresses on any CFG path including the
166	/// block itself. Is `false` for basic blocks where we can guarantee the
167	/// opposite. False positives won't lead to incorrect analysis results,
168	/// therefore this approach is fair.
169	BitVector StackAddressUsedBlockInfo;
170
171	/// Check if \p MI uses or defines a callee-saved register or
172	/// a frame index. If this is the case, this means \p MI must happen
173	/// after Save and before Restore.
174	bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
175	bool StackAddressUsed) const;
176
177	const SetOfRegs &getCurrentCSRs(RegScavenger RS) const* {
178	if (CurrentCSRs.empty()) {
179	BitVector SavedRegs;
180	const TargetFrameLowering *TFI =
181	MachineFunc->getSubtarget().getFrameLowering();
182
183	TFI->determineCalleeSaves(MF&: *MachineFunc, SavedRegs, RS);
184
185	for (int Reg = SavedRegs.find_first(); Reg != -`1`;
186	Reg = SavedRegs.find_next(Prev: Reg))
187	CurrentCSRs.insert(X: (unsigned)Reg);
188	}
189	return CurrentCSRs;
190	}
191
192	/// Update the Save and Restore points such that \p MBB is in
193	/// the region that is dominated by Save and post-dominated by Restore
194	/// and Save and Restore still match the safe point definition.
195	/// Such point may not exist and Save and/or Restore may be null after
196	/// this call.
197	void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
198
199	// Try to find safe point based on dominance and block frequency without
200	// any change in IR.
201	bool performShrinkWrapping(
202	const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
203	RegScavenger *RS);
204
205	/// This function tries to split the restore point if doing so can shrink the
206	/// save point further. \return True if restore point is split.
207	bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
208	RegScavenger *RS);
209
210	/// This function analyzes if the restore point can split to create a new
211	/// restore point. This function collects
212	/// 1. Any preds of current restore that are reachable by callee save/FI
213	/// blocks
214	/// - indicated by DirtyPreds
215	/// 2. Any preds of current restore that are not DirtyPreds - indicated by
216	/// CleanPreds
217	/// Both sets should be non-empty for considering restore point split.
218	bool checkIfRestoreSplittable(
219	const MachineBasicBlock *CurRestore,
220	const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
221	SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
222	SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
223	const TargetInstrInfo TII, RegScavenger RS);
224
225	/// Initialize the pass for \p MF.
226	void init(MachineFunction &MF) {
227	RCI.runOnMachineFunction(MF);
228	Save = nullptr;
229	Restore = nullptr;
230	EntryFreq = MBFI->getEntryFreq();
231	const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
232	const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
233	FrameSetupOpcode = TII.getCallFrameSetupOpcode();
234	FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
235	SP = Subtarget.getTargetLowering()->getStackPointerRegisterToSaveRestore();
236	Entry = &MF.front();
237	CurrentCSRs.clear();
238	MachineFunc = &MF;
239
240	++NumFunc;
241	}
242
243	/// Check whether or not Save and Restore points are still interesting for
244	/// shrink-wrapping.
245	bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
246
247	public:
248	ShrinkWrapImpl(MachineDominatorTree MDT, MachinePostDominatorTree MPDT,
249	MachineBlockFrequencyInfo MBFI, MachineLoopInfo MLI,
250	MachineOptimizationRemarkEmitter *ORE)
251	: MDT(MDT), MPDT(MPDT), MBFI(MBFI), MLI(MLI), ORE(ORE) {}
252
253	/// Check if shrink wrapping is enabled for this target and function.
254	static bool isShrinkWrapEnabled(const MachineFunction &MF);
255
256	bool run(MachineFunction &MF);
257	};
258
259	class ShrinkWrapLegacy : public MachineFunctionPass {
260	public:
261	static char ID;
262
263	ShrinkWrapLegacy() : MachineFunctionPass (ID) {
264	initializeShrinkWrapLegacyPass(*PassRegistry::getPassRegistry());
265	}
266
267	void getAnalysisUsage(AnalysisUsage &AU) const override {
268	AU.setPreservesAll();
269	AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
270	AU.addRequired<MachineDominatorTreeWrapperPass>();
271	AU.addRequired<MachinePostDominatorTreeWrapperPass>();
272	AU.addRequired<MachineLoopInfoWrapperPass>();
273	AU.addRequired<MachineOptimizationRemarkEmitterPass>();
274	MachineFunctionPass::getAnalysisUsage(AU);
275	}
276
277	MachineFunctionProperties getRequiredProperties() const override {
278	return MachineFunctionProperties ().setNoVRegs();
279	}
280
281	StringRef getPassName() const override { return "Shrink Wrapping analysis"; }
282
283	/// Perform the shrink-wrapping analysis and update
284	/// the MachineFrameInfo attached to \p MF with the results.
285	bool runOnMachineFunction(MachineFunction &MF) override;
286	};
287
288	} // end anonymous namespace
289
290	char ShrinkWrapLegacy::ID = `0`;
291
292	char &llvm::ShrinkWrapID = ShrinkWrapLegacy::ID;
293
294	INITIALIZE_PASS_BEGIN(ShrinkWrapLegacy, DEBUG_TYPE, "Shrink Wrap Pass", false,
295	false)
296	INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
297	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
298	INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
299	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
300	INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
301	INITIALIZE_PASS_END(ShrinkWrapLegacy, DEBUG_TYPE, "Shrink Wrap Pass", false,
302	false)
303
304	bool ShrinkWrapImpl::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
305	bool StackAddressUsed) const {
306	/// Check if \p Op is known to access an address not on the function's stack .
307	/// At the moment, accesses where the underlying object is a global, function
308	/// argument, or jump table are considered non-stack accesses. Note that the
309	/// caller's stack may get accessed when passing an argument via the stack,
310	/// but not the stack of the current function.
311	///
312	auto IsKnownNonStackPtr = [](MachineMemOperand *Op) {
313	if (Op->getValue()) {
314	const Value *UO = getUnderlyingObject(V: Op->getValue());
315	if (!UO)
316	return false;
317	if (auto *Arg = dyn_cast<Argument>(Val: UO))
318	return !Arg->hasPassPointeeByValueCopyAttr();
319	return isa<GlobalValue>(Val: UO);
320	}
321	if (const PseudoSourceValue *PSV = Op->getPseudoValue())
322	return PSV->isJumpTable();
323	return false;
324	};
325	// Load/store operations may access the stack indirectly when we previously
326	// computed an address to a stack location.
327	if (StackAddressUsed && MI.mayLoadOrStore() &&
328	(MI.isCall() \|\| MI.hasUnmodeledSideEffects() \|\| MI.memoperands_empty() \|\|
329	!all_of(Range: MI.memoperands(), P: IsKnownNonStackPtr)))
330	return true;
331
332	if (MI.getOpcode() == FrameSetupOpcode \|\|
333	MI.getOpcode() == FrameDestroyOpcode) {
334	LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << `'\n'`);
335	return true;
336	}
337	const MachineFunction *MF = MI.getParent()->getParent();
338	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
339	for (const MachineOperand &MO : MI.operands()) {
340	bool UseOrDefCSR = false;
341	if (MO.isReg()) {
342	// Ignore instructions like DBG_VALUE which don't read/def the register.
343	if (!MO.isDef() && !MO.readsReg())
344	continue;
345	Register PhysReg = MO.getReg();
346	if (!PhysReg)
347	continue;
348	assert(PhysReg.isPhysical() && "Unallocated register?!");
349	// The stack pointer is not normally described as a callee-saved register
350	// in calling convention definitions, so we need to watch for it
351	// separately. An SP mentioned by a call instruction, we can ignore,
352	// though, as it's harmless and we do not want to effectively disable tail
353	// calls by forcing the restore point to post-dominate them.
354	// PPC's LR is also not normally described as a callee-saved register in
355	// calling convention definitions, so we need to watch for it, too. An LR
356	// mentioned implicitly by a return (or "branch to link register")
357	// instruction we can ignore, otherwise we may pessimize shrinkwrapping.
358	// PPC's Frame pointer (FP) is also not described as a callee-saved
359	// register. Until the FP is assigned a Physical Register PPC's FP needs
360	// to be checked separately.
361	UseOrDefCSR = (!MI.isCall() && PhysReg == SP) \|\|
362	RCI.getLastCalleeSavedAlias(PhysReg) \|\|
363	(!MI.isReturn() &&
364	TRI->isNonallocatableRegisterCalleeSave(Reg: PhysReg)) \|\|
365	TRI->isVirtualFrameRegister(Reg: PhysReg);
366	} else if (MO.isRegMask()) {
367	// Check if this regmask clobbers any of the CSRs.
368	for (unsigned Reg : getCurrentCSRs(RS)) {
369	if (MO.clobbersPhysReg(PhysReg: Reg)) {
370	UseOrDefCSR = true;
371	break;
372	}
373	}
374	}
375	// Skip FrameIndex operands in DBG_VALUE instructions.
376	if (UseOrDefCSR \|\| (MO.isFI() && !MI.isDebugValue())) {
377	LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI("
378	<< MO.isFI() << "): " << MI << `'\n'`);
379	return true;
380	}
381	}
382	return false;
383	}
384
385	/// Helper function to find the immediate (post) dominator.
386	template <typename ListOfBBs, typename DominanceAnalysis>
387	static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
388	DominanceAnalysis &Dom, bool Strict = true) {
389	MachineBasicBlock *IDom = Dom.findNearestCommonDominator(iterator_range(BBs));
390	if (Strict && IDom == &Block)
391	return nullptr;
392	return IDom;
393	}
394
395	static bool isAnalyzableBB(const TargetInstrInfo &TII,
396	MachineBasicBlock &Entry) {
397	// Check if the block is analyzable.
398	MachineBasicBlock TBB = nullptr, FBB = nullptr;
399	SmallVector<MachineOperand, `4`> Cond;
400	return !TII.analyzeBranch(MBB&: Entry, TBB, FBB, Cond);
401	}
402
403	/// Determines if any predecessor of MBB is on the path from block that has use
404	/// or def of CSRs/FI to MBB.
405	/// ReachableByDirty: All blocks reachable from block that has use or def of
406	/// CSR/FI.
407	static bool
408	hasDirtyPred(const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
409	const MachineBasicBlock &MBB) {
410	for (const MachineBasicBlock *PredBB : MBB.predecessors())
411	if (ReachableByDirty.count(V: PredBB))
412	return true;
413	return false;
414	}
415
416	/// Derives the list of all the basic blocks reachable from MBB.
417	static void markAllReachable(DenseSet<const MachineBasicBlock *> &Visited,
418	const MachineBasicBlock &MBB) {
419	SmallVector<MachineBasicBlock *, `4`> Worklist(MBB.successors());
420	Visited.insert(V: &MBB);
421	while (!Worklist.empty()) {
422	MachineBasicBlock *SuccMBB = Worklist.pop_back_val();
423	if (!Visited.insert(V: SuccMBB).second)
424	continue;
425	Worklist.append(in_start: SuccMBB->succ_begin(), in_end: SuccMBB->succ_end());
426	}
427	}
428
429	/// Collect blocks reachable by use or def of CSRs/FI.
430	static void collectBlocksReachableByDirty(
431	const DenseSet<const MachineBasicBlock *> &DirtyBBs,
432	DenseSet<const MachineBasicBlock *> &ReachableByDirty) {
433	for (const MachineBasicBlock *MBB : DirtyBBs) {
434	if (ReachableByDirty.count(V: MBB))
435	continue;
436	// Mark all offsprings as reachable.
437	markAllReachable(Visited&: ReachableByDirty, MBB: *MBB);
438	}
439	}
440
441	/// \return true if there is a clean path from SavePoint to the original
442	/// Restore.
443	static bool
444	isSaveReachableThroughClean(const MachineBasicBlock *SavePoint,
445	ArrayRef<MachineBasicBlock *> CleanPreds) {
446	DenseSet<const MachineBasicBlock *> Visited;
447	SmallVector<MachineBasicBlock *, `4`> Worklist(CleanPreds);
448	while (!Worklist.empty()) {
449	MachineBasicBlock *CleanBB = Worklist.pop_back_val();
450	if (CleanBB == SavePoint)
451	return true;
452	if (!Visited.insert(V: CleanBB).second \|\| !CleanBB->pred_size())
453	continue;
454	Worklist.append(in_start: CleanBB->pred_begin(), in_end: CleanBB->pred_end());
455	}
456	return false;
457	}
458
459	/// This function updates the branches post restore point split.
460	///
461	/// Restore point has been split.
462	/// Old restore point: MBB
463	/// New restore point: NMBB
464	/// Any basic block(say BBToUpdate) which had a fallthrough to MBB
465	/// previously should
466	/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the
467	/// block layout OR
468	/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place.
469	static void updateTerminator(MachineBasicBlock *BBToUpdate,
470	MachineBasicBlock *NMBB,
471	const TargetInstrInfo *TII) {
472	DebugLoc DL = BBToUpdate->findBranchDebugLoc();
473	// if NMBB isn't the new layout successor for BBToUpdate, insert unconditional
474	// branch to it
475	if (!BBToUpdate->isLayoutSuccessor(MBB: NMBB))
476	TII->insertUnconditionalBranch(MBB&: *BBToUpdate, DestBB: NMBB, DL);
477	}
478
479	/// This function splits the restore point and returns new restore point/BB.
480	///
481	/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty
482	///
483	/// Decision has been made to split the restore point.
484	/// old restore point: \p MBB
485	/// new restore point: \p NMBB
486	/// This function makes the necessary block layout changes so that
487	/// 1. \p NMBB points to \p MBB unconditionally
488	/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB
489	static MachineBasicBlock *
490	tryToSplitRestore(MachineBasicBlock *MBB,
491	ArrayRef<MachineBasicBlock *> DirtyPreds,
492	const TargetInstrInfo *TII) {
493	MachineFunction *MF = MBB->getParent();
494
495	// get the list of DirtyPreds who have a fallthrough to MBB
496	// before the block layout change. This is just to ensure that if the NMBB is
497	// inserted after MBB, then we create unconditional branch from
498	// DirtyPred/CleanPred to NMBB
499	SmallPtrSet<MachineBasicBlock *, `8`> MBBFallthrough;
500	for (MachineBasicBlock *BB : DirtyPreds)
501	if (BB->getFallThrough(JumpToFallThrough: false) == MBB)
502	MBBFallthrough.insert(Ptr: BB);
503
504	MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
505	// Insert this block at the end of the function. Inserting in between may
506	// interfere with control flow optimizer decisions.
507	MF->insert(MBBI: MF->end(), MBB: NMBB);
508
509	for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins())
510	NMBB->addLiveIn(PhysReg: LI.PhysReg);
511
512	TII->insertUnconditionalBranch(MBB&: *NMBB, DestBB: MBB, DL: DebugLoc ());
513
514	// After splitting, all predecessors of the restore point should be dirty
515	// blocks.
516	for (MachineBasicBlock *SuccBB : DirtyPreds)
517	SuccBB->ReplaceUsesOfBlockWith(Old: MBB, New: NMBB);
518
519	NMBB->addSuccessor(Succ: MBB);
520
521	for (MachineBasicBlock *BBToUpdate : MBBFallthrough)
522	updateTerminator(BBToUpdate, NMBB, TII);
523
524	return NMBB;
525	}
526
527	/// This function undoes the restore point split done earlier.
528	///
529	/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty.
530	///
531	/// Restore point was split and the change needs to be unrolled. Make necessary
532	/// changes to reset restore point from \p NMBB to \p MBB.
533	static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB,
534	MachineBasicBlock *MBB,
535	ArrayRef<MachineBasicBlock *> DirtyPreds,
536	const TargetInstrInfo *TII) {
537	// For a BB, if NMBB is fallthrough in the current layout, then in the new
538	// layout a. BB should fallthrough to MBB OR b. BB should undconditionally
539	// branch to MBB
540	SmallPtrSet<MachineBasicBlock *, `8`> NMBBFallthrough;
541	for (MachineBasicBlock *BB : DirtyPreds)
542	if (BB->getFallThrough(JumpToFallThrough: false) == NMBB)
543	NMBBFallthrough.insert(Ptr: BB);
544
545	NMBB->removeSuccessor(Succ: MBB);
546	for (MachineBasicBlock *SuccBB : DirtyPreds)
547	SuccBB->ReplaceUsesOfBlockWith(Old: NMBB, New: MBB);
548
549	NMBB->erase(I: NMBB->begin(), E: NMBB->end());
550	NMBB->eraseFromParent();
551
552	for (MachineBasicBlock *BBToUpdate : NMBBFallthrough)
553	updateTerminator(BBToUpdate, NMBB: MBB, TII);
554	}
555
556	// A block is deemed fit for restore point split iff there exist
557	// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI
558	// 2. CleanPreds - preds of CurRestore that arent DirtyPreds
559	bool ShrinkWrapImpl::checkIfRestoreSplittable(
560	const MachineBasicBlock *CurRestore,
561	const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
562	SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
563	SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
564	const TargetInstrInfo TII, RegScavenger RS) {
565	for (const MachineInstr &MI : *CurRestore)
566	if (useOrDefCSROrFI(MI, RS, /StackAddressUsed=/true))
567	return false;
568
569	for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
570	if (!isAnalyzableBB(TII: TII, Entry&: PredBB))
571	return false;
572
573	if (ReachableByDirty.count(V: PredBB))
574	DirtyPreds.push_back(Elt: PredBB);
575	else
576	CleanPreds.push_back(Elt: PredBB);
577	}
578
579	return !(CleanPreds.empty() \|\| DirtyPreds.empty());
580	}
581
582	bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
583	RegScavenger *RS) {
584	if (!EnablePostShrinkWrapOpt)
585	return false;
586
587	MachineBasicBlock InitSave = nullptr*;
588	MachineBasicBlock InitRestore = nullptr*;
589
590	if (HasCandidate) {
591	InitSave = Save;
592	InitRestore = Restore;
593	} else {
594	InitRestore = nullptr;
595	InitSave = &MF.front();
596	for (MachineBasicBlock &MBB : MF) {
597	if (MBB.isEHFuncletEntry())
598	return false;
599	if (MBB.isReturnBlock()) {
600	// Do not support multiple restore points.
601	if (InitRestore)
602	return false;
603	InitRestore = &MBB;
604	}
605	}
606	}
607
608	if (!InitSave \|\| !InitRestore \|\| InitRestore == InitSave \|\|
609	!MDT->dominates(A: InitSave, B: InitRestore) \|\|
610	!MPDT->dominates(A: InitRestore, B: InitSave))
611	return false;
612
613	// Bail out of the optimization if any of the basic block is target of
614	// INLINEASM_BR instruction
615	for (MachineBasicBlock &MBB : MF)
616	if (MBB.isInlineAsmBrIndirectTarget())
617	return false;
618
619	DenseSet<const MachineBasicBlock *> DirtyBBs;
620	for (MachineBasicBlock &MBB : MF) {
621	if (MBB.isEHPad()) {
622	DirtyBBs.insert(V: &MBB);
623	continue;
624	}
625	for (const MachineInstr &MI : MBB)
626	if (useOrDefCSROrFI(MI, RS, /StackAddressUsed=/true)) {
627	DirtyBBs.insert(V: &MBB);
628	break;
629	}
630	}
631
632	// Find blocks reachable from the use or def of CSRs/FI.
633	DenseSet<const MachineBasicBlock *> ReachableByDirty;
634	collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty);
635
636	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
637	SmallVector<MachineBasicBlock *, `2`> DirtyPreds;
638	SmallVector<MachineBasicBlock *, `2`> CleanPreds;
639	if (!checkIfRestoreSplittable(CurRestore: InitRestore, ReachableByDirty, DirtyPreds,
640	CleanPreds, TII, RS))
641	return false;
642
643	// Trying to reach out to the new save point which dominates all dirty blocks.
644	MachineBasicBlock *NewSave =
645	FindIDom<>(Block&: *DirtyPreds.begin(), BBs: DirtyPreds, Dom&: MDT, Strict: false);
646
647	while (NewSave && (hasDirtyPred(ReachableByDirty, MBB: *NewSave) \|\|
648	EntryFreq < MBFI->getBlockFreq(MBB: NewSave) \|\|
649	/Entry freq has been observed more than a loop block in*
650	some cases/*
651	MLI->getLoopFor(BB: NewSave)))
652	NewSave = FindIDom<>(Block&: *NewSave->pred_begin(), BBs: NewSave->predecessors(), Dom&: MDT,
653	Strict: false);
654
655	const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
656	if (!NewSave \|\| NewSave == InitSave \|\|
657	isSaveReachableThroughClean(SavePoint: NewSave, CleanPreds) \|\|
658	!TFI->canUseAsPrologue(MBB: *NewSave))
659	return false;
660
661	// Now we know that splitting a restore point can isolate the restore point
662	// from clean blocks and doing so can shrink the save point.
663	MachineBasicBlock *NewRestore =
664	tryToSplitRestore(MBB: InitRestore, DirtyPreds, TII);
665
666	// Make sure if the new restore point is valid as an epilogue, depending on
667	// targets.
668	if (!TFI->canUseAsEpilogue(MBB: *NewRestore)) {
669	rollbackRestoreSplit(MF, NMBB: NewRestore, MBB: InitRestore, DirtyPreds, TII);
670	return false;
671	}
672
673	Save = NewSave;
674	Restore = NewRestore;
675
676	MDT->recalculate(Func&: MF);
677	MPDT->recalculate(Func&: MF);
678
679	assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) &&
680	"Incorrect save or restore point due to dominance relations");
681	assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) &&
682	"Unexpected save or restore point in a loop");
683	assert((EntryFreq >= MBFI->getBlockFreq(Save) &&
684	EntryFreq >= MBFI->getBlockFreq(Restore)) &&
685	"Incorrect save or restore point based on block frequency");
686	return true;
687	}
688
689	void ShrinkWrapImpl::updateSaveRestorePoints(MachineBasicBlock &MBB,
690	RegScavenger *RS) {
691	// Get rid of the easy cases first.
692	if (!Save)
693	Save = &MBB;
694	else
695	Save = MDT->findNearestCommonDominator(A: Save, B: &MBB);
696	assert(Save);
697
698	if (!Restore)
699	Restore = &MBB;
700	else if (MPDT->getNode(BB: &MBB)) // If the block is not in the post dom tree, it
701	// means the block never returns. If that's the
702	// case, we don't want to call
703	// `findNearestCommonDominator`, which will
704	// return `Restore`.
705	Restore = MPDT->findNearestCommonDominator(A: Restore, B: &MBB);
706	else
707	Restore = nullptr; // Abort, we can't find a restore point in this case.
708
709	// Make sure we would be able to insert the restore code before the
710	// terminator.
711	if (Restore == &MBB) {
712	for (const MachineInstr &Terminator : MBB.terminators()) {
713	if (!useOrDefCSROrFI(MI: Terminator, RS, /StackAddressUsed=/true))
714	continue;
715	// One of the terminator needs to happen before the restore point.
716	if (MBB.succ_empty()) {
717	Restore = nullptr; // Abort, we can't find a restore point in this case.
718	break;
719	}
720	// Look for a restore point that post-dominates all the successors.
721	// The immediate post-dominator is what we are looking for.
722	Restore = FindIDom<>(Block&: Restore, BBs: Restore->successors(), Dom&: MPDT);
723	break;
724	}
725	}
726
727	if (!Restore) {
728	LLVM_DEBUG(
729	dbgs() << "Restore point needs to be spanned on several blocks\n");
730	return;
731	}
732
733	// Make sure Save and Restore are suitable for shrink-wrapping:
734	// 1. all path from Save needs to lead to Restore before exiting.
735	// 2. all path to Restore needs to go through Save from Entry.
736	// We achieve that by making sure that:
737	// A. Save dominates Restore.
738	// B. Restore post-dominates Save.
739	// C. Save and Restore are in the same loop.
740	bool SaveDominatesRestore = false;
741	bool RestorePostDominatesSave = false;
742	while (Restore &&
743	(!(SaveDominatesRestore = MDT->dominates(A: Save, B: Restore)) \|\|
744	!(RestorePostDominatesSave = MPDT->dominates(A: Restore, B: Save)) \|\|
745	// Post-dominance is not enough in loops to ensure that all uses/defs
746	// are after the prologue and before the epilogue at runtime.
747	// E.g.,
748	// while(1) {
749	// Save
750	// Restore
751	// if (...)
752	// break;
753	// use/def CSRs
754	// }
755	// All the uses/defs of CSRs are dominated by Save and post-dominated
756	// by Restore. However, the CSRs uses are still reachable after
757	// Restore and before Save are executed.
758	//
759	// For now, just push the restore/save points outside of loops.
760	// FIXME: Refine the criteria to still find interesting cases
761	// for loops.
762	MLI->getLoopFor(BB: Save) \|\| MLI->getLoopFor(BB: Restore))) {
763	// Fix (A).
764	if (!SaveDominatesRestore) {
765	Save = MDT->findNearestCommonDominator(A: Save, B: Restore);
766	continue;
767	}
768	// Fix (B).
769	if (!RestorePostDominatesSave)
770	Restore = MPDT->findNearestCommonDominator(A: Restore, B: Save);
771
772	// Fix (C).
773	if (Restore && (MLI->getLoopFor(BB: Save) \|\| MLI->getLoopFor(BB: Restore))) {
774	if (MLI->getLoopDepth(BB: Save) > MLI->getLoopDepth(BB: Restore)) {
775	// Push Save outside of this loop if immediate dominator is different
776	// from save block. If immediate dominator is not different, bail out.
777	Save = FindIDom<>(Block&: Save, BBs: Save->predecessors(), Dom&: MDT);
778	if (!Save)
779	break;
780	} else {
781	// If the loop does not exit, there is no point in looking
782	// for a post-dominator outside the loop.
783	SmallVector<MachineBasicBlock*, `4`> ExitBlocks;
784	MLI->getLoopFor(BB: Restore)->getExitingBlocks(ExitingBlocks&: ExitBlocks);
785	// Push Restore outside of this loop.
786	// Look for the immediate post-dominator of the loop exits.
787	MachineBasicBlock *IPdom = Restore;
788	for (MachineBasicBlock *LoopExitBB: ExitBlocks) {
789	IPdom = FindIDom<>(Block&: IPdom, BBs: LoopExitBB->successors(), Dom&: MPDT);
790	if (!IPdom)
791	break;
792	}
793	// If the immediate post-dominator is not in a less nested loop,
794	// then we are stuck in a program with an infinite loop.
795	// In that case, we will not find a safe point, hence, bail out.
796	if (IPdom && MLI->getLoopDepth(BB: IPdom) < MLI->getLoopDepth(BB: Restore))
797	Restore = IPdom;
798	else {
799	Restore = nullptr;
800	break;
801	}
802	}
803	}
804	}
805	}
806
807	static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
808	StringRef RemarkName, StringRef RemarkMessage,
809	const DiagnosticLocation &Loc,
810	const MachineBasicBlock *MBB) {
811	ORE->emit(RemarkBuilder: [&]() {
812	return MachineOptimizationRemarkMissed (DEBUG_TYPE, RemarkName, Loc, MBB)
813	<< RemarkMessage;
814	});
815
816	LLVM_DEBUG(dbgs() << RemarkMessage << `'\n'`);
817	return false;
818	}
819
820	bool ShrinkWrapImpl::performShrinkWrapping(
821	const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
822	RegScavenger *RS) {
823	for (MachineBasicBlock *MBB : RPOT) {
824	LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << `'\n'`);
825
826	if (MBB->isEHFuncletEntry())
827	return giveUpWithRemarks(ORE, RemarkName: "UnsupportedEHFunclets",
828	RemarkMessage: "EH Funclets are not supported yet.",
829	Loc: MBB->front().getDebugLoc(), MBB);
830
831	if (MBB->isEHPad() \|\| MBB->isInlineAsmBrIndirectTarget()) {
832	// Push the prologue and epilogue outside of the region that may throw (or
833	// jump out via inlineasm_br), by making sure that all the landing pads
834	// are at least at the boundary of the save and restore points. The
835	// problem is that a basic block can jump out from the middle in these
836	// cases, which we do not handle.
837	updateSaveRestorePoints(MBB&: *MBB, RS);
838	if (!ArePointsInteresting()) {
839	LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
840	return false;
841	}
842	continue;
843	}
844
845	bool StackAddressUsed = false;
846	// Check if we found any stack accesses in the predecessors. We are not
847	// doing a full dataflow analysis here to keep things simple but just
848	// rely on a reverse portorder traversal (RPOT) to guarantee predecessors
849	// are already processed except for loops (and accept the conservative
850	// result for loops).
851	for (const MachineBasicBlock *Pred : MBB->predecessors()) {
852	if (StackAddressUsedBlockInfo.test(Idx: Pred->getNumber())) {
853	StackAddressUsed = true;
854	break;
855	}
856	}
857
858	for (const MachineInstr &MI : *MBB) {
859	if (useOrDefCSROrFI(MI, RS, StackAddressUsed)) {
860	// Save (resp. restore) point must dominate (resp. post dominate)
861	// MI. Look for the proper basic block for those.
862	updateSaveRestorePoints(MBB&: *MBB, RS);
863	// If we are at a point where we cannot improve the placement of
864	// save/restore instructions, just give up.
865	if (!ArePointsInteresting()) {
866	LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
867	return false;
868	}
869	// No need to look for other instructions, this basic block
870	// will already be part of the handled region.
871	StackAddressUsed = true;
872	break;
873	}
874	}
875	StackAddressUsedBlockInfo [MBB->getNumber()] = StackAddressUsed;
876	}
877	if (!ArePointsInteresting()) {
878	// If the points are not interesting at this point, then they must be null
879	// because it means we did not encounter any frame/CSR related code.
880	// Otherwise, we would have returned from the previous loop.
881	assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!");
882	LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n");
883	return false;
884	}
885
886	LLVM_DEBUG(dbgs() << "\n Results \nFrequency of the Entry: "
887	<< EntryFreq.getFrequency() << `'\n'`);
888
889	const TargetFrameLowering *TFI =
890	MachineFunc->getSubtarget().getFrameLowering();
891	do {
892	LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
893	<< printMBBReference(*Save) << `' '`
894	<< printBlockFreq(MBFI, Save)
895	<< "\nRestore: " << printMBBReference(*Restore) << `' '`
896	<< printBlockFreq(MBFI, Restore) << `'\n'`);
897
898	bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
899	if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(MBB: Save)) &&
900	EntryFreq >= MBFI->getBlockFreq(MBB: Restore)) &&
901	((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(MBB: *Save)) &&
902	TFI->canUseAsEpilogue(MBB: *Restore)))
903	break;
904	LLVM_DEBUG(
905	dbgs() << "New points are too expensive or invalid for the target\n");
906	MachineBasicBlock *NewBB;
907	if (!IsSaveCheap \|\| !TargetCanUseSaveAsPrologue) {
908	Save = FindIDom<>(Block&: Save, BBs: Save->predecessors(), Dom&: MDT);
909	if (!Save)
910	break;
911	NewBB = Save;
912	} else {
913	// Restore is expensive.
914	Restore = FindIDom<>(Block&: Restore, BBs: Restore->successors(), Dom&: MPDT);
915	if (!Restore)
916	break;
917	NewBB = Restore;
918	}
919	updateSaveRestorePoints(MBB&: *NewBB, RS);
920	} while (Save && Restore);
921
922	if (!ArePointsInteresting()) {
923	++NumCandidatesDropped;
924	return false;
925	}
926	return true;
927	}
928
929	bool ShrinkWrapImpl::run(MachineFunction &MF) {
930	LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << `'\n'`);
931
932	init(MF);
933
934	ReversePostOrderTraversal<MachineBasicBlock > RPOT(&MF.begin());
935	if (containsIrreducibleCFG<MachineBasicBlock >(RPOTraversal&: RPOT, LI: MLI)) {
936	// If MF is irreducible, a block may be in a loop without
937	// MachineLoopInfo reporting it. I.e., we may use the
938	// post-dominance property in loops, which lead to incorrect
939	// results. Moreover, we may miss that the prologue and
940	// epilogue are not in the same loop, leading to unbalanced
941	// construction/deconstruction of the stack frame.
942	return giveUpWithRemarks(ORE, RemarkName: "UnsupportedIrreducibleCFG",
943	RemarkMessage: "Irreducible CFGs are not supported yet.",
944	Loc: MF.getFunction().getSubprogram(), MBB: &MF.front());
945	}
946
947	const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
948	std::unique_ptr<RegScavenger> RS(
949	TRI->requiresRegisterScavenging(MF) ? new RegScavenger () : nullptr);
950
951	bool Changed = false;
952
953	// Initially, conservatively assume that stack addresses can be used in each
954	// basic block and change the state only for those basic blocks for which we
955	// were able to prove the opposite.
956	StackAddressUsedBlockInfo.resize(N: MF.getNumBlockIDs(), t: true);
957	bool HasCandidate = performShrinkWrapping(RPOT, RS: RS.get());
958	StackAddressUsedBlockInfo.clear();
959	Changed = postShrinkWrapping(HasCandidate, MF, RS: RS.get());
960	if (!HasCandidate && !Changed)
961	return false;
962	if (!ArePointsInteresting())
963	return Changed;
964
965	LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
966	<< printMBBReference(*Save) << `' '`
967	<< "\nRestore: " << printMBBReference(*Restore) << `'\n'`);
968
969	MachineFrameInfo &MFI = MF.getFrameInfo();
970	MFI.setSavePoint(Save);
971	MFI.setRestorePoint(Restore);
972	++NumCandidates;
973	return Changed;
974	}
975
976	bool ShrinkWrapLegacy::runOnMachineFunction(MachineFunction &MF) {
977	if (skipFunction(F: MF.getFunction()) \|\| MF.empty() \|\|
978	!ShrinkWrapImpl::isShrinkWrapEnabled(MF))
979	return false;
980
981	MachineDominatorTree *MDT =
982	&getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
983	MachinePostDominatorTree *MPDT =
984	&getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
985	MachineBlockFrequencyInfo *MBFI =
986	&getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
987	MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
988	MachineOptimizationRemarkEmitter *ORE =
989	&getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
990
991	return ShrinkWrapImpl (MDT, MPDT, MBFI, MLI, ORE).run(MF);
992	}
993
994	PreservedAnalyses ShrinkWrapPass::run(MachineFunction &MF,
995	MachineFunctionAnalysisManager &MFAM) {
996	MFPropsModifier _(*this, MF);
997	if (MF.empty() \|\| !ShrinkWrapImpl::isShrinkWrapEnabled(MF))
998	return PreservedAnalyses::all();
999
1000	MachineDominatorTree &MDT = MFAM.getResult<MachineDominatorTreeAnalysis>(IR&: MF);
1001	MachinePostDominatorTree &MPDT =
1002	MFAM.getResult<MachinePostDominatorTreeAnalysis>(IR&: MF);
1003	MachineBlockFrequencyInfo &MBFI =
1004	MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF);
1005	MachineLoopInfo &MLI = MFAM.getResult<MachineLoopAnalysis>(IR&: MF);
1006	MachineOptimizationRemarkEmitter &ORE =
1007	MFAM.getResult<MachineOptimizationRemarkEmitterAnalysis>(IR&: MF);
1008
1009	ShrinkWrapImpl (&MDT, &MPDT, &MBFI, &MLI, &ORE).run(MF);
1010	return PreservedAnalyses::all();
1011	}
1012
1013	bool ShrinkWrapImpl::isShrinkWrapEnabled(const MachineFunction &MF) {
1014	const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
1015
1016	switch (EnableShrinkWrapOpt) {
1017	case cl::BOU_UNSET:
1018	return TFI->enableShrinkWrapping(MF) &&
1019	// Windows with CFI has some limitations that make it impossible
1020	// to use shrink-wrapping.
1021	!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
1022	// Sanitizers look at the value of the stack at the location
1023	// of the crash. Since a crash can happen anywhere, the
1024	// frame must be lowered before anything else happen for the
1025	// sanitizers to be able to get a correct stack frame.
1026	!(MF.getFunction().hasFnAttribute(Kind: Attribute::SanitizeAddress) \|\|
1027	MF.getFunction().hasFnAttribute(Kind: Attribute::SanitizeThread) \|\|
1028	MF.getFunction().hasFnAttribute(Kind: Attribute::SanitizeMemory) \|\|
1029	MF.getFunction().hasFnAttribute(Kind: Attribute::SanitizeType) \|\|
1030	MF.getFunction().hasFnAttribute(Kind: Attribute::SanitizeHWAddress));
1031	// If EnableShrinkWrap is set, it takes precedence on whatever the
1032	// target sets. The rational is that we assume we want to test
1033	// something related to shrink-wrapping.
1034	case cl::BOU_TRUE:
1035	return true;
1036	case cl::BOU_FALSE:
1037	return false;
1038	}
1039	llvm_unreachable("Invalid shrink-wrapping state");
1040	}
1041

Browse the source code of llvm_projects/llvm/lib/CodeGen/ShrinkWrap.cpp