AArch64CodeLayoutOpt.cpp source code [llvm_projects/llvm/lib/Target/AArch64/AArch64CodeLayoutOpt.cpp]

1	//===-- AArch64CodeLayoutOpt.cpp - Code Layout Optimizations --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This pass runs after instruction scheduling and employs code layout
10	// optimizations for certain patterns.
11	//
12	// Option -aarch64-code-layout-opt-enable selects instruction pairs to optimize:
13	// cmp-csel: Enable CMP/CMN-CSEL code layout optimization
14	// fcmp-fcsel: Enable FCMP-FCSEL code layout optimization
15	//
16	// The initial implementation induces function alignment when a supported
17	// pattern is detected, and possibly instruction-alignment when a pair would
18	// straddle cache-lines.
19	//===----------------------------------------------------------------------===//
20
21	#include "AArch64.h"
22	#include "AArch64InstrInfo.h"
23	#include "AArch64Subtarget.h"
24	#include "llvm/ADT/SmallVector.h"
25	#include "llvm/ADT/Statistic.h"
26	#include "llvm/CodeGen/MachineBasicBlock.h"
27	#include "llvm/CodeGen/MachineFunctionPass.h"
28	#include "llvm/Support/CommandLine.h"
29	#include "llvm/Support/Debug.h"
30	#include "llvm/Support/ErrorHandling.h"
31	#include "llvm/Support/MathExtras.h"
32
33	using namespace llvm;
34
35	#define DEBUG_TYPE "aarch64-code-layout-opt"
36	#define DBG(...) LLVM_DEBUG(dbgs() << DEBUG_TYPE ": " << __VA_ARGS__)
37	#define AARCH64_CODE_LAYOUT_OPT_NAME "AArch64 Code Layout Optimization"
38
39	enum CodeLayoutOpt {
40	CmpCsel, // Align CMP/CMN-CSEL pairs
41	FcmpFcsel, // Align FCMP-FCSEL pairs
42	};
43
44	static cl::bits<CodeLayoutOpt> EnableCodeAlignment(
45	"aarch64-code-layout-opt-enable", cl::Hidden, cl::CommaSeparated,
46	cl::desc("Enable code alignment optimization for instruction pairs"),
47	cl::values(
48	clEnumValN(CmpCsel, "cmp-csel", "CMP/CMN-CSEL pair alignment (32-bit)"),
49	clEnumValN(FcmpFcsel, "fcmp-fcsel", "FCMP-FCSEL pair alignment")));
50
51	static cl::opt<unsigned> FunctionAlignBytes(
52	"aarch64-code-layout-opt-align-functions", cl::Hidden,
53	cl::desc("Function alignment in bytes for code layout optimization "
54	"(must be a power of 2)"),
55	cl::init(Val: `64`), cl::callback(CB: [](const unsigned &Val) {
56	if (!isPowerOf2_32(Value: Val))
57	report_fatal_error(
58	reason: "aarch64-code-layout-opt-align must be a power of 2");
59	}));
60
61	STATISTIC(NumFunctionsAligned,
62	"Number of functions with aligned (to 64-bytes by default)");
63	STATISTIC(NumCmpCselPairsDetected,
64	"Number of CMP/CMN-CSEL pairs detected for alignment");
65	STATISTIC(NumFcmpFcselPairsDetected,
66	"Number of FCMP-FCSEL pairs detected for alignment");
67
68	namespace {
69
70	class AArch64CodeLayoutOpt : public MachineFunctionPass {
71	public:
72	static char ID;
73	AArch64CodeLayoutOpt() : MachineFunctionPass (ID) {}
74	void getAnalysisUsage(AnalysisUsage &AU) const override;
75	bool runOnMachineFunction(MachineFunction &MF) override;
76	StringRef getPassName() const override {
77	return AARCH64_CODE_LAYOUT_OPT_NAME;
78	}
79
80	private:
81	const AArch64InstrInfo TII = nullptr*;
82
83	/// Align each fusible CMP/CMN-CSEL or FCMP-FCSEL pair in \p MBB by emitting
84	/// .p2align before the lead instruction (splitting the block if needed).
85	/// \returns true iff at least one pair was found and aligned.
86	bool alignLayoutSensitivePatterns(MachineBasicBlock *MBB);
87
88	/// Emit .p2align before MI. Splits the block if MI is not at its start.
89	void emitP2Align(MachineInstr &MI, Align DesiredAlign,
90	unsigned MaxSkipBytes = `4`);
91
92	bool optimizeForCodeLayout(MachineFunction &MF);
93	};
94
95	} // end anonymous namespace
96
97	char AArch64CodeLayoutOpt::ID = `0`;
98
99	INITIALIZE_PASS(AArch64CodeLayoutOpt, "aarch64-code-layout-opt",
100	AARCH64_CODE_LAYOUT_OPT_NAME, false, false)
101
102	void AArch64CodeLayoutOpt::getAnalysisUsage(AnalysisUsage &AU) const {
103	AU.setPreservesAll();
104	MachineFunctionPass::getAnalysisUsage(AU);
105	}
106
107	FunctionPass *llvm::createAArch64CodeLayoutOptPass() {
108	return new AArch64CodeLayoutOpt ();
109	}
110
111	/// \returns true iff Opc is a floating-point comparison (FCMP/FCMPE).
112	static bool isFloatingPointCompare(unsigned Opc) {
113	switch (Opc) {
114	case AArch64::FCMPSrr:
115	case AArch64::FCMPDrr:
116	case AArch64::FCMPESrr:
117	case AArch64::FCMPEDrr:
118	case AArch64::FCMPHrr:
119	case AArch64::FCMPEHrr:
120	return true;
121	default:
122	return false;
123	}
124	}
125
126	/// \returns true iff Opc is a floating-point conditional select (FCSEL).
127	static bool isFloatingPointConditionalSelect(unsigned Opc) {
128	switch (Opc) {
129	case AArch64::FCSELSrrr:
130	case AArch64::FCSELDrrr:
131	case AArch64::FCSELHrrr:
132	return true;
133	default:
134	return false;
135	}
136	}
137
138	/// \returns true if MI is a qualifying 32-bit CMP or CMN instruction.
139	/// CMP is encoded as SUBS with WZR destination, CMN as ADDS with WZR.
140	/// Only simple variants (no shifted/extended reg) qualify, and immediate
141	/// variants require no LSL shift and small immediates (<=15).
142	static bool isQualifyingIntCompare(const MachineInstr &MI) {
143	switch (MI.getOpcode()) {
144	case AArch64::SUBSWrr:
145	case AArch64::ADDSWrr:
146	return MI.definesRegister(Reg: AArch64::WZR, /TRI=/nullptr);
147	case AArch64::SUBSWri:
148	case AArch64::ADDSWri:
149	return MI.definesRegister(Reg: AArch64::WZR, /TRI=/nullptr) &&
150	MI.getOperand(i: `3`).getImm() == `0` && MI.getOperand(i: `2`).getImm() <= `15`;
151	case AArch64::SUBSWrs:
152	case AArch64::ADDSWrs:
153	return MI.definesRegister(Reg: AArch64::WZR, /TRI=/nullptr) &&
154	!AArch64InstrInfo::hasShiftedReg(MI);
155	case AArch64::SUBSWrx:
156	return MI.definesRegister(Reg: AArch64::WZR, /TRI=/nullptr) &&
157	!AArch64InstrInfo::hasExtendedReg(MI);
158	default:
159	return false;
160	}
161	}
162
163	bool AArch64CodeLayoutOpt::runOnMachineFunction(MachineFunction &MF) {
164	const Function &F = MF.getFunction();
165	// hasOptSize() returns true for both -Os and -Oz.
166	if (F.hasOptSize())
167	return false;
168
169	const auto *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
170	TII = Subtarget->getInstrInfo();
171
172	// Default: enable when the subtarget opts in via FeatureAlignCmpCSelPairs.
173	if (!EnableCodeAlignment.getBits() && Subtarget->hasAlignCmpCSelPairs()) {
174	if (Subtarget->hasFuseCmpCSel())
175	EnableCodeAlignment.addValue(V: CmpCsel);
176	if (Subtarget->hasFuseFCmpFCSel())
177	EnableCodeAlignment.addValue(V: FcmpFcsel);
178	}
179
180	if (!(EnableCodeAlignment.isSet(V: CmpCsel) && Subtarget->hasFuseCmpCSel()) &&
181	!(EnableCodeAlignment.isSet(V: FcmpFcsel) && Subtarget->hasFuseFCmpFCSel()))
182	return false;
183
184	return optimizeForCodeLayout(MF);
185	}
186
187	void AArch64CodeLayoutOpt::emitP2Align(MachineInstr &MI, Align DesiredAlign,
188	unsigned MaxSkipBytes) {
189	MachineBasicBlock *MBB = MI.getParent();
190
191	auto FirstReal =
192	skipDebugInstructionsForward(It: MBB->instr_begin(), End: MBB->instr_end());
193	if (&*FirstReal != &MI) {
194	auto PrevIt = prev_nodbg(It: MI.getIterator(), Begin: MBB->instr_begin());
195	MBB = MBB->splitAt(SplitInst&: PrevIt, /UpdateLiveIns=/*true);
196	}
197
198	MBB->setAlignment(DesiredAlign);
199	MBB->setMaxBytesForAlignment(MaxSkipBytes);
200	}
201
202	// Align each fusible CMP/CMN-CSEL or FCMP-FCSEL pair in MBB by emitting
203	// .p2align before the lead instruction (splitting the block if needed).
204	// A pair is: a qualifying lead instruction immediately followed by its
205	// consumer (CMP/CMN→CSEL or FCMP→FCSEL), with no intervening instructions.
206	// Returns true iff at least one pair was found and aligned.
207	bool AArch64CodeLayoutOpt::alignLayoutSensitivePatterns(
208	MachineBasicBlock *MBB) {
209	auto End = MBB->instr_end();
210	SmallVector<std::pair<MachineInstr , bool*>, `4`> Pairs;
211
212	for (auto &MI : instructionsWithoutDebug(It: MBB->begin(), End: MBB->end())) {
213	auto NextIt =
214	skipDebugInstructionsForward(It: std::next(x: MI.getIterator()), End);
215	if (NextIt == End)
216	break;
217
218	// --- CMP/CMN-CSEL detection ---
219	if (EnableCodeAlignment.isSet(V: CmpCsel) && isQualifyingIntCompare(MI) &&
220	NextIt ->getOpcode() == AArch64::CSELWr) {
221	Pairs.push_back(Elt: {&MI, true});
222	continue;
223	}
224
225	// --- FCMP-FCSEL detection ---
226	if (EnableCodeAlignment.isSet(V: FcmpFcsel) &&
227	isFloatingPointCompare(Opc: MI.getOpcode()) &&
228	isFloatingPointConditionalSelect(Opc: NextIt ->getOpcode())) {
229	Pairs.push_back(Elt: {&MI, false});
230	continue;
231	}
232	}
233
234	for (auto &[MI, IsCmpCsel] : Pairs) {
235	emitP2Align(MI&: *MI, DesiredAlign: Align (`64`));
236	DBG(".p2align 6, , 4 before " << *MI);
237	++(IsCmpCsel ? NumCmpCselPairsDetected : NumFcmpFcselPairsDetected);
238	}
239
240	return !Pairs.empty();
241	}
242
243	bool AArch64CodeLayoutOpt::optimizeForCodeLayout(MachineFunction &MF) {
244	DBG("optimizeForCodeLayout: " << MF.getName() << "\n");
245
246	bool Changed = false;
247	for (auto &MBB : MF)
248	Changed \|= alignLayoutSensitivePatterns(MBB: &MBB);
249
250	if (!Changed)
251	return false;
252
253	if (MF.getAlignment() < Align (FunctionAlignBytes)) {
254	MF.setAlignment(Align (FunctionAlignBytes));
255	++NumFunctionsAligned;
256	DBG("Set " << FunctionAlignBytes << "-byte alignment for function "
257	<< MF.getName() << "\n");
258	} else {
259	DBG("Function " << MF.getName() << " already has sufficient alignment\n");
260	}
261	return true;
262	}
263

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64CodeLayoutOpt.cpp