1//===-- AArch64CodeLayoutOpt.cpp - Code Layout Optimizations --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass runs after instruction scheduling and employs code layout
10// optimizations for certain patterns.
11//
12// Option -aarch64-code-layout-opt-enable selects instruction pairs to optimize:
13// cmp-csel: Enable CMP/CMN-CSEL code layout optimization
14// fcmp-fcsel: Enable FCMP-FCSEL code layout optimization
15//
16// The initial implementation induces function alignment when a supported
17// pattern is detected, and possibly instruction-alignment when a pair would
18// straddle cache-lines.
19//===----------------------------------------------------------------------===//
20
21#include "AArch64.h"
22#include "AArch64InstrInfo.h"
23#include "AArch64Subtarget.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/CodeGen/MachineBasicBlock.h"
27#include "llvm/CodeGen/MachineFunctionPass.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/MathExtras.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "aarch64-code-layout-opt"
36#define DBG(...) LLVM_DEBUG(dbgs() << DEBUG_TYPE ": " << __VA_ARGS__)
37#define AARCH64_CODE_LAYOUT_OPT_NAME "AArch64 Code Layout Optimization"
38
39enum CodeLayoutOpt {
40 CmpCsel, // Align CMP/CMN-CSEL pairs
41 FcmpFcsel, // Align FCMP-FCSEL pairs
42};
43
44static cl::bits<CodeLayoutOpt> EnableCodeAlignment(
45 "aarch64-code-layout-opt-enable", cl::Hidden, cl::CommaSeparated,
46 cl::desc("Enable code alignment optimization for instruction pairs"),
47 cl::values(
48 clEnumValN(CmpCsel, "cmp-csel", "CMP/CMN-CSEL pair alignment (32-bit)"),
49 clEnumValN(FcmpFcsel, "fcmp-fcsel", "FCMP-FCSEL pair alignment")));
50
51static cl::opt<unsigned> FunctionAlignBytes(
52 "aarch64-code-layout-opt-align-functions", cl::Hidden,
53 cl::desc("Function alignment in bytes for code layout optimization "
54 "(must be a power of 2)"),
55 cl::init(Val: 64), cl::callback(CB: [](const unsigned &Val) {
56 if (!isPowerOf2_32(Value: Val))
57 report_fatal_error(
58 reason: "aarch64-code-layout-opt-align must be a power of 2");
59 }));
60
61STATISTIC(NumFunctionsAligned,
62 "Number of functions with aligned (to 64-bytes by default)");
63STATISTIC(NumCmpCselPairsDetected,
64 "Number of CMP/CMN-CSEL pairs detected for alignment");
65STATISTIC(NumFcmpFcselPairsDetected,
66 "Number of FCMP-FCSEL pairs detected for alignment");
67
68namespace {
69
70class AArch64CodeLayoutOpt : public MachineFunctionPass {
71public:
72 static char ID;
73 AArch64CodeLayoutOpt() : MachineFunctionPass(ID) {}
74 void getAnalysisUsage(AnalysisUsage &AU) const override;
75 bool runOnMachineFunction(MachineFunction &MF) override;
76 StringRef getPassName() const override {
77 return AARCH64_CODE_LAYOUT_OPT_NAME;
78 }
79
80private:
81 const AArch64InstrInfo *TII = nullptr;
82
83 /// Align each fusible CMP/CMN-CSEL or FCMP-FCSEL pair in \p MBB by emitting
84 /// .p2align before the lead instruction (splitting the block if needed).
85 /// \returns true iff at least one pair was found and aligned.
86 bool alignLayoutSensitivePatterns(MachineBasicBlock *MBB);
87
88 /// Emit .p2align before MI. Splits the block if MI is not at its start.
89 void emitP2Align(MachineInstr &MI, Align DesiredAlign,
90 unsigned MaxSkipBytes = 4);
91
92 bool optimizeForCodeLayout(MachineFunction &MF);
93};
94
95} // end anonymous namespace
96
97char AArch64CodeLayoutOpt::ID = 0;
98
99INITIALIZE_PASS(AArch64CodeLayoutOpt, "aarch64-code-layout-opt",
100 AARCH64_CODE_LAYOUT_OPT_NAME, false, false)
101
102void AArch64CodeLayoutOpt::getAnalysisUsage(AnalysisUsage &AU) const {
103 AU.setPreservesAll();
104 MachineFunctionPass::getAnalysisUsage(AU);
105}
106
107FunctionPass *llvm::createAArch64CodeLayoutOptPass() {
108 return new AArch64CodeLayoutOpt();
109}
110
111/// \returns true iff Opc is a floating-point comparison (FCMP/FCMPE).
112static bool isFloatingPointCompare(unsigned Opc) {
113 switch (Opc) {
114 case AArch64::FCMPSrr:
115 case AArch64::FCMPDrr:
116 case AArch64::FCMPESrr:
117 case AArch64::FCMPEDrr:
118 case AArch64::FCMPHrr:
119 case AArch64::FCMPEHrr:
120 return true;
121 default:
122 return false;
123 }
124}
125
126/// \returns true iff Opc is a floating-point conditional select (FCSEL).
127static bool isFloatingPointConditionalSelect(unsigned Opc) {
128 switch (Opc) {
129 case AArch64::FCSELSrrr:
130 case AArch64::FCSELDrrr:
131 case AArch64::FCSELHrrr:
132 return true;
133 default:
134 return false;
135 }
136}
137
138/// \returns true if MI is a qualifying 32-bit CMP or CMN instruction.
139/// CMP is encoded as SUBS with WZR destination, CMN as ADDS with WZR.
140/// Only simple variants (no shifted/extended reg) qualify, and immediate
141/// variants require no LSL shift and small immediates (<=15).
142static bool isQualifyingIntCompare(const MachineInstr &MI) {
143 switch (MI.getOpcode()) {
144 case AArch64::SUBSWrr:
145 case AArch64::ADDSWrr:
146 return MI.definesRegister(Reg: AArch64::WZR, /*TRI=*/nullptr);
147 case AArch64::SUBSWri:
148 case AArch64::ADDSWri:
149 return MI.definesRegister(Reg: AArch64::WZR, /*TRI=*/nullptr) &&
150 MI.getOperand(i: 3).getImm() == 0 && MI.getOperand(i: 2).getImm() <= 15;
151 case AArch64::SUBSWrs:
152 case AArch64::ADDSWrs:
153 return MI.definesRegister(Reg: AArch64::WZR, /*TRI=*/nullptr) &&
154 !AArch64InstrInfo::hasShiftedReg(MI);
155 case AArch64::SUBSWrx:
156 return MI.definesRegister(Reg: AArch64::WZR, /*TRI=*/nullptr) &&
157 !AArch64InstrInfo::hasExtendedReg(MI);
158 default:
159 return false;
160 }
161}
162
163bool AArch64CodeLayoutOpt::runOnMachineFunction(MachineFunction &MF) {
164 const Function &F = MF.getFunction();
165 // hasOptSize() returns true for both -Os and -Oz.
166 if (F.hasOptSize())
167 return false;
168
169 const auto *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
170 TII = Subtarget->getInstrInfo();
171
172 // Default: enable when the subtarget opts in via FeatureAlignCmpCSelPairs.
173 if (!EnableCodeAlignment.getBits() && Subtarget->hasAlignCmpCSelPairs()) {
174 if (Subtarget->hasFuseCmpCSel())
175 EnableCodeAlignment.addValue(V: CmpCsel);
176 if (Subtarget->hasFuseFCmpFCSel())
177 EnableCodeAlignment.addValue(V: FcmpFcsel);
178 }
179
180 if (!(EnableCodeAlignment.isSet(V: CmpCsel) && Subtarget->hasFuseCmpCSel()) &&
181 !(EnableCodeAlignment.isSet(V: FcmpFcsel) && Subtarget->hasFuseFCmpFCSel()))
182 return false;
183
184 return optimizeForCodeLayout(MF);
185}
186
187void AArch64CodeLayoutOpt::emitP2Align(MachineInstr &MI, Align DesiredAlign,
188 unsigned MaxSkipBytes) {
189 MachineBasicBlock *MBB = MI.getParent();
190
191 auto FirstReal =
192 skipDebugInstructionsForward(It: MBB->instr_begin(), End: MBB->instr_end());
193 if (&*FirstReal != &MI) {
194 auto PrevIt = prev_nodbg(It: MI.getIterator(), Begin: MBB->instr_begin());
195 MBB = MBB->splitAt(SplitInst&: *PrevIt, /*UpdateLiveIns=*/true);
196 }
197
198 MBB->setAlignment(DesiredAlign);
199 MBB->setMaxBytesForAlignment(MaxSkipBytes);
200}
201
202// Align each fusible CMP/CMN-CSEL or FCMP-FCSEL pair in MBB by emitting
203// .p2align before the lead instruction (splitting the block if needed).
204// A pair is: a qualifying lead instruction immediately followed by its
205// consumer (CMP/CMN→CSEL or FCMP→FCSEL), with no intervening instructions.
206// Returns true iff at least one pair was found and aligned.
207bool AArch64CodeLayoutOpt::alignLayoutSensitivePatterns(
208 MachineBasicBlock *MBB) {
209 auto End = MBB->instr_end();
210 SmallVector<std::pair<MachineInstr *, bool>, 4> Pairs;
211
212 for (auto &MI : instructionsWithoutDebug(It: MBB->begin(), End: MBB->end())) {
213 auto NextIt =
214 skipDebugInstructionsForward(It: std::next(x: MI.getIterator()), End);
215 if (NextIt == End)
216 break;
217
218 // --- CMP/CMN-CSEL detection ---
219 if (EnableCodeAlignment.isSet(V: CmpCsel) && isQualifyingIntCompare(MI) &&
220 NextIt->getOpcode() == AArch64::CSELWr) {
221 Pairs.push_back(Elt: {&MI, true});
222 continue;
223 }
224
225 // --- FCMP-FCSEL detection ---
226 if (EnableCodeAlignment.isSet(V: FcmpFcsel) &&
227 isFloatingPointCompare(Opc: MI.getOpcode()) &&
228 isFloatingPointConditionalSelect(Opc: NextIt->getOpcode())) {
229 Pairs.push_back(Elt: {&MI, false});
230 continue;
231 }
232 }
233
234 for (auto &[MI, IsCmpCsel] : Pairs) {
235 emitP2Align(MI&: *MI, DesiredAlign: Align(64));
236 DBG(".p2align 6, , 4 before " << *MI);
237 ++(IsCmpCsel ? NumCmpCselPairsDetected : NumFcmpFcselPairsDetected);
238 }
239
240 return !Pairs.empty();
241}
242
243bool AArch64CodeLayoutOpt::optimizeForCodeLayout(MachineFunction &MF) {
244 DBG("optimizeForCodeLayout: " << MF.getName() << "\n");
245
246 bool Changed = false;
247 for (auto &MBB : MF)
248 Changed |= alignLayoutSensitivePatterns(MBB: &MBB);
249
250 if (!Changed)
251 return false;
252
253 if (MF.getAlignment() < Align(FunctionAlignBytes)) {
254 MF.setAlignment(Align(FunctionAlignBytes));
255 ++NumFunctionsAligned;
256 DBG("Set " << FunctionAlignBytes << "-byte alignment for function "
257 << MF.getName() << "\n");
258 } else {
259 DBG("Function " << MF.getName() << " already has sufficient alignment\n");
260 }
261 return true;
262}
263