1//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The purpose of this pass is to employ a canonical code transformation so
10// that code compiled with slightly different IR passes can be diffed more
11// effectively than otherwise. This is done by renaming vregs in a given
12// LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13// move defs closer to their use inorder to reduce diffs caused by slightly
14// different schedules.
15//
16// Basic Usage:
17//
18// llc -o - -run-pass mir-canonicalizer example.mir
19//
20// Reorders instructions canonically.
21// Renames virtual register operands canonically.
22// Strips certain MIR artifacts (optionally).
23//
24//===----------------------------------------------------------------------===//
25
26#include "MIRVRegNamerUtils.h"
27#include "llvm/ADT/PostOrderIterator.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/CodeGen/MachineFunctionPass.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/InitializePasses.h"
32#include "llvm/Pass.h"
33#include "llvm/Support/Debug.h"
34#include "llvm/Support/raw_ostream.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "mir-canonicalizer"
39
40static cl::opt<unsigned>
41 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(Val: ~0u),
42 cl::value_desc("N"),
43 cl::desc("Function number to canonicalize."));
44
45namespace {
46
47class MIRCanonicalizer : public MachineFunctionPass {
48public:
49 static char ID;
50 MIRCanonicalizer() : MachineFunctionPass(ID) {}
51
52 StringRef getPassName() const override {
53 return "Rename register operands in a canonical ordering.";
54 }
55
56 void getAnalysisUsage(AnalysisUsage &AU) const override {
57 AU.setPreservesCFG();
58 MachineFunctionPass::getAnalysisUsage(AU);
59 }
60
61 bool runOnMachineFunction(MachineFunction &MF) override;
62};
63
64} // end anonymous namespace
65
66char MIRCanonicalizer::ID;
67
68char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
69
70INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
71 "Rename Register Operands Canonically", false, false)
72
73INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
74 "Rename Register Operands Canonically", false, false)
75
76static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
77 if (MF.empty())
78 return {};
79 ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
80 std::vector<MachineBasicBlock *> RPOList;
81 append_range(C&: RPOList, R&: RPOT);
82
83 return RPOList;
84}
85
86static bool
87rescheduleLexographically(std::vector<MachineInstr *> instructions,
88 MachineBasicBlock *MBB,
89 std::function<MachineBasicBlock::iterator()> getPos) {
90
91 bool Changed = false;
92 using StringInstrPair = std::pair<std::string, MachineInstr *>;
93 std::vector<StringInstrPair> StringInstrMap;
94
95 for (auto *II : instructions) {
96 std::string S;
97 raw_string_ostream OS(S);
98 II->print(OS);
99
100 // Trim the assignment, or start from the beginning in the case of a store.
101 const size_t i = S.find(c: '=');
102 StringInstrMap.push_back(x: {(i == std::string::npos) ? S : S.substr(pos: i), II});
103 }
104
105 llvm::sort(C&: StringInstrMap, Comp: llvm::less_first());
106
107 for (auto &II : StringInstrMap) {
108
109 LLVM_DEBUG({
110 dbgs() << "Splicing ";
111 II.second->dump();
112 dbgs() << " right before: ";
113 getPos()->dump();
114 });
115
116 Changed = true;
117 MBB->splice(Where: getPos(), Other: MBB, From: II.second);
118 }
119
120 return Changed;
121}
122
123static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
124 MachineBasicBlock *MBB) {
125
126 bool Changed = false;
127
128 // Calculates the distance of MI from the beginning of its parent BB.
129 auto getInstrIdx = [](const MachineInstr &MI) {
130 unsigned i = 0;
131 for (const auto &CurMI : *MI.getParent()) {
132 if (&CurMI == &MI)
133 return i;
134 i++;
135 }
136 return ~0U;
137 };
138
139 // Pre-Populate vector of instructions to reschedule so that we don't
140 // clobber the iterator.
141 std::vector<MachineInstr *> Instructions;
142 for (auto &MI : *MBB) {
143 Instructions.push_back(x: &MI);
144 }
145
146 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
147 std::map<unsigned, MachineInstr *> MultiUserLookup;
148 unsigned UseToBringDefCloserToCount = 0;
149 std::vector<MachineInstr *> PseudoIdempotentInstructions;
150 std::vector<MCRegister> PhysRegDefs;
151 for (auto *II : Instructions) {
152 for (unsigned i = 1; i < II->getNumOperands(); i++) {
153 MachineOperand &MO = II->getOperand(i);
154 if (!MO.isReg())
155 continue;
156
157 if (MO.getReg().isVirtual())
158 continue;
159
160 if (!MO.isDef())
161 continue;
162
163 PhysRegDefs.push_back(x: MO.getReg());
164 }
165 }
166
167 for (auto *II : Instructions) {
168 if (II->getNumOperands() == 0)
169 continue;
170 if (II->mayLoadOrStore())
171 continue;
172
173 MachineOperand &MO = II->getOperand(i: 0);
174 if (!MO.isReg() || !MO.getReg().isVirtual())
175 continue;
176 if (!MO.isDef())
177 continue;
178
179 bool IsPseudoIdempotent = true;
180 for (unsigned i = 1; i < II->getNumOperands(); i++) {
181
182 if (II->getOperand(i).isImm()) {
183 continue;
184 }
185
186 if (II->getOperand(i).isReg()) {
187 if (!II->getOperand(i).getReg().isVirtual())
188 if (!llvm::is_contained(Range&: PhysRegDefs,
189 Element: II->getOperand(i).getReg().asMCReg())) {
190 continue;
191 }
192 }
193
194 IsPseudoIdempotent = false;
195 break;
196 }
197
198 if (IsPseudoIdempotent) {
199 PseudoIdempotentInstructions.push_back(x: II);
200 continue;
201 }
202
203 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
204
205 MachineInstr *Def = II;
206 unsigned Distance = ~0U;
207 MachineInstr *UseToBringDefCloserTo = nullptr;
208 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
209 for (auto &UO : MRI->use_nodbg_operands(Reg: MO.getReg())) {
210 MachineInstr *UseInst = UO.getParent();
211
212 const unsigned DefLoc = getInstrIdx(*Def);
213 const unsigned UseLoc = getInstrIdx(*UseInst);
214 const unsigned Delta = (UseLoc - DefLoc);
215
216 if (UseInst->getParent() != Def->getParent())
217 continue;
218 if (DefLoc >= UseLoc)
219 continue;
220
221 if (Delta < Distance) {
222 Distance = Delta;
223 UseToBringDefCloserTo = UseInst;
224 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
225 }
226 }
227
228 const auto BBE = MBB->instr_end();
229 MachineBasicBlock::iterator DefI = BBE;
230 MachineBasicBlock::iterator UseI = BBE;
231
232 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
233
234 if (DefI != BBE && UseI != BBE)
235 break;
236
237 if (&*BBI == Def) {
238 DefI = BBI;
239 continue;
240 }
241
242 if (&*BBI == UseToBringDefCloserTo) {
243 UseI = BBI;
244 continue;
245 }
246 }
247
248 if (DefI == BBE || UseI == BBE)
249 continue;
250
251 LLVM_DEBUG({
252 dbgs() << "Splicing ";
253 DefI->dump();
254 dbgs() << " right before: ";
255 UseI->dump();
256 });
257
258 MultiUsers[UseToBringDefCloserTo].push_back(x: Def);
259 Changed = true;
260 MBB->splice(Where: UseI, Other: MBB, From: DefI);
261 }
262
263 // Sort the defs for users of multiple defs lexographically.
264 for (const auto &E : MultiUserLookup) {
265
266 auto UseI = llvm::find_if(Range: MBB->instrs(), P: [&](MachineInstr &MI) -> bool {
267 return &MI == E.second;
268 });
269
270 if (UseI == MBB->instr_end())
271 continue;
272
273 LLVM_DEBUG(
274 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.");
275 Changed |= rescheduleLexographically(
276 instructions: MultiUsers[E.second], MBB,
277 getPos: [&]() -> MachineBasicBlock::iterator { return UseI; });
278 }
279
280 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
281 LLVM_DEBUG(dbgs() << "Rescheduling Idempotent Instructions Lexographically.");
282 Changed |= rescheduleLexographically(
283 instructions: PseudoIdempotentInstructions, MBB,
284 getPos: [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
285
286 return Changed;
287}
288
289static bool propagateLocalCopies(MachineBasicBlock *MBB) {
290 bool Changed = false;
291 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
292
293 std::vector<MachineInstr *> Copies;
294 for (MachineInstr &MI : MBB->instrs()) {
295 if (MI.isCopy())
296 Copies.push_back(x: &MI);
297 }
298
299 for (MachineInstr *MI : Copies) {
300
301 if (!MI->getOperand(i: 0).isReg())
302 continue;
303 if (!MI->getOperand(i: 1).isReg())
304 continue;
305
306 const Register Dst = MI->getOperand(i: 0).getReg();
307 const Register Src = MI->getOperand(i: 1).getReg();
308
309 if (!Dst.isVirtual())
310 continue;
311 if (!Src.isVirtual())
312 continue;
313 // Not folding COPY instructions if regbankselect has not set the RCs.
314 // Why are we only considering Register Classes? Because the verifier
315 // sometimes gets upset if the register classes don't match even if the
316 // types do. A future patch might add COPY folding for matching types in
317 // pre-registerbankselect code.
318 if (!MRI.getRegClassOrNull(Reg: Dst))
319 continue;
320 if (MRI.getRegClass(Reg: Dst) != MRI.getRegClass(Reg: Src))
321 continue;
322
323 std::vector<MachineOperand *> Uses;
324 for (MachineOperand &MO : MRI.use_operands(Reg: Dst))
325 Uses.push_back(x: &MO);
326 for (auto *MO : Uses)
327 MO->setReg(Src);
328
329 Changed = true;
330 MI->eraseFromParent();
331 }
332
333 return Changed;
334}
335
336static bool doDefKillClear(MachineBasicBlock *MBB) {
337 bool Changed = false;
338
339 for (auto &MI : *MBB) {
340 for (auto &MO : MI.operands()) {
341 if (!MO.isReg())
342 continue;
343 if (!MO.isDef() && MO.isKill()) {
344 Changed = true;
345 MO.setIsKill(false);
346 }
347
348 if (MO.isDef() && MO.isDead()) {
349 Changed = true;
350 MO.setIsDead(false);
351 }
352 }
353 }
354
355 return Changed;
356}
357
358static bool runOnBasicBlock(MachineBasicBlock *MBB,
359 unsigned BasicBlockNum, VRegRenamer &Renamer) {
360 LLVM_DEBUG({
361 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
362 dbgs() << "\n\n================================================\n\n";
363 });
364
365 bool Changed = false;
366
367 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n");
368
369 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
370 MBB->dump(););
371 Changed |= propagateLocalCopies(MBB);
372 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
373
374 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
375 unsigned IdempotentInstCount = 0;
376 Changed |= rescheduleCanonically(PseudoIdempotentInstCount&: IdempotentInstCount, MBB);
377 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
378
379 Changed |= Renamer.renameVRegs(MBB, BBNum: BasicBlockNum);
380
381 // TODO: Consider dropping this. Dropping kill defs is probably not
382 // semantically sound.
383 Changed |= doDefKillClear(MBB);
384
385 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
386 dbgs() << "\n");
387 LLVM_DEBUG(
388 dbgs() << "\n\n================================================\n\n");
389 return Changed;
390}
391
392bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
393
394 static unsigned functionNum = 0;
395 if (CanonicalizeFunctionNumber != ~0U) {
396 if (CanonicalizeFunctionNumber != functionNum++)
397 return false;
398 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
399 << "\n";);
400 }
401
402 // we need a valid vreg to create a vreg type for skipping all those
403 // stray vreg numbers so reach alignment/canonical vreg values.
404 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
405
406 LLVM_DEBUG(
407 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
408 dbgs() << "\n\n================================================\n\n";
409 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
410 for (auto MBB
411 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
412 << "\n\n================================================\n\n";);
413
414 unsigned BBNum = 0;
415 bool Changed = false;
416 MachineRegisterInfo &MRI = MF.getRegInfo();
417 VRegRenamer Renamer(MRI);
418 for (auto *MBB : RPOList)
419 Changed |= runOnBasicBlock(MBB, BasicBlockNum: BBNum++, Renamer);
420
421 return Changed;
422}
423