1//===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to cleanup ugly code sequences at
10// MachineInstruction layer.
11//
12// Currently, there are two optimizations implemented:
13// - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
14// zero extend 32-bit subregisters to 64-bit registers, if the compiler
15// could prove the subregisters is defined by 32-bit operations in which
16// case the upper half of the underlying 64-bit registers were zeroed
17// implicitly.
18//
19// - One post-RA PreEmit pass to do final cleanup on some redundant
20// instructions generated due to bad RA on subregister.
21//===----------------------------------------------------------------------===//
22
23#include "BPF.h"
24#include "BPFInstrInfo.h"
25#include "BPFTargetMachine.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/CodeGen/LivePhysRegs.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/Support/Debug.h"
34#include <set>
35
36using namespace llvm;
37
38#define DEBUG_TYPE "bpf-mi-zext-elim"
39
40static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden,
41 cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound"));
42
43STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
44
45namespace {
46
47struct BPFMIPeephole : public MachineFunctionPass {
48
49 static char ID;
50 const BPFInstrInfo *TII;
51 MachineFunction *MF;
52 MachineRegisterInfo *MRI;
53
54 BPFMIPeephole() : MachineFunctionPass(ID) {}
55
56private:
57 // Initialize class variables.
58 void initialize(MachineFunction &MFParm);
59
60 bool isCopyFrom32Def(MachineInstr *CopyMI);
61 bool isInsnFrom32Def(MachineInstr *DefInsn);
62 bool isPhiFrom32Def(MachineInstr *MovMI);
63 bool isMovFrom32Def(MachineInstr *MovMI);
64 bool eliminateZExtSeq();
65 bool eliminateZExt();
66
67 std::set<MachineInstr *> PhiInsns;
68
69public:
70
71 // Main entry point for this pass.
72 bool runOnMachineFunction(MachineFunction &MF) override {
73 if (skipFunction(F: MF.getFunction()))
74 return false;
75
76 initialize(MFParm&: MF);
77
78 // First try to eliminate (zext, lshift, rshift) and then
79 // try to eliminate zext.
80 bool ZExtSeqExist, ZExtExist;
81 ZExtSeqExist = eliminateZExtSeq();
82 ZExtExist = eliminateZExt();
83 return ZExtSeqExist || ZExtExist;
84 }
85};
86
87// Initialize class variables.
88void BPFMIPeephole::initialize(MachineFunction &MFParm) {
89 MF = &MFParm;
90 MRI = &MF->getRegInfo();
91 TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
92 LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
93}
94
95bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
96{
97 MachineOperand &opnd = CopyMI->getOperand(i: 1);
98
99 if (!opnd.isReg())
100 return false;
101
102 // Return false if getting value from a 32bit physical register.
103 // Most likely, this physical register is aliased to
104 // function call return value or current function parameters.
105 Register Reg = opnd.getReg();
106 if (!Reg.isVirtual())
107 return false;
108
109 if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
110 return false;
111
112 MachineInstr *DefInsn = MRI->getVRegDef(Reg);
113 if (!isInsnFrom32Def(DefInsn))
114 return false;
115
116 return true;
117}
118
119bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
120{
121 for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
122 MachineOperand &opnd = PhiMI->getOperand(i);
123
124 if (!opnd.isReg())
125 return false;
126
127 MachineInstr *PhiDef = MRI->getVRegDef(Reg: opnd.getReg());
128 if (!PhiDef)
129 return false;
130 if (PhiDef->isPHI()) {
131 if (!PhiInsns.insert(x: PhiDef).second)
132 return false;
133 if (!isPhiFrom32Def(PhiMI: PhiDef))
134 return false;
135 }
136 if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(CopyMI: PhiDef))
137 return false;
138 }
139
140 return true;
141}
142
143// The \p DefInsn instruction defines a virtual register.
144bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
145{
146 if (!DefInsn)
147 return false;
148
149 if (DefInsn->isPHI()) {
150 if (!PhiInsns.insert(x: DefInsn).second)
151 return false;
152 if (!isPhiFrom32Def(PhiMI: DefInsn))
153 return false;
154 } else if (DefInsn->getOpcode() == BPF::COPY) {
155 if (!isCopyFrom32Def(CopyMI: DefInsn))
156 return false;
157 }
158
159 return true;
160}
161
162bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
163{
164 MachineInstr *DefInsn = MRI->getVRegDef(Reg: MovMI->getOperand(i: 1).getReg());
165
166 LLVM_DEBUG(dbgs() << " Def of Mov Src:");
167 LLVM_DEBUG(DefInsn->dump());
168
169 PhiInsns.clear();
170 if (!isInsnFrom32Def(DefInsn))
171 return false;
172
173 LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");
174
175 return true;
176}
177
178bool BPFMIPeephole::eliminateZExtSeq() {
179 MachineInstr* ToErase = nullptr;
180 bool Eliminated = false;
181
182 for (MachineBasicBlock &MBB : *MF) {
183 for (MachineInstr &MI : MBB) {
184 // If the previous instruction was marked for elimination, remove it now.
185 if (ToErase) {
186 ToErase->eraseFromParent();
187 ToErase = nullptr;
188 }
189
190 // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
191 //
192 // MOV_32_64 rB, wA
193 // SLL_ri rB, rB, 32
194 // SRL_ri rB, rB, 32
195 if (MI.getOpcode() == BPF::SRL_ri &&
196 MI.getOperand(i: 2).getImm() == 32) {
197 Register DstReg = MI.getOperand(i: 0).getReg();
198 Register ShfReg = MI.getOperand(i: 1).getReg();
199 MachineInstr *SllMI = MRI->getVRegDef(Reg: ShfReg);
200
201 LLVM_DEBUG(dbgs() << "Starting SRL found:");
202 LLVM_DEBUG(MI.dump());
203
204 if (!SllMI ||
205 SllMI->isPHI() ||
206 SllMI->getOpcode() != BPF::SLL_ri ||
207 SllMI->getOperand(i: 2).getImm() != 32)
208 continue;
209
210 LLVM_DEBUG(dbgs() << " SLL found:");
211 LLVM_DEBUG(SllMI->dump());
212
213 MachineInstr *MovMI = MRI->getVRegDef(Reg: SllMI->getOperand(i: 1).getReg());
214 if (!MovMI ||
215 MovMI->isPHI() ||
216 MovMI->getOpcode() != BPF::MOV_32_64)
217 continue;
218
219 LLVM_DEBUG(dbgs() << " Type cast Mov found:");
220 LLVM_DEBUG(MovMI->dump());
221
222 Register SubReg = MovMI->getOperand(i: 1).getReg();
223 if (!isMovFrom32Def(MovMI)) {
224 LLVM_DEBUG(dbgs()
225 << " One ZExt elim sequence failed qualifying elim.\n");
226 continue;
227 }
228
229 BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: BPF::SUBREG_TO_REG), DestReg: DstReg)
230 .addImm(Val: 0).addReg(RegNo: SubReg).addImm(Val: BPF::sub_32);
231
232 SllMI->eraseFromParent();
233 MovMI->eraseFromParent();
234 // MI is the right shift, we can't erase it in it's own iteration.
235 // Mark it to ToErase, and erase in the next iteration.
236 ToErase = &MI;
237 ZExtElemNum++;
238 Eliminated = true;
239 }
240 }
241 }
242
243 return Eliminated;
244}
245
246bool BPFMIPeephole::eliminateZExt() {
247 MachineInstr* ToErase = nullptr;
248 bool Eliminated = false;
249
250 for (MachineBasicBlock &MBB : *MF) {
251 for (MachineInstr &MI : MBB) {
252 // If the previous instruction was marked for elimination, remove it now.
253 if (ToErase) {
254 ToErase->eraseFromParent();
255 ToErase = nullptr;
256 }
257
258 if (MI.getOpcode() != BPF::MOV_32_64)
259 continue;
260
261 // Eliminate MOV_32_64 if possible.
262 // MOV_32_64 rA, wB
263 //
264 // If wB has been zero extended, replace it with a SUBREG_TO_REG.
265 // This is to workaround BPF programs where pkt->{data, data_end}
266 // is encoded as u32, but actually the verifier populates them
267 // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
268 LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
269 LLVM_DEBUG(MI.dump());
270
271 if (!isMovFrom32Def(MovMI: &MI))
272 continue;
273
274 LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");
275
276 Register dst = MI.getOperand(i: 0).getReg();
277 Register src = MI.getOperand(i: 1).getReg();
278
279 // Build a SUBREG_TO_REG instruction.
280 BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: BPF::SUBREG_TO_REG), DestReg: dst)
281 .addImm(Val: 0).addReg(RegNo: src).addImm(Val: BPF::sub_32);
282
283 ToErase = &MI;
284 Eliminated = true;
285 }
286 }
287
288 return Eliminated;
289}
290
291} // end default namespace
292
293INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
294 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
295 false, false)
296
297char BPFMIPeephole::ID = 0;
298FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
299
300STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
301
302namespace {
303
304struct BPFMIPreEmitPeephole : public MachineFunctionPass {
305
306 static char ID;
307 MachineFunction *MF;
308 const TargetRegisterInfo *TRI;
309 const BPFInstrInfo *TII;
310 bool SupportGotol;
311
312 BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {}
313
314private:
315 // Initialize class variables.
316 void initialize(MachineFunction &MFParm);
317
318 bool in16BitRange(int Num);
319 bool eliminateRedundantMov();
320 bool adjustBranch();
321 bool insertMissingCallerSavedSpills();
322 bool removeMayGotoZero();
323 bool addExitAfterUnreachable();
324
325public:
326
327 // Main entry point for this pass.
328 bool runOnMachineFunction(MachineFunction &MF) override {
329 if (skipFunction(F: MF.getFunction()))
330 return false;
331
332 initialize(MFParm&: MF);
333
334 bool Changed;
335 Changed = eliminateRedundantMov();
336 if (SupportGotol)
337 Changed = adjustBranch() || Changed;
338 Changed |= insertMissingCallerSavedSpills();
339 Changed |= removeMayGotoZero();
340 Changed |= addExitAfterUnreachable();
341 return Changed;
342 }
343};
344
345// Initialize class variables.
346void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
347 MF = &MFParm;
348 TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
349 TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
350 SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol();
351 LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
352}
353
354bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
355 MachineInstr* ToErase = nullptr;
356 bool Eliminated = false;
357
358 for (MachineBasicBlock &MBB : *MF) {
359 for (MachineInstr &MI : MBB) {
360 // If the previous instruction was marked for elimination, remove it now.
361 if (ToErase) {
362 LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:");
363 LLVM_DEBUG(ToErase->dump());
364 ToErase->eraseFromParent();
365 ToErase = nullptr;
366 }
367
368 // Eliminate identical move:
369 //
370 // MOV rA, rA
371 //
372 // Note that we cannot remove
373 // MOV_32_64 rA, wA
374 // MOV_rr_32 wA, wA
375 // as these two instructions having side effects, zeroing out
376 // top 32 bits of rA.
377 unsigned Opcode = MI.getOpcode();
378 if (Opcode == BPF::MOV_rr) {
379 Register dst = MI.getOperand(i: 0).getReg();
380 Register src = MI.getOperand(i: 1).getReg();
381
382 if (dst != src)
383 continue;
384
385 ToErase = &MI;
386 RedundantMovElemNum++;
387 Eliminated = true;
388 }
389 }
390 }
391
392 return Eliminated;
393}
394
395bool BPFMIPreEmitPeephole::in16BitRange(int Num) {
396 // Well, the cut-off is not precisely at 16bit range since
397 // new codes are added during the transformation. So let us
398 // a little bit conservative.
399 return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound;
400}
401
402// Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
403// is supported for both unconditional (JMP) and condition (JEQ, JSGT,
404// etc.) branches. In certain cases, e.g., full unrolling, the branch
405// target offset might exceed 16bit range. If this happens, the llvm
406// will generate incorrect code as the offset is truncated to 16bit.
407//
408// To fix this rare case, a new insn JMPL is introduced. This new
409// insn supports supports 32bit branch target offset. The compiler
410// does not use this insn during insn selection. Rather, BPF backend
411// will estimate the branch target offset and do JMP -> JMPL and
412// JEQ -> JEQ + JMPL conversion if the estimated branch target offset
413// is beyond 16bit.
414bool BPFMIPreEmitPeephole::adjustBranch() {
415 bool Changed = false;
416 int CurrNumInsns = 0;
417 DenseMap<MachineBasicBlock *, int> SoFarNumInsns;
418 DenseMap<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB;
419 std::vector<MachineBasicBlock *> MBBs;
420
421 MachineBasicBlock *PrevBB = nullptr;
422 for (MachineBasicBlock &MBB : *MF) {
423 // MBB.size() is the number of insns in this basic block, including some
424 // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
425 // Typically we have way more normal insns than DEBUG_VALUE insns.
426 // Also, if we indeed need to convert conditional branch like JEQ to
427 // JEQ + JMPL, we actually introduced some new insns like below.
428 CurrNumInsns += (int)MBB.size();
429 SoFarNumInsns[&MBB] = CurrNumInsns;
430 if (PrevBB != nullptr)
431 FollowThroughBB[PrevBB] = &MBB;
432 PrevBB = &MBB;
433 // A list of original BBs to make later traveral easier.
434 MBBs.push_back(x: &MBB);
435 }
436 FollowThroughBB[PrevBB] = nullptr;
437
438 for (unsigned i = 0; i < MBBs.size(); i++) {
439 // We have four cases here:
440 // (1). no terminator, simple follow through.
441 // (2). jmp to another bb.
442 // (3). conditional jmp to another bb or follow through.
443 // (4). conditional jmp followed by an unconditional jmp.
444 MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr;
445
446 MachineBasicBlock *MBB = MBBs[i];
447 for (MachineInstr &Term : MBB->terminators()) {
448 if (Term.isConditionalBranch()) {
449 assert(CondJmp == nullptr);
450 CondJmp = &Term;
451 } else if (Term.isUnconditionalBranch()) {
452 assert(UncondJmp == nullptr);
453 UncondJmp = &Term;
454 }
455 }
456
457 // (1). no terminator, simple follow through.
458 if (!CondJmp && !UncondJmp)
459 continue;
460
461 MachineBasicBlock *CondTargetBB, *JmpBB;
462 CurrNumInsns = SoFarNumInsns[MBB];
463
464 // (2). jmp to another bb.
465 if (!CondJmp && UncondJmp) {
466 JmpBB = UncondJmp->getOperand(i: 0).getMBB();
467 if (in16BitRange(Num: SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))
468 continue;
469
470 // replace this insn as a JMPL.
471 BuildMI(BB: MBB, MIMD: UncondJmp->getDebugLoc(), MCID: TII->get(Opcode: BPF::JMPL)).addMBB(MBB: JmpBB);
472 UncondJmp->eraseFromParent();
473 Changed = true;
474 continue;
475 }
476
477 const BasicBlock *TermBB = MBB->getBasicBlock();
478 int Dist;
479
480 // (3). conditional jmp to another bb or follow through.
481 if (!UncondJmp) {
482 CondTargetBB = CondJmp->getOperand(i: 2).getMBB();
483 MachineBasicBlock *FollowBB = FollowThroughBB[MBB];
484 Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
485 if (in16BitRange(Num: Dist))
486 continue;
487
488 // We have
489 // B2: ...
490 // if (cond) goto B5
491 // B3: ...
492 // where B2 -> B5 is beyond 16bit range.
493 //
494 // We do not have 32bit cond jmp insn. So we try to do
495 // the following.
496 // B2: ...
497 // if (cond) goto New_B1
498 // New_B0 goto B3
499 // New_B1: gotol B5
500 // B3: ...
501 // Basically two new basic blocks are created.
502 MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(BB: TermBB);
503 MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(BB: TermBB);
504
505 // Insert New_B0 and New_B1 into function block list.
506 MachineFunction::iterator MBB_I = ++MBB->getIterator();
507 MF->insert(MBBI: MBB_I, MBB: New_B0);
508 MF->insert(MBBI: MBB_I, MBB: New_B1);
509
510 // replace B2 cond jump
511 if (CondJmp->getOperand(i: 1).isReg())
512 BuildMI(BB&: *MBB, I: MachineBasicBlock::iterator(*CondJmp), MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: CondJmp->getOpcode()))
513 .addReg(RegNo: CondJmp->getOperand(i: 0).getReg())
514 .addReg(RegNo: CondJmp->getOperand(i: 1).getReg())
515 .addMBB(MBB: New_B1);
516 else
517 BuildMI(BB&: *MBB, I: MachineBasicBlock::iterator(*CondJmp), MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: CondJmp->getOpcode()))
518 .addReg(RegNo: CondJmp->getOperand(i: 0).getReg())
519 .addImm(Val: CondJmp->getOperand(i: 1).getImm())
520 .addMBB(MBB: New_B1);
521
522 // it is possible that CondTargetBB and FollowBB are the same. But the
523 // above Dist checking should already filtered this case.
524 MBB->removeSuccessor(Succ: CondTargetBB);
525 MBB->removeSuccessor(Succ: FollowBB);
526 MBB->addSuccessor(Succ: New_B0);
527 MBB->addSuccessor(Succ: New_B1);
528
529 // Populate insns in New_B0 and New_B1.
530 BuildMI(BB: New_B0, MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: BPF::JMP)).addMBB(MBB: FollowBB);
531 BuildMI(BB: New_B1, MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: BPF::JMPL))
532 .addMBB(MBB: CondTargetBB);
533
534 New_B0->addSuccessor(Succ: FollowBB);
535 New_B1->addSuccessor(Succ: CondTargetBB);
536 CondJmp->eraseFromParent();
537 Changed = true;
538 continue;
539 }
540
541 // (4). conditional jmp followed by an unconditional jmp.
542 CondTargetBB = CondJmp->getOperand(i: 2).getMBB();
543 JmpBB = UncondJmp->getOperand(i: 0).getMBB();
544
545 // We have
546 // B2: ...
547 // if (cond) goto B5
548 // JMP B7
549 // B3: ...
550 //
551 // If only B2->B5 is out of 16bit range, we can do
552 // B2: ...
553 // if (cond) goto new_B
554 // JMP B7
555 // New_B: gotol B5
556 // B3: ...
557 //
558 // If only 'JMP B7' is out of 16bit range, we can replace
559 // 'JMP B7' with 'JMPL B7'.
560 //
561 // If both B2->B5 and 'JMP B7' is out of range, just do
562 // both the above transformations.
563 Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
564 if (!in16BitRange(Num: Dist)) {
565 MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(BB: TermBB);
566
567 // Insert New_B0 into function block list.
568 MF->insert(MBBI: ++MBB->getIterator(), MBB: New_B);
569
570 // replace B2 cond jump
571 if (CondJmp->getOperand(i: 1).isReg())
572 BuildMI(BB&: *MBB, I: MachineBasicBlock::iterator(*CondJmp), MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: CondJmp->getOpcode()))
573 .addReg(RegNo: CondJmp->getOperand(i: 0).getReg())
574 .addReg(RegNo: CondJmp->getOperand(i: 1).getReg())
575 .addMBB(MBB: New_B);
576 else
577 BuildMI(BB&: *MBB, I: MachineBasicBlock::iterator(*CondJmp), MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: CondJmp->getOpcode()))
578 .addReg(RegNo: CondJmp->getOperand(i: 0).getReg())
579 .addImm(Val: CondJmp->getOperand(i: 1).getImm())
580 .addMBB(MBB: New_B);
581
582 if (CondTargetBB != JmpBB)
583 MBB->removeSuccessor(Succ: CondTargetBB);
584 MBB->addSuccessor(Succ: New_B);
585
586 // Populate insn in New_B.
587 BuildMI(BB: New_B, MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: BPF::JMPL)).addMBB(MBB: CondTargetBB);
588
589 New_B->addSuccessor(Succ: CondTargetBB);
590 CondJmp->eraseFromParent();
591 Changed = true;
592 }
593
594 if (!in16BitRange(Num: SoFarNumInsns[JmpBB] - CurrNumInsns)) {
595 BuildMI(BB: MBB, MIMD: UncondJmp->getDebugLoc(), MCID: TII->get(Opcode: BPF::JMPL)).addMBB(MBB: JmpBB);
596 UncondJmp->eraseFromParent();
597 Changed = true;
598 }
599 }
600
601 return Changed;
602}
603
604static const unsigned CallerSavedRegs[] = {BPF::R0, BPF::R1, BPF::R2,
605 BPF::R3, BPF::R4, BPF::R5};
606
607struct BPFFastCall {
608 MachineInstr *MI;
609 unsigned LiveCallerSavedRegs;
610};
611
612static void collectBPFFastCalls(const TargetRegisterInfo *TRI,
613 LivePhysRegs &LiveRegs, MachineBasicBlock &BB,
614 SmallVectorImpl<BPFFastCall> &Calls) {
615 LiveRegs.init(TRI: *TRI);
616 LiveRegs.addLiveOuts(MBB: BB);
617 Calls.clear();
618 for (MachineInstr &MI : llvm::reverse(C&: BB)) {
619 if (MI.isCall()) {
620 unsigned LiveCallerSavedRegs = 0;
621 for (MCRegister R : CallerSavedRegs) {
622 bool DoSpillFill = false;
623 for (MCPhysReg SR : TRI->subregs(Reg: R))
624 DoSpillFill |= !MI.definesRegister(Reg: SR, TRI) && LiveRegs.contains(Reg: SR);
625 if (!DoSpillFill)
626 continue;
627 LiveCallerSavedRegs |= 1 << R;
628 }
629 if (LiveCallerSavedRegs)
630 Calls.push_back(Elt: {.MI: &MI, .LiveCallerSavedRegs: LiveCallerSavedRegs});
631 }
632 LiveRegs.stepBackward(MI);
633 }
634}
635
636static int64_t computeMinFixedObjOffset(MachineFrameInfo &MFI,
637 unsigned SlotSize) {
638 int64_t MinFixedObjOffset = 0;
639 // Same logic as in X86FrameLowering::adjustFrameForMsvcCxxEh()
640 for (int I = MFI.getObjectIndexBegin(); I < MFI.getObjectIndexEnd(); ++I) {
641 if (MFI.isDeadObjectIndex(ObjectIdx: I))
642 continue;
643 MinFixedObjOffset = std::min(a: MinFixedObjOffset, b: MFI.getObjectOffset(ObjectIdx: I));
644 }
645 MinFixedObjOffset -=
646 (SlotSize + MinFixedObjOffset % SlotSize) & (SlotSize - 1);
647 return MinFixedObjOffset;
648}
649
650bool BPFMIPreEmitPeephole::insertMissingCallerSavedSpills() {
651 MachineFrameInfo &MFI = MF->getFrameInfo();
652 SmallVector<BPFFastCall, 8> Calls;
653 LivePhysRegs LiveRegs;
654 const unsigned SlotSize = 8;
655 int64_t MinFixedObjOffset = computeMinFixedObjOffset(MFI, SlotSize);
656 bool Changed = false;
657 for (MachineBasicBlock &BB : *MF) {
658 collectBPFFastCalls(TRI, LiveRegs, BB, Calls);
659 Changed |= !Calls.empty();
660 for (BPFFastCall &Call : Calls) {
661 int64_t CurOffset = MinFixedObjOffset;
662 for (MCRegister Reg : CallerSavedRegs) {
663 if (((1 << Reg) & Call.LiveCallerSavedRegs) == 0)
664 continue;
665 // Allocate stack object
666 CurOffset -= SlotSize;
667 MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: CurOffset);
668 // Generate spill
669 BuildMI(BB, I: Call.MI->getIterator(), MIMD: Call.MI->getDebugLoc(),
670 MCID: TII->get(Opcode: BPF::STD))
671 .addReg(RegNo: Reg, flags: RegState::Kill)
672 .addReg(RegNo: BPF::R10)
673 .addImm(Val: CurOffset);
674 // Generate fill
675 BuildMI(BB, I: ++Call.MI->getIterator(), MIMD: Call.MI->getDebugLoc(),
676 MCID: TII->get(Opcode: BPF::LDD))
677 .addReg(RegNo: Reg, flags: RegState::Define)
678 .addReg(RegNo: BPF::R10)
679 .addImm(Val: CurOffset);
680 }
681 }
682 }
683 return Changed;
684}
685
686bool BPFMIPreEmitPeephole::removeMayGotoZero() {
687 bool Changed = false;
688 MachineBasicBlock *Prev_MBB, *Curr_MBB = nullptr;
689
690 for (MachineBasicBlock &MBB : make_early_inc_range(Range: reverse(C&: *MF))) {
691 Prev_MBB = Curr_MBB;
692 Curr_MBB = &MBB;
693 if (Prev_MBB == nullptr || Curr_MBB->empty())
694 continue;
695
696 MachineInstr &MI = Curr_MBB->back();
697 if (MI.getOpcode() != TargetOpcode::INLINEASM_BR)
698 continue;
699
700 const char *AsmStr = MI.getOperand(i: 0).getSymbolName();
701 SmallVector<StringRef, 4> AsmPieces;
702 SplitString(Source: AsmStr, OutFragments&: AsmPieces, Delimiters: ";\n");
703
704 // Do not support multiple insns in one inline asm.
705 if (AsmPieces.size() != 1)
706 continue;
707
708 // The asm insn must be a may_goto insn.
709 SmallVector<StringRef, 4> AsmOpPieces;
710 SplitString(Source: AsmPieces[0], OutFragments&: AsmOpPieces, Delimiters: " ");
711 if (AsmOpPieces.size() != 2 || AsmOpPieces[0] != "may_goto")
712 continue;
713 // Enforce the format of 'may_goto <label>'.
714 if (AsmOpPieces[1] != "${0:l}" && AsmOpPieces[1] != "$0")
715 continue;
716
717 // Get the may_goto branch target.
718 MachineOperand &MO = MI.getOperand(i: InlineAsm::MIOp_FirstOperand + 1);
719 if (!MO.isMBB() || MO.getMBB() != Prev_MBB)
720 continue;
721
722 Changed = true;
723 if (Curr_MBB->begin() == MI) {
724 // Single 'may_goto' insn in the same basic block.
725 Curr_MBB->removeSuccessor(Succ: Prev_MBB);
726 for (MachineBasicBlock *Pred : Curr_MBB->predecessors())
727 Pred->replaceSuccessor(Old: Curr_MBB, New: Prev_MBB);
728 Curr_MBB->eraseFromParent();
729 Curr_MBB = Prev_MBB;
730 } else {
731 // Remove 'may_goto' insn.
732 MI.eraseFromParent();
733 }
734 }
735
736 return Changed;
737}
738
739// If the last insn in a funciton is 'JAL &bpf_unreachable', let us add an
740// 'exit' insn after that insn. This will ensure no fallthrough at the last
741// insn, making kernel verification easier.
742bool BPFMIPreEmitPeephole::addExitAfterUnreachable() {
743 MachineBasicBlock &MBB = MF->back();
744 MachineInstr &MI = MBB.back();
745 if (MI.getOpcode() != BPF::JAL || !MI.getOperand(i: 0).isGlobal() ||
746 MI.getOperand(i: 0).getGlobal()->getName() != BPF_TRAP)
747 return false;
748
749 BuildMI(BB: &MBB, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: BPF::RET));
750 return true;
751}
752
753} // end default namespace
754
755INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
756 "BPF PreEmit Peephole Optimization", false, false)
757
758char BPFMIPreEmitPeephole::ID = 0;
759FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
760{
761 return new BPFMIPreEmitPeephole();
762}
763