1//===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to cleanup ugly code sequences at
10// MachineInstruction layer.
11//
12// Currently, there are two optimizations implemented:
13// - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
14// zero extend 32-bit subregisters to 64-bit registers, if the compiler
15// could prove the subregisters is defined by 32-bit operations in which
16// case the upper half of the underlying 64-bit registers were zeroed
17// implicitly.
18//
19// - One post-RA PreEmit pass to do final cleanup on some redundant
20// instructions generated due to bad RA on subregister.
21//===----------------------------------------------------------------------===//
22
23#include "BPF.h"
24#include "BPFInstrInfo.h"
25#include "BPFTargetMachine.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/CodeGen/LivePhysRegs.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/Support/Debug.h"
34#include <set>
35
36using namespace llvm;
37
38#define DEBUG_TYPE "bpf-mi-zext-elim"
39
40static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden,
41 cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound"));
42
43STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
44
45namespace {
46
47struct BPFMIPeephole : public MachineFunctionPass {
48
49 static char ID;
50 const BPFInstrInfo *TII;
51 MachineFunction *MF;
52 MachineRegisterInfo *MRI;
53
54 BPFMIPeephole() : MachineFunctionPass(ID) {}
55
56private:
57 // Initialize class variables.
58 void initialize(MachineFunction &MFParm);
59
60 bool isCopyFrom32Def(MachineInstr *CopyMI);
61 bool isInsnFrom32Def(MachineInstr *DefInsn);
62 bool isPhiFrom32Def(MachineInstr *MovMI);
63 bool isMovFrom32Def(MachineInstr *MovMI);
64 bool eliminateZExtSeq();
65 bool eliminateZExt();
66
67 std::set<MachineInstr *> PhiInsns;
68
69public:
70
71 // Main entry point for this pass.
72 bool runOnMachineFunction(MachineFunction &MF) override {
73 if (skipFunction(F: MF.getFunction()))
74 return false;
75
76 initialize(MFParm&: MF);
77
78 // First try to eliminate (zext, lshift, rshift) and then
79 // try to eliminate zext.
80 bool ZExtSeqExist, ZExtExist;
81 ZExtSeqExist = eliminateZExtSeq();
82 ZExtExist = eliminateZExt();
83 return ZExtSeqExist || ZExtExist;
84 }
85};
86
87// Initialize class variables.
88void BPFMIPeephole::initialize(MachineFunction &MFParm) {
89 MF = &MFParm;
90 MRI = &MF->getRegInfo();
91 TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
92 LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
93}
94
95bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
96{
97 MachineOperand &opnd = CopyMI->getOperand(i: 1);
98
99 if (!opnd.isReg())
100 return false;
101
102 // Return false if getting value from a 32bit physical register.
103 // Most likely, this physical register is aliased to
104 // function call return value or current function parameters.
105 Register Reg = opnd.getReg();
106 if (!Reg.isVirtual())
107 return false;
108
109 if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
110 return false;
111
112 MachineInstr *DefInsn = MRI->getVRegDef(Reg);
113 if (!isInsnFrom32Def(DefInsn))
114 return false;
115
116 return true;
117}
118
119bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
120{
121 for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
122 MachineOperand &opnd = PhiMI->getOperand(i);
123
124 if (!opnd.isReg())
125 return false;
126
127 MachineInstr *PhiDef = MRI->getVRegDef(Reg: opnd.getReg());
128 if (!PhiDef)
129 return false;
130 if (PhiDef->isPHI()) {
131 if (!PhiInsns.insert(x: PhiDef).second)
132 return false;
133 if (!isPhiFrom32Def(PhiMI: PhiDef))
134 return false;
135 }
136 if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(CopyMI: PhiDef))
137 return false;
138 }
139
140 return true;
141}
142
143// The \p DefInsn instruction defines a virtual register.
144bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
145{
146 if (!DefInsn)
147 return false;
148
149 if (DefInsn->isPHI()) {
150 if (!PhiInsns.insert(x: DefInsn).second)
151 return false;
152 if (!isPhiFrom32Def(PhiMI: DefInsn))
153 return false;
154 } else if (DefInsn->getOpcode() == BPF::COPY) {
155 if (!isCopyFrom32Def(CopyMI: DefInsn))
156 return false;
157 }
158
159 return true;
160}
161
162bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
163{
164 MachineInstr *DefInsn = MRI->getVRegDef(Reg: MovMI->getOperand(i: 1).getReg());
165
166 LLVM_DEBUG(dbgs() << " Def of Mov Src:");
167 LLVM_DEBUG(DefInsn->dump());
168
169 PhiInsns.clear();
170 if (!isInsnFrom32Def(DefInsn))
171 return false;
172
173 LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");
174
175 return true;
176}
177
178bool BPFMIPeephole::eliminateZExtSeq() {
179 MachineInstr* ToErase = nullptr;
180 bool Eliminated = false;
181
182 for (MachineBasicBlock &MBB : *MF) {
183 for (MachineInstr &MI : MBB) {
184 // If the previous instruction was marked for elimination, remove it now.
185 if (ToErase) {
186 ToErase->eraseFromParent();
187 ToErase = nullptr;
188 }
189
190 // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
191 //
192 // MOV_32_64 rB, wA
193 // SLL_ri rB, rB, 32
194 // SRL_ri rB, rB, 32
195 if (MI.getOpcode() == BPF::SRL_ri &&
196 MI.getOperand(i: 2).getImm() == 32) {
197 Register DstReg = MI.getOperand(i: 0).getReg();
198 Register ShfReg = MI.getOperand(i: 1).getReg();
199 MachineInstr *SllMI = MRI->getVRegDef(Reg: ShfReg);
200
201 LLVM_DEBUG(dbgs() << "Starting SRL found:");
202 LLVM_DEBUG(MI.dump());
203
204 if (!SllMI ||
205 SllMI->isPHI() ||
206 SllMI->getOpcode() != BPF::SLL_ri ||
207 SllMI->getOperand(i: 2).getImm() != 32)
208 continue;
209
210 LLVM_DEBUG(dbgs() << " SLL found:");
211 LLVM_DEBUG(SllMI->dump());
212
213 MachineInstr *MovMI = MRI->getVRegDef(Reg: SllMI->getOperand(i: 1).getReg());
214 if (!MovMI ||
215 MovMI->isPHI() ||
216 MovMI->getOpcode() != BPF::MOV_32_64)
217 continue;
218
219 LLVM_DEBUG(dbgs() << " Type cast Mov found:");
220 LLVM_DEBUG(MovMI->dump());
221
222 Register SubReg = MovMI->getOperand(i: 1).getReg();
223 if (!isMovFrom32Def(MovMI)) {
224 LLVM_DEBUG(dbgs()
225 << " One ZExt elim sequence failed qualifying elim.\n");
226 continue;
227 }
228
229 BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: BPF::SUBREG_TO_REG), DestReg: DstReg)
230 .addReg(RegNo: SubReg)
231 .addImm(Val: BPF::sub_32);
232
233 SllMI->eraseFromParent();
234 MovMI->eraseFromParent();
235 // MI is the right shift, we can't erase it in it's own iteration.
236 // Mark it to ToErase, and erase in the next iteration.
237 ToErase = &MI;
238 ZExtElemNum++;
239 Eliminated = true;
240 }
241 }
242 }
243
244 return Eliminated;
245}
246
247bool BPFMIPeephole::eliminateZExt() {
248 MachineInstr* ToErase = nullptr;
249 bool Eliminated = false;
250
251 for (MachineBasicBlock &MBB : *MF) {
252 for (MachineInstr &MI : MBB) {
253 // If the previous instruction was marked for elimination, remove it now.
254 if (ToErase) {
255 ToErase->eraseFromParent();
256 ToErase = nullptr;
257 }
258
259 if (MI.getOpcode() != BPF::MOV_32_64)
260 continue;
261
262 // Eliminate MOV_32_64 if possible.
263 // MOV_32_64 rA, wB
264 //
265 // If wB has been zero extended, replace it with a SUBREG_TO_REG.
266 // This is to workaround BPF programs where pkt->{data, data_end}
267 // is encoded as u32, but actually the verifier populates them
268 // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
269 LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
270 LLVM_DEBUG(MI.dump());
271
272 if (!isMovFrom32Def(MovMI: &MI))
273 continue;
274
275 LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");
276
277 Register dst = MI.getOperand(i: 0).getReg();
278 Register src = MI.getOperand(i: 1).getReg();
279
280 // Build a SUBREG_TO_REG instruction.
281 BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: BPF::SUBREG_TO_REG), DestReg: dst)
282 .addReg(RegNo: src)
283 .addImm(Val: BPF::sub_32);
284
285 ToErase = &MI;
286 Eliminated = true;
287 }
288 }
289
290 return Eliminated;
291}
292
293} // end default namespace
294
295INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
296 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
297 false, false)
298
299char BPFMIPeephole::ID = 0;
300FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
301
302STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
303
304namespace {
305
306struct BPFMIPreEmitPeephole : public MachineFunctionPass {
307
308 static char ID;
309 MachineFunction *MF;
310 const TargetRegisterInfo *TRI;
311 const BPFInstrInfo *TII;
312 bool SupportGotol;
313
314 BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {}
315
316private:
317 // Initialize class variables.
318 void initialize(MachineFunction &MFParm);
319
320 bool in16BitRange(int Num);
321 bool eliminateRedundantMov();
322 bool adjustBranch();
323 bool insertMissingCallerSavedSpills();
324 bool removeMayGotoZero();
325 bool addExitAfterUnreachable();
326 bool expandStackArgPseudos();
327
328public:
329
330 // Main entry point for this pass.
331 bool runOnMachineFunction(MachineFunction &MF) override {
332 initialize(MFParm&: MF);
333
334 bool Changed = expandStackArgPseudos();
335 if (skipFunction(F: MF.getFunction()))
336 return Changed;
337
338 Changed |= eliminateRedundantMov();
339 if (SupportGotol)
340 Changed |= adjustBranch();
341 Changed |= insertMissingCallerSavedSpills();
342 Changed |= removeMayGotoZero();
343 Changed |= addExitAfterUnreachable();
344 return Changed;
345 }
346};
347
348// Initialize class variables.
349void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
350 MF = &MFParm;
351 TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
352 TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
353 SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol();
354 LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
355}
356
357bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
358 MachineInstr* ToErase = nullptr;
359 bool Eliminated = false;
360
361 for (MachineBasicBlock &MBB : *MF) {
362 for (MachineInstr &MI : MBB) {
363 // If the previous instruction was marked for elimination, remove it now.
364 if (ToErase) {
365 LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:");
366 LLVM_DEBUG(ToErase->dump());
367 ToErase->eraseFromParent();
368 ToErase = nullptr;
369 }
370
371 // Eliminate identical move:
372 //
373 // MOV rA, rA
374 //
375 // Note that we cannot remove
376 // MOV_32_64 rA, wA
377 // MOV_rr_32 wA, wA
378 // as these two instructions having side effects, zeroing out
379 // top 32 bits of rA.
380 unsigned Opcode = MI.getOpcode();
381 if (Opcode == BPF::MOV_rr) {
382 Register dst = MI.getOperand(i: 0).getReg();
383 Register src = MI.getOperand(i: 1).getReg();
384
385 if (dst != src)
386 continue;
387
388 ToErase = &MI;
389 RedundantMovElemNum++;
390 Eliminated = true;
391 }
392 }
393 }
394
395 return Eliminated;
396}
397
398bool BPFMIPreEmitPeephole::in16BitRange(int Num) {
399 // Well, the cut-off is not precisely at 16bit range since
400 // new codes are added during the transformation. So let us
401 // a little bit conservative.
402 return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound;
403}
404
405// Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
406// is supported for both unconditional (JMP) and condition (JEQ, JSGT,
407// etc.) branches. In certain cases, e.g., full unrolling, the branch
408// target offset might exceed 16bit range. If this happens, the llvm
409// will generate incorrect code as the offset is truncated to 16bit.
410//
411// To fix this rare case, a new insn JMPL is introduced. This new
412// insn supports supports 32bit branch target offset. The compiler
413// does not use this insn during insn selection. Rather, BPF backend
414// will estimate the branch target offset and do JMP -> JMPL and
415// JEQ -> JEQ + JMPL conversion if the estimated branch target offset
416// is beyond 16bit.
417bool BPFMIPreEmitPeephole::adjustBranch() {
418 bool Changed = false;
419 int CurrNumInsns = 0;
420 DenseMap<MachineBasicBlock *, int> SoFarNumInsns;
421 DenseMap<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB;
422 std::vector<MachineBasicBlock *> MBBs;
423
424 MachineBasicBlock *PrevBB = nullptr;
425 for (MachineBasicBlock &MBB : *MF) {
426 // MBB.size() is the number of insns in this basic block, including some
427 // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
428 // Typically we have way more normal insns than DEBUG_VALUE insns.
429 // Also, if we indeed need to convert conditional branch like JEQ to
430 // JEQ + JMPL, we actually introduced some new insns like below.
431 CurrNumInsns += (int)MBB.size();
432 SoFarNumInsns[&MBB] = CurrNumInsns;
433 if (PrevBB != nullptr)
434 FollowThroughBB[PrevBB] = &MBB;
435 PrevBB = &MBB;
436 // A list of original BBs to make later traveral easier.
437 MBBs.push_back(x: &MBB);
438 }
439 FollowThroughBB[PrevBB] = nullptr;
440
441 for (unsigned i = 0; i < MBBs.size(); i++) {
442 // We have four cases here:
443 // (1). no terminator, simple follow through.
444 // (2). jmp to another bb.
445 // (3). conditional jmp to another bb or follow through.
446 // (4). conditional jmp followed by an unconditional jmp.
447 MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr;
448
449 MachineBasicBlock *MBB = MBBs[i];
450 for (MachineInstr &Term : MBB->terminators()) {
451 if (Term.isConditionalBranch()) {
452 assert(CondJmp == nullptr);
453 CondJmp = &Term;
454 } else if (Term.isUnconditionalBranch()) {
455 assert(UncondJmp == nullptr);
456 UncondJmp = &Term;
457 }
458 }
459
460 // (1). no terminator, simple follow through.
461 if (!CondJmp && !UncondJmp)
462 continue;
463
464 MachineBasicBlock *CondTargetBB, *JmpBB;
465 CurrNumInsns = SoFarNumInsns[MBB];
466
467 // (2). jmp to another bb.
468 if (!CondJmp && UncondJmp) {
469 JmpBB = UncondJmp->getOperand(i: 0).getMBB();
470 if (in16BitRange(Num: SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))
471 continue;
472
473 // replace this insn as a JMPL.
474 BuildMI(BB: MBB, MIMD: UncondJmp->getDebugLoc(), MCID: TII->get(Opcode: BPF::JMPL)).addMBB(MBB: JmpBB);
475 UncondJmp->eraseFromParent();
476 Changed = true;
477 continue;
478 }
479
480 const BasicBlock *TermBB = MBB->getBasicBlock();
481 int Dist;
482
483 // (3). conditional jmp to another bb or follow through.
484 if (!UncondJmp) {
485 CondTargetBB = CondJmp->getOperand(i: 2).getMBB();
486 MachineBasicBlock *FollowBB = FollowThroughBB[MBB];
487 Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
488 if (in16BitRange(Num: Dist))
489 continue;
490
491 // We have
492 // B2: ...
493 // if (cond) goto B5
494 // B3: ...
495 // where B2 -> B5 is beyond 16bit range.
496 //
497 // We do not have 32bit cond jmp insn. So we try to do
498 // the following.
499 // B2: ...
500 // if (cond) goto New_B1
501 // New_B0 goto B3
502 // New_B1: gotol B5
503 // B3: ...
504 // Basically two new basic blocks are created.
505 MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(BB: TermBB);
506 MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(BB: TermBB);
507
508 // Insert New_B0 and New_B1 into function block list.
509 MachineFunction::iterator MBB_I = ++MBB->getIterator();
510 MF->insert(MBBI: MBB_I, MBB: New_B0);
511 MF->insert(MBBI: MBB_I, MBB: New_B1);
512
513 // replace B2 cond jump
514 if (CondJmp->getOperand(i: 1).isReg())
515 BuildMI(BB&: *MBB, I: MachineBasicBlock::iterator(*CondJmp), MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: CondJmp->getOpcode()))
516 .addReg(RegNo: CondJmp->getOperand(i: 0).getReg())
517 .addReg(RegNo: CondJmp->getOperand(i: 1).getReg())
518 .addMBB(MBB: New_B1);
519 else
520 BuildMI(BB&: *MBB, I: MachineBasicBlock::iterator(*CondJmp), MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: CondJmp->getOpcode()))
521 .addReg(RegNo: CondJmp->getOperand(i: 0).getReg())
522 .addImm(Val: CondJmp->getOperand(i: 1).getImm())
523 .addMBB(MBB: New_B1);
524
525 // it is possible that CondTargetBB and FollowBB are the same. But the
526 // above Dist checking should already filtered this case.
527 MBB->removeSuccessor(Succ: CondTargetBB);
528 MBB->removeSuccessor(Succ: FollowBB);
529 MBB->addSuccessor(Succ: New_B0);
530 MBB->addSuccessor(Succ: New_B1);
531
532 // Populate insns in New_B0 and New_B1.
533 BuildMI(BB: New_B0, MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: BPF::JMP)).addMBB(MBB: FollowBB);
534 BuildMI(BB: New_B1, MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: BPF::JMPL))
535 .addMBB(MBB: CondTargetBB);
536
537 New_B0->addSuccessor(Succ: FollowBB);
538 New_B1->addSuccessor(Succ: CondTargetBB);
539 CondJmp->eraseFromParent();
540 Changed = true;
541 continue;
542 }
543
544 // (4). conditional jmp followed by an unconditional jmp.
545 CondTargetBB = CondJmp->getOperand(i: 2).getMBB();
546 JmpBB = UncondJmp->getOperand(i: 0).getMBB();
547
548 // We have
549 // B2: ...
550 // if (cond) goto B5
551 // JMP B7
552 // B3: ...
553 //
554 // If only B2->B5 is out of 16bit range, we can do
555 // B2: ...
556 // if (cond) goto new_B
557 // JMP B7
558 // New_B: gotol B5
559 // B3: ...
560 //
561 // If only 'JMP B7' is out of 16bit range, we can replace
562 // 'JMP B7' with 'JMPL B7'.
563 //
564 // If both B2->B5 and 'JMP B7' is out of range, just do
565 // both the above transformations.
566 Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
567 if (!in16BitRange(Num: Dist)) {
568 MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(BB: TermBB);
569
570 // Insert New_B0 into function block list.
571 MF->insert(MBBI: ++MBB->getIterator(), MBB: New_B);
572
573 // replace B2 cond jump
574 if (CondJmp->getOperand(i: 1).isReg())
575 BuildMI(BB&: *MBB, I: MachineBasicBlock::iterator(*CondJmp), MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: CondJmp->getOpcode()))
576 .addReg(RegNo: CondJmp->getOperand(i: 0).getReg())
577 .addReg(RegNo: CondJmp->getOperand(i: 1).getReg())
578 .addMBB(MBB: New_B);
579 else
580 BuildMI(BB&: *MBB, I: MachineBasicBlock::iterator(*CondJmp), MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: CondJmp->getOpcode()))
581 .addReg(RegNo: CondJmp->getOperand(i: 0).getReg())
582 .addImm(Val: CondJmp->getOperand(i: 1).getImm())
583 .addMBB(MBB: New_B);
584
585 if (CondTargetBB != JmpBB)
586 MBB->removeSuccessor(Succ: CondTargetBB);
587 MBB->addSuccessor(Succ: New_B);
588
589 // Populate insn in New_B.
590 BuildMI(BB: New_B, MIMD: CondJmp->getDebugLoc(), MCID: TII->get(Opcode: BPF::JMPL)).addMBB(MBB: CondTargetBB);
591
592 New_B->addSuccessor(Succ: CondTargetBB);
593 CondJmp->eraseFromParent();
594 Changed = true;
595 }
596
597 if (!in16BitRange(Num: SoFarNumInsns[JmpBB] - CurrNumInsns)) {
598 BuildMI(BB: MBB, MIMD: UncondJmp->getDebugLoc(), MCID: TII->get(Opcode: BPF::JMPL)).addMBB(MBB: JmpBB);
599 UncondJmp->eraseFromParent();
600 Changed = true;
601 }
602 }
603
604 return Changed;
605}
606
607static const unsigned CallerSavedRegs[] = {BPF::R0, BPF::R1, BPF::R2,
608 BPF::R3, BPF::R4, BPF::R5};
609
610struct BPFFastCall {
611 MachineInstr *MI;
612 unsigned LiveCallerSavedRegs;
613};
614
615static void collectBPFFastCalls(const TargetRegisterInfo *TRI,
616 LivePhysRegs &LiveRegs, MachineBasicBlock &BB,
617 SmallVectorImpl<BPFFastCall> &Calls) {
618 LiveRegs.init(TRI: *TRI);
619 LiveRegs.addLiveOuts(MBB: BB);
620 Calls.clear();
621 for (MachineInstr &MI : llvm::reverse(C&: BB)) {
622 if (MI.isCall()) {
623 unsigned LiveCallerSavedRegs = 0;
624 for (MCRegister R : CallerSavedRegs) {
625 bool DoSpillFill = false;
626 for (MCPhysReg SR : TRI->subregs(Reg: R))
627 DoSpillFill |= !MI.definesRegister(Reg: SR, TRI) && LiveRegs.contains(Reg: SR);
628 if (!DoSpillFill)
629 continue;
630 LiveCallerSavedRegs |= 1 << R;
631 }
632 if (LiveCallerSavedRegs)
633 Calls.push_back(Elt: {.MI: &MI, .LiveCallerSavedRegs: LiveCallerSavedRegs});
634 }
635 LiveRegs.stepBackward(MI);
636 }
637}
638
639static int64_t computeMinFixedObjOffset(MachineFrameInfo &MFI,
640 unsigned SlotSize) {
641 int64_t MinFixedObjOffset = 0;
642 // Same logic as in X86FrameLowering::adjustFrameForMsvcCxxEh()
643 for (int I = MFI.getObjectIndexBegin(); I < MFI.getObjectIndexEnd(); ++I) {
644 if (MFI.isDeadObjectIndex(ObjectIdx: I))
645 continue;
646 MinFixedObjOffset = std::min(a: MinFixedObjOffset, b: MFI.getObjectOffset(ObjectIdx: I));
647 }
648 MinFixedObjOffset -=
649 (SlotSize + MinFixedObjOffset % SlotSize) & (SlotSize - 1);
650 return MinFixedObjOffset;
651}
652
653bool BPFMIPreEmitPeephole::insertMissingCallerSavedSpills() {
654 MachineFrameInfo &MFI = MF->getFrameInfo();
655 SmallVector<BPFFastCall, 8> Calls;
656 LivePhysRegs LiveRegs;
657 const unsigned SlotSize = 8;
658 int64_t MinFixedObjOffset = computeMinFixedObjOffset(MFI, SlotSize);
659 bool Changed = false;
660 for (MachineBasicBlock &BB : *MF) {
661 collectBPFFastCalls(TRI, LiveRegs, BB, Calls);
662 Changed |= !Calls.empty();
663 for (BPFFastCall &Call : Calls) {
664 int64_t CurOffset = MinFixedObjOffset;
665 for (MCRegister Reg : CallerSavedRegs) {
666 if (((1 << Reg) & Call.LiveCallerSavedRegs) == 0)
667 continue;
668 // Allocate stack object
669 CurOffset -= SlotSize;
670 MFI.CreateFixedSpillStackObject(Size: SlotSize, SPOffset: CurOffset);
671 // Generate spill
672 BuildMI(BB, I: Call.MI->getIterator(), MIMD: Call.MI->getDebugLoc(),
673 MCID: TII->get(Opcode: BPF::STD))
674 .addReg(RegNo: Reg, Flags: RegState::Kill)
675 .addReg(RegNo: BPF::R10)
676 .addImm(Val: CurOffset);
677 // Generate fill
678 BuildMI(BB, I: ++Call.MI->getIterator(), MIMD: Call.MI->getDebugLoc(),
679 MCID: TII->get(Opcode: BPF::LDD))
680 .addReg(RegNo: Reg, Flags: RegState::Define)
681 .addReg(RegNo: BPF::R10)
682 .addImm(Val: CurOffset);
683 }
684 }
685 }
686 return Changed;
687}
688
689bool BPFMIPreEmitPeephole::removeMayGotoZero() {
690 bool Changed = false;
691 MachineBasicBlock *Prev_MBB, *Curr_MBB = nullptr;
692
693 for (MachineBasicBlock &MBB : make_early_inc_range(Range: reverse(C&: *MF))) {
694 Prev_MBB = Curr_MBB;
695 Curr_MBB = &MBB;
696 if (Prev_MBB == nullptr || Curr_MBB->empty())
697 continue;
698
699 MachineInstr &MI = Curr_MBB->back();
700 if (MI.getOpcode() != TargetOpcode::INLINEASM_BR)
701 continue;
702
703 const char *AsmStr = MI.getOperand(i: 0).getSymbolName();
704 SmallVector<StringRef, 4> AsmPieces;
705 SplitString(Source: AsmStr, OutFragments&: AsmPieces, Delimiters: ";\n");
706
707 // Do not support multiple insns in one inline asm.
708 if (AsmPieces.size() != 1)
709 continue;
710
711 // The asm insn must be a may_goto insn.
712 SmallVector<StringRef, 4> AsmOpPieces;
713 SplitString(Source: AsmPieces[0], OutFragments&: AsmOpPieces, Delimiters: " ");
714 if (AsmOpPieces.size() != 2 || AsmOpPieces[0] != "may_goto")
715 continue;
716 // Enforce the format of 'may_goto <label>'.
717 if (AsmOpPieces[1] != "${0:l}" && AsmOpPieces[1] != "$0")
718 continue;
719
720 // Get the may_goto branch target.
721 MachineOperand &MO = MI.getOperand(i: InlineAsm::MIOp_FirstOperand + 1);
722 if (!MO.isMBB() || MO.getMBB() != Prev_MBB)
723 continue;
724
725 Changed = true;
726 if (Curr_MBB->begin() == MI) {
727 // Single 'may_goto' insn in the same basic block.
728 Curr_MBB->removeSuccessor(Succ: Prev_MBB);
729 for (MachineBasicBlock *Pred : Curr_MBB->predecessors())
730 Pred->replaceSuccessor(Old: Curr_MBB, New: Prev_MBB);
731 Curr_MBB->eraseFromParent();
732 Curr_MBB = Prev_MBB;
733 } else {
734 // Remove 'may_goto' insn.
735 MI.eraseFromParent();
736 }
737 }
738
739 return Changed;
740}
741
742// If the last insn in a funciton is 'JAL &bpf_unreachable', let us add an
743// 'exit' insn after that insn. This will ensure no fallthrough at the last
744// insn, making kernel verification easier.
745bool BPFMIPreEmitPeephole::addExitAfterUnreachable() {
746 MachineBasicBlock &MBB = MF->back();
747 MachineInstr &MI = MBB.back();
748 if (MI.getOpcode() != BPF::JAL || !MI.getOperand(i: 0).isGlobal() ||
749 MI.getOperand(i: 0).getGlobal()->getName() != BPF_TRAP)
750 return false;
751
752 BuildMI(BB: &MBB, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: BPF::RET));
753 return true;
754}
755
756bool BPFMIPreEmitPeephole::expandStackArgPseudos() {
757 bool Changed = false;
758
759 for (MachineBasicBlock &MBB : *MF) {
760 for (auto It = MBB.begin(), End = MBB.end(); It != End;) {
761 MachineInstr &MI = *It++;
762 DebugLoc DL = MI.getDebugLoc();
763
764 switch (MI.getOpcode()) {
765 default:
766 break;
767
768 case BPF::LOAD_STACK_ARG_PSEUDO: {
769 Register DstReg = MI.getOperand(i: 0).getReg();
770 int16_t Off = MI.getOperand(i: 1).getImm();
771
772 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BPF::LDD), DestReg: DstReg)
773 .addReg(RegNo: BPF::R11)
774 .addImm(Val: Off);
775 MI.eraseFromParent();
776 Changed = true;
777 break;
778 }
779
780 case BPF::STORE_STACK_ARG_PSEUDO: {
781 int16_t Off = MI.getOperand(i: 0).getImm();
782 const MachineOperand &SrcMO = MI.getOperand(i: 1);
783 Register SrcReg = SrcMO.getReg();
784 bool IsKill = SrcMO.isKill();
785
786 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BPF::STD))
787 .addReg(RegNo: SrcReg, Flags: getKillRegState(B: IsKill))
788 .addReg(RegNo: BPF::R11)
789 .addImm(Val: Off);
790 MI.eraseFromParent();
791 Changed = true;
792 break;
793 }
794
795 case BPF::STORE_STACK_ARG_IMM_PSEUDO: {
796 int16_t Off = MI.getOperand(i: 0).getImm();
797 int32_t Val = MI.getOperand(i: 1).getImm();
798
799 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BPF::STD_imm))
800 .addImm(Val)
801 .addReg(RegNo: BPF::R11)
802 .addImm(Val: Off);
803 MI.eraseFromParent();
804 Changed = true;
805 break;
806 }
807 }
808 }
809 }
810
811 return Changed;
812}
813
814} // end default namespace
815
816INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
817 "BPF PreEmit Peephole Optimization", false, false)
818
819char BPFMIPreEmitPeephole::ID = 0;
820FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
821{
822 return new BPFMIPreEmitPeephole();
823}
824