1//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass that finds instructions that can be
10// re-written as LEA instructions in order to reduce pipeline delays.
11// It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrInfo.h"
17#include "X86Subtarget.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/ProfileSummaryInfo.h"
20#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineSizeOpts.h"
24#include "llvm/CodeGen/Passes.h"
25#include "llvm/CodeGen/TargetSchedule.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/raw_ostream.h"
28using namespace llvm;
29
30#define FIXUPLEA_DESC "X86 LEA Fixup"
31#define FIXUPLEA_NAME "x86-fixup-leas"
32
33#define DEBUG_TYPE FIXUPLEA_NAME
34
35STATISTIC(NumLEAs, "Number of LEA instructions created");
36
37namespace {
38class FixupLEAsImpl {
39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
40
41 /// Given a machine register, look for the instruction
42 /// which writes it in the current basic block. If found,
43 /// try to replace it with an equivalent LEA instruction.
44 /// If replacement succeeds, then also process the newly created
45 /// instruction.
46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
47 MachineBasicBlock &MBB);
48
49 /// Given a memory access or LEA instruction
50 /// whose address mode uses a base and/or index register, look for
51 /// an opportunity to replace the instruction which sets the base or index
52 /// register with an equivalent LEA instruction.
53 void processInstruction(MachineBasicBlock::iterator &I,
54 MachineBasicBlock &MBB);
55
56 /// Given a LEA instruction which is unprofitable
57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
59 MachineBasicBlock &MBB);
60
61 /// Given a LEA instruction which is unprofitable
62 /// on SNB+ try to replace it with other instructions.
63 /// According to Intel's Optimization Reference Manual:
64 /// " For LEA instructions with three source operands and some specific
65 /// situations, instruction latency has increased to 3 cycles, and must
66 /// dispatch via port 1:
67 /// - LEA that has all three source operands: base, index, and offset
68 /// - LEA that uses base and index registers where the base is EBP, RBP,
69 /// or R13
70 /// - LEA that uses RIP relative addressing mode
71 /// - LEA that uses 16-bit addressing mode "
72 /// This function currently handles the first 2 cases only.
73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
74 MachineBasicBlock &MBB, bool OptIncDec);
75
76 /// Look for LEAs that are really two address LEAs that we might be able to
77 /// turn into regular ADD instructions.
78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
79 MachineBasicBlock &MBB, bool OptIncDec,
80 bool UseLEAForSP) const;
81
82 /// Look for and transform the sequence
83 /// lea (reg1, reg2), reg3
84 /// sub reg3, reg4
85 /// to
86 /// sub reg1, reg4
87 /// sub reg2, reg4
88 /// It can also optimize the sequence lea/add similarly.
89 bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
90
91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92 /// the dest register of LEA instruction I.
93 MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
94 MachineBasicBlock &MBB) const;
95
96 /// Check instructions between LeaI and AluI (exclusively).
97 /// Set BaseIndexDef to true if base or index register from LeaI is defined.
98 /// Set AluDestRef to true if the dest register of AluI is used or defined.
99 /// *KilledBase is set to the killed base register usage.
100 /// *KilledIndex is set to the killed index register usage.
101 void checkRegUsage(MachineBasicBlock::iterator &LeaI,
102 MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
103 bool &AluDestRef, MachineOperand **KilledBase,
104 MachineOperand **KilledIndex) const;
105
106 /// Determine if an instruction references a machine register
107 /// and, if so, whether it reads or writes the register.
108 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
109
110 /// Step backwards through a basic block, looking
111 /// for an instruction which writes a register within
112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
113 MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
114 MachineBasicBlock::iterator &I,
115 MachineBasicBlock &MBB);
116
117 /// if an instruction can be converted to an
118 /// equivalent LEA, insert the new instruction into the basic block
119 /// and return a pointer to it. Otherwise, return zero.
120 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
121 MachineBasicBlock::iterator &MBBI) const;
122
123public:
124 FixupLEAsImpl(ProfileSummaryInfo *PSI, MachineBlockFrequencyInfo *MBFI)
125 : PSI(PSI), MBFI(MBFI) {}
126
127 /// Loop over all of the basic blocks,
128 /// replacing instructions by equivalent LEA instructions
129 /// if needed and when possible.
130 bool runOnMachineFunction(MachineFunction &MF);
131
132private:
133 TargetSchedModel TSM;
134 const X86InstrInfo *TII = nullptr;
135 const X86RegisterInfo *TRI = nullptr;
136 ProfileSummaryInfo *PSI;
137 MachineBlockFrequencyInfo *MBFI;
138};
139
140class FixupLEAsLegacy : public MachineFunctionPass {
141public:
142 static char ID;
143
144 StringRef getPassName() const override { return FIXUPLEA_DESC; }
145
146 FixupLEAsLegacy() : MachineFunctionPass(ID) {}
147
148 bool runOnMachineFunction(MachineFunction &MF) override;
149
150 // This pass runs after regalloc and doesn't support VReg operands.
151 MachineFunctionProperties getRequiredProperties() const override {
152 return MachineFunctionProperties().setNoVRegs();
153 }
154
155 void getAnalysisUsage(AnalysisUsage &AU) const override {
156 AU.addRequired<ProfileSummaryInfoWrapperPass>();
157 AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
158 MachineFunctionPass::getAnalysisUsage(AU);
159 }
160};
161}
162
163char FixupLEAsLegacy::ID = 0;
164
165INITIALIZE_PASS(FixupLEAsLegacy, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
166
167MachineInstr *
168FixupLEAsImpl::postRAConvertToLEA(MachineBasicBlock &MBB,
169 MachineBasicBlock::iterator &MBBI) const {
170 MachineInstr &MI = *MBBI;
171 switch (MI.getOpcode()) {
172 case X86::MOV32rr:
173 case X86::MOV64rr: {
174 const MachineOperand &Src = MI.getOperand(i: 1);
175 const MachineOperand &Dest = MI.getOperand(i: 0);
176 MachineInstr *NewMI =
177 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
178 MCID: TII->get(Opcode: MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
179 : X86::LEA64r))
180 .add(MO: Dest)
181 .add(MO: Src)
182 .addImm(Val: 1)
183 .addReg(RegNo: 0)
184 .addImm(Val: 0)
185 .addReg(RegNo: 0);
186 return NewMI;
187 }
188 }
189
190 if (!MI.isConvertibleTo3Addr())
191 return nullptr;
192
193 switch (MI.getOpcode()) {
194 default:
195 // Only convert instructions that we've verified are safe.
196 return nullptr;
197 case X86::ADD64ri32:
198 case X86::ADD64ri32_DB:
199 case X86::ADD32ri:
200 case X86::ADD32ri_DB:
201 if (!MI.getOperand(i: 2).isImm()) {
202 // convertToThreeAddress will call getImm()
203 // which requires isImm() to be true
204 return nullptr;
205 }
206 break;
207 case X86::SHL64ri:
208 case X86::SHL32ri:
209 case X86::INC64r:
210 case X86::INC32r:
211 case X86::DEC64r:
212 case X86::DEC32r:
213 case X86::ADD64rr:
214 case X86::ADD64rr_DB:
215 case X86::ADD32rr:
216 case X86::ADD32rr_DB:
217 // These instructions are all fine to convert.
218 break;
219 }
220 return TII->convertToThreeAddress(MI, LV: nullptr, LIS: nullptr);
221}
222
223FunctionPass *llvm::createX86FixupLEAsLegacyPass() {
224 return new FixupLEAsLegacy();
225}
226
227static bool isLEA(unsigned Opcode) {
228 return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
229 Opcode == X86::LEA64_32r;
230}
231
232bool FixupLEAsImpl::runOnMachineFunction(MachineFunction &MF) {
233 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
234 bool IsSlowLEA = ST.slowLEA();
235 bool IsSlow3OpsLEA = ST.slow3OpsLEA();
236 bool LEAUsesAG = ST.leaUsesAG();
237
238 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
239 bool UseLEAForSP = ST.useLeaForSP();
240
241 TSM.init(TSInfo: &ST);
242 TII = ST.getInstrInfo();
243 TRI = ST.getRegisterInfo();
244
245 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
246 for (MachineBasicBlock &MBB : MF) {
247 // First pass. Try to remove or optimize existing LEAs.
248 bool OptIncDecPerBB =
249 OptIncDec || llvm::shouldOptimizeForSize(MBB: &MBB, PSI, MBFI);
250 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
251 if (!isLEA(Opcode: I->getOpcode()))
252 continue;
253
254 if (optTwoAddrLEA(I, MBB, OptIncDec: OptIncDecPerBB, UseLEAForSP))
255 continue;
256
257 if (IsSlowLEA)
258 processInstructionForSlowLEA(I, MBB);
259 else if (IsSlow3OpsLEA)
260 processInstrForSlow3OpLEA(I, MBB, OptIncDec: OptIncDecPerBB);
261 }
262
263 // Second pass for creating LEAs. This may reverse some of the
264 // transformations above.
265 if (LEAUsesAG) {
266 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
267 processInstruction(I, MBB);
268 }
269 }
270
271 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
272
273 return true;
274}
275
276FixupLEAsImpl::RegUsageState
277FixupLEAsImpl::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
278 RegUsageState RegUsage = RU_NotUsed;
279 MachineInstr &MI = *I;
280
281 for (const MachineOperand &MO : MI.operands()) {
282 if (MO.isReg() && MO.getReg() == p.getReg()) {
283 if (MO.isDef())
284 return RU_Write;
285 RegUsage = RU_Read;
286 }
287 }
288 return RegUsage;
289}
290
291/// getPreviousInstr - Given a reference to an instruction in a basic
292/// block, return a reference to the previous instruction in the block,
293/// wrapping around to the last instruction of the block if the block
294/// branches to itself.
295static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
296 MachineBasicBlock &MBB) {
297 if (I == MBB.begin()) {
298 if (MBB.isPredecessor(MBB: &MBB)) {
299 I = --MBB.end();
300 return true;
301 } else
302 return false;
303 }
304 --I;
305 return true;
306}
307
308MachineBasicBlock::iterator FixupLEAsImpl::searchBackwards(
309 MachineOperand &p, MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) {
310 int InstrDistance = 1;
311 MachineBasicBlock::iterator CurInst;
312 static const int INSTR_DISTANCE_THRESHOLD = 5;
313
314 CurInst = I;
315 bool Found;
316 Found = getPreviousInstr(I&: CurInst, MBB);
317 while (Found && I != CurInst) {
318 if (CurInst->isCall() || CurInst->isInlineAsm())
319 break;
320 if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
321 break; // too far back to make a difference
322 if (usesRegister(p, I: CurInst) == RU_Write) {
323 return CurInst;
324 }
325 InstrDistance += TSM.computeInstrLatency(MI: &*CurInst);
326 Found = getPreviousInstr(I&: CurInst, MBB);
327 }
328 return MachineBasicBlock::iterator();
329}
330
331static inline bool isInefficientLEAReg(Register Reg) {
332 return Reg == X86::EBP || Reg == X86::RBP ||
333 Reg == X86::R13D || Reg == X86::R13;
334}
335
336/// Returns true if this LEA uses base and index registers, and the base
337/// register is known to be inefficient for the subtarget.
338// TODO: use a variant scheduling class to model the latency profile
339// of LEA instructions, and implement this logic as a scheduling predicate.
340static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
341 const MachineOperand &Index) {
342 return Base.isReg() && isInefficientLEAReg(Reg: Base.getReg()) && Index.isReg() &&
343 Index.getReg().isValid();
344}
345
346static inline bool hasLEAOffset(const MachineOperand &Offset) {
347 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal() ||
348 Offset.isBlockAddress();
349}
350
351static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
352 switch (LEAOpcode) {
353 default:
354 llvm_unreachable("Unexpected LEA instruction");
355 case X86::LEA32r:
356 case X86::LEA64_32r:
357 return X86::ADD32rr;
358 case X86::LEA64r:
359 return X86::ADD64rr;
360 }
361}
362
363static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
364 switch (LEAOpcode) {
365 default:
366 llvm_unreachable("Unexpected LEA instruction");
367 case X86::LEA32r:
368 case X86::LEA64_32r:
369 return X86::SUB32rr;
370 case X86::LEA64r:
371 return X86::SUB64rr;
372 }
373}
374
375static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
376 const MachineOperand &Offset) {
377 switch (LEAOpcode) {
378 default:
379 llvm_unreachable("Unexpected LEA instruction");
380 case X86::LEA32r:
381 case X86::LEA64_32r:
382 return X86::ADD32ri;
383 case X86::LEA64r:
384 return X86::ADD64ri32;
385 }
386}
387
388static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
389 switch (LEAOpcode) {
390 default:
391 llvm_unreachable("Unexpected LEA instruction");
392 case X86::LEA32r:
393 case X86::LEA64_32r:
394 return IsINC ? X86::INC32r : X86::DEC32r;
395 case X86::LEA64r:
396 return IsINC ? X86::INC64r : X86::DEC64r;
397 }
398}
399
400MachineBasicBlock::iterator
401FixupLEAsImpl::searchALUInst(MachineBasicBlock::iterator &I,
402 MachineBasicBlock &MBB) const {
403 const int InstrDistanceThreshold = 5;
404 int InstrDistance = 1;
405 MachineBasicBlock::iterator CurInst = std::next(x: I);
406
407 unsigned LEAOpcode = I->getOpcode();
408 unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
409 unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
410 Register DestReg = I->getOperand(i: 0).getReg();
411
412 while (CurInst != MBB.end()) {
413 if (CurInst->isCall() || CurInst->isInlineAsm())
414 break;
415 if (InstrDistance > InstrDistanceThreshold)
416 break;
417
418 // Check if the lea dest register is used in an add/sub instruction only.
419 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
420 MachineOperand &Opnd = CurInst->getOperand(i: I);
421 if (Opnd.isReg()) {
422 if (Opnd.getReg() == DestReg) {
423 if (Opnd.isDef() || !Opnd.isKill())
424 return MachineBasicBlock::iterator();
425
426 unsigned AluOpcode = CurInst->getOpcode();
427 if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
428 return MachineBasicBlock::iterator();
429
430 MachineOperand &Opnd2 = CurInst->getOperand(i: 3 - I);
431 MachineOperand AluDest = CurInst->getOperand(i: 0);
432 if (Opnd2.getReg() != AluDest.getReg())
433 return MachineBasicBlock::iterator();
434
435 // X - (Y + Z) may generate different flags than (X - Y) - Z when
436 // there is overflow. So we can't change the alu instruction if the
437 // flags register is live.
438 if (!CurInst->registerDefIsDead(Reg: X86::EFLAGS, TRI))
439 return MachineBasicBlock::iterator();
440
441 return CurInst;
442 }
443 if (TRI->regsOverlap(RegA: DestReg, RegB: Opnd.getReg()))
444 return MachineBasicBlock::iterator();
445 }
446 }
447
448 InstrDistance++;
449 ++CurInst;
450 }
451 return MachineBasicBlock::iterator();
452}
453
454void FixupLEAsImpl::checkRegUsage(MachineBasicBlock::iterator &LeaI,
455 MachineBasicBlock::iterator &AluI,
456 bool &BaseIndexDef, bool &AluDestRef,
457 MachineOperand **KilledBase,
458 MachineOperand **KilledIndex) const {
459 BaseIndexDef = AluDestRef = false;
460 *KilledBase = *KilledIndex = nullptr;
461 Register BaseReg = LeaI->getOperand(i: 1 + X86::AddrBaseReg).getReg();
462 Register IndexReg = LeaI->getOperand(i: 1 + X86::AddrIndexReg).getReg();
463 Register AluDestReg = AluI->getOperand(i: 0).getReg();
464
465 for (MachineInstr &CurInst : llvm::make_range(x: std::next(x: LeaI), y: AluI)) {
466 for (MachineOperand &Opnd : CurInst.operands()) {
467 if (!Opnd.isReg())
468 continue;
469 Register Reg = Opnd.getReg();
470 if (TRI->regsOverlap(RegA: Reg, RegB: AluDestReg))
471 AluDestRef = true;
472 if (TRI->regsOverlap(RegA: Reg, RegB: BaseReg)) {
473 if (Opnd.isDef())
474 BaseIndexDef = true;
475 else if (Opnd.isKill())
476 *KilledBase = &Opnd;
477 }
478 if (TRI->regsOverlap(RegA: Reg, RegB: IndexReg)) {
479 if (Opnd.isDef())
480 BaseIndexDef = true;
481 else if (Opnd.isKill())
482 *KilledIndex = &Opnd;
483 }
484 }
485 }
486}
487
488bool FixupLEAsImpl::optLEAALU(MachineBasicBlock::iterator &I,
489 MachineBasicBlock &MBB) const {
490 // Look for an add/sub instruction which uses the result of lea.
491 MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
492 if (AluI == MachineBasicBlock::iterator())
493 return false;
494
495 // Check if there are any related register usage between lea and alu.
496 bool BaseIndexDef, AluDestRef;
497 MachineOperand *KilledBase, *KilledIndex;
498 checkRegUsage(LeaI&: I, AluI, BaseIndexDef, AluDestRef, KilledBase: &KilledBase, KilledIndex: &KilledIndex);
499
500 MachineBasicBlock::iterator InsertPos = AluI;
501 if (BaseIndexDef) {
502 if (AluDestRef)
503 return false;
504 InsertPos = I;
505 KilledBase = KilledIndex = nullptr;
506 }
507
508 // Check if there are same registers.
509 Register AluDestReg = AluI->getOperand(i: 0).getReg();
510 Register BaseReg = I->getOperand(i: 1 + X86::AddrBaseReg).getReg();
511 Register IndexReg = I->getOperand(i: 1 + X86::AddrIndexReg).getReg();
512 if (I->getOpcode() == X86::LEA64_32r) {
513 BaseReg = TRI->getSubReg(Reg: BaseReg, Idx: X86::sub_32bit);
514 IndexReg = TRI->getSubReg(Reg: IndexReg, Idx: X86::sub_32bit);
515 }
516 if (AluDestReg == IndexReg) {
517 if (BaseReg == IndexReg)
518 return false;
519 std::swap(a&: BaseReg, b&: IndexReg);
520 std::swap(a&: KilledBase, b&: KilledIndex);
521 }
522 if (BaseReg == IndexReg)
523 KilledBase = nullptr;
524
525 // Now it's safe to change instructions.
526 MachineInstr *NewMI1, *NewMI2;
527 unsigned NewOpcode = AluI->getOpcode();
528 NewMI1 = BuildMI(BB&: MBB, I: InsertPos, MIMD: AluI->getDebugLoc(), MCID: TII->get(Opcode: NewOpcode),
529 DestReg: AluDestReg)
530 .addReg(RegNo: AluDestReg, Flags: RegState::Kill)
531 .addReg(RegNo: BaseReg, Flags: getKillRegState(B: KilledBase));
532 NewMI1->addRegisterDead(Reg: X86::EFLAGS, RegInfo: TRI);
533 NewMI2 = BuildMI(BB&: MBB, I: InsertPos, MIMD: AluI->getDebugLoc(), MCID: TII->get(Opcode: NewOpcode),
534 DestReg: AluDestReg)
535 .addReg(RegNo: AluDestReg, Flags: RegState::Kill)
536 .addReg(RegNo: IndexReg, Flags: getKillRegState(B: KilledIndex));
537 NewMI2->addRegisterDead(Reg: X86::EFLAGS, RegInfo: TRI);
538
539 // Clear the old Kill flags.
540 if (KilledBase)
541 KilledBase->setIsKill(false);
542 if (KilledIndex)
543 KilledIndex->setIsKill(false);
544
545 MBB.getParent()->substituteDebugValuesForInst(Old: *AluI, New&: *NewMI2, MaxOperand: 1);
546 MBB.erase(I);
547 MBB.erase(I: AluI);
548 I = NewMI1;
549 return true;
550}
551
552bool FixupLEAsImpl::optTwoAddrLEA(MachineBasicBlock::iterator &I,
553 MachineBasicBlock &MBB, bool OptIncDec,
554 bool UseLEAForSP) const {
555 MachineInstr &MI = *I;
556
557 const MachineOperand &Base = MI.getOperand(i: 1 + X86::AddrBaseReg);
558 const MachineOperand &Scale = MI.getOperand(i: 1 + X86::AddrScaleAmt);
559 const MachineOperand &Index = MI.getOperand(i: 1 + X86::AddrIndexReg);
560 const MachineOperand &Disp = MI.getOperand(i: 1 + X86::AddrDisp);
561 const MachineOperand &Segment = MI.getOperand(i: 1 + X86::AddrSegmentReg);
562
563 if (Segment.getReg().isValid() || !Disp.isImm() || Scale.getImm() > 1 ||
564 MBB.computeRegisterLiveness(TRI, Reg: X86::EFLAGS, Before: I) !=
565 MachineBasicBlock::LQR_Dead)
566 return false;
567
568 Register DestReg = MI.getOperand(i: 0).getReg();
569 Register BaseReg = Base.getReg();
570 Register IndexReg = Index.getReg();
571
572 // Don't change stack adjustment LEAs.
573 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
574 return false;
575
576 // LEA64_32 has 64-bit operands but 32-bit result.
577 if (MI.getOpcode() == X86::LEA64_32r) {
578 if (BaseReg)
579 BaseReg = TRI->getSubReg(Reg: BaseReg, Idx: X86::sub_32bit);
580 if (IndexReg)
581 IndexReg = TRI->getSubReg(Reg: IndexReg, Idx: X86::sub_32bit);
582 }
583
584 MachineInstr *NewMI = nullptr;
585
586 // Case 1.
587 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
588 // which can be turned into add %reg2, %reg1
589 if (BaseReg.isValid() && IndexReg.isValid() && Disp.getImm() == 0 &&
590 (DestReg == BaseReg || DestReg == IndexReg)) {
591 unsigned NewOpcode = getADDrrFromLEA(LEAOpcode: MI.getOpcode());
592 if (DestReg != BaseReg)
593 std::swap(a&: BaseReg, b&: IndexReg);
594
595 if (MI.getOpcode() == X86::LEA64_32r) {
596 // TODO: Do we need the super register implicit use?
597 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
598 .addReg(RegNo: BaseReg).addReg(RegNo: IndexReg)
599 .addReg(RegNo: Base.getReg(), Flags: RegState::Implicit)
600 .addReg(RegNo: Index.getReg(), Flags: RegState::Implicit);
601 } else {
602 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
603 .addReg(RegNo: BaseReg).addReg(RegNo: IndexReg);
604 }
605 } else if (DestReg == BaseReg && !IndexReg) {
606 // Case 2.
607 // This is an LEA with only a base register and a displacement,
608 // We can use ADDri or INC/DEC.
609
610 // Does this LEA have one these forms:
611 // lea %reg, 1(%reg)
612 // lea %reg, -1(%reg)
613 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
614 bool IsINC = Disp.getImm() == 1;
615 unsigned NewOpcode = getINCDECFromLEA(LEAOpcode: MI.getOpcode(), IsINC);
616
617 if (MI.getOpcode() == X86::LEA64_32r) {
618 // TODO: Do we need the super register implicit use?
619 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
620 .addReg(RegNo: BaseReg).addReg(RegNo: Base.getReg(), Flags: RegState::Implicit);
621 } else {
622 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
623 .addReg(RegNo: BaseReg);
624 }
625 } else {
626 unsigned NewOpcode = getADDriFromLEA(LEAOpcode: MI.getOpcode(), Offset: Disp);
627 if (MI.getOpcode() == X86::LEA64_32r) {
628 // TODO: Do we need the super register implicit use?
629 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
630 .addReg(RegNo: BaseReg).addImm(Val: Disp.getImm())
631 .addReg(RegNo: Base.getReg(), Flags: RegState::Implicit);
632 } else {
633 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
634 .addReg(RegNo: BaseReg).addImm(Val: Disp.getImm());
635 }
636 }
637 } else if (BaseReg.isValid() && IndexReg.isValid() && Disp.getImm() == 0) {
638 // Case 3.
639 // Look for and transform the sequence
640 // lea (reg1, reg2), reg3
641 // sub reg3, reg4
642 return optLEAALU(I, MBB);
643 } else
644 return false;
645
646 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
647 MBB.erase(I);
648 I = NewMI;
649 return true;
650}
651
652void FixupLEAsImpl::processInstruction(MachineBasicBlock::iterator &I,
653 MachineBasicBlock &MBB) {
654 // Process a load, store, or LEA instruction.
655 MachineInstr &MI = *I;
656 const MCInstrDesc &Desc = MI.getDesc();
657 int AddrOffset = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags);
658 if (AddrOffset >= 0) {
659 AddrOffset += X86II::getOperandBias(Desc);
660 MachineOperand &p = MI.getOperand(i: AddrOffset + X86::AddrBaseReg);
661 if (p.isReg() && p.getReg() != X86::ESP) {
662 seekLEAFixup(p, I, MBB);
663 }
664 MachineOperand &q = MI.getOperand(i: AddrOffset + X86::AddrIndexReg);
665 if (q.isReg() && q.getReg() != X86::ESP) {
666 seekLEAFixup(p&: q, I, MBB);
667 }
668 }
669}
670
671void FixupLEAsImpl::seekLEAFixup(MachineOperand &p,
672 MachineBasicBlock::iterator &I,
673 MachineBasicBlock &MBB) {
674 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
675 if (MBI != MachineBasicBlock::iterator()) {
676 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBBI&: MBI);
677 if (NewMI) {
678 ++NumLEAs;
679 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
680 // now to replace with an equivalent LEA...
681 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
682 MBB.getParent()->substituteDebugValuesForInst(Old: *MBI, New&: *NewMI, MaxOperand: 1);
683 MBB.erase(I: MBI);
684 MachineBasicBlock::iterator J =
685 static_cast<MachineBasicBlock::iterator>(NewMI);
686 processInstruction(I&: J, MBB);
687 }
688 }
689}
690
691void FixupLEAsImpl::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
692 MachineBasicBlock &MBB) {
693 MachineInstr &MI = *I;
694 const unsigned Opcode = MI.getOpcode();
695
696 const MachineOperand &Dst = MI.getOperand(i: 0);
697 const MachineOperand &Base = MI.getOperand(i: 1 + X86::AddrBaseReg);
698 const MachineOperand &Scale = MI.getOperand(i: 1 + X86::AddrScaleAmt);
699 const MachineOperand &Index = MI.getOperand(i: 1 + X86::AddrIndexReg);
700 const MachineOperand &Offset = MI.getOperand(i: 1 + X86::AddrDisp);
701 const MachineOperand &Segment = MI.getOperand(i: 1 + X86::AddrSegmentReg);
702
703 if (Segment.getReg().isValid() || !Offset.isImm() ||
704 MBB.computeRegisterLiveness(TRI, Reg: X86::EFLAGS, Before: I, Neighborhood: 4) !=
705 MachineBasicBlock::LQR_Dead)
706 return;
707 const Register DstR = Dst.getReg();
708 const Register SrcR1 = Base.getReg();
709 const Register SrcR2 = Index.getReg();
710 if ((!SrcR1 || SrcR1 != DstR) && (!SrcR2 || SrcR2 != DstR))
711 return;
712 if (Scale.getImm() > 1)
713 return;
714 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
715 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
716 MachineInstr *NewMI = nullptr;
717 // Make ADD instruction for two registers writing to LEA's destination
718 if (SrcR1 && SrcR2) {
719 const MCInstrDesc &ADDrr = TII->get(Opcode: getADDrrFromLEA(LEAOpcode: Opcode));
720 const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
721 NewMI =
722 BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: ADDrr, DestReg: DstR).addReg(RegNo: DstR).add(MO: Src);
723 LLVM_DEBUG(NewMI->dump(););
724 }
725 // Make ADD instruction for immediate
726 if (Offset.getImm() != 0) {
727 const MCInstrDesc &ADDri =
728 TII->get(Opcode: getADDriFromLEA(LEAOpcode: Opcode, Offset));
729 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
730 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: ADDri, DestReg: DstR)
731 .add(MO: SrcR)
732 .addImm(Val: Offset.getImm());
733 LLVM_DEBUG(NewMI->dump(););
734 }
735 if (NewMI) {
736 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
737 MBB.erase(I);
738 I = NewMI;
739 }
740}
741
742void FixupLEAsImpl::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
743 MachineBasicBlock &MBB,
744 bool OptIncDec) {
745 MachineInstr &MI = *I;
746 const unsigned LEAOpcode = MI.getOpcode();
747
748 const MachineOperand &Dest = MI.getOperand(i: 0);
749 const MachineOperand &Base = MI.getOperand(i: 1 + X86::AddrBaseReg);
750 const MachineOperand &Scale = MI.getOperand(i: 1 + X86::AddrScaleAmt);
751 const MachineOperand &Index = MI.getOperand(i: 1 + X86::AddrIndexReg);
752 const MachineOperand &Offset = MI.getOperand(i: 1 + X86::AddrDisp);
753 const MachineOperand &Segment = MI.getOperand(i: 1 + X86::AddrSegmentReg);
754
755 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
756 MBB.computeRegisterLiveness(TRI, Reg: X86::EFLAGS, Before: I, Neighborhood: 4) !=
757 MachineBasicBlock::LQR_Dead ||
758 Segment.getReg().isValid())
759 return;
760
761 Register DestReg = Dest.getReg();
762 Register BaseReg = Base.getReg();
763 Register IndexReg = Index.getReg();
764
765 if (MI.getOpcode() == X86::LEA64_32r) {
766 if (BaseReg)
767 BaseReg = TRI->getSubReg(Reg: BaseReg, Idx: X86::sub_32bit);
768 if (IndexReg)
769 IndexReg = TRI->getSubReg(Reg: IndexReg, Idx: X86::sub_32bit);
770 }
771
772 bool IsScale1 = Scale.getImm() == 1;
773 bool IsInefficientBase = isInefficientLEAReg(Reg: BaseReg);
774 bool IsInefficientIndex = isInefficientLEAReg(Reg: IndexReg);
775
776 // Skip these cases since it takes more than 2 instructions
777 // to replace the LEA instruction.
778 if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
779 return;
780
781 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
782 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
783
784 MachineInstr *NewMI = nullptr;
785 bool BaseOrIndexIsDst = DestReg == BaseReg || DestReg == IndexReg;
786 // First try and remove the base while sticking with LEA iff base == index and
787 // scale == 1. We can handle:
788 // 1. lea D(%base,%index,1) -> lea D(,%index,2)
789 // 2. lea D(%r13/%rbp,%index) -> lea D(,%index,2)
790 // Only do this if the LEA would otherwise be split into 2-instruction
791 // (either it has a an Offset or neither base nor index are dst)
792 if (IsScale1 && BaseReg == IndexReg &&
793 (hasLEAOffset(Offset) || (IsInefficientBase && !BaseOrIndexIsDst))) {
794 NewMI = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LEAOpcode))
795 .add(MO: Dest)
796 .addReg(RegNo: 0)
797 .addImm(Val: 2)
798 .add(MO: Index)
799 .add(MO: Offset)
800 .add(MO: Segment);
801 LLVM_DEBUG(NewMI->dump(););
802
803 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
804 MBB.erase(I);
805 I = NewMI;
806 return;
807 } else if (IsScale1 && BaseOrIndexIsDst) {
808 // Try to replace LEA with one or two (for the 3-op LEA case)
809 // add instructions:
810 // 1.lea (%base,%index,1), %base => add %index,%base
811 // 2.lea (%base,%index,1), %index => add %base,%index
812
813 unsigned NewOpc = getADDrrFromLEA(LEAOpcode: MI.getOpcode());
814 if (DestReg != BaseReg)
815 std::swap(a&: BaseReg, b&: IndexReg);
816
817 if (MI.getOpcode() == X86::LEA64_32r) {
818 // TODO: Do we need the super register implicit use?
819 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
820 .addReg(RegNo: BaseReg)
821 .addReg(RegNo: IndexReg)
822 .addReg(RegNo: Base.getReg(), Flags: RegState::Implicit)
823 .addReg(RegNo: Index.getReg(), Flags: RegState::Implicit);
824 } else {
825 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
826 .addReg(RegNo: BaseReg)
827 .addReg(RegNo: IndexReg);
828 }
829 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
830 // If the base is inefficient try switching the index and base operands,
831 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
832 // lea offset(%base,%index,scale),%dst =>
833 // lea (%base,%index,scale); add offset,%dst
834 NewMI = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LEAOpcode))
835 .add(MO: Dest)
836 .add(MO: IsInefficientBase ? Index : Base)
837 .add(MO: Scale)
838 .add(MO: IsInefficientBase ? Base : Index)
839 .addImm(Val: 0)
840 .add(MO: Segment);
841 LLVM_DEBUG(NewMI->dump(););
842 }
843
844 // If either replacement succeeded above, add the offset if needed, then
845 // replace the instruction.
846 if (NewMI) {
847 // Create ADD instruction for the Offset in case of 3-Ops LEA.
848 if (hasLEAOffset(Offset)) {
849 if (OptIncDec && Offset.isImm() &&
850 (Offset.getImm() == 1 || Offset.getImm() == -1)) {
851 unsigned NewOpc =
852 getINCDECFromLEA(LEAOpcode: MI.getOpcode(), IsINC: Offset.getImm() == 1);
853 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
854 .addReg(RegNo: DestReg);
855 LLVM_DEBUG(NewMI->dump(););
856 } else {
857 unsigned NewOpc = getADDriFromLEA(LEAOpcode: MI.getOpcode(), Offset);
858 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
859 .addReg(RegNo: DestReg)
860 .add(MO: Offset);
861 LLVM_DEBUG(NewMI->dump(););
862 }
863 }
864
865 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
866 MBB.erase(I);
867 I = NewMI;
868 return;
869 }
870
871 // Handle the rest of the cases with inefficient base register:
872 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
873 assert(IsInefficientBase && "efficient base should be handled already!");
874
875 // FIXME: Handle LEA64_32r.
876 if (LEAOpcode == X86::LEA64_32r)
877 return;
878
879 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
880 if (IsScale1 && !hasLEAOffset(Offset)) {
881 bool BIK = Base.isKill() && BaseReg != IndexReg;
882 TII->copyPhysReg(MBB, MI, DL: MI.getDebugLoc(), DestReg, SrcReg: BaseReg, KillSrc: BIK);
883 LLVM_DEBUG(MI.getPrevNode()->dump(););
884
885 unsigned NewOpc = getADDrrFromLEA(LEAOpcode: MI.getOpcode());
886 NewMI = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
887 .addReg(RegNo: DestReg)
888 .add(MO: Index);
889 LLVM_DEBUG(NewMI->dump(););
890
891 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
892 MBB.erase(I);
893 I = NewMI;
894 return;
895 }
896
897 // lea offset(%base,%index,scale), %dst =>
898 // lea offset( ,%index,scale), %dst; add %base,%dst
899 NewMI = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LEAOpcode))
900 .add(MO: Dest)
901 .addReg(RegNo: 0)
902 .add(MO: Scale)
903 .add(MO: Index)
904 .add(MO: Offset)
905 .add(MO: Segment);
906 LLVM_DEBUG(NewMI->dump(););
907
908 unsigned NewOpc = getADDrrFromLEA(LEAOpcode: MI.getOpcode());
909 NewMI = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
910 .addReg(RegNo: DestReg)
911 .add(MO: Base);
912 LLVM_DEBUG(NewMI->dump(););
913
914 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
915 MBB.erase(I);
916 I = NewMI;
917}
918
919bool FixupLEAsLegacy::runOnMachineFunction(MachineFunction &MF) {
920 if (skipFunction(F: MF.getFunction()))
921 return false;
922
923 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
924 auto *MBFI = (PSI && PSI->hasProfileSummary())
925 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
926 : nullptr;
927 FixupLEAsImpl PassImpl(PSI, MBFI);
928 return PassImpl.runOnMachineFunction(MF);
929}
930
931PreservedAnalyses X86FixupLEAsPass::run(MachineFunction &MF,
932 MachineFunctionAnalysisManager &MFAM) {
933 ProfileSummaryInfo *PSI =
934 MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(IR&: MF)
935 .getCachedResult<ProfileSummaryAnalysis>(
936 IR&: *MF.getFunction().getParent());
937 if (!PSI)
938 report_fatal_error(reason: "x86-fixup-leas requires ProfileSummaryAnalysis", gen_crash_diag: false);
939 MachineBlockFrequencyInfo *MBFI =
940 &MFAM.getResult<MachineBlockFrequencyAnalysis>(IR&: MF);
941
942 FixupLEAsImpl PassImpl(PSI, MBFI);
943 bool Changed = PassImpl.runOnMachineFunction(MF);
944 if (!Changed)
945 return PreservedAnalyses::all();
946 PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses();
947 PA.preserveSet<CFGAnalyses>();
948 return PA;
949}
950