1//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the pass that finds instructions that can be
10// re-written as LEA instructions in order to reduce pipeline delays.
11// It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrInfo.h"
17#include "X86Subtarget.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/ProfileSummaryInfo.h"
20#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
21#include "llvm/CodeGen/MachineFunctionPass.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineSizeOpts.h"
24#include "llvm/CodeGen/Passes.h"
25#include "llvm/CodeGen/TargetSchedule.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/raw_ostream.h"
28using namespace llvm;
29
30#define FIXUPLEA_DESC "X86 LEA Fixup"
31#define FIXUPLEA_NAME "x86-fixup-LEAs"
32
33#define DEBUG_TYPE FIXUPLEA_NAME
34
35STATISTIC(NumLEAs, "Number of LEA instructions created");
36
37namespace {
38class FixupLEAPass : public MachineFunctionPass {
39 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
40
41 /// Given a machine register, look for the instruction
42 /// which writes it in the current basic block. If found,
43 /// try to replace it with an equivalent LEA instruction.
44 /// If replacement succeeds, then also process the newly created
45 /// instruction.
46 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
47 MachineBasicBlock &MBB);
48
49 /// Given a memory access or LEA instruction
50 /// whose address mode uses a base and/or index register, look for
51 /// an opportunity to replace the instruction which sets the base or index
52 /// register with an equivalent LEA instruction.
53 void processInstruction(MachineBasicBlock::iterator &I,
54 MachineBasicBlock &MBB);
55
56 /// Given a LEA instruction which is unprofitable
57 /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
58 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
59 MachineBasicBlock &MBB);
60
61 /// Given a LEA instruction which is unprofitable
62 /// on SNB+ try to replace it with other instructions.
63 /// According to Intel's Optimization Reference Manual:
64 /// " For LEA instructions with three source operands and some specific
65 /// situations, instruction latency has increased to 3 cycles, and must
66 /// dispatch via port 1:
67 /// - LEA that has all three source operands: base, index, and offset
68 /// - LEA that uses base and index registers where the base is EBP, RBP,
69 /// or R13
70 /// - LEA that uses RIP relative addressing mode
71 /// - LEA that uses 16-bit addressing mode "
72 /// This function currently handles the first 2 cases only.
73 void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
74 MachineBasicBlock &MBB, bool OptIncDec);
75
76 /// Look for LEAs that are really two address LEAs that we might be able to
77 /// turn into regular ADD instructions.
78 bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
79 MachineBasicBlock &MBB, bool OptIncDec,
80 bool UseLEAForSP) const;
81
82 /// Look for and transform the sequence
83 /// lea (reg1, reg2), reg3
84 /// sub reg3, reg4
85 /// to
86 /// sub reg1, reg4
87 /// sub reg2, reg4
88 /// It can also optimize the sequence lea/add similarly.
89 bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;
90
91 /// Step forwards in MBB, looking for an ADD/SUB instruction which uses
92 /// the dest register of LEA instruction I.
93 MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,
94 MachineBasicBlock &MBB) const;
95
96 /// Check instructions between LeaI and AluI (exclusively).
97 /// Set BaseIndexDef to true if base or index register from LeaI is defined.
98 /// Set AluDestRef to true if the dest register of AluI is used or defined.
99 /// *KilledBase is set to the killed base register usage.
100 /// *KilledIndex is set to the killed index register usage.
101 void checkRegUsage(MachineBasicBlock::iterator &LeaI,
102 MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,
103 bool &AluDestRef, MachineOperand **KilledBase,
104 MachineOperand **KilledIndex) const;
105
106 /// Determine if an instruction references a machine register
107 /// and, if so, whether it reads or writes the register.
108 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
109
110 /// Step backwards through a basic block, looking
111 /// for an instruction which writes a register within
112 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
113 MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
114 MachineBasicBlock::iterator &I,
115 MachineBasicBlock &MBB);
116
117 /// if an instruction can be converted to an
118 /// equivalent LEA, insert the new instruction into the basic block
119 /// and return a pointer to it. Otherwise, return zero.
120 MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
121 MachineBasicBlock::iterator &MBBI) const;
122
123public:
124 static char ID;
125
126 StringRef getPassName() const override { return FIXUPLEA_DESC; }
127
128 FixupLEAPass() : MachineFunctionPass(ID) { }
129
130 /// Loop over all of the basic blocks,
131 /// replacing instructions by equivalent LEA instructions
132 /// if needed and when possible.
133 bool runOnMachineFunction(MachineFunction &MF) override;
134
135 // This pass runs after regalloc and doesn't support VReg operands.
136 MachineFunctionProperties getRequiredProperties() const override {
137 return MachineFunctionProperties().set(
138 MachineFunctionProperties::Property::NoVRegs);
139 }
140
141 void getAnalysisUsage(AnalysisUsage &AU) const override {
142 AU.addRequired<ProfileSummaryInfoWrapperPass>();
143 AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
144 MachineFunctionPass::getAnalysisUsage(AU);
145 }
146
147private:
148 TargetSchedModel TSM;
149 const X86InstrInfo *TII = nullptr;
150 const X86RegisterInfo *TRI = nullptr;
151};
152}
153
154char FixupLEAPass::ID = 0;
155
156INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
157
158MachineInstr *
159FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
160 MachineBasicBlock::iterator &MBBI) const {
161 MachineInstr &MI = *MBBI;
162 switch (MI.getOpcode()) {
163 case X86::MOV32rr:
164 case X86::MOV64rr: {
165 const MachineOperand &Src = MI.getOperand(i: 1);
166 const MachineOperand &Dest = MI.getOperand(i: 0);
167 MachineInstr *NewMI =
168 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
169 MCID: TII->get(Opcode: MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
170 : X86::LEA64r))
171 .add(MO: Dest)
172 .add(MO: Src)
173 .addImm(Val: 1)
174 .addReg(RegNo: 0)
175 .addImm(Val: 0)
176 .addReg(RegNo: 0);
177 return NewMI;
178 }
179 }
180
181 if (!MI.isConvertibleTo3Addr())
182 return nullptr;
183
184 switch (MI.getOpcode()) {
185 default:
186 // Only convert instructions that we've verified are safe.
187 return nullptr;
188 case X86::ADD64ri32:
189 case X86::ADD64ri32_DB:
190 case X86::ADD32ri:
191 case X86::ADD32ri_DB:
192 if (!MI.getOperand(i: 2).isImm()) {
193 // convertToThreeAddress will call getImm()
194 // which requires isImm() to be true
195 return nullptr;
196 }
197 break;
198 case X86::SHL64ri:
199 case X86::SHL32ri:
200 case X86::INC64r:
201 case X86::INC32r:
202 case X86::DEC64r:
203 case X86::DEC32r:
204 case X86::ADD64rr:
205 case X86::ADD64rr_DB:
206 case X86::ADD32rr:
207 case X86::ADD32rr_DB:
208 // These instructions are all fine to convert.
209 break;
210 }
211 return TII->convertToThreeAddress(MI, LV: nullptr, LIS: nullptr);
212}
213
214FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
215
216static bool isLEA(unsigned Opcode) {
217 return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
218 Opcode == X86::LEA64_32r;
219}
220
221bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
222 if (skipFunction(F: MF.getFunction()))
223 return false;
224
225 const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
226 bool IsSlowLEA = ST.slowLEA();
227 bool IsSlow3OpsLEA = ST.slow3OpsLEA();
228 bool LEAUsesAG = ST.leaUsesAG();
229
230 bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
231 bool UseLEAForSP = ST.useLeaForSP();
232
233 TSM.init(TSInfo: &ST);
234 TII = ST.getInstrInfo();
235 TRI = ST.getRegisterInfo();
236 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
237 auto *MBFI = (PSI && PSI->hasProfileSummary())
238 ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
239 : nullptr;
240
241 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
242 for (MachineBasicBlock &MBB : MF) {
243 // First pass. Try to remove or optimize existing LEAs.
244 bool OptIncDecPerBB =
245 OptIncDec || llvm::shouldOptimizeForSize(MBB: &MBB, PSI, MBFI);
246 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
247 if (!isLEA(Opcode: I->getOpcode()))
248 continue;
249
250 if (optTwoAddrLEA(I, MBB, OptIncDec: OptIncDecPerBB, UseLEAForSP))
251 continue;
252
253 if (IsSlowLEA)
254 processInstructionForSlowLEA(I, MBB);
255 else if (IsSlow3OpsLEA)
256 processInstrForSlow3OpLEA(I, MBB, OptIncDec: OptIncDecPerBB);
257 }
258
259 // Second pass for creating LEAs. This may reverse some of the
260 // transformations above.
261 if (LEAUsesAG) {
262 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
263 processInstruction(I, MBB);
264 }
265 }
266
267 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
268
269 return true;
270}
271
272FixupLEAPass::RegUsageState
273FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
274 RegUsageState RegUsage = RU_NotUsed;
275 MachineInstr &MI = *I;
276
277 for (const MachineOperand &MO : MI.operands()) {
278 if (MO.isReg() && MO.getReg() == p.getReg()) {
279 if (MO.isDef())
280 return RU_Write;
281 RegUsage = RU_Read;
282 }
283 }
284 return RegUsage;
285}
286
287/// getPreviousInstr - Given a reference to an instruction in a basic
288/// block, return a reference to the previous instruction in the block,
289/// wrapping around to the last instruction of the block if the block
290/// branches to itself.
291static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
292 MachineBasicBlock &MBB) {
293 if (I == MBB.begin()) {
294 if (MBB.isPredecessor(MBB: &MBB)) {
295 I = --MBB.end();
296 return true;
297 } else
298 return false;
299 }
300 --I;
301 return true;
302}
303
304MachineBasicBlock::iterator
305FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
306 MachineBasicBlock &MBB) {
307 int InstrDistance = 1;
308 MachineBasicBlock::iterator CurInst;
309 static const int INSTR_DISTANCE_THRESHOLD = 5;
310
311 CurInst = I;
312 bool Found;
313 Found = getPreviousInstr(I&: CurInst, MBB);
314 while (Found && I != CurInst) {
315 if (CurInst->isCall() || CurInst->isInlineAsm())
316 break;
317 if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
318 break; // too far back to make a difference
319 if (usesRegister(p, I: CurInst) == RU_Write) {
320 return CurInst;
321 }
322 InstrDistance += TSM.computeInstrLatency(MI: &*CurInst);
323 Found = getPreviousInstr(I&: CurInst, MBB);
324 }
325 return MachineBasicBlock::iterator();
326}
327
328static inline bool isInefficientLEAReg(unsigned Reg) {
329 return Reg == X86::EBP || Reg == X86::RBP ||
330 Reg == X86::R13D || Reg == X86::R13;
331}
332
333/// Returns true if this LEA uses base and index registers, and the base
334/// register is known to be inefficient for the subtarget.
335// TODO: use a variant scheduling class to model the latency profile
336// of LEA instructions, and implement this logic as a scheduling predicate.
337static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
338 const MachineOperand &Index) {
339 return Base.isReg() && isInefficientLEAReg(Reg: Base.getReg()) && Index.isReg() &&
340 Index.getReg() != X86::NoRegister;
341}
342
343static inline bool hasLEAOffset(const MachineOperand &Offset) {
344 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal() ||
345 Offset.isBlockAddress();
346}
347
348static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
349 switch (LEAOpcode) {
350 default:
351 llvm_unreachable("Unexpected LEA instruction");
352 case X86::LEA32r:
353 case X86::LEA64_32r:
354 return X86::ADD32rr;
355 case X86::LEA64r:
356 return X86::ADD64rr;
357 }
358}
359
360static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
361 switch (LEAOpcode) {
362 default:
363 llvm_unreachable("Unexpected LEA instruction");
364 case X86::LEA32r:
365 case X86::LEA64_32r:
366 return X86::SUB32rr;
367 case X86::LEA64r:
368 return X86::SUB64rr;
369 }
370}
371
372static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
373 const MachineOperand &Offset) {
374 switch (LEAOpcode) {
375 default:
376 llvm_unreachable("Unexpected LEA instruction");
377 case X86::LEA32r:
378 case X86::LEA64_32r:
379 return X86::ADD32ri;
380 case X86::LEA64r:
381 return X86::ADD64ri32;
382 }
383}
384
385static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
386 switch (LEAOpcode) {
387 default:
388 llvm_unreachable("Unexpected LEA instruction");
389 case X86::LEA32r:
390 case X86::LEA64_32r:
391 return IsINC ? X86::INC32r : X86::DEC32r;
392 case X86::LEA64r:
393 return IsINC ? X86::INC64r : X86::DEC64r;
394 }
395}
396
397MachineBasicBlock::iterator
398FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,
399 MachineBasicBlock &MBB) const {
400 const int InstrDistanceThreshold = 5;
401 int InstrDistance = 1;
402 MachineBasicBlock::iterator CurInst = std::next(x: I);
403
404 unsigned LEAOpcode = I->getOpcode();
405 unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);
406 unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);
407 Register DestReg = I->getOperand(i: 0).getReg();
408
409 while (CurInst != MBB.end()) {
410 if (CurInst->isCall() || CurInst->isInlineAsm())
411 break;
412 if (InstrDistance > InstrDistanceThreshold)
413 break;
414
415 // Check if the lea dest register is used in an add/sub instruction only.
416 for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
417 MachineOperand &Opnd = CurInst->getOperand(i: I);
418 if (Opnd.isReg()) {
419 if (Opnd.getReg() == DestReg) {
420 if (Opnd.isDef() || !Opnd.isKill())
421 return MachineBasicBlock::iterator();
422
423 unsigned AluOpcode = CurInst->getOpcode();
424 if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)
425 return MachineBasicBlock::iterator();
426
427 MachineOperand &Opnd2 = CurInst->getOperand(i: 3 - I);
428 MachineOperand AluDest = CurInst->getOperand(i: 0);
429 if (Opnd2.getReg() != AluDest.getReg())
430 return MachineBasicBlock::iterator();
431
432 // X - (Y + Z) may generate different flags than (X - Y) - Z when
433 // there is overflow. So we can't change the alu instruction if the
434 // flags register is live.
435 if (!CurInst->registerDefIsDead(Reg: X86::EFLAGS, TRI))
436 return MachineBasicBlock::iterator();
437
438 return CurInst;
439 }
440 if (TRI->regsOverlap(RegA: DestReg, RegB: Opnd.getReg()))
441 return MachineBasicBlock::iterator();
442 }
443 }
444
445 InstrDistance++;
446 ++CurInst;
447 }
448 return MachineBasicBlock::iterator();
449}
450
451void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
452 MachineBasicBlock::iterator &AluI,
453 bool &BaseIndexDef, bool &AluDestRef,
454 MachineOperand **KilledBase,
455 MachineOperand **KilledIndex) const {
456 BaseIndexDef = AluDestRef = false;
457 *KilledBase = *KilledIndex = nullptr;
458 Register BaseReg = LeaI->getOperand(i: 1 + X86::AddrBaseReg).getReg();
459 Register IndexReg = LeaI->getOperand(i: 1 + X86::AddrIndexReg).getReg();
460 Register AluDestReg = AluI->getOperand(i: 0).getReg();
461
462 for (MachineInstr &CurInst : llvm::make_range(x: std::next(x: LeaI), y: AluI)) {
463 for (MachineOperand &Opnd : CurInst.operands()) {
464 if (!Opnd.isReg())
465 continue;
466 Register Reg = Opnd.getReg();
467 if (TRI->regsOverlap(RegA: Reg, RegB: AluDestReg))
468 AluDestRef = true;
469 if (TRI->regsOverlap(RegA: Reg, RegB: BaseReg)) {
470 if (Opnd.isDef())
471 BaseIndexDef = true;
472 else if (Opnd.isKill())
473 *KilledBase = &Opnd;
474 }
475 if (TRI->regsOverlap(RegA: Reg, RegB: IndexReg)) {
476 if (Opnd.isDef())
477 BaseIndexDef = true;
478 else if (Opnd.isKill())
479 *KilledIndex = &Opnd;
480 }
481 }
482 }
483}
484
485bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,
486 MachineBasicBlock &MBB) const {
487 // Look for an add/sub instruction which uses the result of lea.
488 MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);
489 if (AluI == MachineBasicBlock::iterator())
490 return false;
491
492 // Check if there are any related register usage between lea and alu.
493 bool BaseIndexDef, AluDestRef;
494 MachineOperand *KilledBase, *KilledIndex;
495 checkRegUsage(LeaI&: I, AluI, BaseIndexDef, AluDestRef, KilledBase: &KilledBase, KilledIndex: &KilledIndex);
496
497 MachineBasicBlock::iterator InsertPos = AluI;
498 if (BaseIndexDef) {
499 if (AluDestRef)
500 return false;
501 InsertPos = I;
502 KilledBase = KilledIndex = nullptr;
503 }
504
505 // Check if there are same registers.
506 Register AluDestReg = AluI->getOperand(i: 0).getReg();
507 Register BaseReg = I->getOperand(i: 1 + X86::AddrBaseReg).getReg();
508 Register IndexReg = I->getOperand(i: 1 + X86::AddrIndexReg).getReg();
509 if (I->getOpcode() == X86::LEA64_32r) {
510 BaseReg = TRI->getSubReg(Reg: BaseReg, Idx: X86::sub_32bit);
511 IndexReg = TRI->getSubReg(Reg: IndexReg, Idx: X86::sub_32bit);
512 }
513 if (AluDestReg == IndexReg) {
514 if (BaseReg == IndexReg)
515 return false;
516 std::swap(a&: BaseReg, b&: IndexReg);
517 std::swap(a&: KilledBase, b&: KilledIndex);
518 }
519 if (BaseReg == IndexReg)
520 KilledBase = nullptr;
521
522 // Now it's safe to change instructions.
523 MachineInstr *NewMI1, *NewMI2;
524 unsigned NewOpcode = AluI->getOpcode();
525 NewMI1 = BuildMI(BB&: MBB, I: InsertPos, MIMD: AluI->getDebugLoc(), MCID: TII->get(Opcode: NewOpcode),
526 DestReg: AluDestReg)
527 .addReg(RegNo: AluDestReg, flags: RegState::Kill)
528 .addReg(RegNo: BaseReg, flags: KilledBase ? RegState::Kill : 0);
529 NewMI1->addRegisterDead(Reg: X86::EFLAGS, RegInfo: TRI);
530 NewMI2 = BuildMI(BB&: MBB, I: InsertPos, MIMD: AluI->getDebugLoc(), MCID: TII->get(Opcode: NewOpcode),
531 DestReg: AluDestReg)
532 .addReg(RegNo: AluDestReg, flags: RegState::Kill)
533 .addReg(RegNo: IndexReg, flags: KilledIndex ? RegState::Kill : 0);
534 NewMI2->addRegisterDead(Reg: X86::EFLAGS, RegInfo: TRI);
535
536 // Clear the old Kill flags.
537 if (KilledBase)
538 KilledBase->setIsKill(false);
539 if (KilledIndex)
540 KilledIndex->setIsKill(false);
541
542 MBB.getParent()->substituteDebugValuesForInst(Old: *AluI, New&: *NewMI2, MaxOperand: 1);
543 MBB.erase(I);
544 MBB.erase(I: AluI);
545 I = NewMI1;
546 return true;
547}
548
549bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
550 MachineBasicBlock &MBB, bool OptIncDec,
551 bool UseLEAForSP) const {
552 MachineInstr &MI = *I;
553
554 const MachineOperand &Base = MI.getOperand(i: 1 + X86::AddrBaseReg);
555 const MachineOperand &Scale = MI.getOperand(i: 1 + X86::AddrScaleAmt);
556 const MachineOperand &Index = MI.getOperand(i: 1 + X86::AddrIndexReg);
557 const MachineOperand &Disp = MI.getOperand(i: 1 + X86::AddrDisp);
558 const MachineOperand &Segment = MI.getOperand(i: 1 + X86::AddrSegmentReg);
559
560 if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
561 MBB.computeRegisterLiveness(TRI, Reg: X86::EFLAGS, Before: I) !=
562 MachineBasicBlock::LQR_Dead)
563 return false;
564
565 Register DestReg = MI.getOperand(i: 0).getReg();
566 Register BaseReg = Base.getReg();
567 Register IndexReg = Index.getReg();
568
569 // Don't change stack adjustment LEAs.
570 if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
571 return false;
572
573 // LEA64_32 has 64-bit operands but 32-bit result.
574 if (MI.getOpcode() == X86::LEA64_32r) {
575 if (BaseReg != 0)
576 BaseReg = TRI->getSubReg(Reg: BaseReg, Idx: X86::sub_32bit);
577 if (IndexReg != 0)
578 IndexReg = TRI->getSubReg(Reg: IndexReg, Idx: X86::sub_32bit);
579 }
580
581 MachineInstr *NewMI = nullptr;
582
583 // Case 1.
584 // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
585 // which can be turned into add %reg2, %reg1
586 if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
587 (DestReg == BaseReg || DestReg == IndexReg)) {
588 unsigned NewOpcode = getADDrrFromLEA(LEAOpcode: MI.getOpcode());
589 if (DestReg != BaseReg)
590 std::swap(a&: BaseReg, b&: IndexReg);
591
592 if (MI.getOpcode() == X86::LEA64_32r) {
593 // TODO: Do we need the super register implicit use?
594 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
595 .addReg(RegNo: BaseReg).addReg(RegNo: IndexReg)
596 .addReg(RegNo: Base.getReg(), flags: RegState::Implicit)
597 .addReg(RegNo: Index.getReg(), flags: RegState::Implicit);
598 } else {
599 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
600 .addReg(RegNo: BaseReg).addReg(RegNo: IndexReg);
601 }
602 } else if (DestReg == BaseReg && IndexReg == 0) {
603 // Case 2.
604 // This is an LEA with only a base register and a displacement,
605 // We can use ADDri or INC/DEC.
606
607 // Does this LEA have one these forms:
608 // lea %reg, 1(%reg)
609 // lea %reg, -1(%reg)
610 if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
611 bool IsINC = Disp.getImm() == 1;
612 unsigned NewOpcode = getINCDECFromLEA(LEAOpcode: MI.getOpcode(), IsINC);
613
614 if (MI.getOpcode() == X86::LEA64_32r) {
615 // TODO: Do we need the super register implicit use?
616 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
617 .addReg(RegNo: BaseReg).addReg(RegNo: Base.getReg(), flags: RegState::Implicit);
618 } else {
619 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
620 .addReg(RegNo: BaseReg);
621 }
622 } else {
623 unsigned NewOpcode = getADDriFromLEA(LEAOpcode: MI.getOpcode(), Offset: Disp);
624 if (MI.getOpcode() == X86::LEA64_32r) {
625 // TODO: Do we need the super register implicit use?
626 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
627 .addReg(RegNo: BaseReg).addImm(Val: Disp.getImm())
628 .addReg(RegNo: Base.getReg(), flags: RegState::Implicit);
629 } else {
630 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpcode), DestReg)
631 .addReg(RegNo: BaseReg).addImm(Val: Disp.getImm());
632 }
633 }
634 } else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {
635 // Case 3.
636 // Look for and transform the sequence
637 // lea (reg1, reg2), reg3
638 // sub reg3, reg4
639 return optLEAALU(I, MBB);
640 } else
641 return false;
642
643 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
644 MBB.erase(I);
645 I = NewMI;
646 return true;
647}
648
649void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
650 MachineBasicBlock &MBB) {
651 // Process a load, store, or LEA instruction.
652 MachineInstr &MI = *I;
653 const MCInstrDesc &Desc = MI.getDesc();
654 int AddrOffset = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags);
655 if (AddrOffset >= 0) {
656 AddrOffset += X86II::getOperandBias(Desc);
657 MachineOperand &p = MI.getOperand(i: AddrOffset + X86::AddrBaseReg);
658 if (p.isReg() && p.getReg() != X86::ESP) {
659 seekLEAFixup(p, I, MBB);
660 }
661 MachineOperand &q = MI.getOperand(i: AddrOffset + X86::AddrIndexReg);
662 if (q.isReg() && q.getReg() != X86::ESP) {
663 seekLEAFixup(p&: q, I, MBB);
664 }
665 }
666}
667
668void FixupLEAPass::seekLEAFixup(MachineOperand &p,
669 MachineBasicBlock::iterator &I,
670 MachineBasicBlock &MBB) {
671 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
672 if (MBI != MachineBasicBlock::iterator()) {
673 MachineInstr *NewMI = postRAConvertToLEA(MBB, MBBI&: MBI);
674 if (NewMI) {
675 ++NumLEAs;
676 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
677 // now to replace with an equivalent LEA...
678 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
679 MBB.getParent()->substituteDebugValuesForInst(Old: *MBI, New&: *NewMI, MaxOperand: 1);
680 MBB.erase(I: MBI);
681 MachineBasicBlock::iterator J =
682 static_cast<MachineBasicBlock::iterator>(NewMI);
683 processInstruction(I&: J, MBB);
684 }
685 }
686}
687
688void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
689 MachineBasicBlock &MBB) {
690 MachineInstr &MI = *I;
691 const unsigned Opcode = MI.getOpcode();
692
693 const MachineOperand &Dst = MI.getOperand(i: 0);
694 const MachineOperand &Base = MI.getOperand(i: 1 + X86::AddrBaseReg);
695 const MachineOperand &Scale = MI.getOperand(i: 1 + X86::AddrScaleAmt);
696 const MachineOperand &Index = MI.getOperand(i: 1 + X86::AddrIndexReg);
697 const MachineOperand &Offset = MI.getOperand(i: 1 + X86::AddrDisp);
698 const MachineOperand &Segment = MI.getOperand(i: 1 + X86::AddrSegmentReg);
699
700 if (Segment.getReg() != 0 || !Offset.isImm() ||
701 MBB.computeRegisterLiveness(TRI, Reg: X86::EFLAGS, Before: I, Neighborhood: 4) !=
702 MachineBasicBlock::LQR_Dead)
703 return;
704 const Register DstR = Dst.getReg();
705 const Register SrcR1 = Base.getReg();
706 const Register SrcR2 = Index.getReg();
707 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
708 return;
709 if (Scale.getImm() > 1)
710 return;
711 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
712 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
713 MachineInstr *NewMI = nullptr;
714 // Make ADD instruction for two registers writing to LEA's destination
715 if (SrcR1 != 0 && SrcR2 != 0) {
716 const MCInstrDesc &ADDrr = TII->get(Opcode: getADDrrFromLEA(LEAOpcode: Opcode));
717 const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
718 NewMI =
719 BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: ADDrr, DestReg: DstR).addReg(RegNo: DstR).add(MO: Src);
720 LLVM_DEBUG(NewMI->dump(););
721 }
722 // Make ADD instruction for immediate
723 if (Offset.getImm() != 0) {
724 const MCInstrDesc &ADDri =
725 TII->get(Opcode: getADDriFromLEA(LEAOpcode: Opcode, Offset));
726 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
727 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: ADDri, DestReg: DstR)
728 .add(MO: SrcR)
729 .addImm(Val: Offset.getImm());
730 LLVM_DEBUG(NewMI->dump(););
731 }
732 if (NewMI) {
733 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
734 MBB.erase(I);
735 I = NewMI;
736 }
737}
738
739void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
740 MachineBasicBlock &MBB,
741 bool OptIncDec) {
742 MachineInstr &MI = *I;
743 const unsigned LEAOpcode = MI.getOpcode();
744
745 const MachineOperand &Dest = MI.getOperand(i: 0);
746 const MachineOperand &Base = MI.getOperand(i: 1 + X86::AddrBaseReg);
747 const MachineOperand &Scale = MI.getOperand(i: 1 + X86::AddrScaleAmt);
748 const MachineOperand &Index = MI.getOperand(i: 1 + X86::AddrIndexReg);
749 const MachineOperand &Offset = MI.getOperand(i: 1 + X86::AddrDisp);
750 const MachineOperand &Segment = MI.getOperand(i: 1 + X86::AddrSegmentReg);
751
752 if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
753 MBB.computeRegisterLiveness(TRI, Reg: X86::EFLAGS, Before: I, Neighborhood: 4) !=
754 MachineBasicBlock::LQR_Dead ||
755 Segment.getReg() != X86::NoRegister)
756 return;
757
758 Register DestReg = Dest.getReg();
759 Register BaseReg = Base.getReg();
760 Register IndexReg = Index.getReg();
761
762 if (MI.getOpcode() == X86::LEA64_32r) {
763 if (BaseReg != 0)
764 BaseReg = TRI->getSubReg(Reg: BaseReg, Idx: X86::sub_32bit);
765 if (IndexReg != 0)
766 IndexReg = TRI->getSubReg(Reg: IndexReg, Idx: X86::sub_32bit);
767 }
768
769 bool IsScale1 = Scale.getImm() == 1;
770 bool IsInefficientBase = isInefficientLEAReg(Reg: BaseReg);
771 bool IsInefficientIndex = isInefficientLEAReg(Reg: IndexReg);
772
773 // Skip these cases since it takes more than 2 instructions
774 // to replace the LEA instruction.
775 if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
776 return;
777
778 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
779 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
780
781 MachineInstr *NewMI = nullptr;
782 bool BaseOrIndexIsDst = DestReg == BaseReg || DestReg == IndexReg;
783 // First try and remove the base while sticking with LEA iff base == index and
784 // scale == 1. We can handle:
785 // 1. lea D(%base,%index,1) -> lea D(,%index,2)
786 // 2. lea D(%r13/%rbp,%index) -> lea D(,%index,2)
787 // Only do this if the LEA would otherwise be split into 2-instruction
788 // (either it has a an Offset or neither base nor index are dst)
789 if (IsScale1 && BaseReg == IndexReg &&
790 (hasLEAOffset(Offset) || (IsInefficientBase && !BaseOrIndexIsDst))) {
791 NewMI = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LEAOpcode))
792 .add(MO: Dest)
793 .addReg(RegNo: 0)
794 .addImm(Val: 2)
795 .add(MO: Index)
796 .add(MO: Offset)
797 .add(MO: Segment);
798 LLVM_DEBUG(NewMI->dump(););
799
800 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
801 MBB.erase(I);
802 I = NewMI;
803 return;
804 } else if (IsScale1 && BaseOrIndexIsDst) {
805 // Try to replace LEA with one or two (for the 3-op LEA case)
806 // add instructions:
807 // 1.lea (%base,%index,1), %base => add %index,%base
808 // 2.lea (%base,%index,1), %index => add %base,%index
809
810 unsigned NewOpc = getADDrrFromLEA(LEAOpcode: MI.getOpcode());
811 if (DestReg != BaseReg)
812 std::swap(a&: BaseReg, b&: IndexReg);
813
814 if (MI.getOpcode() == X86::LEA64_32r) {
815 // TODO: Do we need the super register implicit use?
816 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
817 .addReg(RegNo: BaseReg)
818 .addReg(RegNo: IndexReg)
819 .addReg(RegNo: Base.getReg(), flags: RegState::Implicit)
820 .addReg(RegNo: Index.getReg(), flags: RegState::Implicit);
821 } else {
822 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
823 .addReg(RegNo: BaseReg)
824 .addReg(RegNo: IndexReg);
825 }
826 } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
827 // If the base is inefficient try switching the index and base operands,
828 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
829 // lea offset(%base,%index,scale),%dst =>
830 // lea (%base,%index,scale); add offset,%dst
831 NewMI = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LEAOpcode))
832 .add(MO: Dest)
833 .add(MO: IsInefficientBase ? Index : Base)
834 .add(MO: Scale)
835 .add(MO: IsInefficientBase ? Base : Index)
836 .addImm(Val: 0)
837 .add(MO: Segment);
838 LLVM_DEBUG(NewMI->dump(););
839 }
840
841 // If either replacement succeeded above, add the offset if needed, then
842 // replace the instruction.
843 if (NewMI) {
844 // Create ADD instruction for the Offset in case of 3-Ops LEA.
845 if (hasLEAOffset(Offset)) {
846 if (OptIncDec && Offset.isImm() &&
847 (Offset.getImm() == 1 || Offset.getImm() == -1)) {
848 unsigned NewOpc =
849 getINCDECFromLEA(LEAOpcode: MI.getOpcode(), IsINC: Offset.getImm() == 1);
850 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
851 .addReg(RegNo: DestReg);
852 LLVM_DEBUG(NewMI->dump(););
853 } else {
854 unsigned NewOpc = getADDriFromLEA(LEAOpcode: MI.getOpcode(), Offset);
855 NewMI = BuildMI(BB&: MBB, I, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
856 .addReg(RegNo: DestReg)
857 .add(MO: Offset);
858 LLVM_DEBUG(NewMI->dump(););
859 }
860 }
861
862 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
863 MBB.erase(I);
864 I = NewMI;
865 return;
866 }
867
868 // Handle the rest of the cases with inefficient base register:
869 assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
870 assert(IsInefficientBase && "efficient base should be handled already!");
871
872 // FIXME: Handle LEA64_32r.
873 if (LEAOpcode == X86::LEA64_32r)
874 return;
875
876 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
877 if (IsScale1 && !hasLEAOffset(Offset)) {
878 bool BIK = Base.isKill() && BaseReg != IndexReg;
879 TII->copyPhysReg(MBB, MI, DL: MI.getDebugLoc(), DestReg, SrcReg: BaseReg, KillSrc: BIK);
880 LLVM_DEBUG(MI.getPrevNode()->dump(););
881
882 unsigned NewOpc = getADDrrFromLEA(LEAOpcode: MI.getOpcode());
883 NewMI = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
884 .addReg(RegNo: DestReg)
885 .add(MO: Index);
886 LLVM_DEBUG(NewMI->dump(););
887
888 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
889 MBB.erase(I);
890 I = NewMI;
891 return;
892 }
893
894 // lea offset(%base,%index,scale), %dst =>
895 // lea offset( ,%index,scale), %dst; add %base,%dst
896 NewMI = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: LEAOpcode))
897 .add(MO: Dest)
898 .addReg(RegNo: 0)
899 .add(MO: Scale)
900 .add(MO: Index)
901 .add(MO: Offset)
902 .add(MO: Segment);
903 LLVM_DEBUG(NewMI->dump(););
904
905 unsigned NewOpc = getADDrrFromLEA(LEAOpcode: MI.getOpcode());
906 NewMI = BuildMI(BB&: MBB, I&: MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: NewOpc), DestReg)
907 .addReg(RegNo: DestReg)
908 .add(MO: Base);
909 LLVM_DEBUG(NewMI->dump(););
910
911 MBB.getParent()->substituteDebugValuesForInst(Old: *I, New&: *NewMI, MaxOperand: 1);
912 MBB.erase(I);
913 I = NewMI;
914}
915