1 | //===- X86OptimizeLEAs.cpp - optimize usage of LEA instructions -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the pass that performs some optimizations with LEA |
10 | // instructions in order to improve performance and code size. |
11 | // Currently, it does two things: |
12 | // 1) If there are two LEA instructions calculating addresses which only differ |
13 | // by displacement inside a basic block, one of them is removed. |
14 | // 2) Address calculations in load and store instructions are replaced by |
15 | // existing LEA def registers where possible. |
16 | // |
17 | //===----------------------------------------------------------------------===// |
18 | |
19 | #include "MCTargetDesc/X86BaseInfo.h" |
20 | #include "X86.h" |
21 | #include "X86InstrInfo.h" |
22 | #include "X86Subtarget.h" |
23 | #include "llvm/ADT/DenseMap.h" |
24 | #include "llvm/ADT/DenseMapInfo.h" |
25 | #include "llvm/ADT/Hashing.h" |
26 | #include "llvm/ADT/SmallVector.h" |
27 | #include "llvm/ADT/Statistic.h" |
28 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
29 | #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" |
30 | #include "llvm/CodeGen/MachineBasicBlock.h" |
31 | #include "llvm/CodeGen/MachineFunction.h" |
32 | #include "llvm/CodeGen/MachineFunctionPass.h" |
33 | #include "llvm/CodeGen/MachineInstr.h" |
34 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
35 | #include "llvm/CodeGen/MachineOperand.h" |
36 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
37 | #include "llvm/CodeGen/MachineSizeOpts.h" |
38 | #include "llvm/CodeGen/TargetOpcodes.h" |
39 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
40 | #include "llvm/IR/DebugInfoMetadata.h" |
41 | #include "llvm/IR/DebugLoc.h" |
42 | #include "llvm/IR/Function.h" |
43 | #include "llvm/MC/MCInstrDesc.h" |
44 | #include "llvm/Support/CommandLine.h" |
45 | #include "llvm/Support/Debug.h" |
46 | #include "llvm/Support/ErrorHandling.h" |
47 | #include "llvm/Support/MathExtras.h" |
48 | #include "llvm/Support/raw_ostream.h" |
49 | #include <cassert> |
50 | #include <cstdint> |
51 | #include <iterator> |
52 | |
53 | using namespace llvm; |
54 | |
55 | #define DEBUG_TYPE "x86-optimize-LEAs" |
56 | |
57 | static cl::opt<bool> |
58 | DisableX86LEAOpt("disable-x86-lea-opt" , cl::Hidden, |
59 | cl::desc("X86: Disable LEA optimizations." ), |
60 | cl::init(Val: false)); |
61 | |
62 | STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions" ); |
63 | STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed" ); |
64 | |
65 | /// Returns true if two machine operands are identical and they are not |
66 | /// physical registers. |
67 | static inline bool isIdenticalOp(const MachineOperand &MO1, |
68 | const MachineOperand &MO2); |
69 | |
70 | /// Returns true if two address displacement operands are of the same |
71 | /// type and use the same symbol/index/address regardless of the offset. |
72 | static bool isSimilarDispOp(const MachineOperand &MO1, |
73 | const MachineOperand &MO2); |
74 | |
75 | /// Returns true if the instruction is LEA. |
76 | static inline bool isLEA(const MachineInstr &MI); |
77 | |
78 | namespace { |
79 | |
80 | /// A key based on instruction's memory operands. |
81 | class MemOpKey { |
82 | public: |
83 | MemOpKey(const MachineOperand *Base, const MachineOperand *Scale, |
84 | const MachineOperand *Index, const MachineOperand *Segment, |
85 | const MachineOperand *Disp) |
86 | : Disp(Disp) { |
87 | Operands[0] = Base; |
88 | Operands[1] = Scale; |
89 | Operands[2] = Index; |
90 | Operands[3] = Segment; |
91 | } |
92 | |
93 | bool operator==(const MemOpKey &Other) const { |
94 | // Addresses' bases, scales, indices and segments must be identical. |
95 | for (int i = 0; i < 4; ++i) |
96 | if (!isIdenticalOp(MO1: *Operands[i], MO2: *Other.Operands[i])) |
97 | return false; |
98 | |
99 | // Addresses' displacements don't have to be exactly the same. It only |
100 | // matters that they use the same symbol/index/address. Immediates' or |
101 | // offsets' differences will be taken care of during instruction |
102 | // substitution. |
103 | return isSimilarDispOp(MO1: *Disp, MO2: *Other.Disp); |
104 | } |
105 | |
106 | // Address' base, scale, index and segment operands. |
107 | const MachineOperand *Operands[4]; |
108 | |
109 | // Address' displacement operand. |
110 | const MachineOperand *Disp; |
111 | }; |
112 | |
113 | } // end anonymous namespace |
114 | |
115 | namespace llvm { |
116 | |
117 | /// Provide DenseMapInfo for MemOpKey. |
118 | template <> struct DenseMapInfo<MemOpKey> { |
119 | using PtrInfo = DenseMapInfo<const MachineOperand *>; |
120 | |
121 | static inline MemOpKey getEmptyKey() { |
122 | return MemOpKey(PtrInfo::getEmptyKey(), PtrInfo::getEmptyKey(), |
123 | PtrInfo::getEmptyKey(), PtrInfo::getEmptyKey(), |
124 | PtrInfo::getEmptyKey()); |
125 | } |
126 | |
127 | static inline MemOpKey getTombstoneKey() { |
128 | return MemOpKey(PtrInfo::getTombstoneKey(), PtrInfo::getTombstoneKey(), |
129 | PtrInfo::getTombstoneKey(), PtrInfo::getTombstoneKey(), |
130 | PtrInfo::getTombstoneKey()); |
131 | } |
132 | |
133 | static unsigned getHashValue(const MemOpKey &Val) { |
134 | // Checking any field of MemOpKey is enough to determine if the key is |
135 | // empty or tombstone. |
136 | assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key" ); |
137 | assert(Val.Disp != PtrInfo::getTombstoneKey() && |
138 | "Cannot hash the tombstone key" ); |
139 | |
140 | hash_code Hash = hash_combine(args: *Val.Operands[0], args: *Val.Operands[1], |
141 | args: *Val.Operands[2], args: *Val.Operands[3]); |
142 | |
143 | // If the address displacement is an immediate, it should not affect the |
144 | // hash so that memory operands which differ only be immediate displacement |
145 | // would have the same hash. If the address displacement is something else, |
146 | // we should reflect symbol/index/address in the hash. |
147 | switch (Val.Disp->getType()) { |
148 | case MachineOperand::MO_Immediate: |
149 | break; |
150 | case MachineOperand::MO_ConstantPoolIndex: |
151 | case MachineOperand::MO_JumpTableIndex: |
152 | Hash = hash_combine(args: Hash, args: Val.Disp->getIndex()); |
153 | break; |
154 | case MachineOperand::MO_ExternalSymbol: |
155 | Hash = hash_combine(args: Hash, args: Val.Disp->getSymbolName()); |
156 | break; |
157 | case MachineOperand::MO_GlobalAddress: |
158 | Hash = hash_combine(args: Hash, args: Val.Disp->getGlobal()); |
159 | break; |
160 | case MachineOperand::MO_BlockAddress: |
161 | Hash = hash_combine(args: Hash, args: Val.Disp->getBlockAddress()); |
162 | break; |
163 | case MachineOperand::MO_MCSymbol: |
164 | Hash = hash_combine(args: Hash, args: Val.Disp->getMCSymbol()); |
165 | break; |
166 | case MachineOperand::MO_MachineBasicBlock: |
167 | Hash = hash_combine(args: Hash, args: Val.Disp->getMBB()); |
168 | break; |
169 | default: |
170 | llvm_unreachable("Invalid address displacement operand" ); |
171 | } |
172 | |
173 | return (unsigned)Hash; |
174 | } |
175 | |
176 | static bool isEqual(const MemOpKey &LHS, const MemOpKey &RHS) { |
177 | // Checking any field of MemOpKey is enough to determine if the key is |
178 | // empty or tombstone. |
179 | if (RHS.Disp == PtrInfo::getEmptyKey()) |
180 | return LHS.Disp == PtrInfo::getEmptyKey(); |
181 | if (RHS.Disp == PtrInfo::getTombstoneKey()) |
182 | return LHS.Disp == PtrInfo::getTombstoneKey(); |
183 | return LHS == RHS; |
184 | } |
185 | }; |
186 | |
187 | } // end namespace llvm |
188 | |
189 | /// Returns a hash table key based on memory operands of \p MI. The |
190 | /// number of the first memory operand of \p MI is specified through \p N. |
191 | static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) { |
192 | assert((isLEA(MI) || MI.mayLoadOrStore()) && |
193 | "The instruction must be a LEA, a load or a store" ); |
194 | return MemOpKey(&MI.getOperand(i: N + X86::AddrBaseReg), |
195 | &MI.getOperand(i: N + X86::AddrScaleAmt), |
196 | &MI.getOperand(i: N + X86::AddrIndexReg), |
197 | &MI.getOperand(i: N + X86::AddrSegmentReg), |
198 | &MI.getOperand(i: N + X86::AddrDisp)); |
199 | } |
200 | |
201 | static inline bool isIdenticalOp(const MachineOperand &MO1, |
202 | const MachineOperand &MO2) { |
203 | return MO1.isIdenticalTo(Other: MO2) && (!MO1.isReg() || !MO1.getReg().isPhysical()); |
204 | } |
205 | |
206 | #ifndef NDEBUG |
207 | static bool isValidDispOp(const MachineOperand &MO) { |
208 | return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() || |
209 | MO.isGlobal() || MO.isBlockAddress() || MO.isMCSymbol() || MO.isMBB(); |
210 | } |
211 | #endif |
212 | |
213 | static bool isSimilarDispOp(const MachineOperand &MO1, |
214 | const MachineOperand &MO2) { |
215 | assert(isValidDispOp(MO1) && isValidDispOp(MO2) && |
216 | "Address displacement operand is not valid" ); |
217 | return (MO1.isImm() && MO2.isImm()) || |
218 | (MO1.isCPI() && MO2.isCPI() && MO1.getIndex() == MO2.getIndex()) || |
219 | (MO1.isJTI() && MO2.isJTI() && MO1.getIndex() == MO2.getIndex()) || |
220 | (MO1.isSymbol() && MO2.isSymbol() && |
221 | MO1.getSymbolName() == MO2.getSymbolName()) || |
222 | (MO1.isGlobal() && MO2.isGlobal() && |
223 | MO1.getGlobal() == MO2.getGlobal()) || |
224 | (MO1.isBlockAddress() && MO2.isBlockAddress() && |
225 | MO1.getBlockAddress() == MO2.getBlockAddress()) || |
226 | (MO1.isMCSymbol() && MO2.isMCSymbol() && |
227 | MO1.getMCSymbol() == MO2.getMCSymbol()) || |
228 | (MO1.isMBB() && MO2.isMBB() && MO1.getMBB() == MO2.getMBB()); |
229 | } |
230 | |
231 | static inline bool isLEA(const MachineInstr &MI) { |
232 | unsigned Opcode = MI.getOpcode(); |
233 | return Opcode == X86::LEA16r || Opcode == X86::LEA32r || |
234 | Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; |
235 | } |
236 | |
237 | namespace { |
238 | |
239 | class X86OptimizeLEAPass : public MachineFunctionPass { |
240 | public: |
241 | X86OptimizeLEAPass() : MachineFunctionPass(ID) {} |
242 | |
243 | StringRef getPassName() const override { return "X86 LEA Optimize" ; } |
244 | |
245 | /// Loop over all of the basic blocks, replacing address |
246 | /// calculations in load and store instructions, if it's already |
247 | /// been calculated by LEA. Also, remove redundant LEAs. |
248 | bool runOnMachineFunction(MachineFunction &MF) override; |
249 | |
250 | static char ID; |
251 | |
252 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
253 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
254 | AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); |
255 | MachineFunctionPass::getAnalysisUsage(AU); |
256 | } |
257 | |
258 | private: |
259 | using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>; |
260 | |
261 | /// Returns a distance between two instructions inside one basic block. |
262 | /// Negative result means, that instructions occur in reverse order. |
263 | int calcInstrDist(const MachineInstr &First, const MachineInstr &Last); |
264 | |
265 | /// Choose the best \p LEA instruction from the \p List to replace |
266 | /// address calculation in \p MI instruction. Return the address displacement |
267 | /// and the distance between \p MI and the chosen \p BestLEA in |
268 | /// \p AddrDispShift and \p Dist. |
269 | bool chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List, |
270 | const MachineInstr &MI, MachineInstr *&BestLEA, |
271 | int64_t &AddrDispShift, int &Dist); |
272 | |
273 | /// Returns the difference between addresses' displacements of \p MI1 |
274 | /// and \p MI2. The numbers of the first memory operands for the instructions |
275 | /// are specified through \p N1 and \p N2. |
276 | int64_t getAddrDispShift(const MachineInstr &MI1, unsigned N1, |
277 | const MachineInstr &MI2, unsigned N2) const; |
278 | |
279 | /// Returns true if the \p Last LEA instruction can be replaced by the |
280 | /// \p First. The difference between displacements of the addresses calculated |
281 | /// by these LEAs is returned in \p AddrDispShift. It'll be used for proper |
282 | /// replacement of the \p Last LEA's uses with the \p First's def register. |
283 | bool isReplaceable(const MachineInstr &First, const MachineInstr &Last, |
284 | int64_t &AddrDispShift) const; |
285 | |
286 | /// Find all LEA instructions in the basic block. Also, assign position |
287 | /// numbers to all instructions in the basic block to speed up calculation of |
288 | /// distance between them. |
289 | void findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs); |
290 | |
291 | /// Removes redundant address calculations. |
292 | bool removeRedundantAddrCalc(MemOpMap &LEAs); |
293 | |
294 | /// Replace debug value MI with a new debug value instruction using register |
295 | /// VReg with an appropriate offset and DIExpression to incorporate the |
296 | /// address displacement AddrDispShift. Return new debug value instruction. |
297 | MachineInstr *replaceDebugValue(MachineInstr &MI, unsigned OldReg, |
298 | unsigned NewReg, int64_t AddrDispShift); |
299 | |
300 | /// Removes LEAs which calculate similar addresses. |
301 | bool removeRedundantLEAs(MemOpMap &LEAs); |
302 | |
303 | DenseMap<const MachineInstr *, unsigned> InstrPos; |
304 | |
305 | MachineRegisterInfo *MRI = nullptr; |
306 | const X86InstrInfo *TII = nullptr; |
307 | const X86RegisterInfo *TRI = nullptr; |
308 | }; |
309 | |
310 | } // end anonymous namespace |
311 | |
312 | char X86OptimizeLEAPass::ID = 0; |
313 | |
314 | FunctionPass *llvm::createX86OptimizeLEAs() { return new X86OptimizeLEAPass(); } |
315 | INITIALIZE_PASS(X86OptimizeLEAPass, DEBUG_TYPE, "X86 optimize LEA pass" , false, |
316 | false) |
317 | |
318 | int X86OptimizeLEAPass::calcInstrDist(const MachineInstr &First, |
319 | const MachineInstr &Last) { |
320 | // Both instructions must be in the same basic block and they must be |
321 | // presented in InstrPos. |
322 | assert(Last.getParent() == First.getParent() && |
323 | "Instructions are in different basic blocks" ); |
324 | assert(InstrPos.contains(&First) && InstrPos.contains(&Last) && |
325 | "Instructions' positions are undefined" ); |
326 | |
327 | return InstrPos[&Last] - InstrPos[&First]; |
328 | } |
329 | |
330 | // Find the best LEA instruction in the List to replace address recalculation in |
331 | // MI. Such LEA must meet these requirements: |
332 | // 1) The address calculated by the LEA differs only by the displacement from |
333 | // the address used in MI. |
334 | // 2) The register class of the definition of the LEA is compatible with the |
335 | // register class of the address base register of MI. |
336 | // 3) Displacement of the new memory operand should fit in 1 byte if possible. |
337 | // 4) The LEA should be as close to MI as possible, and prior to it if |
338 | // possible. |
339 | bool X86OptimizeLEAPass::chooseBestLEA( |
340 | const SmallVectorImpl<MachineInstr *> &List, const MachineInstr &MI, |
341 | MachineInstr *&BestLEA, int64_t &AddrDispShift, int &Dist) { |
342 | const MachineFunction *MF = MI.getParent()->getParent(); |
343 | const MCInstrDesc &Desc = MI.getDesc(); |
344 | int MemOpNo = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags) + |
345 | X86II::getOperandBias(Desc); |
346 | |
347 | BestLEA = nullptr; |
348 | |
349 | // Loop over all LEA instructions. |
350 | for (auto *DefMI : List) { |
351 | // Get new address displacement. |
352 | int64_t AddrDispShiftTemp = getAddrDispShift(MI1: MI, N1: MemOpNo, MI2: *DefMI, N2: 1); |
353 | |
354 | // Make sure address displacement fits 4 bytes. |
355 | if (!isInt<32>(x: AddrDispShiftTemp)) |
356 | continue; |
357 | |
358 | // Check that LEA def register can be used as MI address base. Some |
359 | // instructions can use a limited set of registers as address base, for |
360 | // example MOV8mr_NOREX. We could constrain the register class of the LEA |
361 | // def to suit MI, however since this case is very rare and hard to |
362 | // reproduce in a test it's just more reliable to skip the LEA. |
363 | if (TII->getRegClass(MCID: Desc, OpNum: MemOpNo + X86::AddrBaseReg, TRI, MF: *MF) != |
364 | MRI->getRegClass(Reg: DefMI->getOperand(i: 0).getReg())) |
365 | continue; |
366 | |
367 | // Choose the closest LEA instruction from the list, prior to MI if |
368 | // possible. Note that we took into account resulting address displacement |
369 | // as well. Also note that the list is sorted by the order in which the LEAs |
370 | // occur, so the break condition is pretty simple. |
371 | int DistTemp = calcInstrDist(First: *DefMI, Last: MI); |
372 | assert(DistTemp != 0 && |
373 | "The distance between two different instructions cannot be zero" ); |
374 | if (DistTemp > 0 || BestLEA == nullptr) { |
375 | // Do not update return LEA, if the current one provides a displacement |
376 | // which fits in 1 byte, while the new candidate does not. |
377 | if (BestLEA != nullptr && !isInt<8>(x: AddrDispShiftTemp) && |
378 | isInt<8>(x: AddrDispShift)) |
379 | continue; |
380 | |
381 | BestLEA = DefMI; |
382 | AddrDispShift = AddrDispShiftTemp; |
383 | Dist = DistTemp; |
384 | } |
385 | |
386 | // FIXME: Maybe we should not always stop at the first LEA after MI. |
387 | if (DistTemp < 0) |
388 | break; |
389 | } |
390 | |
391 | return BestLEA != nullptr; |
392 | } |
393 | |
394 | // Get the difference between the addresses' displacements of the two |
395 | // instructions \p MI1 and \p MI2. The numbers of the first memory operands are |
396 | // passed through \p N1 and \p N2. |
397 | int64_t X86OptimizeLEAPass::getAddrDispShift(const MachineInstr &MI1, |
398 | unsigned N1, |
399 | const MachineInstr &MI2, |
400 | unsigned N2) const { |
401 | const MachineOperand &Op1 = MI1.getOperand(i: N1 + X86::AddrDisp); |
402 | const MachineOperand &Op2 = MI2.getOperand(i: N2 + X86::AddrDisp); |
403 | |
404 | assert(isSimilarDispOp(Op1, Op2) && |
405 | "Address displacement operands are not compatible" ); |
406 | |
407 | // After the assert above we can be sure that both operands are of the same |
408 | // valid type and use the same symbol/index/address, thus displacement shift |
409 | // calculation is rather simple. |
410 | if (Op1.isJTI()) |
411 | return 0; |
412 | return Op1.isImm() ? Op1.getImm() - Op2.getImm() |
413 | : Op1.getOffset() - Op2.getOffset(); |
414 | } |
415 | |
416 | // Check that the Last LEA can be replaced by the First LEA. To be so, |
417 | // these requirements must be met: |
418 | // 1) Addresses calculated by LEAs differ only by displacement. |
419 | // 2) Def registers of LEAs belong to the same class. |
420 | // 3) All uses of the Last LEA def register are replaceable, thus the |
421 | // register is used only as address base. |
422 | bool X86OptimizeLEAPass::isReplaceable(const MachineInstr &First, |
423 | const MachineInstr &Last, |
424 | int64_t &AddrDispShift) const { |
425 | assert(isLEA(First) && isLEA(Last) && |
426 | "The function works only with LEA instructions" ); |
427 | |
428 | // Make sure that LEA def registers belong to the same class. There may be |
429 | // instructions (like MOV8mr_NOREX) which allow a limited set of registers to |
430 | // be used as their operands, so we must be sure that replacing one LEA |
431 | // with another won't lead to putting a wrong register in the instruction. |
432 | if (MRI->getRegClass(Reg: First.getOperand(i: 0).getReg()) != |
433 | MRI->getRegClass(Reg: Last.getOperand(i: 0).getReg())) |
434 | return false; |
435 | |
436 | // Get new address displacement. |
437 | AddrDispShift = getAddrDispShift(MI1: Last, N1: 1, MI2: First, N2: 1); |
438 | |
439 | // Loop over all uses of the Last LEA to check that its def register is |
440 | // used only as address base for memory accesses. If so, it can be |
441 | // replaced, otherwise - no. |
442 | for (auto &MO : MRI->use_nodbg_operands(Reg: Last.getOperand(i: 0).getReg())) { |
443 | MachineInstr &MI = *MO.getParent(); |
444 | |
445 | // Get the number of the first memory operand. |
446 | const MCInstrDesc &Desc = MI.getDesc(); |
447 | int MemOpNo = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags); |
448 | |
449 | // If the use instruction has no memory operand - the LEA is not |
450 | // replaceable. |
451 | if (MemOpNo < 0) |
452 | return false; |
453 | |
454 | MemOpNo += X86II::getOperandBias(Desc); |
455 | |
456 | // If the address base of the use instruction is not the LEA def register - |
457 | // the LEA is not replaceable. |
458 | if (!isIdenticalOp(MO1: MI.getOperand(i: MemOpNo + X86::AddrBaseReg), MO2: MO)) |
459 | return false; |
460 | |
461 | // If the LEA def register is used as any other operand of the use |
462 | // instruction - the LEA is not replaceable. |
463 | for (unsigned i = 0; i < MI.getNumOperands(); i++) |
464 | if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) && |
465 | isIdenticalOp(MO1: MI.getOperand(i), MO2: MO)) |
466 | return false; |
467 | |
468 | // Check that the new address displacement will fit 4 bytes. |
469 | if (MI.getOperand(i: MemOpNo + X86::AddrDisp).isImm() && |
470 | !isInt<32>(x: MI.getOperand(i: MemOpNo + X86::AddrDisp).getImm() + |
471 | AddrDispShift)) |
472 | return false; |
473 | } |
474 | |
475 | return true; |
476 | } |
477 | |
478 | void X86OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, |
479 | MemOpMap &LEAs) { |
480 | unsigned Pos = 0; |
481 | for (auto &MI : MBB) { |
482 | // Assign the position number to the instruction. Note that we are going to |
483 | // move some instructions during the optimization however there will never |
484 | // be a need to move two instructions before any selected instruction. So to |
485 | // avoid multiple positions' updates during moves we just increase position |
486 | // counter by two leaving a free space for instructions which will be moved. |
487 | InstrPos[&MI] = Pos += 2; |
488 | |
489 | if (isLEA(MI)) |
490 | LEAs[getMemOpKey(MI, N: 1)].push_back(Elt: const_cast<MachineInstr *>(&MI)); |
491 | } |
492 | } |
493 | |
494 | // Try to find load and store instructions which recalculate addresses already |
495 | // calculated by some LEA and replace their memory operands with its def |
496 | // register. |
497 | bool X86OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { |
498 | bool Changed = false; |
499 | |
500 | assert(!LEAs.empty()); |
501 | MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent(); |
502 | |
503 | // Process all instructions in basic block. |
504 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *MBB)) { |
505 | // Instruction must be load or store. |
506 | if (!MI.mayLoadOrStore()) |
507 | continue; |
508 | |
509 | // Get the number of the first memory operand. |
510 | const MCInstrDesc &Desc = MI.getDesc(); |
511 | int MemOpNo = X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags); |
512 | |
513 | // If instruction has no memory operand - skip it. |
514 | if (MemOpNo < 0) |
515 | continue; |
516 | |
517 | MemOpNo += X86II::getOperandBias(Desc); |
518 | |
519 | // Do not call chooseBestLEA if there was no matching LEA |
520 | auto Insns = LEAs.find(Val: getMemOpKey(MI, N: MemOpNo)); |
521 | if (Insns == LEAs.end()) |
522 | continue; |
523 | |
524 | // Get the best LEA instruction to replace address calculation. |
525 | MachineInstr *DefMI; |
526 | int64_t AddrDispShift; |
527 | int Dist; |
528 | if (!chooseBestLEA(List: Insns->second, MI, BestLEA&: DefMI, AddrDispShift, Dist)) |
529 | continue; |
530 | |
531 | // If LEA occurs before current instruction, we can freely replace |
532 | // the instruction. If LEA occurs after, we can lift LEA above the |
533 | // instruction and this way to be able to replace it. Since LEA and the |
534 | // instruction have similar memory operands (thus, the same def |
535 | // instructions for these operands), we can always do that, without |
536 | // worries of using registers before their defs. |
537 | if (Dist < 0) { |
538 | DefMI->removeFromParent(); |
539 | MBB->insert(I: MachineBasicBlock::iterator(&MI), MI: DefMI); |
540 | InstrPos[DefMI] = InstrPos[&MI] - 1; |
541 | |
542 | // Make sure the instructions' position numbers are sane. |
543 | assert(((InstrPos[DefMI] == 1 && |
544 | MachineBasicBlock::iterator(DefMI) == MBB->begin()) || |
545 | InstrPos[DefMI] > |
546 | InstrPos[&*std::prev(MachineBasicBlock::iterator(DefMI))]) && |
547 | "Instruction positioning is broken" ); |
548 | } |
549 | |
550 | // Since we can possibly extend register lifetime, clear kill flags. |
551 | MRI->clearKillFlags(Reg: DefMI->getOperand(i: 0).getReg()); |
552 | |
553 | ++NumSubstLEAs; |
554 | LLVM_DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: " ; MI.dump();); |
555 | |
556 | // Change instruction operands. |
557 | MI.getOperand(i: MemOpNo + X86::AddrBaseReg) |
558 | .ChangeToRegister(Reg: DefMI->getOperand(i: 0).getReg(), isDef: false); |
559 | MI.getOperand(i: MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(ImmVal: 1); |
560 | MI.getOperand(i: MemOpNo + X86::AddrIndexReg) |
561 | .ChangeToRegister(Reg: X86::NoRegister, isDef: false); |
562 | MI.getOperand(i: MemOpNo + X86::AddrDisp).ChangeToImmediate(ImmVal: AddrDispShift); |
563 | MI.getOperand(i: MemOpNo + X86::AddrSegmentReg) |
564 | .ChangeToRegister(Reg: X86::NoRegister, isDef: false); |
565 | |
566 | LLVM_DEBUG(dbgs() << "OptimizeLEAs: Replaced by: " ; MI.dump();); |
567 | |
568 | Changed = true; |
569 | } |
570 | |
571 | return Changed; |
572 | } |
573 | |
574 | MachineInstr *X86OptimizeLEAPass::replaceDebugValue(MachineInstr &MI, |
575 | unsigned OldReg, |
576 | unsigned NewReg, |
577 | int64_t AddrDispShift) { |
578 | const DIExpression *Expr = MI.getDebugExpression(); |
579 | if (AddrDispShift != 0) { |
580 | if (MI.isNonListDebugValue()) { |
581 | Expr = |
582 | DIExpression::prepend(Expr, Flags: DIExpression::StackValue, Offset: AddrDispShift); |
583 | } else { |
584 | // Update the Expression, appending an offset of `AddrDispShift` to the |
585 | // Op corresponding to `OldReg`. |
586 | SmallVector<uint64_t, 3> Ops; |
587 | DIExpression::appendOffset(Ops, Offset: AddrDispShift); |
588 | for (MachineOperand &Op : MI.getDebugOperandsForReg(Reg: OldReg)) { |
589 | unsigned OpIdx = MI.getDebugOperandIndex(Op: &Op); |
590 | Expr = DIExpression::appendOpsToArg(Expr, Ops, ArgNo: OpIdx); |
591 | } |
592 | } |
593 | } |
594 | |
595 | // Replace DBG_VALUE instruction with modified version. |
596 | MachineBasicBlock *MBB = MI.getParent(); |
597 | DebugLoc DL = MI.getDebugLoc(); |
598 | bool IsIndirect = MI.isIndirectDebugValue(); |
599 | const MDNode *Var = MI.getDebugVariable(); |
600 | unsigned Opcode = MI.isNonListDebugValue() ? TargetOpcode::DBG_VALUE |
601 | : TargetOpcode::DBG_VALUE_LIST; |
602 | if (IsIndirect) |
603 | assert(MI.getDebugOffset().getImm() == 0 && |
604 | "DBG_VALUE with nonzero offset" ); |
605 | SmallVector<MachineOperand, 4> NewOps; |
606 | // If we encounter an operand using the old register, replace it with an |
607 | // operand that uses the new register; otherwise keep the old operand. |
608 | auto replaceOldReg = [OldReg, NewReg](const MachineOperand &Op) { |
609 | if (Op.isReg() && Op.getReg() == OldReg) |
610 | return MachineOperand::CreateReg(Reg: NewReg, isDef: false, isImp: false, isKill: false, isDead: false, |
611 | isUndef: false, isEarlyClobber: false, SubReg: false, isDebug: false, isInternalRead: false, |
612 | /*IsRenamable*/ isRenamable: true); |
613 | return Op; |
614 | }; |
615 | for (const MachineOperand &Op : MI.debug_operands()) |
616 | NewOps.push_back(Elt: replaceOldReg(Op)); |
617 | return BuildMI(BB&: *MBB, I: MBB->erase(I: &MI), DL, MCID: TII->get(Opcode), IsIndirect, |
618 | MOs: NewOps, Variable: Var, Expr); |
619 | } |
620 | |
621 | // Try to find similar LEAs in the list and replace one with another. |
622 | bool X86OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { |
623 | bool Changed = false; |
624 | |
625 | // Loop over all entries in the table. |
626 | for (auto &E : LEAs) { |
627 | auto &List = E.second; |
628 | |
629 | // Loop over all LEA pairs. |
630 | auto I1 = List.begin(); |
631 | while (I1 != List.end()) { |
632 | MachineInstr &First = **I1; |
633 | auto I2 = std::next(x: I1); |
634 | while (I2 != List.end()) { |
635 | MachineInstr &Last = **I2; |
636 | int64_t AddrDispShift; |
637 | |
638 | // LEAs should be in occurrence order in the list, so we can freely |
639 | // replace later LEAs with earlier ones. |
640 | assert(calcInstrDist(First, Last) > 0 && |
641 | "LEAs must be in occurrence order in the list" ); |
642 | |
643 | // Check that the Last LEA instruction can be replaced by the First. |
644 | if (!isReplaceable(First, Last, AddrDispShift)) { |
645 | ++I2; |
646 | continue; |
647 | } |
648 | |
649 | // Loop over all uses of the Last LEA and update their operands. Note |
650 | // that the correctness of this has already been checked in the |
651 | // isReplaceable function. |
652 | Register FirstVReg = First.getOperand(i: 0).getReg(); |
653 | Register LastVReg = Last.getOperand(i: 0).getReg(); |
654 | // We use MRI->use_empty here instead of the combination of |
655 | // llvm::make_early_inc_range and MRI->use_operands because we could |
656 | // replace two or more uses in a debug instruction in one iteration, and |
657 | // that would deeply confuse llvm::make_early_inc_range. |
658 | while (!MRI->use_empty(RegNo: LastVReg)) { |
659 | MachineOperand &MO = *MRI->use_begin(RegNo: LastVReg); |
660 | MachineInstr &MI = *MO.getParent(); |
661 | |
662 | if (MI.isDebugValue()) { |
663 | // Replace DBG_VALUE instruction with modified version using the |
664 | // register from the replacing LEA and the address displacement |
665 | // between the LEA instructions. |
666 | replaceDebugValue(MI, OldReg: LastVReg, NewReg: FirstVReg, AddrDispShift); |
667 | continue; |
668 | } |
669 | |
670 | // Get the number of the first memory operand. |
671 | const MCInstrDesc &Desc = MI.getDesc(); |
672 | int MemOpNo = |
673 | X86II::getMemoryOperandNo(TSFlags: Desc.TSFlags) + |
674 | X86II::getOperandBias(Desc); |
675 | |
676 | // Update address base. |
677 | MO.setReg(FirstVReg); |
678 | |
679 | // Update address disp. |
680 | MachineOperand &Op = MI.getOperand(i: MemOpNo + X86::AddrDisp); |
681 | if (Op.isImm()) |
682 | Op.setImm(Op.getImm() + AddrDispShift); |
683 | else if (!Op.isJTI()) |
684 | Op.setOffset(Op.getOffset() + AddrDispShift); |
685 | } |
686 | |
687 | // Since we can possibly extend register lifetime, clear kill flags. |
688 | MRI->clearKillFlags(Reg: FirstVReg); |
689 | |
690 | ++NumRedundantLEAs; |
691 | LLVM_DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: " ; |
692 | Last.dump();); |
693 | |
694 | // By this moment, all of the Last LEA's uses must be replaced. So we |
695 | // can freely remove it. |
696 | assert(MRI->use_empty(LastVReg) && |
697 | "The LEA's def register must have no uses" ); |
698 | Last.eraseFromParent(); |
699 | |
700 | // Erase removed LEA from the list. |
701 | I2 = List.erase(CI: I2); |
702 | |
703 | Changed = true; |
704 | } |
705 | ++I1; |
706 | } |
707 | } |
708 | |
709 | return Changed; |
710 | } |
711 | |
712 | bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { |
713 | bool Changed = false; |
714 | |
715 | if (DisableX86LEAOpt || skipFunction(F: MF.getFunction())) |
716 | return false; |
717 | |
718 | MRI = &MF.getRegInfo(); |
719 | TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); |
720 | TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo(); |
721 | auto *PSI = |
722 | &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
723 | auto *MBFI = (PSI && PSI->hasProfileSummary()) ? |
724 | &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() : |
725 | nullptr; |
726 | |
727 | // Process all basic blocks. |
728 | for (auto &MBB : MF) { |
729 | MemOpMap LEAs; |
730 | InstrPos.clear(); |
731 | |
732 | // Find all LEA instructions in basic block. |
733 | findLEAs(MBB, LEAs); |
734 | |
735 | // If current basic block has no LEAs, move on to the next one. |
736 | if (LEAs.empty()) |
737 | continue; |
738 | |
739 | // Remove redundant LEA instructions. |
740 | Changed |= removeRedundantLEAs(LEAs); |
741 | |
742 | // Remove redundant address calculations. Do it only for -Os/-Oz since only |
743 | // a code size gain is expected from this part of the pass. |
744 | bool OptForSize = MF.getFunction().hasOptSize() || |
745 | llvm::shouldOptimizeForSize(MBB: &MBB, PSI, MBFI); |
746 | if (OptForSize) |
747 | Changed |= removeRedundantAddrCalc(LEAs); |
748 | } |
749 | |
750 | return Changed; |
751 | } |
752 | |