1 | //===-- LanaiMemAluCombiner.cpp - Pass to combine memory & ALU operations -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // Simple pass to combine memory and ALU operations |
9 | // |
10 | // The Lanai ISA supports instructions where a load/store modifies the base |
11 | // register used in the load/store operation. This pass finds suitable |
12 | // load/store and ALU instructions and combines them into one instruction. |
13 | // |
14 | // For example, |
15 | // ld [ %r6 -- ], %r12 |
16 | // is a supported instruction that is not currently generated by the instruction |
17 | // selection pass of this backend. This pass generates these instructions by |
18 | // merging |
19 | // add %r6, -4, %r6 |
20 | // followed by |
21 | // ld [ %r6 ], %r12 |
22 | // in the same machine basic block into one machine instruction. |
23 | //===----------------------------------------------------------------------===// |
24 | |
25 | #include "Lanai.h" |
26 | #include "LanaiAluCode.h" |
27 | #include "LanaiTargetMachine.h" |
28 | #include "llvm/ADT/Statistic.h" |
29 | #include "llvm/CodeGen/MachineFunctionPass.h" |
30 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
31 | #include "llvm/CodeGen/RegisterScavenging.h" |
32 | #include "llvm/CodeGen/TargetInstrInfo.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | using namespace llvm; |
35 | |
36 | #define GET_INSTRMAP_INFO |
37 | #include "LanaiGenInstrInfo.inc" |
38 | |
39 | #define DEBUG_TYPE "lanai-mem-alu-combiner" |
40 | |
41 | STATISTIC(NumLdStAluCombined, "Number of memory and ALU instructions combined" ); |
42 | |
43 | static llvm::cl::opt<bool> DisableMemAluCombiner( |
44 | "disable-lanai-mem-alu-combiner" , llvm::cl::init(Val: false), |
45 | llvm::cl::desc("Do not combine ALU and memory operators" ), |
46 | llvm::cl::Hidden); |
47 | |
48 | namespace { |
49 | typedef MachineBasicBlock::iterator MbbIterator; |
50 | typedef MachineFunction::iterator MfIterator; |
51 | |
52 | class LanaiMemAluCombiner : public MachineFunctionPass { |
53 | public: |
54 | static char ID; |
55 | explicit LanaiMemAluCombiner() : MachineFunctionPass(ID) {} |
56 | |
57 | StringRef getPassName() const override { |
58 | return "Lanai load / store optimization pass" ; |
59 | } |
60 | |
61 | bool runOnMachineFunction(MachineFunction &F) override; |
62 | |
63 | MachineFunctionProperties getRequiredProperties() const override { |
64 | return MachineFunctionProperties().setNoVRegs(); |
65 | } |
66 | |
67 | private: |
68 | MbbIterator findClosestSuitableAluInstr(MachineBasicBlock *BB, |
69 | const MbbIterator &MemInstr, |
70 | bool Decrement); |
71 | void insertMergedInstruction(MachineBasicBlock *BB, |
72 | const MbbIterator &MemInstr, |
73 | const MbbIterator &AluInstr, bool Before); |
74 | bool combineMemAluInBasicBlock(MachineBasicBlock *BB); |
75 | |
76 | // Target machine description which we query for register names, data |
77 | // layout, etc. |
78 | const TargetInstrInfo *TII; |
79 | }; |
80 | } // namespace |
81 | |
82 | char LanaiMemAluCombiner::ID = 0; |
83 | |
84 | INITIALIZE_PASS(LanaiMemAluCombiner, DEBUG_TYPE, |
85 | "Lanai memory ALU combiner pass" , false, false) |
86 | |
87 | namespace { |
88 | bool isSpls(uint16_t Opcode) { return Lanai::splsIdempotent(Opcode) == Opcode; } |
89 | |
90 | // Determine the opcode for the merged instruction created by considering the |
91 | // old memory operation's opcode and whether the merged opcode will have an |
92 | // immediate offset. |
93 | unsigned mergedOpcode(unsigned OldOpcode, bool ImmediateOffset) { |
94 | switch (OldOpcode) { |
95 | case Lanai::LDW_RI: |
96 | case Lanai::LDW_RR: |
97 | if (ImmediateOffset) |
98 | return Lanai::LDW_RI; |
99 | return Lanai::LDW_RR; |
100 | case Lanai::LDHs_RI: |
101 | case Lanai::LDHs_RR: |
102 | if (ImmediateOffset) |
103 | return Lanai::LDHs_RI; |
104 | return Lanai::LDHs_RR; |
105 | case Lanai::LDHz_RI: |
106 | case Lanai::LDHz_RR: |
107 | if (ImmediateOffset) |
108 | return Lanai::LDHz_RI; |
109 | return Lanai::LDHz_RR; |
110 | case Lanai::LDBs_RI: |
111 | case Lanai::LDBs_RR: |
112 | if (ImmediateOffset) |
113 | return Lanai::LDBs_RI; |
114 | return Lanai::LDBs_RR; |
115 | case Lanai::LDBz_RI: |
116 | case Lanai::LDBz_RR: |
117 | if (ImmediateOffset) |
118 | return Lanai::LDBz_RI; |
119 | return Lanai::LDBz_RR; |
120 | case Lanai::SW_RI: |
121 | case Lanai::SW_RR: |
122 | if (ImmediateOffset) |
123 | return Lanai::SW_RI; |
124 | return Lanai::SW_RR; |
125 | case Lanai::STB_RI: |
126 | case Lanai::STB_RR: |
127 | if (ImmediateOffset) |
128 | return Lanai::STB_RI; |
129 | return Lanai::STB_RR; |
130 | case Lanai::STH_RI: |
131 | case Lanai::STH_RR: |
132 | if (ImmediateOffset) |
133 | return Lanai::STH_RI; |
134 | return Lanai::STH_RR; |
135 | default: |
136 | return 0; |
137 | } |
138 | } |
139 | |
140 | // Check if the machine instruction has non-volatile memory operands of the type |
141 | // supported for combining with ALU instructions. |
142 | bool isNonVolatileMemoryOp(const MachineInstr &MI) { |
143 | if (!MI.hasOneMemOperand()) |
144 | return false; |
145 | |
146 | // Determine if the machine instruction is a supported memory operation by |
147 | // testing if the computed merge opcode is a valid memory operation opcode. |
148 | if (mergedOpcode(OldOpcode: MI.getOpcode(), ImmediateOffset: false) == 0) |
149 | return false; |
150 | |
151 | const MachineMemOperand *MemOperand = *MI.memoperands_begin(); |
152 | |
153 | // Don't move volatile memory accesses |
154 | // TODO: unclear if we need to be as conservative about atomics |
155 | if (MemOperand->isVolatile() || MemOperand->isAtomic()) |
156 | return false; |
157 | |
158 | return true; |
159 | } |
160 | |
161 | // Test to see if two machine operands are of the same type. This test is less |
162 | // strict than the MachineOperand::isIdenticalTo function. |
163 | bool isSameOperand(const MachineOperand &Op1, const MachineOperand &Op2) { |
164 | if (Op1.getType() != Op2.getType()) |
165 | return false; |
166 | |
167 | switch (Op1.getType()) { |
168 | case MachineOperand::MO_Register: |
169 | return Op1.getReg() == Op2.getReg(); |
170 | case MachineOperand::MO_Immediate: |
171 | return Op1.getImm() == Op2.getImm(); |
172 | default: |
173 | return false; |
174 | } |
175 | } |
176 | |
177 | bool isZeroOperand(const MachineOperand &Op) { |
178 | return ((Op.isReg() && Op.getReg() == Lanai::R0) || |
179 | (Op.isImm() && Op.getImm() == 0)); |
180 | } |
181 | |
182 | // Determines whether a register is used by an instruction. |
183 | bool InstrUsesReg(const MbbIterator &Instr, const MachineOperand *Reg) { |
184 | for (MachineInstr::const_mop_iterator Mop = Instr->operands_begin(); |
185 | Mop != Instr->operands_end(); ++Mop) { |
186 | if (isSameOperand(Op1: *Mop, Op2: *Reg)) |
187 | return true; |
188 | } |
189 | return false; |
190 | } |
191 | |
192 | // Converts between machine opcode and AluCode. |
193 | // Flag using/modifying ALU operations should not be considered for merging and |
194 | // are omitted from this list. |
195 | LPAC::AluCode mergedAluCode(unsigned AluOpcode) { |
196 | switch (AluOpcode) { |
197 | case Lanai::ADD_I_LO: |
198 | case Lanai::ADD_R: |
199 | return LPAC::ADD; |
200 | case Lanai::SUB_I_LO: |
201 | case Lanai::SUB_R: |
202 | return LPAC::SUB; |
203 | case Lanai::AND_I_LO: |
204 | case Lanai::AND_R: |
205 | return LPAC::AND; |
206 | case Lanai::OR_I_LO: |
207 | case Lanai::OR_R: |
208 | return LPAC::OR; |
209 | case Lanai::XOR_I_LO: |
210 | case Lanai::XOR_R: |
211 | return LPAC::XOR; |
212 | case Lanai::SHL_R: |
213 | return LPAC::SHL; |
214 | case Lanai::SRL_R: |
215 | return LPAC::SRL; |
216 | case Lanai::SRA_R: |
217 | return LPAC::SRA; |
218 | case Lanai::SA_I: |
219 | case Lanai::SL_I: |
220 | default: |
221 | return LPAC::UNKNOWN; |
222 | } |
223 | } |
224 | |
225 | // Insert a new combined memory and ALU operation instruction. |
226 | // |
227 | // This function builds a new machine instruction using the MachineInstrBuilder |
228 | // class and inserts it before the memory instruction. |
229 | void LanaiMemAluCombiner::insertMergedInstruction(MachineBasicBlock *BB, |
230 | const MbbIterator &MemInstr, |
231 | const MbbIterator &AluInstr, |
232 | bool Before) { |
233 | // Insert new combined load/store + alu operation |
234 | MachineOperand Dest = MemInstr->getOperand(i: 0); |
235 | MachineOperand Base = MemInstr->getOperand(i: 1); |
236 | MachineOperand MemOffset = MemInstr->getOperand(i: 2); |
237 | MachineOperand AluOffset = AluInstr->getOperand(i: 2); |
238 | |
239 | // Abort if ALU offset is not a register or immediate |
240 | assert((AluOffset.isReg() || AluOffset.isImm()) && |
241 | "Unsupported operand type in merge" ); |
242 | |
243 | // Determined merged instructions opcode and ALU code |
244 | LPAC::AluCode AluOpcode = mergedAluCode(AluOpcode: AluInstr->getOpcode()); |
245 | unsigned NewOpc = mergedOpcode(OldOpcode: MemInstr->getOpcode(), ImmediateOffset: AluOffset.isImm()); |
246 | |
247 | assert(AluOpcode != LPAC::UNKNOWN && "Unknown ALU code in merging" ); |
248 | assert(NewOpc != 0 && "Unknown merged node opcode" ); |
249 | |
250 | // Build and insert new machine instruction |
251 | MachineInstrBuilder InstrBuilder = |
252 | BuildMI(BB&: *BB, I: MemInstr, MIMD: MemInstr->getDebugLoc(), MCID: TII->get(Opcode: NewOpc)); |
253 | InstrBuilder.addReg(RegNo: Dest.getReg(), flags: getDefRegState(B: true)); |
254 | InstrBuilder.addReg(RegNo: Base.getReg(), flags: getKillRegState(B: true)); |
255 | |
256 | // Add offset to machine instruction |
257 | if (AluOffset.isReg()) |
258 | InstrBuilder.addReg(RegNo: AluOffset.getReg()); |
259 | else if (AluOffset.isImm()) |
260 | InstrBuilder.addImm(Val: AluOffset.getImm()); |
261 | else |
262 | llvm_unreachable("Unsupported ld/st ALU merge." ); |
263 | |
264 | // Create a pre-op if the ALU operation preceded the memory operation or the |
265 | // MemOffset is non-zero (i.e. the memory value should be adjusted before |
266 | // accessing it), else create a post-op. |
267 | if (Before || !isZeroOperand(Op: MemOffset)) |
268 | InstrBuilder.addImm(Val: LPAC::makePreOp(AluOp: AluOpcode)); |
269 | else |
270 | InstrBuilder.addImm(Val: LPAC::makePostOp(AluOp: AluOpcode)); |
271 | |
272 | // Transfer memory operands. |
273 | InstrBuilder.setMemRefs(MemInstr->memoperands()); |
274 | } |
275 | |
276 | // Function determines if ALU operation (in alu_iter) can be combined with |
277 | // a load/store with base and offset. |
278 | bool isSuitableAluInstr(bool IsSpls, const MbbIterator &AluIter, |
279 | const MachineOperand &Base, |
280 | const MachineOperand &Offset) { |
281 | // ALU operations have 3 operands |
282 | if (AluIter->getNumOperands() != 3) |
283 | return false; |
284 | |
285 | MachineOperand &Dest = AluIter->getOperand(i: 0); |
286 | MachineOperand &Op1 = AluIter->getOperand(i: 1); |
287 | MachineOperand &Op2 = AluIter->getOperand(i: 2); |
288 | |
289 | // Only match instructions using the base register as destination and with the |
290 | // base and first operand equal |
291 | if (!isSameOperand(Op1: Dest, Op2: Base) || !isSameOperand(Op1: Dest, Op2: Op1)) |
292 | return false; |
293 | |
294 | if (Op2.isImm()) { |
295 | // It is not a match if the 2nd operand in the ALU operation is an |
296 | // immediate but the ALU operation is not an addition. |
297 | if (AluIter->getOpcode() != Lanai::ADD_I_LO) |
298 | return false; |
299 | |
300 | if (Offset.isReg() && Offset.getReg() == Lanai::R0) |
301 | return true; |
302 | |
303 | if (Offset.isImm() && |
304 | ((Offset.getImm() == 0 && |
305 | // Check that the Op2 would fit in the immediate field of the |
306 | // memory operation. |
307 | ((IsSpls && isInt<10>(x: Op2.getImm())) || |
308 | (!IsSpls && isInt<16>(x: Op2.getImm())))) || |
309 | Offset.getImm() == Op2.getImm())) |
310 | return true; |
311 | } else if (Op2.isReg()) { |
312 | // The Offset and 2nd operand are both registers and equal |
313 | if (Offset.isReg() && Op2.getReg() == Offset.getReg()) |
314 | return true; |
315 | } else |
316 | // Only consider operations with register or immediate values |
317 | return false; |
318 | |
319 | return false; |
320 | } |
321 | |
322 | MbbIterator LanaiMemAluCombiner::findClosestSuitableAluInstr( |
323 | MachineBasicBlock *BB, const MbbIterator &MemInstr, const bool Decrement) { |
324 | MachineOperand *Base = &MemInstr->getOperand(i: 1); |
325 | MachineOperand *Offset = &MemInstr->getOperand(i: 2); |
326 | bool IsSpls = isSpls(Opcode: MemInstr->getOpcode()); |
327 | |
328 | MbbIterator First = MemInstr; |
329 | MbbIterator Last = Decrement ? BB->begin() : BB->end(); |
330 | |
331 | while (First != Last) { |
332 | Decrement ? --First : ++First; |
333 | |
334 | if (First == Last) |
335 | break; |
336 | |
337 | // Skip over debug instructions |
338 | if (First->isDebugInstr()) |
339 | continue; |
340 | |
341 | if (isSuitableAluInstr(IsSpls, AluIter: First, Base: *Base, Offset: *Offset)) { |
342 | return First; |
343 | } |
344 | |
345 | // Usage of the base or offset register is not a form suitable for merging. |
346 | if (First != Last) { |
347 | if (InstrUsesReg(Instr: First, Reg: Base)) |
348 | break; |
349 | if (Offset->isReg() && InstrUsesReg(Instr: First, Reg: Offset)) |
350 | break; |
351 | } |
352 | } |
353 | |
354 | return MemInstr; |
355 | } |
356 | |
357 | bool LanaiMemAluCombiner::combineMemAluInBasicBlock(MachineBasicBlock *BB) { |
358 | bool Modified = false; |
359 | |
360 | MbbIterator MBBIter = BB->begin(), End = BB->end(); |
361 | while (MBBIter != End) { |
362 | bool IsMemOp = isNonVolatileMemoryOp(MI: *MBBIter); |
363 | |
364 | if (IsMemOp) { |
365 | MachineOperand AluOperand = MBBIter->getOperand(i: 3); |
366 | unsigned int DestReg = MBBIter->getOperand(i: 0).getReg(), |
367 | BaseReg = MBBIter->getOperand(i: 1).getReg(); |
368 | assert(AluOperand.isImm() && "Unexpected memory operator type" ); |
369 | LPAC::AluCode AluOpcode = static_cast<LPAC::AluCode>(AluOperand.getImm()); |
370 | |
371 | // Skip memory operations that already modify the base register or if |
372 | // the destination and base register are the same |
373 | if (!LPAC::modifiesOp(AluOp: AluOpcode) && DestReg != BaseReg) { |
374 | for (int Inc = 0; Inc <= 1; ++Inc) { |
375 | MbbIterator AluIter = |
376 | findClosestSuitableAluInstr(BB, MemInstr: MBBIter, Decrement: Inc == 0); |
377 | if (AluIter != MBBIter) { |
378 | insertMergedInstruction(BB, MemInstr: MBBIter, AluInstr: AluIter, Before: Inc == 0); |
379 | |
380 | ++NumLdStAluCombined; |
381 | Modified = true; |
382 | |
383 | // Erase the matching ALU instruction |
384 | BB->erase(I: AluIter); |
385 | // Erase old load/store instruction |
386 | BB->erase(I: MBBIter++); |
387 | break; |
388 | } |
389 | } |
390 | } |
391 | } |
392 | if (MBBIter == End) |
393 | break; |
394 | ++MBBIter; |
395 | } |
396 | |
397 | return Modified; |
398 | } |
399 | |
400 | // Driver function that iterates over the machine basic building blocks of a |
401 | // machine function |
402 | bool LanaiMemAluCombiner::runOnMachineFunction(MachineFunction &MF) { |
403 | if (DisableMemAluCombiner) |
404 | return false; |
405 | |
406 | TII = MF.getSubtarget<LanaiSubtarget>().getInstrInfo(); |
407 | bool Modified = false; |
408 | for (MachineBasicBlock &MBB : MF) |
409 | Modified |= combineMemAluInBasicBlock(BB: &MBB); |
410 | return Modified; |
411 | } |
412 | } // namespace |
413 | |
414 | FunctionPass *llvm::createLanaiMemAluCombinerPass() { |
415 | return new LanaiMemAluCombiner(); |
416 | } |
417 | |