1 | //===-- LanaiMemAluCombiner.cpp - Pass to combine memory & ALU operations -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // Simple pass to combine memory and ALU operations |
9 | // |
10 | // The Lanai ISA supports instructions where a load/store modifies the base |
11 | // register used in the load/store operation. This pass finds suitable |
12 | // load/store and ALU instructions and combines them into one instruction. |
13 | // |
14 | // For example, |
15 | // ld [ %r6 -- ], %r12 |
16 | // is a supported instruction that is not currently generated by the instruction |
17 | // selection pass of this backend. This pass generates these instructions by |
18 | // merging |
19 | // add %r6, -4, %r6 |
20 | // followed by |
21 | // ld [ %r6 ], %r12 |
22 | // in the same machine basic block into one machine instruction. |
23 | //===----------------------------------------------------------------------===// |
24 | |
25 | #include "LanaiAluCode.h" |
26 | #include "LanaiTargetMachine.h" |
27 | #include "llvm/ADT/Statistic.h" |
28 | #include "llvm/CodeGen/MachineFunctionPass.h" |
29 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
30 | #include "llvm/CodeGen/RegisterScavenging.h" |
31 | #include "llvm/CodeGen/TargetInstrInfo.h" |
32 | #include "llvm/Support/CommandLine.h" |
33 | using namespace llvm; |
34 | |
35 | #define GET_INSTRMAP_INFO |
36 | #include "LanaiGenInstrInfo.inc" |
37 | |
38 | #define DEBUG_TYPE "lanai-mem-alu-combiner" |
39 | |
40 | STATISTIC(NumLdStAluCombined, "Number of memory and ALU instructions combined" ); |
41 | |
42 | static llvm::cl::opt<bool> DisableMemAluCombiner( |
43 | "disable-lanai-mem-alu-combiner" , llvm::cl::init(Val: false), |
44 | llvm::cl::desc("Do not combine ALU and memory operators" ), |
45 | llvm::cl::Hidden); |
46 | |
47 | namespace llvm { |
48 | void initializeLanaiMemAluCombinerPass(PassRegistry &); |
49 | } // namespace llvm |
50 | |
51 | namespace { |
52 | typedef MachineBasicBlock::iterator MbbIterator; |
53 | typedef MachineFunction::iterator MfIterator; |
54 | |
55 | class LanaiMemAluCombiner : public MachineFunctionPass { |
56 | public: |
57 | static char ID; |
58 | explicit LanaiMemAluCombiner() : MachineFunctionPass(ID) { |
59 | initializeLanaiMemAluCombinerPass(*PassRegistry::getPassRegistry()); |
60 | } |
61 | |
62 | StringRef getPassName() const override { |
63 | return "Lanai load / store optimization pass" ; |
64 | } |
65 | |
66 | bool runOnMachineFunction(MachineFunction &F) override; |
67 | |
68 | MachineFunctionProperties getRequiredProperties() const override { |
69 | return MachineFunctionProperties().set( |
70 | MachineFunctionProperties::Property::NoVRegs); |
71 | } |
72 | |
73 | private: |
74 | MbbIterator findClosestSuitableAluInstr(MachineBasicBlock *BB, |
75 | const MbbIterator &MemInstr, |
76 | bool Decrement); |
77 | void insertMergedInstruction(MachineBasicBlock *BB, |
78 | const MbbIterator &MemInstr, |
79 | const MbbIterator &AluInstr, bool Before); |
80 | bool combineMemAluInBasicBlock(MachineBasicBlock *BB); |
81 | |
82 | // Target machine description which we query for register names, data |
83 | // layout, etc. |
84 | const TargetInstrInfo *TII; |
85 | }; |
86 | } // namespace |
87 | |
88 | char LanaiMemAluCombiner::ID = 0; |
89 | |
90 | INITIALIZE_PASS(LanaiMemAluCombiner, DEBUG_TYPE, |
91 | "Lanai memory ALU combiner pass" , false, false) |
92 | |
93 | namespace { |
94 | bool isSpls(uint16_t Opcode) { return Lanai::splsIdempotent(Opcode) == Opcode; } |
95 | |
96 | // Determine the opcode for the merged instruction created by considering the |
97 | // old memory operation's opcode and whether the merged opcode will have an |
98 | // immediate offset. |
99 | unsigned mergedOpcode(unsigned OldOpcode, bool ImmediateOffset) { |
100 | switch (OldOpcode) { |
101 | case Lanai::LDW_RI: |
102 | case Lanai::LDW_RR: |
103 | if (ImmediateOffset) |
104 | return Lanai::LDW_RI; |
105 | return Lanai::LDW_RR; |
106 | case Lanai::LDHs_RI: |
107 | case Lanai::LDHs_RR: |
108 | if (ImmediateOffset) |
109 | return Lanai::LDHs_RI; |
110 | return Lanai::LDHs_RR; |
111 | case Lanai::LDHz_RI: |
112 | case Lanai::LDHz_RR: |
113 | if (ImmediateOffset) |
114 | return Lanai::LDHz_RI; |
115 | return Lanai::LDHz_RR; |
116 | case Lanai::LDBs_RI: |
117 | case Lanai::LDBs_RR: |
118 | if (ImmediateOffset) |
119 | return Lanai::LDBs_RI; |
120 | return Lanai::LDBs_RR; |
121 | case Lanai::LDBz_RI: |
122 | case Lanai::LDBz_RR: |
123 | if (ImmediateOffset) |
124 | return Lanai::LDBz_RI; |
125 | return Lanai::LDBz_RR; |
126 | case Lanai::SW_RI: |
127 | case Lanai::SW_RR: |
128 | if (ImmediateOffset) |
129 | return Lanai::SW_RI; |
130 | return Lanai::SW_RR; |
131 | case Lanai::STB_RI: |
132 | case Lanai::STB_RR: |
133 | if (ImmediateOffset) |
134 | return Lanai::STB_RI; |
135 | return Lanai::STB_RR; |
136 | case Lanai::STH_RI: |
137 | case Lanai::STH_RR: |
138 | if (ImmediateOffset) |
139 | return Lanai::STH_RI; |
140 | return Lanai::STH_RR; |
141 | default: |
142 | return 0; |
143 | } |
144 | } |
145 | |
146 | // Check if the machine instruction has non-volatile memory operands of the type |
147 | // supported for combining with ALU instructions. |
148 | bool isNonVolatileMemoryOp(const MachineInstr &MI) { |
149 | if (!MI.hasOneMemOperand()) |
150 | return false; |
151 | |
152 | // Determine if the machine instruction is a supported memory operation by |
153 | // testing if the computed merge opcode is a valid memory operation opcode. |
154 | if (mergedOpcode(OldOpcode: MI.getOpcode(), ImmediateOffset: false) == 0) |
155 | return false; |
156 | |
157 | const MachineMemOperand *MemOperand = *MI.memoperands_begin(); |
158 | |
159 | // Don't move volatile memory accesses |
160 | // TODO: unclear if we need to be as conservative about atomics |
161 | if (MemOperand->isVolatile() || MemOperand->isAtomic()) |
162 | return false; |
163 | |
164 | return true; |
165 | } |
166 | |
167 | // Test to see if two machine operands are of the same type. This test is less |
168 | // strict than the MachineOperand::isIdenticalTo function. |
169 | bool isSameOperand(const MachineOperand &Op1, const MachineOperand &Op2) { |
170 | if (Op1.getType() != Op2.getType()) |
171 | return false; |
172 | |
173 | switch (Op1.getType()) { |
174 | case MachineOperand::MO_Register: |
175 | return Op1.getReg() == Op2.getReg(); |
176 | case MachineOperand::MO_Immediate: |
177 | return Op1.getImm() == Op2.getImm(); |
178 | default: |
179 | return false; |
180 | } |
181 | } |
182 | |
183 | bool isZeroOperand(const MachineOperand &Op) { |
184 | return ((Op.isReg() && Op.getReg() == Lanai::R0) || |
185 | (Op.isImm() && Op.getImm() == 0)); |
186 | } |
187 | |
188 | // Determines whether a register is used by an instruction. |
189 | bool InstrUsesReg(const MbbIterator &Instr, const MachineOperand *Reg) { |
190 | for (MachineInstr::const_mop_iterator Mop = Instr->operands_begin(); |
191 | Mop != Instr->operands_end(); ++Mop) { |
192 | if (isSameOperand(Op1: *Mop, Op2: *Reg)) |
193 | return true; |
194 | } |
195 | return false; |
196 | } |
197 | |
198 | // Converts between machine opcode and AluCode. |
199 | // Flag using/modifying ALU operations should not be considered for merging and |
200 | // are omitted from this list. |
201 | LPAC::AluCode mergedAluCode(unsigned AluOpcode) { |
202 | switch (AluOpcode) { |
203 | case Lanai::ADD_I_LO: |
204 | case Lanai::ADD_R: |
205 | return LPAC::ADD; |
206 | case Lanai::SUB_I_LO: |
207 | case Lanai::SUB_R: |
208 | return LPAC::SUB; |
209 | case Lanai::AND_I_LO: |
210 | case Lanai::AND_R: |
211 | return LPAC::AND; |
212 | case Lanai::OR_I_LO: |
213 | case Lanai::OR_R: |
214 | return LPAC::OR; |
215 | case Lanai::XOR_I_LO: |
216 | case Lanai::XOR_R: |
217 | return LPAC::XOR; |
218 | case Lanai::SHL_R: |
219 | return LPAC::SHL; |
220 | case Lanai::SRL_R: |
221 | return LPAC::SRL; |
222 | case Lanai::SRA_R: |
223 | return LPAC::SRA; |
224 | case Lanai::SA_I: |
225 | case Lanai::SL_I: |
226 | default: |
227 | return LPAC::UNKNOWN; |
228 | } |
229 | } |
230 | |
231 | // Insert a new combined memory and ALU operation instruction. |
232 | // |
233 | // This function builds a new machine instruction using the MachineInstrBuilder |
234 | // class and inserts it before the memory instruction. |
235 | void LanaiMemAluCombiner::insertMergedInstruction(MachineBasicBlock *BB, |
236 | const MbbIterator &MemInstr, |
237 | const MbbIterator &AluInstr, |
238 | bool Before) { |
239 | // Insert new combined load/store + alu operation |
240 | MachineOperand Dest = MemInstr->getOperand(i: 0); |
241 | MachineOperand Base = MemInstr->getOperand(i: 1); |
242 | MachineOperand MemOffset = MemInstr->getOperand(i: 2); |
243 | MachineOperand AluOffset = AluInstr->getOperand(i: 2); |
244 | |
245 | // Abort if ALU offset is not a register or immediate |
246 | assert((AluOffset.isReg() || AluOffset.isImm()) && |
247 | "Unsupported operand type in merge" ); |
248 | |
249 | // Determined merged instructions opcode and ALU code |
250 | LPAC::AluCode AluOpcode = mergedAluCode(AluOpcode: AluInstr->getOpcode()); |
251 | unsigned NewOpc = mergedOpcode(OldOpcode: MemInstr->getOpcode(), ImmediateOffset: AluOffset.isImm()); |
252 | |
253 | assert(AluOpcode != LPAC::UNKNOWN && "Unknown ALU code in merging" ); |
254 | assert(NewOpc != 0 && "Unknown merged node opcode" ); |
255 | |
256 | // Build and insert new machine instruction |
257 | MachineInstrBuilder InstrBuilder = |
258 | BuildMI(BB&: *BB, I: MemInstr, MIMD: MemInstr->getDebugLoc(), MCID: TII->get(Opcode: NewOpc)); |
259 | InstrBuilder.addReg(RegNo: Dest.getReg(), flags: getDefRegState(B: true)); |
260 | InstrBuilder.addReg(RegNo: Base.getReg(), flags: getKillRegState(B: true)); |
261 | |
262 | // Add offset to machine instruction |
263 | if (AluOffset.isReg()) |
264 | InstrBuilder.addReg(RegNo: AluOffset.getReg()); |
265 | else if (AluOffset.isImm()) |
266 | InstrBuilder.addImm(Val: AluOffset.getImm()); |
267 | else |
268 | llvm_unreachable("Unsupported ld/st ALU merge." ); |
269 | |
270 | // Create a pre-op if the ALU operation preceded the memory operation or the |
271 | // MemOffset is non-zero (i.e. the memory value should be adjusted before |
272 | // accessing it), else create a post-op. |
273 | if (Before || !isZeroOperand(Op: MemOffset)) |
274 | InstrBuilder.addImm(Val: LPAC::makePreOp(AluOp: AluOpcode)); |
275 | else |
276 | InstrBuilder.addImm(Val: LPAC::makePostOp(AluOp: AluOpcode)); |
277 | |
278 | // Transfer memory operands. |
279 | InstrBuilder.setMemRefs(MemInstr->memoperands()); |
280 | } |
281 | |
282 | // Function determines if ALU operation (in alu_iter) can be combined with |
283 | // a load/store with base and offset. |
284 | bool isSuitableAluInstr(bool IsSpls, const MbbIterator &AluIter, |
285 | const MachineOperand &Base, |
286 | const MachineOperand &Offset) { |
287 | // ALU operations have 3 operands |
288 | if (AluIter->getNumOperands() != 3) |
289 | return false; |
290 | |
291 | MachineOperand &Dest = AluIter->getOperand(i: 0); |
292 | MachineOperand &Op1 = AluIter->getOperand(i: 1); |
293 | MachineOperand &Op2 = AluIter->getOperand(i: 2); |
294 | |
295 | // Only match instructions using the base register as destination and with the |
296 | // base and first operand equal |
297 | if (!isSameOperand(Op1: Dest, Op2: Base) || !isSameOperand(Op1: Dest, Op2: Op1)) |
298 | return false; |
299 | |
300 | if (Op2.isImm()) { |
301 | // It is not a match if the 2nd operand in the ALU operation is an |
302 | // immediate but the ALU operation is not an addition. |
303 | if (AluIter->getOpcode() != Lanai::ADD_I_LO) |
304 | return false; |
305 | |
306 | if (Offset.isReg() && Offset.getReg() == Lanai::R0) |
307 | return true; |
308 | |
309 | if (Offset.isImm() && |
310 | ((Offset.getImm() == 0 && |
311 | // Check that the Op2 would fit in the immediate field of the |
312 | // memory operation. |
313 | ((IsSpls && isInt<10>(x: Op2.getImm())) || |
314 | (!IsSpls && isInt<16>(x: Op2.getImm())))) || |
315 | Offset.getImm() == Op2.getImm())) |
316 | return true; |
317 | } else if (Op2.isReg()) { |
318 | // The Offset and 2nd operand are both registers and equal |
319 | if (Offset.isReg() && Op2.getReg() == Offset.getReg()) |
320 | return true; |
321 | } else |
322 | // Only consider operations with register or immediate values |
323 | return false; |
324 | |
325 | return false; |
326 | } |
327 | |
328 | MbbIterator LanaiMemAluCombiner::findClosestSuitableAluInstr( |
329 | MachineBasicBlock *BB, const MbbIterator &MemInstr, const bool Decrement) { |
330 | MachineOperand *Base = &MemInstr->getOperand(i: 1); |
331 | MachineOperand *Offset = &MemInstr->getOperand(i: 2); |
332 | bool IsSpls = isSpls(Opcode: MemInstr->getOpcode()); |
333 | |
334 | MbbIterator First = MemInstr; |
335 | MbbIterator Last = Decrement ? BB->begin() : BB->end(); |
336 | |
337 | while (First != Last) { |
338 | Decrement ? --First : ++First; |
339 | |
340 | if (First == Last) |
341 | break; |
342 | |
343 | // Skip over debug instructions |
344 | if (First->isDebugInstr()) |
345 | continue; |
346 | |
347 | if (isSuitableAluInstr(IsSpls, AluIter: First, Base: *Base, Offset: *Offset)) { |
348 | return First; |
349 | } |
350 | |
351 | // Usage of the base or offset register is not a form suitable for merging. |
352 | if (First != Last) { |
353 | if (InstrUsesReg(Instr: First, Reg: Base)) |
354 | break; |
355 | if (Offset->isReg() && InstrUsesReg(Instr: First, Reg: Offset)) |
356 | break; |
357 | } |
358 | } |
359 | |
360 | return MemInstr; |
361 | } |
362 | |
363 | bool LanaiMemAluCombiner::combineMemAluInBasicBlock(MachineBasicBlock *BB) { |
364 | bool Modified = false; |
365 | |
366 | MbbIterator MBBIter = BB->begin(), End = BB->end(); |
367 | while (MBBIter != End) { |
368 | bool IsMemOp = isNonVolatileMemoryOp(MI: *MBBIter); |
369 | |
370 | if (IsMemOp) { |
371 | MachineOperand AluOperand = MBBIter->getOperand(i: 3); |
372 | unsigned int DestReg = MBBIter->getOperand(i: 0).getReg(), |
373 | BaseReg = MBBIter->getOperand(i: 1).getReg(); |
374 | assert(AluOperand.isImm() && "Unexpected memory operator type" ); |
375 | LPAC::AluCode AluOpcode = static_cast<LPAC::AluCode>(AluOperand.getImm()); |
376 | |
377 | // Skip memory operations that already modify the base register or if |
378 | // the destination and base register are the same |
379 | if (!LPAC::modifiesOp(AluOp: AluOpcode) && DestReg != BaseReg) { |
380 | for (int Inc = 0; Inc <= 1; ++Inc) { |
381 | MbbIterator AluIter = |
382 | findClosestSuitableAluInstr(BB, MemInstr: MBBIter, Decrement: Inc == 0); |
383 | if (AluIter != MBBIter) { |
384 | insertMergedInstruction(BB, MemInstr: MBBIter, AluInstr: AluIter, Before: Inc == 0); |
385 | |
386 | ++NumLdStAluCombined; |
387 | Modified = true; |
388 | |
389 | // Erase the matching ALU instruction |
390 | BB->erase(I: AluIter); |
391 | // Erase old load/store instruction |
392 | BB->erase(I: MBBIter++); |
393 | break; |
394 | } |
395 | } |
396 | } |
397 | } |
398 | if (MBBIter == End) |
399 | break; |
400 | ++MBBIter; |
401 | } |
402 | |
403 | return Modified; |
404 | } |
405 | |
406 | // Driver function that iterates over the machine basic building blocks of a |
407 | // machine function |
408 | bool LanaiMemAluCombiner::runOnMachineFunction(MachineFunction &MF) { |
409 | if (DisableMemAluCombiner) |
410 | return false; |
411 | |
412 | TII = MF.getSubtarget<LanaiSubtarget>().getInstrInfo(); |
413 | bool Modified = false; |
414 | for (MachineBasicBlock &MBB : MF) |
415 | Modified |= combineMemAluInBasicBlock(BB: &MBB); |
416 | return Modified; |
417 | } |
418 | } // namespace |
419 | |
420 | FunctionPass *llvm::createLanaiMemAluCombinerPass() { |
421 | return new LanaiMemAluCombiner(); |
422 | } |
423 | |