| 1 | //===-- LanaiMemAluCombiner.cpp - Pass to combine memory & ALU operations -===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // Simple pass to combine memory and ALU operations |
| 9 | // |
| 10 | // The Lanai ISA supports instructions where a load/store modifies the base |
| 11 | // register used in the load/store operation. This pass finds suitable |
| 12 | // load/store and ALU instructions and combines them into one instruction. |
| 13 | // |
| 14 | // For example, |
| 15 | // ld [ %r6 -- ], %r12 |
| 16 | // is a supported instruction that is not currently generated by the instruction |
| 17 | // selection pass of this backend. This pass generates these instructions by |
| 18 | // merging |
| 19 | // add %r6, -4, %r6 |
| 20 | // followed by |
| 21 | // ld [ %r6 ], %r12 |
| 22 | // in the same machine basic block into one machine instruction. |
| 23 | //===----------------------------------------------------------------------===// |
| 24 | |
| 25 | #include "Lanai.h" |
| 26 | #include "LanaiAluCode.h" |
| 27 | #include "LanaiTargetMachine.h" |
| 28 | #include "llvm/ADT/Statistic.h" |
| 29 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 30 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 31 | #include "llvm/CodeGen/RegisterScavenging.h" |
| 32 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 33 | #include "llvm/Support/CommandLine.h" |
| 34 | using namespace llvm; |
| 35 | |
| 36 | #define GET_INSTRMAP_INFO |
| 37 | #include "LanaiGenInstrInfo.inc" |
| 38 | |
| 39 | #define DEBUG_TYPE "lanai-mem-alu-combiner" |
| 40 | |
| 41 | STATISTIC(NumLdStAluCombined, "Number of memory and ALU instructions combined" ); |
| 42 | |
| 43 | static llvm::cl::opt<bool> DisableMemAluCombiner( |
| 44 | "disable-lanai-mem-alu-combiner" , llvm::cl::init(Val: false), |
| 45 | llvm::cl::desc("Do not combine ALU and memory operators" ), |
| 46 | llvm::cl::Hidden); |
| 47 | |
| 48 | namespace { |
| 49 | typedef MachineBasicBlock::iterator MbbIterator; |
| 50 | typedef MachineFunction::iterator MfIterator; |
| 51 | |
| 52 | class LanaiMemAluCombiner : public MachineFunctionPass { |
| 53 | public: |
| 54 | static char ID; |
| 55 | explicit LanaiMemAluCombiner() : MachineFunctionPass(ID) {} |
| 56 | |
| 57 | StringRef getPassName() const override { |
| 58 | return "Lanai load / store optimization pass" ; |
| 59 | } |
| 60 | |
| 61 | bool runOnMachineFunction(MachineFunction &F) override; |
| 62 | |
| 63 | MachineFunctionProperties getRequiredProperties() const override { |
| 64 | return MachineFunctionProperties().setNoVRegs(); |
| 65 | } |
| 66 | |
| 67 | private: |
| 68 | MbbIterator findClosestSuitableAluInstr(MachineBasicBlock *BB, |
| 69 | const MbbIterator &MemInstr, |
| 70 | bool Decrement); |
| 71 | void insertMergedInstruction(MachineBasicBlock *BB, |
| 72 | const MbbIterator &MemInstr, |
| 73 | const MbbIterator &AluInstr, bool Before); |
| 74 | bool combineMemAluInBasicBlock(MachineBasicBlock *BB); |
| 75 | |
| 76 | // Target machine description which we query for register names, data |
| 77 | // layout, etc. |
| 78 | const TargetInstrInfo *TII; |
| 79 | }; |
| 80 | } // namespace |
| 81 | |
| 82 | char LanaiMemAluCombiner::ID = 0; |
| 83 | |
| 84 | INITIALIZE_PASS(LanaiMemAluCombiner, DEBUG_TYPE, |
| 85 | "Lanai memory ALU combiner pass" , false, false) |
| 86 | |
| 87 | namespace { |
| 88 | bool isSpls(uint16_t Opcode) { return Lanai::splsIdempotent(Opcode) == Opcode; } |
| 89 | |
| 90 | // Determine the opcode for the merged instruction created by considering the |
| 91 | // old memory operation's opcode and whether the merged opcode will have an |
| 92 | // immediate offset. |
| 93 | unsigned mergedOpcode(unsigned OldOpcode, bool ImmediateOffset) { |
| 94 | switch (OldOpcode) { |
| 95 | case Lanai::LDW_RI: |
| 96 | case Lanai::LDW_RR: |
| 97 | if (ImmediateOffset) |
| 98 | return Lanai::LDW_RI; |
| 99 | return Lanai::LDW_RR; |
| 100 | case Lanai::LDHs_RI: |
| 101 | case Lanai::LDHs_RR: |
| 102 | if (ImmediateOffset) |
| 103 | return Lanai::LDHs_RI; |
| 104 | return Lanai::LDHs_RR; |
| 105 | case Lanai::LDHz_RI: |
| 106 | case Lanai::LDHz_RR: |
| 107 | if (ImmediateOffset) |
| 108 | return Lanai::LDHz_RI; |
| 109 | return Lanai::LDHz_RR; |
| 110 | case Lanai::LDBs_RI: |
| 111 | case Lanai::LDBs_RR: |
| 112 | if (ImmediateOffset) |
| 113 | return Lanai::LDBs_RI; |
| 114 | return Lanai::LDBs_RR; |
| 115 | case Lanai::LDBz_RI: |
| 116 | case Lanai::LDBz_RR: |
| 117 | if (ImmediateOffset) |
| 118 | return Lanai::LDBz_RI; |
| 119 | return Lanai::LDBz_RR; |
| 120 | case Lanai::SW_RI: |
| 121 | case Lanai::SW_RR: |
| 122 | if (ImmediateOffset) |
| 123 | return Lanai::SW_RI; |
| 124 | return Lanai::SW_RR; |
| 125 | case Lanai::STB_RI: |
| 126 | case Lanai::STB_RR: |
| 127 | if (ImmediateOffset) |
| 128 | return Lanai::STB_RI; |
| 129 | return Lanai::STB_RR; |
| 130 | case Lanai::STH_RI: |
| 131 | case Lanai::STH_RR: |
| 132 | if (ImmediateOffset) |
| 133 | return Lanai::STH_RI; |
| 134 | return Lanai::STH_RR; |
| 135 | default: |
| 136 | return 0; |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | // Check if the machine instruction has non-volatile memory operands of the type |
| 141 | // supported for combining with ALU instructions. |
| 142 | bool isNonVolatileMemoryOp(const MachineInstr &MI) { |
| 143 | if (!MI.hasOneMemOperand()) |
| 144 | return false; |
| 145 | |
| 146 | // Determine if the machine instruction is a supported memory operation by |
| 147 | // testing if the computed merge opcode is a valid memory operation opcode. |
| 148 | if (mergedOpcode(OldOpcode: MI.getOpcode(), ImmediateOffset: false) == 0) |
| 149 | return false; |
| 150 | |
| 151 | const MachineMemOperand *MemOperand = *MI.memoperands_begin(); |
| 152 | |
| 153 | // Don't move volatile memory accesses |
| 154 | // TODO: unclear if we need to be as conservative about atomics |
| 155 | if (MemOperand->isVolatile() || MemOperand->isAtomic()) |
| 156 | return false; |
| 157 | |
| 158 | return true; |
| 159 | } |
| 160 | |
| 161 | // Test to see if two machine operands are of the same type. This test is less |
| 162 | // strict than the MachineOperand::isIdenticalTo function. |
| 163 | bool isSameOperand(const MachineOperand &Op1, const MachineOperand &Op2) { |
| 164 | if (Op1.getType() != Op2.getType()) |
| 165 | return false; |
| 166 | |
| 167 | switch (Op1.getType()) { |
| 168 | case MachineOperand::MO_Register: |
| 169 | return Op1.getReg() == Op2.getReg(); |
| 170 | case MachineOperand::MO_Immediate: |
| 171 | return Op1.getImm() == Op2.getImm(); |
| 172 | default: |
| 173 | return false; |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | bool isZeroOperand(const MachineOperand &Op) { |
| 178 | return ((Op.isReg() && Op.getReg() == Lanai::R0) || |
| 179 | (Op.isImm() && Op.getImm() == 0)); |
| 180 | } |
| 181 | |
| 182 | // Determines whether a register is used by an instruction. |
| 183 | bool InstrUsesReg(const MbbIterator &Instr, const MachineOperand *Reg) { |
| 184 | for (MachineInstr::const_mop_iterator Mop = Instr->operands_begin(); |
| 185 | Mop != Instr->operands_end(); ++Mop) { |
| 186 | if (isSameOperand(Op1: *Mop, Op2: *Reg)) |
| 187 | return true; |
| 188 | } |
| 189 | return false; |
| 190 | } |
| 191 | |
| 192 | // Converts between machine opcode and AluCode. |
| 193 | // Flag using/modifying ALU operations should not be considered for merging and |
| 194 | // are omitted from this list. |
| 195 | LPAC::AluCode mergedAluCode(unsigned AluOpcode) { |
| 196 | switch (AluOpcode) { |
| 197 | case Lanai::ADD_I_LO: |
| 198 | case Lanai::ADD_R: |
| 199 | return LPAC::ADD; |
| 200 | case Lanai::SUB_I_LO: |
| 201 | case Lanai::SUB_R: |
| 202 | return LPAC::SUB; |
| 203 | case Lanai::AND_I_LO: |
| 204 | case Lanai::AND_R: |
| 205 | return LPAC::AND; |
| 206 | case Lanai::OR_I_LO: |
| 207 | case Lanai::OR_R: |
| 208 | return LPAC::OR; |
| 209 | case Lanai::XOR_I_LO: |
| 210 | case Lanai::XOR_R: |
| 211 | return LPAC::XOR; |
| 212 | case Lanai::SHL_R: |
| 213 | return LPAC::SHL; |
| 214 | case Lanai::SRL_R: |
| 215 | return LPAC::SRL; |
| 216 | case Lanai::SRA_R: |
| 217 | return LPAC::SRA; |
| 218 | case Lanai::SA_I: |
| 219 | case Lanai::SL_I: |
| 220 | default: |
| 221 | return LPAC::UNKNOWN; |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | // Insert a new combined memory and ALU operation instruction. |
| 226 | // |
| 227 | // This function builds a new machine instruction using the MachineInstrBuilder |
| 228 | // class and inserts it before the memory instruction. |
| 229 | void LanaiMemAluCombiner::insertMergedInstruction(MachineBasicBlock *BB, |
| 230 | const MbbIterator &MemInstr, |
| 231 | const MbbIterator &AluInstr, |
| 232 | bool Before) { |
| 233 | // Insert new combined load/store + alu operation |
| 234 | MachineOperand Dest = MemInstr->getOperand(i: 0); |
| 235 | MachineOperand Base = MemInstr->getOperand(i: 1); |
| 236 | MachineOperand MemOffset = MemInstr->getOperand(i: 2); |
| 237 | MachineOperand AluOffset = AluInstr->getOperand(i: 2); |
| 238 | |
| 239 | // Abort if ALU offset is not a register or immediate |
| 240 | assert((AluOffset.isReg() || AluOffset.isImm()) && |
| 241 | "Unsupported operand type in merge" ); |
| 242 | |
| 243 | // Determined merged instructions opcode and ALU code |
| 244 | LPAC::AluCode AluOpcode = mergedAluCode(AluOpcode: AluInstr->getOpcode()); |
| 245 | unsigned NewOpc = mergedOpcode(OldOpcode: MemInstr->getOpcode(), ImmediateOffset: AluOffset.isImm()); |
| 246 | |
| 247 | assert(AluOpcode != LPAC::UNKNOWN && "Unknown ALU code in merging" ); |
| 248 | assert(NewOpc != 0 && "Unknown merged node opcode" ); |
| 249 | |
| 250 | // Build and insert new machine instruction |
| 251 | MachineInstrBuilder InstrBuilder = |
| 252 | BuildMI(BB&: *BB, I: MemInstr, MIMD: MemInstr->getDebugLoc(), MCID: TII->get(Opcode: NewOpc)); |
| 253 | InstrBuilder.addReg(RegNo: Dest.getReg(), flags: getDefRegState(B: true)); |
| 254 | InstrBuilder.addReg(RegNo: Base.getReg(), flags: getKillRegState(B: true)); |
| 255 | |
| 256 | // Add offset to machine instruction |
| 257 | if (AluOffset.isReg()) |
| 258 | InstrBuilder.addReg(RegNo: AluOffset.getReg()); |
| 259 | else if (AluOffset.isImm()) |
| 260 | InstrBuilder.addImm(Val: AluOffset.getImm()); |
| 261 | else |
| 262 | llvm_unreachable("Unsupported ld/st ALU merge." ); |
| 263 | |
| 264 | // Create a pre-op if the ALU operation preceded the memory operation or the |
| 265 | // MemOffset is non-zero (i.e. the memory value should be adjusted before |
| 266 | // accessing it), else create a post-op. |
| 267 | if (Before || !isZeroOperand(Op: MemOffset)) |
| 268 | InstrBuilder.addImm(Val: LPAC::makePreOp(AluOp: AluOpcode)); |
| 269 | else |
| 270 | InstrBuilder.addImm(Val: LPAC::makePostOp(AluOp: AluOpcode)); |
| 271 | |
| 272 | // Transfer memory operands. |
| 273 | InstrBuilder.setMemRefs(MemInstr->memoperands()); |
| 274 | } |
| 275 | |
| 276 | // Function determines if ALU operation (in alu_iter) can be combined with |
| 277 | // a load/store with base and offset. |
| 278 | bool isSuitableAluInstr(bool IsSpls, const MbbIterator &AluIter, |
| 279 | const MachineOperand &Base, |
| 280 | const MachineOperand &Offset) { |
| 281 | // ALU operations have 3 operands |
| 282 | if (AluIter->getNumOperands() != 3) |
| 283 | return false; |
| 284 | |
| 285 | MachineOperand &Dest = AluIter->getOperand(i: 0); |
| 286 | MachineOperand &Op1 = AluIter->getOperand(i: 1); |
| 287 | MachineOperand &Op2 = AluIter->getOperand(i: 2); |
| 288 | |
| 289 | // Only match instructions using the base register as destination and with the |
| 290 | // base and first operand equal |
| 291 | if (!isSameOperand(Op1: Dest, Op2: Base) || !isSameOperand(Op1: Dest, Op2: Op1)) |
| 292 | return false; |
| 293 | |
| 294 | if (Op2.isImm()) { |
| 295 | // It is not a match if the 2nd operand in the ALU operation is an |
| 296 | // immediate but the ALU operation is not an addition. |
| 297 | if (AluIter->getOpcode() != Lanai::ADD_I_LO) |
| 298 | return false; |
| 299 | |
| 300 | if (Offset.isReg() && Offset.getReg() == Lanai::R0) |
| 301 | return true; |
| 302 | |
| 303 | if (Offset.isImm() && |
| 304 | ((Offset.getImm() == 0 && |
| 305 | // Check that the Op2 would fit in the immediate field of the |
| 306 | // memory operation. |
| 307 | ((IsSpls && isInt<10>(x: Op2.getImm())) || |
| 308 | (!IsSpls && isInt<16>(x: Op2.getImm())))) || |
| 309 | Offset.getImm() == Op2.getImm())) |
| 310 | return true; |
| 311 | } else if (Op2.isReg()) { |
| 312 | // The Offset and 2nd operand are both registers and equal |
| 313 | if (Offset.isReg() && Op2.getReg() == Offset.getReg()) |
| 314 | return true; |
| 315 | } else |
| 316 | // Only consider operations with register or immediate values |
| 317 | return false; |
| 318 | |
| 319 | return false; |
| 320 | } |
| 321 | |
| 322 | MbbIterator LanaiMemAluCombiner::findClosestSuitableAluInstr( |
| 323 | MachineBasicBlock *BB, const MbbIterator &MemInstr, const bool Decrement) { |
| 324 | MachineOperand *Base = &MemInstr->getOperand(i: 1); |
| 325 | MachineOperand *Offset = &MemInstr->getOperand(i: 2); |
| 326 | bool IsSpls = isSpls(Opcode: MemInstr->getOpcode()); |
| 327 | |
| 328 | MbbIterator First = MemInstr; |
| 329 | MbbIterator Last = Decrement ? BB->begin() : BB->end(); |
| 330 | |
| 331 | while (First != Last) { |
| 332 | Decrement ? --First : ++First; |
| 333 | |
| 334 | if (First == Last) |
| 335 | break; |
| 336 | |
| 337 | // Skip over debug instructions |
| 338 | if (First->isDebugInstr()) |
| 339 | continue; |
| 340 | |
| 341 | if (isSuitableAluInstr(IsSpls, AluIter: First, Base: *Base, Offset: *Offset)) { |
| 342 | return First; |
| 343 | } |
| 344 | |
| 345 | // Usage of the base or offset register is not a form suitable for merging. |
| 346 | if (First != Last) { |
| 347 | if (InstrUsesReg(Instr: First, Reg: Base)) |
| 348 | break; |
| 349 | if (Offset->isReg() && InstrUsesReg(Instr: First, Reg: Offset)) |
| 350 | break; |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | return MemInstr; |
| 355 | } |
| 356 | |
| 357 | bool LanaiMemAluCombiner::combineMemAluInBasicBlock(MachineBasicBlock *BB) { |
| 358 | bool Modified = false; |
| 359 | |
| 360 | MbbIterator MBBIter = BB->begin(), End = BB->end(); |
| 361 | while (MBBIter != End) { |
| 362 | bool IsMemOp = isNonVolatileMemoryOp(MI: *MBBIter); |
| 363 | |
| 364 | if (IsMemOp) { |
| 365 | MachineOperand AluOperand = MBBIter->getOperand(i: 3); |
| 366 | unsigned int DestReg = MBBIter->getOperand(i: 0).getReg(), |
| 367 | BaseReg = MBBIter->getOperand(i: 1).getReg(); |
| 368 | assert(AluOperand.isImm() && "Unexpected memory operator type" ); |
| 369 | LPAC::AluCode AluOpcode = static_cast<LPAC::AluCode>(AluOperand.getImm()); |
| 370 | |
| 371 | // Skip memory operations that already modify the base register or if |
| 372 | // the destination and base register are the same |
| 373 | if (!LPAC::modifiesOp(AluOp: AluOpcode) && DestReg != BaseReg) { |
| 374 | for (int Inc = 0; Inc <= 1; ++Inc) { |
| 375 | MbbIterator AluIter = |
| 376 | findClosestSuitableAluInstr(BB, MemInstr: MBBIter, Decrement: Inc == 0); |
| 377 | if (AluIter != MBBIter) { |
| 378 | insertMergedInstruction(BB, MemInstr: MBBIter, AluInstr: AluIter, Before: Inc == 0); |
| 379 | |
| 380 | ++NumLdStAluCombined; |
| 381 | Modified = true; |
| 382 | |
| 383 | // Erase the matching ALU instruction |
| 384 | BB->erase(I: AluIter); |
| 385 | // Erase old load/store instruction |
| 386 | BB->erase(I: MBBIter++); |
| 387 | break; |
| 388 | } |
| 389 | } |
| 390 | } |
| 391 | } |
| 392 | if (MBBIter == End) |
| 393 | break; |
| 394 | ++MBBIter; |
| 395 | } |
| 396 | |
| 397 | return Modified; |
| 398 | } |
| 399 | |
| 400 | // Driver function that iterates over the machine basic building blocks of a |
| 401 | // machine function |
| 402 | bool LanaiMemAluCombiner::runOnMachineFunction(MachineFunction &MF) { |
| 403 | if (DisableMemAluCombiner) |
| 404 | return false; |
| 405 | |
| 406 | TII = MF.getSubtarget<LanaiSubtarget>().getInstrInfo(); |
| 407 | bool Modified = false; |
| 408 | for (MachineBasicBlock &MBB : MF) |
| 409 | Modified |= combineMemAluInBasicBlock(BB: &MBB); |
| 410 | return Modified; |
| 411 | } |
| 412 | } // namespace |
| 413 | |
| 414 | FunctionPass *llvm::createLanaiMemAluCombinerPass() { |
| 415 | return new LanaiMemAluCombiner(); |
| 416 | } |
| 417 | |