| 1 | //===- PPCMacroFusion.cpp - PowerPC Macro Fusion --------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file This file contains the PowerPC implementation of the DAG scheduling |
| 10 | /// mutation to pair instructions back to back. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "PPC.h" |
| 15 | #include "PPCSubtarget.h" |
| 16 | #include "llvm/ADT/DenseSet.h" |
| 17 | #include "llvm/CodeGen/MacroFusion.h" |
| 18 | #include "llvm/CodeGen/ScheduleDAGMutation.h" |
| 19 | #include <optional> |
| 20 | |
| 21 | using namespace llvm; |
| 22 | namespace { |
| 23 | |
| 24 | class FusionFeature { |
| 25 | public: |
| 26 | typedef SmallDenseSet<unsigned> FusionOpSet; |
| 27 | |
| 28 | enum FusionKind { |
| 29 | #define FUSION_KIND(KIND) FK_##KIND |
| 30 | #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) \ |
| 31 | FUSION_KIND(KIND), |
| 32 | #include "PPCMacroFusion.def" |
| 33 | FUSION_KIND(END) |
| 34 | }; |
| 35 | private: |
| 36 | // Each fusion feature is assigned with one fusion kind. All the |
| 37 | // instructions with the same fusion kind have the same fusion characteristic. |
| 38 | FusionKind Kd; |
| 39 | // True if this feature is enabled. |
| 40 | bool Supported; |
| 41 | // li rx, si |
| 42 | // load rt, ra, rx |
| 43 | // The dependent operand index in the second op(load). And the negative means |
| 44 | // it could be any one. |
| 45 | int DepOpIdx; |
| 46 | // The first fusion op set. |
| 47 | FusionOpSet OpSet1; |
| 48 | // The second fusion op set. |
| 49 | FusionOpSet OpSet2; |
| 50 | public: |
| 51 | FusionFeature(FusionKind Kind, bool HasFeature, int Index, |
| 52 | const FusionOpSet &First, const FusionOpSet &Second) : |
| 53 | Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First), |
| 54 | OpSet2(Second) {} |
| 55 | |
| 56 | bool hasOp1(unsigned Opc) const { return OpSet1.contains(V: Opc); } |
| 57 | bool hasOp2(unsigned Opc) const { return OpSet2.contains(V: Opc); } |
| 58 | bool isSupported() const { return Supported; } |
| 59 | std::optional<unsigned> depOpIdx() const { |
| 60 | if (DepOpIdx < 0) |
| 61 | return std::nullopt; |
| 62 | return DepOpIdx; |
| 63 | } |
| 64 | |
| 65 | FusionKind getKind() const { return Kd; } |
| 66 | }; |
| 67 | |
| 68 | static bool matchingRegOps(const MachineInstr &FirstMI, |
| 69 | int FirstMIOpIndex, |
| 70 | const MachineInstr &SecondMI, |
| 71 | int SecondMIOpIndex) { |
| 72 | const MachineOperand &Op1 = FirstMI.getOperand(i: FirstMIOpIndex); |
| 73 | const MachineOperand &Op2 = SecondMI.getOperand(i: SecondMIOpIndex); |
| 74 | if (!Op1.isReg() || !Op2.isReg()) |
| 75 | return false; |
| 76 | |
| 77 | return Op1.getReg() == Op2.getReg(); |
| 78 | } |
| 79 | |
| 80 | static bool matchingImmOps(const MachineInstr &MI, |
| 81 | int MIOpIndex, |
| 82 | int64_t Expect, |
| 83 | unsigned ExtendFrom = 64) { |
| 84 | const MachineOperand &Op = MI.getOperand(i: MIOpIndex); |
| 85 | if (!Op.isImm()) |
| 86 | return false; |
| 87 | int64_t Imm = Op.getImm(); |
| 88 | if (ExtendFrom < 64) |
| 89 | Imm = SignExtend64(X: Imm, B: ExtendFrom); |
| 90 | return Imm == Expect; |
| 91 | } |
| 92 | |
| 93 | // Return true if the FirstMI meets the constraints of SecondMI according to |
| 94 | // fusion specification. |
| 95 | static bool checkOpConstraints(FusionFeature::FusionKind Kd, |
| 96 | const MachineInstr &FirstMI, |
| 97 | const MachineInstr &SecondMI) { |
| 98 | switch (Kd) { |
| 99 | // The hardware didn't require any specific check for the fused instructions' |
| 100 | // operands. Therefore, return true to indicate that, it is fusable. |
| 101 | default: return true; |
| 102 | // [addi rt,ra,si - lxvd2x xt,ra,rb] etc. |
| 103 | case FusionFeature::FK_AddiLoad: { |
| 104 | // lxvd2x(ra) cannot be zero |
| 105 | const MachineOperand &RA = SecondMI.getOperand(i: 1); |
| 106 | if (!RA.isReg()) |
| 107 | return true; |
| 108 | |
| 109 | return RA.getReg().isVirtual() || |
| 110 | (RA.getReg() != PPC::ZERO && RA.getReg() != PPC::ZERO8); |
| 111 | } |
| 112 | // [addis rt,ra,si - ld rt,ds(ra)] etc. |
| 113 | case FusionFeature::FK_AddisLoad: { |
| 114 | const MachineOperand &RT = SecondMI.getOperand(i: 0); |
| 115 | if (!RT.isReg()) |
| 116 | return true; |
| 117 | |
| 118 | // Only check it for non-virtual register. |
| 119 | if (!RT.getReg().isVirtual()) |
| 120 | // addis(rt) = ld(ra) = ld(rt) |
| 121 | // ld(rt) cannot be zero |
| 122 | if (!matchingRegOps(FirstMI: SecondMI, FirstMIOpIndex: 0, SecondMI, SecondMIOpIndex: 2) || |
| 123 | (RT.getReg() == PPC::ZERO || RT.getReg() == PPC::ZERO8)) |
| 124 | return false; |
| 125 | |
| 126 | // addis(si) first 12 bits must be all 1s or all 0s |
| 127 | const MachineOperand &SI = FirstMI.getOperand(i: 2); |
| 128 | if (!SI.isImm()) |
| 129 | return true; |
| 130 | int64_t Imm = SI.getImm(); |
| 131 | if (((Imm & 0xFFF0) != 0) && ((Imm & 0xFFF0) != 0xFFF0)) |
| 132 | return false; |
| 133 | |
| 134 | // If si = 1111111111110000 and the msb of the d/ds field of the load equals |
| 135 | // 1, then fusion does not occur. |
| 136 | if ((Imm & 0xFFF0) == 0xFFF0) { |
| 137 | const MachineOperand &D = SecondMI.getOperand(i: 1); |
| 138 | if (!D.isImm()) |
| 139 | return true; |
| 140 | |
| 141 | // 14 bit for DS field, while 16 bit for D field. |
| 142 | int MSB = 15; |
| 143 | if (SecondMI.getOpcode() == PPC::LD) |
| 144 | MSB = 13; |
| 145 | |
| 146 | return (D.getImm() & (1ULL << MSB)) == 0; |
| 147 | } |
| 148 | return true; |
| 149 | } |
| 150 | |
| 151 | case FusionFeature::FK_SldiAdd: |
| 152 | return (matchingImmOps(MI: FirstMI, MIOpIndex: 2, Expect: 3) && matchingImmOps(MI: FirstMI, MIOpIndex: 3, Expect: 60)) || |
| 153 | (matchingImmOps(MI: FirstMI, MIOpIndex: 2, Expect: 6) && matchingImmOps(MI: FirstMI, MIOpIndex: 3, Expect: 57)); |
| 154 | |
| 155 | // rldicl rx, ra, 1, 0 - xor |
| 156 | case FusionFeature::FK_RotateLeftXor: |
| 157 | return matchingImmOps(MI: FirstMI, MIOpIndex: 2, Expect: 1) && matchingImmOps(MI: FirstMI, MIOpIndex: 3, Expect: 0); |
| 158 | |
| 159 | // rldicr rx, ra, 1, 63 - xor |
| 160 | case FusionFeature::FK_RotateRightXor: |
| 161 | return matchingImmOps(MI: FirstMI, MIOpIndex: 2, Expect: 1) && matchingImmOps(MI: FirstMI, MIOpIndex: 3, Expect: 63); |
| 162 | |
| 163 | // We actually use CMPW* and CMPD*, 'l' doesn't exist as an operand in instr. |
| 164 | |
| 165 | // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 } |
| 166 | // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 } |
| 167 | case FusionFeature::FK_LoadCmp1: |
| 168 | // { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 } |
| 169 | // { ld,ldx } - cmpli 0,1,rx,{ 0,1 } |
| 170 | case FusionFeature::FK_LoadCmp2: { |
| 171 | const MachineOperand &BT = SecondMI.getOperand(i: 0); |
| 172 | if (!BT.isReg() || (!BT.getReg().isVirtual() && BT.getReg() != PPC::CR0)) |
| 173 | return false; |
| 174 | if (SecondMI.getOpcode() == PPC::CMPDI && |
| 175 | matchingImmOps(MI: SecondMI, MIOpIndex: 2, Expect: -1, ExtendFrom: 16)) |
| 176 | return true; |
| 177 | return matchingImmOps(MI: SecondMI, MIOpIndex: 2, Expect: 0) || matchingImmOps(MI: SecondMI, MIOpIndex: 2, Expect: 1); |
| 178 | } |
| 179 | |
| 180 | // { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 } |
| 181 | case FusionFeature::FK_LoadCmp3: { |
| 182 | const MachineOperand &BT = SecondMI.getOperand(i: 0); |
| 183 | if (!BT.isReg() || (!BT.getReg().isVirtual() && BT.getReg() != PPC::CR0)) |
| 184 | return false; |
| 185 | return matchingImmOps(MI: SecondMI, MIOpIndex: 2, Expect: 0) || matchingImmOps(MI: SecondMI, MIOpIndex: 2, Expect: 1) || |
| 186 | matchingImmOps(MI: SecondMI, MIOpIndex: 2, Expect: -1, ExtendFrom: 16); |
| 187 | } |
| 188 | |
| 189 | // mtctr - { bcctr,bcctrl } |
| 190 | case FusionFeature::FK_ZeroMoveCTR: |
| 191 | // ( mtctr rx ) is alias of ( mtspr 9, rx ) |
| 192 | return (FirstMI.getOpcode() != PPC::MTSPR && |
| 193 | FirstMI.getOpcode() != PPC::MTSPR8) || |
| 194 | matchingImmOps(MI: FirstMI, MIOpIndex: 0, Expect: 9); |
| 195 | |
| 196 | // mtlr - { bclr,bclrl } |
| 197 | case FusionFeature::FK_ZeroMoveLR: |
| 198 | // ( mtlr rx ) is alias of ( mtspr 8, rx ) |
| 199 | return (FirstMI.getOpcode() != PPC::MTSPR && |
| 200 | FirstMI.getOpcode() != PPC::MTSPR8) || |
| 201 | matchingImmOps(MI: FirstMI, MIOpIndex: 0, Expect: 8); |
| 202 | |
| 203 | // addis rx,ra,si - addi rt,rx,SI, SI >= 0 |
| 204 | case FusionFeature::FK_AddisAddi: { |
| 205 | const MachineOperand &RA = FirstMI.getOperand(i: 1); |
| 206 | const MachineOperand &SI = SecondMI.getOperand(i: 2); |
| 207 | if (!SI.isImm() || !RA.isReg()) |
| 208 | return false; |
| 209 | if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8) |
| 210 | return false; |
| 211 | return SignExtend64(X: SI.getImm(), B: 16) >= 0; |
| 212 | } |
| 213 | |
| 214 | // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2 |
| 215 | case FusionFeature::FK_AddiAddis: { |
| 216 | const MachineOperand &RA = FirstMI.getOperand(i: 1); |
| 217 | const MachineOperand &SI = FirstMI.getOperand(i: 2); |
| 218 | if (!SI.isImm() || !RA.isReg()) |
| 219 | return false; |
| 220 | if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8) |
| 221 | return false; |
| 222 | int64_t ExtendedSI = SignExtend64(X: SI.getImm(), B: 16); |
| 223 | return ExtendedSI >= 2; |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | llvm_unreachable("All the cases should have been handled" ); |
| 228 | return true; |
| 229 | } |
| 230 | |
| 231 | /// Check if the instr pair, FirstMI and SecondMI, should be fused together. |
| 232 | /// Given SecondMI, when FirstMI is unspecified, then check if SecondMI may be |
| 233 | /// part of a fused pair at all. |
| 234 | static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, |
| 235 | const TargetSubtargetInfo &TSI, |
| 236 | const MachineInstr *FirstMI, |
| 237 | const MachineInstr &SecondMI) { |
| 238 | // We use the PPC namespace to avoid the need to prefix opcodes with PPC:: in |
| 239 | // the def file. |
| 240 | using namespace PPC; |
| 241 | |
| 242 | const PPCSubtarget &ST = static_cast<const PPCSubtarget&>(TSI); |
| 243 | static const FusionFeature FusionFeatures[] = { |
| 244 | #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) { \ |
| 245 | FusionFeature::FUSION_KIND(KIND), ST.HAS_FEATURE(), DEP_OP_IDX, { OPSET1 },\ |
| 246 | { OPSET2 } }, |
| 247 | #include "PPCMacroFusion.def" |
| 248 | }; |
| 249 | #undef FUSION_KIND |
| 250 | |
| 251 | for (auto &Feature : FusionFeatures) { |
| 252 | // Skip if the feature is not supported. |
| 253 | if (!Feature.isSupported()) |
| 254 | continue; |
| 255 | |
| 256 | // Only when the SecondMI is fusable, we are starting to look for the |
| 257 | // fusable FirstMI. |
| 258 | if (Feature.hasOp2(Opc: SecondMI.getOpcode())) { |
| 259 | // If FirstMI == nullptr, that means, we're only checking whether SecondMI |
| 260 | // can be fused at all. |
| 261 | if (!FirstMI) |
| 262 | return true; |
| 263 | |
| 264 | // Checking if the FirstMI is fusable with the SecondMI. |
| 265 | if (!Feature.hasOp1(Opc: FirstMI->getOpcode())) |
| 266 | continue; |
| 267 | |
| 268 | auto DepOpIdx = Feature.depOpIdx(); |
| 269 | if (DepOpIdx) { |
| 270 | // Checking if the result of the FirstMI is the desired operand of the |
| 271 | // SecondMI if the DepOpIdx is set. Otherwise, ignore it. |
| 272 | if (!matchingRegOps(FirstMI: *FirstMI, FirstMIOpIndex: 0, SecondMI, SecondMIOpIndex: *DepOpIdx)) |
| 273 | return false; |
| 274 | } |
| 275 | |
| 276 | // Checking more on the instruction operands. |
| 277 | if (checkOpConstraints(Kd: Feature.getKind(), FirstMI: *FirstMI, SecondMI)) |
| 278 | return true; |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | return false; |
| 283 | } |
| 284 | |
| 285 | } // end anonymous namespace |
| 286 | |
| 287 | namespace llvm { |
| 288 | |
| 289 | std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation() { |
| 290 | return createMacroFusionDAGMutation(Predicates: shouldScheduleAdjacent); |
| 291 | } |
| 292 | |
| 293 | } // end namespace llvm |
| 294 | |