| 1 | //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains the Base ARM implementation of the TargetInstrInfo class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "ARMBaseInstrInfo.h" |
| 14 | #include "ARMBaseRegisterInfo.h" |
| 15 | #include "ARMConstantPoolValue.h" |
| 16 | #include "ARMFeatures.h" |
| 17 | #include "ARMHazardRecognizer.h" |
| 18 | #include "ARMMachineFunctionInfo.h" |
| 19 | #include "ARMSubtarget.h" |
| 20 | #include "MCTargetDesc/ARMAddressingModes.h" |
| 21 | #include "MCTargetDesc/ARMBaseInfo.h" |
| 22 | #include "MVETailPredUtils.h" |
| 23 | #include "llvm/ADT/DenseMap.h" |
| 24 | #include "llvm/ADT/STLExtras.h" |
| 25 | #include "llvm/ADT/SmallSet.h" |
| 26 | #include "llvm/ADT/SmallVector.h" |
| 27 | #include "llvm/CodeGen/CFIInstBuilder.h" |
| 28 | #include "llvm/CodeGen/DFAPacketizer.h" |
| 29 | #include "llvm/CodeGen/LiveVariables.h" |
| 30 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 31 | #include "llvm/CodeGen/MachineConstantPool.h" |
| 32 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 33 | #include "llvm/CodeGen/MachineFunction.h" |
| 34 | #include "llvm/CodeGen/MachineInstr.h" |
| 35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 36 | #include "llvm/CodeGen/MachineMemOperand.h" |
| 37 | #include "llvm/CodeGen/MachineModuleInfo.h" |
| 38 | #include "llvm/CodeGen/MachineOperand.h" |
| 39 | #include "llvm/CodeGen/MachinePipeliner.h" |
| 40 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 41 | #include "llvm/CodeGen/MachineScheduler.h" |
| 42 | #include "llvm/CodeGen/MultiHazardRecognizer.h" |
| 43 | #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" |
| 44 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
| 45 | #include "llvm/CodeGen/TargetInstrInfo.h" |
| 46 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 47 | #include "llvm/CodeGen/TargetSchedule.h" |
| 48 | #include "llvm/IR/Attributes.h" |
| 49 | #include "llvm/IR/DebugLoc.h" |
| 50 | #include "llvm/IR/Function.h" |
| 51 | #include "llvm/IR/GlobalValue.h" |
| 52 | #include "llvm/IR/Module.h" |
| 53 | #include "llvm/MC/MCAsmInfo.h" |
| 54 | #include "llvm/MC/MCInstrDesc.h" |
| 55 | #include "llvm/MC/MCInstrItineraries.h" |
| 56 | #include "llvm/Support/BranchProbability.h" |
| 57 | #include "llvm/Support/Casting.h" |
| 58 | #include "llvm/Support/Compiler.h" |
| 59 | #include "llvm/Support/Debug.h" |
| 60 | #include "llvm/Support/ErrorHandling.h" |
| 61 | #include "llvm/Support/raw_ostream.h" |
| 62 | #include "llvm/Target/TargetMachine.h" |
| 63 | #include <algorithm> |
| 64 | #include <cassert> |
| 65 | #include <cstdint> |
| 66 | #include <iterator> |
| 67 | #include <new> |
| 68 | #include <utility> |
| 69 | #include <vector> |
| 70 | |
| 71 | using namespace llvm; |
| 72 | |
| 73 | #define DEBUG_TYPE "arm-instrinfo" |
| 74 | |
| 75 | #define GET_INSTRINFO_CTOR_DTOR |
| 76 | #include "ARMGenInstrInfo.inc" |
| 77 | |
| 78 | /// ARM_MLxEntry - Record information about MLA / MLS instructions. |
| 79 | struct ARM_MLxEntry { |
| 80 | uint16_t MLxOpc; // MLA / MLS opcode |
| 81 | uint16_t MulOpc; // Expanded multiplication opcode |
| 82 | uint16_t AddSubOpc; // Expanded add / sub opcode |
| 83 | bool NegAcc; // True if the acc is negated before the add / sub. |
| 84 | bool HasLane; // True if instruction has an extra "lane" operand. |
| 85 | }; |
| 86 | |
| 87 | static const ARM_MLxEntry ARM_MLxTable[] = { |
| 88 | // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane |
| 89 | // fp scalar ops |
| 90 | { .MLxOpc: ARM::VMLAS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VADDS, .NegAcc: false, .HasLane: false }, |
| 91 | { .MLxOpc: ARM::VMLSS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: false, .HasLane: false }, |
| 92 | { .MLxOpc: ARM::VMLAD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VADDD, .NegAcc: false, .HasLane: false }, |
| 93 | { .MLxOpc: ARM::VMLSD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: false, .HasLane: false }, |
| 94 | { .MLxOpc: ARM::VNMLAS, .MulOpc: ARM::VNMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: true, .HasLane: false }, |
| 95 | { .MLxOpc: ARM::VNMLSS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: true, .HasLane: false }, |
| 96 | { .MLxOpc: ARM::VNMLAD, .MulOpc: ARM::VNMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: true, .HasLane: false }, |
| 97 | { .MLxOpc: ARM::VNMLSD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: true, .HasLane: false }, |
| 98 | |
| 99 | // fp SIMD ops |
| 100 | { .MLxOpc: ARM::VMLAfd, .MulOpc: ARM::VMULfd, .AddSubOpc: ARM::VADDfd, .NegAcc: false, .HasLane: false }, |
| 101 | { .MLxOpc: ARM::VMLSfd, .MulOpc: ARM::VMULfd, .AddSubOpc: ARM::VSUBfd, .NegAcc: false, .HasLane: false }, |
| 102 | { .MLxOpc: ARM::VMLAfq, .MulOpc: ARM::VMULfq, .AddSubOpc: ARM::VADDfq, .NegAcc: false, .HasLane: false }, |
| 103 | { .MLxOpc: ARM::VMLSfq, .MulOpc: ARM::VMULfq, .AddSubOpc: ARM::VSUBfq, .NegAcc: false, .HasLane: false }, |
| 104 | { .MLxOpc: ARM::VMLAslfd, .MulOpc: ARM::VMULslfd, .AddSubOpc: ARM::VADDfd, .NegAcc: false, .HasLane: true }, |
| 105 | { .MLxOpc: ARM::VMLSslfd, .MulOpc: ARM::VMULslfd, .AddSubOpc: ARM::VSUBfd, .NegAcc: false, .HasLane: true }, |
| 106 | { .MLxOpc: ARM::VMLAslfq, .MulOpc: ARM::VMULslfq, .AddSubOpc: ARM::VADDfq, .NegAcc: false, .HasLane: true }, |
| 107 | { .MLxOpc: ARM::VMLSslfq, .MulOpc: ARM::VMULslfq, .AddSubOpc: ARM::VSUBfq, .NegAcc: false, .HasLane: true }, |
| 108 | }; |
| 109 | |
| 110 | ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) |
| 111 | : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), |
| 112 | Subtarget(STI) { |
| 113 | for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) { |
| 114 | if (!MLxEntryMap.insert(KV: std::make_pair(x: ARM_MLxTable[i].MLxOpc, y&: i)).second) |
| 115 | llvm_unreachable("Duplicated entries?" ); |
| 116 | MLxHazardOpcodes.insert(V: ARM_MLxTable[i].AddSubOpc); |
| 117 | MLxHazardOpcodes.insert(V: ARM_MLxTable[i].MulOpc); |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl |
| 122 | // currently defaults to no prepass hazard recognizer. |
| 123 | ScheduleHazardRecognizer * |
| 124 | ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, |
| 125 | const ScheduleDAG *DAG) const { |
| 126 | if (usePreRAHazardRecognizer()) { |
| 127 | const InstrItineraryData *II = |
| 128 | static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); |
| 129 | return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched" ); |
| 130 | } |
| 131 | return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); |
| 132 | } |
| 133 | |
| 134 | // Called during: |
| 135 | // - pre-RA scheduling |
| 136 | // - post-RA scheduling when FeatureUseMISched is set |
| 137 | ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer( |
| 138 | const InstrItineraryData *II, const ScheduleDAGMI *DAG) const { |
| 139 | MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); |
| 140 | |
| 141 | // We would like to restrict this hazard recognizer to only |
| 142 | // post-RA scheduling; we can tell that we're post-RA because we don't |
| 143 | // track VRegLiveness. |
| 144 | // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM |
| 145 | // banks banked on bit 2. Assume that TCMs are in use. |
| 146 | if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness()) |
| 147 | MHR->AddHazardRecognizer( |
| 148 | std::make_unique<ARMBankConflictHazardRecognizer>(args&: DAG, args: 0x4, args: true)); |
| 149 | |
| 150 | // Not inserting ARMHazardRecognizerFPMLx because that would change |
| 151 | // legacy behavior |
| 152 | |
| 153 | auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG); |
| 154 | MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR)); |
| 155 | return MHR; |
| 156 | } |
| 157 | |
| 158 | // Called during post-RA scheduling when FeatureUseMISched is not set |
| 159 | ScheduleHazardRecognizer *ARMBaseInstrInfo:: |
| 160 | CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
| 161 | const ScheduleDAG *DAG) const { |
| 162 | MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); |
| 163 | |
| 164 | if (Subtarget.isThumb2() || Subtarget.hasVFP2Base()) |
| 165 | MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>()); |
| 166 | |
| 167 | auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); |
| 168 | if (BHR) |
| 169 | MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR)); |
| 170 | return MHR; |
| 171 | } |
| 172 | |
| 173 | // Branch analysis. |
| 174 | // Cond vector output format: |
| 175 | // 0 elements indicates an unconditional branch |
| 176 | // 2 elements indicates a conditional branch; the elements are |
| 177 | // the condition to check and the CPSR. |
| 178 | // 3 elements indicates a hardware loop end; the elements |
| 179 | // are the opcode, the operand value to test, and a dummy |
| 180 | // operand used to pad out to 3 operands. |
| 181 | bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, |
| 182 | MachineBasicBlock *&TBB, |
| 183 | MachineBasicBlock *&FBB, |
| 184 | SmallVectorImpl<MachineOperand> &Cond, |
| 185 | bool AllowModify) const { |
| 186 | TBB = nullptr; |
| 187 | FBB = nullptr; |
| 188 | |
| 189 | MachineBasicBlock::instr_iterator I = MBB.instr_end(); |
| 190 | if (I == MBB.instr_begin()) |
| 191 | return false; // Empty blocks are easy. |
| 192 | --I; |
| 193 | |
| 194 | // Walk backwards from the end of the basic block until the branch is |
| 195 | // analyzed or we give up. |
| 196 | while (isPredicated(MI: *I) || I->isTerminator() || I->isDebugValue()) { |
| 197 | // Flag to be raised on unanalyzeable instructions. This is useful in cases |
| 198 | // where we want to clean up on the end of the basic block before we bail |
| 199 | // out. |
| 200 | bool CantAnalyze = false; |
| 201 | |
| 202 | // Skip over DEBUG values, predicated nonterminators and speculation |
| 203 | // barrier terminators. |
| 204 | while (I->isDebugInstr() || !I->isTerminator() || |
| 205 | isSpeculationBarrierEndBBOpcode(Opc: I->getOpcode()) || |
| 206 | I->getOpcode() == ARM::t2DoLoopStartTP){ |
| 207 | if (I == MBB.instr_begin()) |
| 208 | return false; |
| 209 | --I; |
| 210 | } |
| 211 | |
| 212 | if (isIndirectBranchOpcode(Opc: I->getOpcode()) || |
| 213 | isJumpTableBranchOpcode(Opc: I->getOpcode())) { |
| 214 | // Indirect branches and jump tables can't be analyzed, but we still want |
| 215 | // to clean up any instructions at the tail of the basic block. |
| 216 | CantAnalyze = true; |
| 217 | } else if (isUncondBranchOpcode(Opc: I->getOpcode())) { |
| 218 | TBB = I->getOperand(i: 0).getMBB(); |
| 219 | } else if (isCondBranchOpcode(Opc: I->getOpcode())) { |
| 220 | // Bail out if we encounter multiple conditional branches. |
| 221 | if (!Cond.empty()) |
| 222 | return true; |
| 223 | |
| 224 | assert(!FBB && "FBB should have been null." ); |
| 225 | FBB = TBB; |
| 226 | TBB = I->getOperand(i: 0).getMBB(); |
| 227 | Cond.push_back(Elt: I->getOperand(i: 1)); |
| 228 | Cond.push_back(Elt: I->getOperand(i: 2)); |
| 229 | } else if (I->isReturn()) { |
| 230 | // Returns can't be analyzed, but we should run cleanup. |
| 231 | CantAnalyze = true; |
| 232 | } else if (I->getOpcode() == ARM::t2LoopEnd && |
| 233 | MBB.getParent() |
| 234 | ->getSubtarget<ARMSubtarget>() |
| 235 | .enableMachinePipeliner()) { |
| 236 | if (!Cond.empty()) |
| 237 | return true; |
| 238 | FBB = TBB; |
| 239 | TBB = I->getOperand(i: 1).getMBB(); |
| 240 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: I->getOpcode())); |
| 241 | Cond.push_back(Elt: I->getOperand(i: 0)); |
| 242 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: 0)); |
| 243 | } else { |
| 244 | // We encountered other unrecognized terminator. Bail out immediately. |
| 245 | return true; |
| 246 | } |
| 247 | |
| 248 | // Cleanup code - to be run for unpredicated unconditional branches and |
| 249 | // returns. |
| 250 | if (!isPredicated(MI: *I) && |
| 251 | (isUncondBranchOpcode(Opc: I->getOpcode()) || |
| 252 | isIndirectBranchOpcode(Opc: I->getOpcode()) || |
| 253 | isJumpTableBranchOpcode(Opc: I->getOpcode()) || |
| 254 | I->isReturn())) { |
| 255 | // Forget any previous condition branch information - it no longer applies. |
| 256 | Cond.clear(); |
| 257 | FBB = nullptr; |
| 258 | |
| 259 | // If we can modify the function, delete everything below this |
| 260 | // unconditional branch. |
| 261 | if (AllowModify) { |
| 262 | MachineBasicBlock::iterator DI = std::next(x: I); |
| 263 | while (DI != MBB.instr_end()) { |
| 264 | MachineInstr &InstToDelete = *DI; |
| 265 | ++DI; |
| 266 | // Speculation barriers must not be deleted. |
| 267 | if (isSpeculationBarrierEndBBOpcode(Opc: InstToDelete.getOpcode())) |
| 268 | continue; |
| 269 | InstToDelete.eraseFromParent(); |
| 270 | } |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | if (CantAnalyze) { |
| 275 | // We may not be able to analyze the block, but we could still have |
| 276 | // an unconditional branch as the last instruction in the block, which |
| 277 | // just branches to layout successor. If this is the case, then just |
| 278 | // remove it if we're allowed to make modifications. |
| 279 | if (AllowModify && !isPredicated(MI: MBB.back()) && |
| 280 | isUncondBranchOpcode(Opc: MBB.back().getOpcode()) && |
| 281 | TBB && MBB.isLayoutSuccessor(MBB: TBB)) |
| 282 | removeBranch(MBB); |
| 283 | return true; |
| 284 | } |
| 285 | |
| 286 | if (I == MBB.instr_begin()) |
| 287 | return false; |
| 288 | |
| 289 | --I; |
| 290 | } |
| 291 | |
| 292 | // We made it past the terminators without bailing out - we must have |
| 293 | // analyzed this branch successfully. |
| 294 | return false; |
| 295 | } |
| 296 | |
| 297 | unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, |
| 298 | int *BytesRemoved) const { |
| 299 | assert(!BytesRemoved && "code size not handled" ); |
| 300 | |
| 301 | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); |
| 302 | if (I == MBB.end()) |
| 303 | return 0; |
| 304 | |
| 305 | if (!isUncondBranchOpcode(Opc: I->getOpcode()) && |
| 306 | !isCondBranchOpcode(Opc: I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) |
| 307 | return 0; |
| 308 | |
| 309 | // Remove the branch. |
| 310 | I->eraseFromParent(); |
| 311 | |
| 312 | I = MBB.end(); |
| 313 | |
| 314 | if (I == MBB.begin()) return 1; |
| 315 | --I; |
| 316 | if (!isCondBranchOpcode(Opc: I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) |
| 317 | return 1; |
| 318 | |
| 319 | // Remove the branch. |
| 320 | I->eraseFromParent(); |
| 321 | return 2; |
| 322 | } |
| 323 | |
| 324 | unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, |
| 325 | MachineBasicBlock *TBB, |
| 326 | MachineBasicBlock *FBB, |
| 327 | ArrayRef<MachineOperand> Cond, |
| 328 | const DebugLoc &DL, |
| 329 | int *BytesAdded) const { |
| 330 | assert(!BytesAdded && "code size not handled" ); |
| 331 | ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); |
| 332 | int BOpc = !AFI->isThumbFunction() |
| 333 | ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); |
| 334 | int BccOpc = !AFI->isThumbFunction() |
| 335 | ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); |
| 336 | bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); |
| 337 | |
| 338 | // Shouldn't be a fall through. |
| 339 | assert(TBB && "insertBranch must not be told to insert a fallthrough" ); |
| 340 | assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) && |
| 341 | "ARM branch conditions have two or three components!" ); |
| 342 | |
| 343 | // For conditional branches, we use addOperand to preserve CPSR flags. |
| 344 | |
| 345 | if (!FBB) { |
| 346 | if (Cond.empty()) { // Unconditional branch? |
| 347 | if (isThumb) |
| 348 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: TBB).add(MOs: predOps(Pred: ARMCC::AL)); |
| 349 | else |
| 350 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: TBB); |
| 351 | } else if (Cond.size() == 2) { |
| 352 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BccOpc)) |
| 353 | .addMBB(MBB: TBB) |
| 354 | .addImm(Val: Cond[0].getImm()) |
| 355 | .add(MO: Cond[1]); |
| 356 | } else |
| 357 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[0].getImm())).add(MO: Cond[1]).addMBB(MBB: TBB); |
| 358 | return 1; |
| 359 | } |
| 360 | |
| 361 | // Two-way conditional branch. |
| 362 | if (Cond.size() == 2) |
| 363 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BccOpc)) |
| 364 | .addMBB(MBB: TBB) |
| 365 | .addImm(Val: Cond[0].getImm()) |
| 366 | .add(MO: Cond[1]); |
| 367 | else if (Cond.size() == 3) |
| 368 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[0].getImm())).add(MO: Cond[1]).addMBB(MBB: TBB); |
| 369 | if (isThumb) |
| 370 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: FBB).add(MOs: predOps(Pred: ARMCC::AL)); |
| 371 | else |
| 372 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: FBB); |
| 373 | return 2; |
| 374 | } |
| 375 | |
| 376 | bool ARMBaseInstrInfo:: |
| 377 | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { |
| 378 | if (Cond.size() == 2) { |
| 379 | ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); |
| 380 | Cond[0].setImm(ARMCC::getOppositeCondition(CC)); |
| 381 | return false; |
| 382 | } |
| 383 | return true; |
| 384 | } |
| 385 | |
| 386 | bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { |
| 387 | if (MI.isBundle()) { |
| 388 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
| 389 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
| 390 | while (++I != E && I->isInsideBundle()) { |
| 391 | int PIdx = I->findFirstPredOperandIdx(); |
| 392 | if (PIdx != -1 && I->getOperand(i: PIdx).getImm() != ARMCC::AL) |
| 393 | return true; |
| 394 | } |
| 395 | return false; |
| 396 | } |
| 397 | |
| 398 | int PIdx = MI.findFirstPredOperandIdx(); |
| 399 | return PIdx != -1 && MI.getOperand(i: PIdx).getImm() != ARMCC::AL; |
| 400 | } |
| 401 | |
| 402 | std::string ARMBaseInstrInfo::createMIROperandComment( |
| 403 | const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, |
| 404 | const TargetRegisterInfo *TRI) const { |
| 405 | |
| 406 | // First, let's see if there is a generic comment for this operand |
| 407 | std::string = |
| 408 | TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); |
| 409 | if (!GenericComment.empty()) |
| 410 | return GenericComment; |
| 411 | |
| 412 | // If not, check if we have an immediate operand. |
| 413 | if (!Op.isImm()) |
| 414 | return std::string(); |
| 415 | |
| 416 | // And print its corresponding condition code if the immediate is a |
| 417 | // predicate. |
| 418 | int FirstPredOp = MI.findFirstPredOperandIdx(); |
| 419 | if (FirstPredOp != (int) OpIdx) |
| 420 | return std::string(); |
| 421 | |
| 422 | std::string CC = "CC::" ; |
| 423 | CC += ARMCondCodeToString(CC: (ARMCC::CondCodes)Op.getImm()); |
| 424 | return CC; |
| 425 | } |
| 426 | |
| 427 | bool ARMBaseInstrInfo::PredicateInstruction( |
| 428 | MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { |
| 429 | unsigned Opc = MI.getOpcode(); |
| 430 | if (isUncondBranchOpcode(Opc)) { |
| 431 | MI.setDesc(get(Opcode: getMatchingCondBranchOpcode(Opc))); |
| 432 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
| 433 | .addImm(Val: Pred[0].getImm()) |
| 434 | .addReg(RegNo: Pred[1].getReg()); |
| 435 | return true; |
| 436 | } |
| 437 | |
| 438 | int PIdx = MI.findFirstPredOperandIdx(); |
| 439 | if (PIdx != -1) { |
| 440 | MachineOperand &PMO = MI.getOperand(i: PIdx); |
| 441 | PMO.setImm(Pred[0].getImm()); |
| 442 | MI.getOperand(i: PIdx+1).setReg(Pred[1].getReg()); |
| 443 | |
| 444 | // Thumb 1 arithmetic instructions do not set CPSR when executed inside an |
| 445 | // IT block. This affects how they are printed. |
| 446 | const MCInstrDesc &MCID = MI.getDesc(); |
| 447 | if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { |
| 448 | assert(MCID.operands()[1].isOptionalDef() && |
| 449 | "CPSR def isn't expected operand" ); |
| 450 | assert((MI.getOperand(1).isDead() || |
| 451 | MI.getOperand(1).getReg() != ARM::CPSR) && |
| 452 | "if conversion tried to stop defining used CPSR" ); |
| 453 | MI.getOperand(i: 1).setReg(ARM::NoRegister); |
| 454 | } |
| 455 | |
| 456 | return true; |
| 457 | } |
| 458 | return false; |
| 459 | } |
| 460 | |
| 461 | bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, |
| 462 | ArrayRef<MachineOperand> Pred2) const { |
| 463 | if (Pred1.size() > 2 || Pred2.size() > 2) |
| 464 | return false; |
| 465 | |
| 466 | ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); |
| 467 | ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); |
| 468 | if (CC1 == CC2) |
| 469 | return true; |
| 470 | |
| 471 | switch (CC1) { |
| 472 | default: |
| 473 | return false; |
| 474 | case ARMCC::AL: |
| 475 | return true; |
| 476 | case ARMCC::HS: |
| 477 | return CC2 == ARMCC::HI; |
| 478 | case ARMCC::LS: |
| 479 | return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; |
| 480 | case ARMCC::GE: |
| 481 | return CC2 == ARMCC::GT; |
| 482 | case ARMCC::LE: |
| 483 | return CC2 == ARMCC::LT; |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI, |
| 488 | std::vector<MachineOperand> &Pred, |
| 489 | bool SkipDead) const { |
| 490 | bool Found = false; |
| 491 | for (const MachineOperand &MO : MI.operands()) { |
| 492 | bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(PhysReg: ARM::CPSR); |
| 493 | bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR; |
| 494 | if (ClobbersCPSR || IsCPSR) { |
| 495 | |
| 496 | // Filter out T1 instructions that have a dead CPSR, |
| 497 | // allowing IT blocks to be generated containing T1 instructions |
| 498 | const MCInstrDesc &MCID = MI.getDesc(); |
| 499 | if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() && |
| 500 | SkipDead) |
| 501 | continue; |
| 502 | |
| 503 | Pred.push_back(x: MO); |
| 504 | Found = true; |
| 505 | } |
| 506 | } |
| 507 | |
| 508 | return Found; |
| 509 | } |
| 510 | |
| 511 | bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { |
| 512 | for (const auto &MO : MI.operands()) |
| 513 | if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) |
| 514 | return true; |
| 515 | return false; |
| 516 | } |
| 517 | |
| 518 | static bool isEligibleForITBlock(const MachineInstr *MI) { |
| 519 | switch (MI->getOpcode()) { |
| 520 | default: return true; |
| 521 | case ARM::tADC: // ADC (register) T1 |
| 522 | case ARM::tADDi3: // ADD (immediate) T1 |
| 523 | case ARM::tADDi8: // ADD (immediate) T2 |
| 524 | case ARM::tADDrr: // ADD (register) T1 |
| 525 | case ARM::tAND: // AND (register) T1 |
| 526 | case ARM::tASRri: // ASR (immediate) T1 |
| 527 | case ARM::tASRrr: // ASR (register) T1 |
| 528 | case ARM::tBIC: // BIC (register) T1 |
| 529 | case ARM::tEOR: // EOR (register) T1 |
| 530 | case ARM::tLSLri: // LSL (immediate) T1 |
| 531 | case ARM::tLSLrr: // LSL (register) T1 |
| 532 | case ARM::tLSRri: // LSR (immediate) T1 |
| 533 | case ARM::tLSRrr: // LSR (register) T1 |
| 534 | case ARM::tMUL: // MUL T1 |
| 535 | case ARM::tMVN: // MVN (register) T1 |
| 536 | case ARM::tORR: // ORR (register) T1 |
| 537 | case ARM::tROR: // ROR (register) T1 |
| 538 | case ARM::tRSB: // RSB (immediate) T1 |
| 539 | case ARM::tSBC: // SBC (register) T1 |
| 540 | case ARM::tSUBi3: // SUB (immediate) T1 |
| 541 | case ARM::tSUBi8: // SUB (immediate) T2 |
| 542 | case ARM::tSUBrr: // SUB (register) T1 |
| 543 | return !ARMBaseInstrInfo::isCPSRDefined(MI: *MI); |
| 544 | } |
| 545 | } |
| 546 | |
| 547 | /// isPredicable - Return true if the specified instruction can be predicated. |
| 548 | /// By default, this returns true for every instruction with a |
| 549 | /// PredicateOperand. |
| 550 | bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { |
| 551 | if (!MI.isPredicable()) |
| 552 | return false; |
| 553 | |
| 554 | if (MI.isBundle()) |
| 555 | return false; |
| 556 | |
| 557 | if (!isEligibleForITBlock(MI: &MI)) |
| 558 | return false; |
| 559 | |
| 560 | const MachineFunction *MF = MI.getParent()->getParent(); |
| 561 | const ARMFunctionInfo *AFI = |
| 562 | MF->getInfo<ARMFunctionInfo>(); |
| 563 | |
| 564 | // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. |
| 565 | // In their ARM encoding, they can't be encoded in a conditional form. |
| 566 | if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) |
| 567 | return false; |
| 568 | |
| 569 | // Make indirect control flow changes unpredicable when SLS mitigation is |
| 570 | // enabled. |
| 571 | const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>(); |
| 572 | if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI)) |
| 573 | return false; |
| 574 | if (ST.hardenSlsBlr() && isIndirectCall(MI)) |
| 575 | return false; |
| 576 | |
| 577 | if (AFI->isThumb2Function()) { |
| 578 | if (getSubtarget().restrictIT()) |
| 579 | return isV8EligibleForIT(Instr: &MI); |
| 580 | } |
| 581 | |
| 582 | return true; |
| 583 | } |
| 584 | |
| 585 | namespace llvm { |
| 586 | |
| 587 | template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { |
| 588 | for (const MachineOperand &MO : MI->operands()) { |
| 589 | if (!MO.isReg() || MO.isUndef() || MO.isUse()) |
| 590 | continue; |
| 591 | if (MO.getReg() != ARM::CPSR) |
| 592 | continue; |
| 593 | if (!MO.isDead()) |
| 594 | return false; |
| 595 | } |
| 596 | // all definitions of CPSR are dead |
| 597 | return true; |
| 598 | } |
| 599 | |
| 600 | } // end namespace llvm |
| 601 | |
| 602 | /// GetInstSize - Return the size of the specified MachineInstr. |
| 603 | /// |
| 604 | unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { |
| 605 | const MachineBasicBlock &MBB = *MI.getParent(); |
| 606 | const MachineFunction *MF = MBB.getParent(); |
| 607 | const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); |
| 608 | |
| 609 | const MCInstrDesc &MCID = MI.getDesc(); |
| 610 | |
| 611 | switch (MI.getOpcode()) { |
| 612 | default: |
| 613 | // Return the size specified in .td file. If there's none, return 0, as we |
| 614 | // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2 |
| 615 | // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in |
| 616 | // contrast to AArch64 instructions which have a default size of 4 bytes for |
| 617 | // example. |
| 618 | return MCID.getSize(); |
| 619 | case TargetOpcode::BUNDLE: |
| 620 | return getInstBundleLength(MI); |
| 621 | case ARM::CONSTPOOL_ENTRY: |
| 622 | case ARM::JUMPTABLE_INSTS: |
| 623 | case ARM::JUMPTABLE_ADDRS: |
| 624 | case ARM::JUMPTABLE_TBB: |
| 625 | case ARM::JUMPTABLE_TBH: |
| 626 | // If this machine instr is a constant pool entry, its size is recorded as |
| 627 | // operand #2. |
| 628 | return MI.getOperand(i: 2).getImm(); |
| 629 | case ARM::SPACE: |
| 630 | return MI.getOperand(i: 1).getImm(); |
| 631 | case ARM::INLINEASM: |
| 632 | case ARM::INLINEASM_BR: { |
| 633 | // If this machine instr is an inline asm, measure it. |
| 634 | unsigned Size = getInlineAsmLength(Str: MI.getOperand(i: 0).getSymbolName(), MAI: *MAI); |
| 635 | if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction()) |
| 636 | Size = alignTo(Value: Size, Align: 4); |
| 637 | return Size; |
| 638 | } |
| 639 | } |
| 640 | } |
| 641 | |
| 642 | unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { |
| 643 | unsigned Size = 0; |
| 644 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
| 645 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
| 646 | while (++I != E && I->isInsideBundle()) { |
| 647 | assert(!I->isBundle() && "No nested bundle!" ); |
| 648 | Size += getInstSizeInBytes(MI: *I); |
| 649 | } |
| 650 | return Size; |
| 651 | } |
| 652 | |
| 653 | void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, |
| 654 | MachineBasicBlock::iterator I, |
| 655 | MCRegister DestReg, bool KillSrc, |
| 656 | const ARMSubtarget &Subtarget) const { |
| 657 | unsigned Opc = Subtarget.isThumb() |
| 658 | ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) |
| 659 | : ARM::MRS; |
| 660 | |
| 661 | MachineInstrBuilder MIB = |
| 662 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc), DestReg); |
| 663 | |
| 664 | // There is only 1 A/R class MRS instruction, and it always refers to |
| 665 | // APSR. However, there are lots of other possibilities on M-class cores. |
| 666 | if (Subtarget.isMClass()) |
| 667 | MIB.addImm(Val: 0x800); |
| 668 | |
| 669 | MIB.add(MOs: predOps(Pred: ARMCC::AL)) |
| 670 | .addReg(RegNo: ARM::CPSR, flags: RegState::Implicit | getKillRegState(B: KillSrc)); |
| 671 | } |
| 672 | |
| 673 | void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, |
| 674 | MachineBasicBlock::iterator I, |
| 675 | MCRegister SrcReg, bool KillSrc, |
| 676 | const ARMSubtarget &Subtarget) const { |
| 677 | unsigned Opc = Subtarget.isThumb() |
| 678 | ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) |
| 679 | : ARM::MSR; |
| 680 | |
| 681 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc)); |
| 682 | |
| 683 | if (Subtarget.isMClass()) |
| 684 | MIB.addImm(Val: 0x800); |
| 685 | else |
| 686 | MIB.addImm(Val: 8); |
| 687 | |
| 688 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
| 689 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 690 | .addReg(RegNo: ARM::CPSR, flags: RegState::Implicit | RegState::Define); |
| 691 | } |
| 692 | |
| 693 | void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) { |
| 694 | MIB.addImm(Val: ARMVCC::None); |
| 695 | MIB.addReg(RegNo: 0); |
| 696 | MIB.addReg(RegNo: 0); // tp_reg |
| 697 | } |
| 698 | |
| 699 | void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, |
| 700 | Register DestReg) { |
| 701 | addUnpredicatedMveVpredNOp(MIB); |
| 702 | MIB.addReg(RegNo: DestReg, flags: RegState::Undef); |
| 703 | } |
| 704 | |
| 705 | void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) { |
| 706 | MIB.addImm(Val: Cond); |
| 707 | MIB.addReg(RegNo: ARM::VPR, flags: RegState::Implicit); |
| 708 | MIB.addReg(RegNo: 0); // tp_reg |
| 709 | } |
| 710 | |
| 711 | void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB, |
| 712 | unsigned Cond, unsigned Inactive) { |
| 713 | addPredicatedMveVpredNOp(MIB, Cond); |
| 714 | MIB.addReg(RegNo: Inactive); |
| 715 | } |
| 716 | |
| 717 | void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
| 718 | MachineBasicBlock::iterator I, |
| 719 | const DebugLoc &DL, Register DestReg, |
| 720 | Register SrcReg, bool KillSrc, |
| 721 | bool RenamableDest, |
| 722 | bool RenamableSrc) const { |
| 723 | bool GPRDest = ARM::GPRRegClass.contains(Reg: DestReg); |
| 724 | bool GPRSrc = ARM::GPRRegClass.contains(Reg: SrcReg); |
| 725 | |
| 726 | if (GPRDest && GPRSrc) { |
| 727 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MOVr), DestReg) |
| 728 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
| 729 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 730 | .add(MO: condCodeOp()); |
| 731 | return; |
| 732 | } |
| 733 | |
| 734 | bool SPRDest = ARM::SPRRegClass.contains(Reg: DestReg); |
| 735 | bool SPRSrc = ARM::SPRRegClass.contains(Reg: SrcReg); |
| 736 | |
| 737 | unsigned Opc = 0; |
| 738 | if (SPRDest && SPRSrc) |
| 739 | Opc = ARM::VMOVS; |
| 740 | else if (GPRDest && SPRSrc) |
| 741 | Opc = ARM::VMOVRS; |
| 742 | else if (SPRDest && GPRSrc) |
| 743 | Opc = ARM::VMOVSR; |
| 744 | else if (ARM::DPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg) && Subtarget.hasFP64()) |
| 745 | Opc = ARM::VMOVD; |
| 746 | else if (ARM::QPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) |
| 747 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy; |
| 748 | |
| 749 | if (Opc) { |
| 750 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: Opc), DestReg); |
| 751 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)); |
| 752 | if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) |
| 753 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)); |
| 754 | if (Opc == ARM::MVE_VORR) |
| 755 | addUnpredicatedMveVpredROp(MIB, DestReg); |
| 756 | else if (Opc != ARM::MQPRCopy) |
| 757 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
| 758 | return; |
| 759 | } |
| 760 | |
| 761 | // Handle register classes that require multiple instructions. |
| 762 | unsigned BeginIdx = 0; |
| 763 | unsigned SubRegs = 0; |
| 764 | int Spacing = 1; |
| 765 | |
| 766 | // Use VORRq when possible. |
| 767 | if (ARM::QQPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
| 768 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; |
| 769 | BeginIdx = ARM::qsub_0; |
| 770 | SubRegs = 2; |
| 771 | } else if (ARM::QQQQPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
| 772 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; |
| 773 | BeginIdx = ARM::qsub_0; |
| 774 | SubRegs = 4; |
| 775 | // Fall back to VMOVD. |
| 776 | } else if (ARM::DPairRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
| 777 | Opc = ARM::VMOVD; |
| 778 | BeginIdx = ARM::dsub_0; |
| 779 | SubRegs = 2; |
| 780 | } else if (ARM::DTripleRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
| 781 | Opc = ARM::VMOVD; |
| 782 | BeginIdx = ARM::dsub_0; |
| 783 | SubRegs = 3; |
| 784 | } else if (ARM::DQuadRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
| 785 | Opc = ARM::VMOVD; |
| 786 | BeginIdx = ARM::dsub_0; |
| 787 | SubRegs = 4; |
| 788 | } else if (ARM::GPRPairRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
| 789 | Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; |
| 790 | BeginIdx = ARM::gsub_0; |
| 791 | SubRegs = 2; |
| 792 | } else if (ARM::DPairSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
| 793 | Opc = ARM::VMOVD; |
| 794 | BeginIdx = ARM::dsub_0; |
| 795 | SubRegs = 2; |
| 796 | Spacing = 2; |
| 797 | } else if (ARM::DTripleSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
| 798 | Opc = ARM::VMOVD; |
| 799 | BeginIdx = ARM::dsub_0; |
| 800 | SubRegs = 3; |
| 801 | Spacing = 2; |
| 802 | } else if (ARM::DQuadSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
| 803 | Opc = ARM::VMOVD; |
| 804 | BeginIdx = ARM::dsub_0; |
| 805 | SubRegs = 4; |
| 806 | Spacing = 2; |
| 807 | } else if (ARM::DPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg) && |
| 808 | !Subtarget.hasFP64()) { |
| 809 | Opc = ARM::VMOVS; |
| 810 | BeginIdx = ARM::ssub_0; |
| 811 | SubRegs = 2; |
| 812 | } else if (SrcReg == ARM::CPSR) { |
| 813 | copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); |
| 814 | return; |
| 815 | } else if (DestReg == ARM::CPSR) { |
| 816 | copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); |
| 817 | return; |
| 818 | } else if (DestReg == ARM::VPR) { |
| 819 | assert(ARM::GPRRegClass.contains(SrcReg)); |
| 820 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMSR_P0), DestReg) |
| 821 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
| 822 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 823 | return; |
| 824 | } else if (SrcReg == ARM::VPR) { |
| 825 | assert(ARM::GPRRegClass.contains(DestReg)); |
| 826 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMRS_P0), DestReg) |
| 827 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
| 828 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 829 | return; |
| 830 | } else if (DestReg == ARM::FPSCR_NZCV) { |
| 831 | assert(ARM::GPRRegClass.contains(SrcReg)); |
| 832 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMSR_FPSCR_NZCVQC), DestReg) |
| 833 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
| 834 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 835 | return; |
| 836 | } else if (SrcReg == ARM::FPSCR_NZCV) { |
| 837 | assert(ARM::GPRRegClass.contains(DestReg)); |
| 838 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMRS_FPSCR_NZCVQC), DestReg) |
| 839 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
| 840 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 841 | return; |
| 842 | } |
| 843 | |
| 844 | assert(Opc && "Impossible reg-to-reg copy" ); |
| 845 | |
| 846 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
| 847 | MachineInstrBuilder Mov; |
| 848 | |
| 849 | // Copy register tuples backward when the first Dest reg overlaps with SrcReg. |
| 850 | if (TRI->regsOverlap(RegA: SrcReg, RegB: TRI->getSubReg(Reg: DestReg, Idx: BeginIdx))) { |
| 851 | BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); |
| 852 | Spacing = -Spacing; |
| 853 | } |
| 854 | #ifndef NDEBUG |
| 855 | SmallSet<unsigned, 4> DstRegs; |
| 856 | #endif |
| 857 | for (unsigned i = 0; i != SubRegs; ++i) { |
| 858 | Register Dst = TRI->getSubReg(Reg: DestReg, Idx: BeginIdx + i * Spacing); |
| 859 | Register Src = TRI->getSubReg(Reg: SrcReg, Idx: BeginIdx + i * Spacing); |
| 860 | assert(Dst && Src && "Bad sub-register" ); |
| 861 | #ifndef NDEBUG |
| 862 | assert(!DstRegs.count(Src) && "destructive vector copy" ); |
| 863 | DstRegs.insert(Dst); |
| 864 | #endif |
| 865 | Mov = BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc), DestReg: Dst).addReg(RegNo: Src); |
| 866 | // VORR (NEON or MVE) takes two source operands. |
| 867 | if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) { |
| 868 | Mov.addReg(RegNo: Src); |
| 869 | } |
| 870 | // MVE VORR takes predicate operands in place of an ordinary condition. |
| 871 | if (Opc == ARM::MVE_VORR) |
| 872 | addUnpredicatedMveVpredROp(MIB&: Mov, DestReg: Dst); |
| 873 | else |
| 874 | Mov = Mov.add(MOs: predOps(Pred: ARMCC::AL)); |
| 875 | // MOVr can set CC. |
| 876 | if (Opc == ARM::MOVr) |
| 877 | Mov = Mov.add(MO: condCodeOp()); |
| 878 | } |
| 879 | // Add implicit super-register defs and kills to the last instruction. |
| 880 | Mov->addRegisterDefined(Reg: DestReg, RegInfo: TRI); |
| 881 | if (KillSrc) |
| 882 | Mov->addRegisterKilled(IncomingReg: SrcReg, RegInfo: TRI); |
| 883 | } |
| 884 | |
| 885 | std::optional<DestSourcePair> |
| 886 | ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { |
| 887 | // VMOVRRD is also a copy instruction but it requires |
| 888 | // special way of handling. It is more complex copy version |
| 889 | // and since that we are not considering it. For recognition |
| 890 | // of such instruction isExtractSubregLike MI interface fuction |
| 891 | // could be used. |
| 892 | // VORRq is considered as a move only if two inputs are |
| 893 | // the same register. |
| 894 | if (!MI.isMoveReg() || |
| 895 | (MI.getOpcode() == ARM::VORRq && |
| 896 | MI.getOperand(i: 1).getReg() != MI.getOperand(i: 2).getReg())) |
| 897 | return std::nullopt; |
| 898 | return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)}; |
| 899 | } |
| 900 | |
| 901 | std::optional<ParamLoadedValue> |
| 902 | ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI, |
| 903 | Register Reg) const { |
| 904 | if (auto DstSrcPair = isCopyInstrImpl(MI)) { |
| 905 | Register DstReg = DstSrcPair->Destination->getReg(); |
| 906 | |
| 907 | // TODO: We don't handle cases where the forwarding reg is narrower/wider |
| 908 | // than the copy registers. Consider for example: |
| 909 | // |
| 910 | // s16 = VMOVS s0 |
| 911 | // s17 = VMOVS s1 |
| 912 | // call @callee(d0) |
| 913 | // |
| 914 | // We'd like to describe the call site value of d0 as d8, but this requires |
| 915 | // gathering and merging the descriptions for the two VMOVS instructions. |
| 916 | // |
| 917 | // We also don't handle the reverse situation, where the forwarding reg is |
| 918 | // narrower than the copy destination: |
| 919 | // |
| 920 | // d8 = VMOVD d0 |
| 921 | // call @callee(s1) |
| 922 | // |
| 923 | // We need to produce a fragment description (the call site value of s1 is |
| 924 | // /not/ just d8). |
| 925 | if (DstReg != Reg) |
| 926 | return std::nullopt; |
| 927 | } |
| 928 | return TargetInstrInfo::describeLoadedValue(MI, Reg); |
| 929 | } |
| 930 | |
| 931 | const MachineInstrBuilder & |
| 932 | ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, |
| 933 | unsigned SubIdx, unsigned State, |
| 934 | const TargetRegisterInfo *TRI) const { |
| 935 | if (!SubIdx) |
| 936 | return MIB.addReg(RegNo: Reg, flags: State); |
| 937 | |
| 938 | if (Register::isPhysicalRegister(Reg)) |
| 939 | return MIB.addReg(RegNo: TRI->getSubReg(Reg, Idx: SubIdx), flags: State); |
| 940 | return MIB.addReg(RegNo: Reg, flags: State, SubReg: SubIdx); |
| 941 | } |
| 942 | |
| 943 | void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
| 944 | MachineBasicBlock::iterator I, |
| 945 | Register SrcReg, bool isKill, int FI, |
| 946 | const TargetRegisterClass *RC, |
| 947 | const TargetRegisterInfo *TRI, |
| 948 | Register VReg, |
| 949 | MachineInstr::MIFlag Flags) const { |
| 950 | MachineFunction &MF = *MBB.getParent(); |
| 951 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 952 | Align Alignment = MFI.getObjectAlign(ObjectIdx: FI); |
| 953 | |
| 954 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
| 955 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOStore, |
| 956 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment); |
| 957 | |
| 958 | switch (TRI->getSpillSize(RC: *RC)) { |
| 959 | case 2: |
| 960 | if (ARM::HPRRegClass.hasSubClassEq(RC)) { |
| 961 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRH)) |
| 962 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 963 | .addFrameIndex(Idx: FI) |
| 964 | .addImm(Val: 0) |
| 965 | .addMemOperand(MMO) |
| 966 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 967 | } else |
| 968 | llvm_unreachable("Unknown reg class!" ); |
| 969 | break; |
| 970 | case 4: |
| 971 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
| 972 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STRi12)) |
| 973 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 974 | .addFrameIndex(Idx: FI) |
| 975 | .addImm(Val: 0) |
| 976 | .addMemOperand(MMO) |
| 977 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 978 | } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { |
| 979 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRS)) |
| 980 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 981 | .addFrameIndex(Idx: FI) |
| 982 | .addImm(Val: 0) |
| 983 | .addMemOperand(MMO) |
| 984 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 985 | } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { |
| 986 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTR_P0_off)) |
| 987 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 988 | .addFrameIndex(Idx: FI) |
| 989 | .addImm(Val: 0) |
| 990 | .addMemOperand(MMO) |
| 991 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 992 | } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) { |
| 993 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTR_FPSCR_NZCVQC_off)) |
| 994 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 995 | .addFrameIndex(Idx: FI) |
| 996 | .addImm(Val: 0) |
| 997 | .addMemOperand(MMO) |
| 998 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 999 | } else |
| 1000 | llvm_unreachable("Unknown reg class!" ); |
| 1001 | break; |
| 1002 | case 8: |
| 1003 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { |
| 1004 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRD)) |
| 1005 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 1006 | .addFrameIndex(Idx: FI) |
| 1007 | .addImm(Val: 0) |
| 1008 | .addMemOperand(MMO) |
| 1009 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1010 | } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
| 1011 | if (Subtarget.hasV5TEOps()) { |
| 1012 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STRD)); |
| 1013 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_0, State: getKillRegState(B: isKill), TRI); |
| 1014 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_1, State: 0, TRI); |
| 1015 | MIB.addFrameIndex(Idx: FI).addReg(RegNo: 0).addImm(Val: 0).addMemOperand(MMO) |
| 1016 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1017 | } else { |
| 1018 | // Fallback to STM instruction, which has existed since the dawn of |
| 1019 | // time. |
| 1020 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STMIA)) |
| 1021 | .addFrameIndex(Idx: FI) |
| 1022 | .addMemOperand(MMO) |
| 1023 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1024 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_0, State: getKillRegState(B: isKill), TRI); |
| 1025 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_1, State: 0, TRI); |
| 1026 | } |
| 1027 | } else |
| 1028 | llvm_unreachable("Unknown reg class!" ); |
| 1029 | break; |
| 1030 | case 16: |
| 1031 | if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { |
| 1032 | // Use aligned spills if the stack can be realigned. |
| 1033 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { |
| 1034 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1q64)) |
| 1035 | .addFrameIndex(Idx: FI) |
| 1036 | .addImm(Val: 16) |
| 1037 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 1038 | .addMemOperand(MMO) |
| 1039 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1040 | } else { |
| 1041 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTMQIA)) |
| 1042 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 1043 | .addFrameIndex(Idx: FI) |
| 1044 | .addMemOperand(MMO) |
| 1045 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1046 | } |
| 1047 | } else if (ARM::QPRRegClass.hasSubClassEq(RC) && |
| 1048 | Subtarget.hasMVEIntegerOps()) { |
| 1049 | auto MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MVE_VSTRWU32)); |
| 1050 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 1051 | .addFrameIndex(Idx: FI) |
| 1052 | .addImm(Val: 0) |
| 1053 | .addMemOperand(MMO); |
| 1054 | addUnpredicatedMveVpredNOp(MIB); |
| 1055 | } else |
| 1056 | llvm_unreachable("Unknown reg class!" ); |
| 1057 | break; |
| 1058 | case 24: |
| 1059 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { |
| 1060 | // Use aligned spills if the stack can be realigned. |
| 1061 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
| 1062 | Subtarget.hasNEON()) { |
| 1063 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1d64TPseudo)) |
| 1064 | .addFrameIndex(Idx: FI) |
| 1065 | .addImm(Val: 16) |
| 1066 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 1067 | .addMemOperand(MMO) |
| 1068 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1069 | } else { |
| 1070 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), |
| 1071 | MCID: get(Opcode: ARM::VSTMDIA)) |
| 1072 | .addFrameIndex(Idx: FI) |
| 1073 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 1074 | .addMemOperand(MMO); |
| 1075 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI); |
| 1076 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI); |
| 1077 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI); |
| 1078 | } |
| 1079 | } else |
| 1080 | llvm_unreachable("Unknown reg class!" ); |
| 1081 | break; |
| 1082 | case 32: |
| 1083 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || |
| 1084 | ARM::MQQPRRegClass.hasSubClassEq(RC) || |
| 1085 | ARM::DQuadRegClass.hasSubClassEq(RC)) { |
| 1086 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
| 1087 | Subtarget.hasNEON()) { |
| 1088 | // FIXME: It's possible to only store part of the QQ register if the |
| 1089 | // spilled def has a sub-register index. |
| 1090 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1d64QPseudo)) |
| 1091 | .addFrameIndex(Idx: FI) |
| 1092 | .addImm(Val: 16) |
| 1093 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 1094 | .addMemOperand(MMO) |
| 1095 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1096 | } else if (Subtarget.hasMVEIntegerOps()) { |
| 1097 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MQQPRStore)) |
| 1098 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 1099 | .addFrameIndex(Idx: FI) |
| 1100 | .addMemOperand(MMO); |
| 1101 | } else { |
| 1102 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), |
| 1103 | MCID: get(Opcode: ARM::VSTMDIA)) |
| 1104 | .addFrameIndex(Idx: FI) |
| 1105 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 1106 | .addMemOperand(MMO); |
| 1107 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI); |
| 1108 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI); |
| 1109 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI); |
| 1110 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_3, State: 0, TRI); |
| 1111 | } |
| 1112 | } else |
| 1113 | llvm_unreachable("Unknown reg class!" ); |
| 1114 | break; |
| 1115 | case 64: |
| 1116 | if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) && |
| 1117 | Subtarget.hasMVEIntegerOps()) { |
| 1118 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MQQQQPRStore)) |
| 1119 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
| 1120 | .addFrameIndex(Idx: FI) |
| 1121 | .addMemOperand(MMO); |
| 1122 | } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { |
| 1123 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTMDIA)) |
| 1124 | .addFrameIndex(Idx: FI) |
| 1125 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 1126 | .addMemOperand(MMO); |
| 1127 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI); |
| 1128 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI); |
| 1129 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI); |
| 1130 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_3, State: 0, TRI); |
| 1131 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_4, State: 0, TRI); |
| 1132 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_5, State: 0, TRI); |
| 1133 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_6, State: 0, TRI); |
| 1134 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_7, State: 0, TRI); |
| 1135 | } else |
| 1136 | llvm_unreachable("Unknown reg class!" ); |
| 1137 | break; |
| 1138 | default: |
| 1139 | llvm_unreachable("Unknown reg class!" ); |
| 1140 | } |
| 1141 | } |
| 1142 | |
| 1143 | Register ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, |
| 1144 | int &FrameIndex) const { |
| 1145 | switch (MI.getOpcode()) { |
| 1146 | default: break; |
| 1147 | case ARM::STRrs: |
| 1148 | case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. |
| 1149 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isReg() && |
| 1150 | MI.getOperand(i: 3).isImm() && MI.getOperand(i: 2).getReg() == 0 && |
| 1151 | MI.getOperand(i: 3).getImm() == 0) { |
| 1152 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
| 1153 | return MI.getOperand(i: 0).getReg(); |
| 1154 | } |
| 1155 | break; |
| 1156 | case ARM::STRi12: |
| 1157 | case ARM::t2STRi12: |
| 1158 | case ARM::tSTRspi: |
| 1159 | case ARM::VSTRD: |
| 1160 | case ARM::VSTRS: |
| 1161 | case ARM::VSTRH: |
| 1162 | case ARM::VSTR_P0_off: |
| 1163 | case ARM::VSTR_FPSCR_NZCVQC_off: |
| 1164 | case ARM::MVE_VSTRWU32: |
| 1165 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() && |
| 1166 | MI.getOperand(i: 2).getImm() == 0) { |
| 1167 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
| 1168 | return MI.getOperand(i: 0).getReg(); |
| 1169 | } |
| 1170 | break; |
| 1171 | case ARM::VST1q64: |
| 1172 | case ARM::VST1d64TPseudo: |
| 1173 | case ARM::VST1d64QPseudo: |
| 1174 | if (MI.getOperand(i: 0).isFI() && MI.getOperand(i: 2).getSubReg() == 0) { |
| 1175 | FrameIndex = MI.getOperand(i: 0).getIndex(); |
| 1176 | return MI.getOperand(i: 2).getReg(); |
| 1177 | } |
| 1178 | break; |
| 1179 | case ARM::VSTMQIA: |
| 1180 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
| 1181 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
| 1182 | return MI.getOperand(i: 0).getReg(); |
| 1183 | } |
| 1184 | break; |
| 1185 | case ARM::MQQPRStore: |
| 1186 | case ARM::MQQQQPRStore: |
| 1187 | if (MI.getOperand(i: 1).isFI()) { |
| 1188 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
| 1189 | return MI.getOperand(i: 0).getReg(); |
| 1190 | } |
| 1191 | break; |
| 1192 | } |
| 1193 | |
| 1194 | return 0; |
| 1195 | } |
| 1196 | |
| 1197 | Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, |
| 1198 | int &FrameIndex) const { |
| 1199 | SmallVector<const MachineMemOperand *, 1> Accesses; |
| 1200 | if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) && |
| 1201 | Accesses.size() == 1) { |
| 1202 | FrameIndex = |
| 1203 | cast<FixedStackPseudoSourceValue>(Val: Accesses.front()->getPseudoValue()) |
| 1204 | ->getFrameIndex(); |
| 1205 | return true; |
| 1206 | } |
| 1207 | return false; |
| 1208 | } |
| 1209 | |
| 1210 | void ARMBaseInstrInfo::loadRegFromStackSlot( |
| 1211 | MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, |
| 1212 | int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, |
| 1213 | Register VReg, MachineInstr::MIFlag Flags) const { |
| 1214 | DebugLoc DL; |
| 1215 | if (I != MBB.end()) DL = I->getDebugLoc(); |
| 1216 | MachineFunction &MF = *MBB.getParent(); |
| 1217 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 1218 | const Align Alignment = MFI.getObjectAlign(ObjectIdx: FI); |
| 1219 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
| 1220 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOLoad, |
| 1221 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment); |
| 1222 | |
| 1223 | switch (TRI->getSpillSize(RC: *RC)) { |
| 1224 | case 2: |
| 1225 | if (ARM::HPRRegClass.hasSubClassEq(RC)) { |
| 1226 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRH), DestReg) |
| 1227 | .addFrameIndex(Idx: FI) |
| 1228 | .addImm(Val: 0) |
| 1229 | .addMemOperand(MMO) |
| 1230 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1231 | } else |
| 1232 | llvm_unreachable("Unknown reg class!" ); |
| 1233 | break; |
| 1234 | case 4: |
| 1235 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
| 1236 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDRi12), DestReg) |
| 1237 | .addFrameIndex(Idx: FI) |
| 1238 | .addImm(Val: 0) |
| 1239 | .addMemOperand(MMO) |
| 1240 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1241 | } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { |
| 1242 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRS), DestReg) |
| 1243 | .addFrameIndex(Idx: FI) |
| 1244 | .addImm(Val: 0) |
| 1245 | .addMemOperand(MMO) |
| 1246 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1247 | } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { |
| 1248 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDR_P0_off), DestReg) |
| 1249 | .addFrameIndex(Idx: FI) |
| 1250 | .addImm(Val: 0) |
| 1251 | .addMemOperand(MMO) |
| 1252 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1253 | } else if (ARM::cl_FPSCR_NZCVRegClass.hasSubClassEq(RC)) { |
| 1254 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDR_FPSCR_NZCVQC_off), DestReg) |
| 1255 | .addFrameIndex(Idx: FI) |
| 1256 | .addImm(Val: 0) |
| 1257 | .addMemOperand(MMO) |
| 1258 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1259 | } else |
| 1260 | llvm_unreachable("Unknown reg class!" ); |
| 1261 | break; |
| 1262 | case 8: |
| 1263 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { |
| 1264 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRD), DestReg) |
| 1265 | .addFrameIndex(Idx: FI) |
| 1266 | .addImm(Val: 0) |
| 1267 | .addMemOperand(MMO) |
| 1268 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1269 | } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
| 1270 | MachineInstrBuilder MIB; |
| 1271 | |
| 1272 | if (Subtarget.hasV5TEOps()) { |
| 1273 | MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDRD)); |
| 1274 | AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_0, State: RegState::DefineNoRead, TRI); |
| 1275 | AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_1, State: RegState::DefineNoRead, TRI); |
| 1276 | MIB.addFrameIndex(Idx: FI).addReg(RegNo: 0).addImm(Val: 0).addMemOperand(MMO) |
| 1277 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1278 | } else { |
| 1279 | // Fallback to LDM instruction, which has existed since the dawn of |
| 1280 | // time. |
| 1281 | MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDMIA)) |
| 1282 | .addFrameIndex(Idx: FI) |
| 1283 | .addMemOperand(MMO) |
| 1284 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1285 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_0, State: RegState::DefineNoRead, TRI); |
| 1286 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_1, State: RegState::DefineNoRead, TRI); |
| 1287 | } |
| 1288 | |
| 1289 | if (DestReg.isPhysical()) |
| 1290 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
| 1291 | } else |
| 1292 | llvm_unreachable("Unknown reg class!" ); |
| 1293 | break; |
| 1294 | case 16: |
| 1295 | if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { |
| 1296 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { |
| 1297 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1q64), DestReg) |
| 1298 | .addFrameIndex(Idx: FI) |
| 1299 | .addImm(Val: 16) |
| 1300 | .addMemOperand(MMO) |
| 1301 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1302 | } else { |
| 1303 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMQIA), DestReg) |
| 1304 | .addFrameIndex(Idx: FI) |
| 1305 | .addMemOperand(MMO) |
| 1306 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1307 | } |
| 1308 | } else if (ARM::QPRRegClass.hasSubClassEq(RC) && |
| 1309 | Subtarget.hasMVEIntegerOps()) { |
| 1310 | auto MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MVE_VLDRWU32), DestReg); |
| 1311 | MIB.addFrameIndex(Idx: FI) |
| 1312 | .addImm(Val: 0) |
| 1313 | .addMemOperand(MMO); |
| 1314 | addUnpredicatedMveVpredNOp(MIB); |
| 1315 | } else |
| 1316 | llvm_unreachable("Unknown reg class!" ); |
| 1317 | break; |
| 1318 | case 24: |
| 1319 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { |
| 1320 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
| 1321 | Subtarget.hasNEON()) { |
| 1322 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1d64TPseudo), DestReg) |
| 1323 | .addFrameIndex(Idx: FI) |
| 1324 | .addImm(Val: 16) |
| 1325 | .addMemOperand(MMO) |
| 1326 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1327 | } else { |
| 1328 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA)) |
| 1329 | .addFrameIndex(Idx: FI) |
| 1330 | .addMemOperand(MMO) |
| 1331 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1332 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI); |
| 1333 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI); |
| 1334 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI); |
| 1335 | if (DestReg.isPhysical()) |
| 1336 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
| 1337 | } |
| 1338 | } else |
| 1339 | llvm_unreachable("Unknown reg class!" ); |
| 1340 | break; |
| 1341 | case 32: |
| 1342 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || |
| 1343 | ARM::MQQPRRegClass.hasSubClassEq(RC) || |
| 1344 | ARM::DQuadRegClass.hasSubClassEq(RC)) { |
| 1345 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
| 1346 | Subtarget.hasNEON()) { |
| 1347 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1d64QPseudo), DestReg) |
| 1348 | .addFrameIndex(Idx: FI) |
| 1349 | .addImm(Val: 16) |
| 1350 | .addMemOperand(MMO) |
| 1351 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 1352 | } else if (Subtarget.hasMVEIntegerOps()) { |
| 1353 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MQQPRLoad), DestReg) |
| 1354 | .addFrameIndex(Idx: FI) |
| 1355 | .addMemOperand(MMO); |
| 1356 | } else { |
| 1357 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA)) |
| 1358 | .addFrameIndex(Idx: FI) |
| 1359 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 1360 | .addMemOperand(MMO); |
| 1361 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI); |
| 1362 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI); |
| 1363 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI); |
| 1364 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_3, State: RegState::DefineNoRead, TRI); |
| 1365 | if (DestReg.isPhysical()) |
| 1366 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
| 1367 | } |
| 1368 | } else |
| 1369 | llvm_unreachable("Unknown reg class!" ); |
| 1370 | break; |
| 1371 | case 64: |
| 1372 | if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) && |
| 1373 | Subtarget.hasMVEIntegerOps()) { |
| 1374 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MQQQQPRLoad), DestReg) |
| 1375 | .addFrameIndex(Idx: FI) |
| 1376 | .addMemOperand(MMO); |
| 1377 | } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { |
| 1378 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA)) |
| 1379 | .addFrameIndex(Idx: FI) |
| 1380 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 1381 | .addMemOperand(MMO); |
| 1382 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI); |
| 1383 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI); |
| 1384 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI); |
| 1385 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_3, State: RegState::DefineNoRead, TRI); |
| 1386 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_4, State: RegState::DefineNoRead, TRI); |
| 1387 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_5, State: RegState::DefineNoRead, TRI); |
| 1388 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_6, State: RegState::DefineNoRead, TRI); |
| 1389 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_7, State: RegState::DefineNoRead, TRI); |
| 1390 | if (DestReg.isPhysical()) |
| 1391 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
| 1392 | } else |
| 1393 | llvm_unreachable("Unknown reg class!" ); |
| 1394 | break; |
| 1395 | default: |
| 1396 | llvm_unreachable("Unknown regclass!" ); |
| 1397 | } |
| 1398 | } |
| 1399 | |
| 1400 | Register ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
| 1401 | int &FrameIndex) const { |
| 1402 | switch (MI.getOpcode()) { |
| 1403 | default: break; |
| 1404 | case ARM::LDRrs: |
| 1405 | case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. |
| 1406 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isReg() && |
| 1407 | MI.getOperand(i: 3).isImm() && MI.getOperand(i: 2).getReg() == 0 && |
| 1408 | MI.getOperand(i: 3).getImm() == 0) { |
| 1409 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
| 1410 | return MI.getOperand(i: 0).getReg(); |
| 1411 | } |
| 1412 | break; |
| 1413 | case ARM::LDRi12: |
| 1414 | case ARM::t2LDRi12: |
| 1415 | case ARM::tLDRspi: |
| 1416 | case ARM::VLDRD: |
| 1417 | case ARM::VLDRS: |
| 1418 | case ARM::VLDRH: |
| 1419 | case ARM::VLDR_P0_off: |
| 1420 | case ARM::VLDR_FPSCR_NZCVQC_off: |
| 1421 | case ARM::MVE_VLDRWU32: |
| 1422 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() && |
| 1423 | MI.getOperand(i: 2).getImm() == 0) { |
| 1424 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
| 1425 | return MI.getOperand(i: 0).getReg(); |
| 1426 | } |
| 1427 | break; |
| 1428 | case ARM::VLD1q64: |
| 1429 | case ARM::VLD1d8TPseudo: |
| 1430 | case ARM::VLD1d16TPseudo: |
| 1431 | case ARM::VLD1d32TPseudo: |
| 1432 | case ARM::VLD1d64TPseudo: |
| 1433 | case ARM::VLD1d8QPseudo: |
| 1434 | case ARM::VLD1d16QPseudo: |
| 1435 | case ARM::VLD1d32QPseudo: |
| 1436 | case ARM::VLD1d64QPseudo: |
| 1437 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
| 1438 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
| 1439 | return MI.getOperand(i: 0).getReg(); |
| 1440 | } |
| 1441 | break; |
| 1442 | case ARM::VLDMQIA: |
| 1443 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
| 1444 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
| 1445 | return MI.getOperand(i: 0).getReg(); |
| 1446 | } |
| 1447 | break; |
| 1448 | case ARM::MQQPRLoad: |
| 1449 | case ARM::MQQQQPRLoad: |
| 1450 | if (MI.getOperand(i: 1).isFI()) { |
| 1451 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
| 1452 | return MI.getOperand(i: 0).getReg(); |
| 1453 | } |
| 1454 | break; |
| 1455 | } |
| 1456 | |
| 1457 | return 0; |
| 1458 | } |
| 1459 | |
| 1460 | Register ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, |
| 1461 | int &FrameIndex) const { |
| 1462 | SmallVector<const MachineMemOperand *, 1> Accesses; |
| 1463 | if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) && |
| 1464 | Accesses.size() == 1) { |
| 1465 | FrameIndex = |
| 1466 | cast<FixedStackPseudoSourceValue>(Val: Accesses.front()->getPseudoValue()) |
| 1467 | ->getFrameIndex(); |
| 1468 | return true; |
| 1469 | } |
| 1470 | return false; |
| 1471 | } |
| 1472 | |
| 1473 | /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD |
| 1474 | /// depending on whether the result is used. |
| 1475 | void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { |
| 1476 | bool isThumb1 = Subtarget.isThumb1Only(); |
| 1477 | bool isThumb2 = Subtarget.isThumb2(); |
| 1478 | const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); |
| 1479 | |
| 1480 | DebugLoc dl = MI->getDebugLoc(); |
| 1481 | MachineBasicBlock *BB = MI->getParent(); |
| 1482 | |
| 1483 | MachineInstrBuilder LDM, STM; |
| 1484 | if (isThumb1 || !MI->getOperand(i: 1).isDead()) { |
| 1485 | MachineOperand LDWb(MI->getOperand(i: 1)); |
| 1486 | LDM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2LDMIA_UPD |
| 1487 | : isThumb1 ? ARM::tLDMIA_UPD |
| 1488 | : ARM::LDMIA_UPD)) |
| 1489 | .add(MO: LDWb); |
| 1490 | } else { |
| 1491 | LDM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); |
| 1492 | } |
| 1493 | |
| 1494 | if (isThumb1 || !MI->getOperand(i: 0).isDead()) { |
| 1495 | MachineOperand STWb(MI->getOperand(i: 0)); |
| 1496 | STM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2STMIA_UPD |
| 1497 | : isThumb1 ? ARM::tSTMIA_UPD |
| 1498 | : ARM::STMIA_UPD)) |
| 1499 | .add(MO: STWb); |
| 1500 | } else { |
| 1501 | STM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2STMIA : ARM::STMIA)); |
| 1502 | } |
| 1503 | |
| 1504 | MachineOperand LDBase(MI->getOperand(i: 3)); |
| 1505 | LDM.add(MO: LDBase).add(MOs: predOps(Pred: ARMCC::AL)); |
| 1506 | |
| 1507 | MachineOperand STBase(MI->getOperand(i: 2)); |
| 1508 | STM.add(MO: STBase).add(MOs: predOps(Pred: ARMCC::AL)); |
| 1509 | |
| 1510 | // Sort the scratch registers into ascending order. |
| 1511 | const TargetRegisterInfo &TRI = getRegisterInfo(); |
| 1512 | SmallVector<unsigned, 6> ScratchRegs; |
| 1513 | for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 5)) |
| 1514 | ScratchRegs.push_back(Elt: MO.getReg()); |
| 1515 | llvm::sort(C&: ScratchRegs, |
| 1516 | Comp: [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool { |
| 1517 | return TRI.getEncodingValue(Reg: Reg1) < |
| 1518 | TRI.getEncodingValue(Reg: Reg2); |
| 1519 | }); |
| 1520 | |
| 1521 | for (const auto &Reg : ScratchRegs) { |
| 1522 | LDM.addReg(RegNo: Reg, flags: RegState::Define); |
| 1523 | STM.addReg(RegNo: Reg, flags: RegState::Kill); |
| 1524 | } |
| 1525 | |
| 1526 | BB->erase(I: MI); |
| 1527 | } |
| 1528 | |
| 1529 | bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { |
| 1530 | if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { |
| 1531 | expandLoadStackGuard(MI); |
| 1532 | MI.getParent()->erase(I: MI); |
| 1533 | return true; |
| 1534 | } |
| 1535 | |
| 1536 | if (MI.getOpcode() == ARM::MEMCPY) { |
| 1537 | expandMEMCPY(MI); |
| 1538 | return true; |
| 1539 | } |
| 1540 | |
| 1541 | // This hook gets to expand COPY instructions before they become |
| 1542 | // copyPhysReg() calls. Look for VMOVS instructions that can legally be |
| 1543 | // widened to VMOVD. We prefer the VMOVD when possible because it may be |
| 1544 | // changed into a VORR that can go down the NEON pipeline. |
| 1545 | if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64()) |
| 1546 | return false; |
| 1547 | |
| 1548 | // Look for a copy between even S-registers. That is where we keep floats |
| 1549 | // when using NEON v2f32 instructions for f32 arithmetic. |
| 1550 | Register DstRegS = MI.getOperand(i: 0).getReg(); |
| 1551 | Register SrcRegS = MI.getOperand(i: 1).getReg(); |
| 1552 | if (!ARM::SPRRegClass.contains(Reg1: DstRegS, Reg2: SrcRegS)) |
| 1553 | return false; |
| 1554 | |
| 1555 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
| 1556 | MCRegister DstRegD = |
| 1557 | TRI->getMatchingSuperReg(Reg: DstRegS, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass); |
| 1558 | MCRegister SrcRegD = |
| 1559 | TRI->getMatchingSuperReg(Reg: SrcRegS, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass); |
| 1560 | if (!DstRegD || !SrcRegD) |
| 1561 | return false; |
| 1562 | |
| 1563 | // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only |
| 1564 | // legal if the COPY already defines the full DstRegD, and it isn't a |
| 1565 | // sub-register insertion. |
| 1566 | if (!MI.definesRegister(Reg: DstRegD, TRI) || MI.readsRegister(Reg: DstRegD, TRI)) |
| 1567 | return false; |
| 1568 | |
| 1569 | // A dead copy shouldn't show up here, but reject it just in case. |
| 1570 | if (MI.getOperand(i: 0).isDead()) |
| 1571 | return false; |
| 1572 | |
| 1573 | // All clear, widen the COPY. |
| 1574 | LLVM_DEBUG(dbgs() << "widening: " << MI); |
| 1575 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
| 1576 | |
| 1577 | // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg |
| 1578 | // or some other super-register. |
| 1579 | int ImpDefIdx = MI.findRegisterDefOperandIdx(Reg: DstRegD, /*TRI=*/nullptr); |
| 1580 | if (ImpDefIdx != -1) |
| 1581 | MI.removeOperand(OpNo: ImpDefIdx); |
| 1582 | |
| 1583 | // Change the opcode and operands. |
| 1584 | MI.setDesc(get(Opcode: ARM::VMOVD)); |
| 1585 | MI.getOperand(i: 0).setReg(DstRegD); |
| 1586 | MI.getOperand(i: 1).setReg(SrcRegD); |
| 1587 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
| 1588 | |
| 1589 | // We are now reading SrcRegD instead of SrcRegS. This may upset the |
| 1590 | // register scavenger and machine verifier, so we need to indicate that we |
| 1591 | // are reading an undefined value from SrcRegD, but a proper value from |
| 1592 | // SrcRegS. |
| 1593 | MI.getOperand(i: 1).setIsUndef(); |
| 1594 | MIB.addReg(RegNo: SrcRegS, flags: RegState::Implicit); |
| 1595 | |
| 1596 | // SrcRegD may actually contain an unrelated value in the ssub_1 |
| 1597 | // sub-register. Don't kill it. Only kill the ssub_0 sub-register. |
| 1598 | if (MI.getOperand(i: 1).isKill()) { |
| 1599 | MI.getOperand(i: 1).setIsKill(false); |
| 1600 | MI.addRegisterKilled(IncomingReg: SrcRegS, RegInfo: TRI, AddIfNotFound: true); |
| 1601 | } |
| 1602 | |
| 1603 | LLVM_DEBUG(dbgs() << "replaced by: " << MI); |
| 1604 | return true; |
| 1605 | } |
| 1606 | |
| 1607 | /// Create a copy of a const pool value. Update CPI to the new index and return |
| 1608 | /// the label UID. |
| 1609 | static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { |
| 1610 | MachineConstantPool *MCP = MF.getConstantPool(); |
| 1611 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
| 1612 | |
| 1613 | const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; |
| 1614 | assert(MCPE.isMachineConstantPoolEntry() && |
| 1615 | "Expecting a machine constantpool entry!" ); |
| 1616 | ARMConstantPoolValue *ACPV = |
| 1617 | static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); |
| 1618 | |
| 1619 | unsigned PCLabelId = AFI->createPICLabelUId(); |
| 1620 | ARMConstantPoolValue *NewCPV = nullptr; |
| 1621 | |
| 1622 | // FIXME: The below assumes PIC relocation model and that the function |
| 1623 | // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and |
| 1624 | // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR |
| 1625 | // instructions, so that's probably OK, but is PIC always correct when |
| 1626 | // we get here? |
| 1627 | if (ACPV->isGlobalValue()) |
| 1628 | NewCPV = ARMConstantPoolConstant::Create( |
| 1629 | C: cast<ARMConstantPoolConstant>(Val: ACPV)->getGV(), ID: PCLabelId, Kind: ARMCP::CPValue, |
| 1630 | PCAdj: 4, Modifier: ACPV->getModifier(), AddCurrentAddress: ACPV->mustAddCurrentAddress()); |
| 1631 | else if (ACPV->isExtSymbol()) |
| 1632 | NewCPV = ARMConstantPoolSymbol:: |
| 1633 | Create(C&: MF.getFunction().getContext(), |
| 1634 | s: cast<ARMConstantPoolSymbol>(Val: ACPV)->getSymbol(), ID: PCLabelId, PCAdj: 4); |
| 1635 | else if (ACPV->isBlockAddress()) |
| 1636 | NewCPV = ARMConstantPoolConstant:: |
| 1637 | Create(C: cast<ARMConstantPoolConstant>(Val: ACPV)->getBlockAddress(), ID: PCLabelId, |
| 1638 | Kind: ARMCP::CPBlockAddress, PCAdj: 4); |
| 1639 | else if (ACPV->isLSDA()) |
| 1640 | NewCPV = ARMConstantPoolConstant::Create(C: &MF.getFunction(), ID: PCLabelId, |
| 1641 | Kind: ARMCP::CPLSDA, PCAdj: 4); |
| 1642 | else if (ACPV->isMachineBasicBlock()) |
| 1643 | NewCPV = ARMConstantPoolMBB:: |
| 1644 | Create(C&: MF.getFunction().getContext(), |
| 1645 | mbb: cast<ARMConstantPoolMBB>(Val: ACPV)->getMBB(), ID: PCLabelId, PCAdj: 4); |
| 1646 | else |
| 1647 | llvm_unreachable("Unexpected ARM constantpool value type!!" ); |
| 1648 | CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: MCPE.getAlign()); |
| 1649 | return PCLabelId; |
| 1650 | } |
| 1651 | |
| 1652 | void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, |
| 1653 | MachineBasicBlock::iterator I, |
| 1654 | Register DestReg, unsigned SubIdx, |
| 1655 | const MachineInstr &Orig, |
| 1656 | const TargetRegisterInfo &TRI) const { |
| 1657 | unsigned Opcode = Orig.getOpcode(); |
| 1658 | switch (Opcode) { |
| 1659 | default: { |
| 1660 | MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig: &Orig); |
| 1661 | MI->substituteRegister(FromReg: Orig.getOperand(i: 0).getReg(), ToReg: DestReg, SubIdx, RegInfo: TRI); |
| 1662 | MBB.insert(I, MI); |
| 1663 | break; |
| 1664 | } |
| 1665 | case ARM::tLDRpci_pic: |
| 1666 | case ARM::t2LDRpci_pic: { |
| 1667 | MachineFunction &MF = *MBB.getParent(); |
| 1668 | unsigned CPI = Orig.getOperand(i: 1).getIndex(); |
| 1669 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
| 1670 | BuildMI(BB&: MBB, I, MIMD: Orig.getDebugLoc(), MCID: get(Opcode), DestReg) |
| 1671 | .addConstantPoolIndex(Idx: CPI) |
| 1672 | .addImm(Val: PCLabelId) |
| 1673 | .cloneMemRefs(OtherMI: Orig); |
| 1674 | break; |
| 1675 | } |
| 1676 | } |
| 1677 | } |
| 1678 | |
| 1679 | MachineInstr & |
| 1680 | ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB, |
| 1681 | MachineBasicBlock::iterator InsertBefore, |
| 1682 | const MachineInstr &Orig) const { |
| 1683 | MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig); |
| 1684 | MachineBasicBlock::instr_iterator I = Cloned.getIterator(); |
| 1685 | for (;;) { |
| 1686 | switch (I->getOpcode()) { |
| 1687 | case ARM::tLDRpci_pic: |
| 1688 | case ARM::t2LDRpci_pic: { |
| 1689 | MachineFunction &MF = *MBB.getParent(); |
| 1690 | unsigned CPI = I->getOperand(i: 1).getIndex(); |
| 1691 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
| 1692 | I->getOperand(i: 1).setIndex(CPI); |
| 1693 | I->getOperand(i: 2).setImm(PCLabelId); |
| 1694 | break; |
| 1695 | } |
| 1696 | } |
| 1697 | if (!I->isBundledWithSucc()) |
| 1698 | break; |
| 1699 | ++I; |
| 1700 | } |
| 1701 | return Cloned; |
| 1702 | } |
| 1703 | |
| 1704 | bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, |
| 1705 | const MachineInstr &MI1, |
| 1706 | const MachineRegisterInfo *MRI) const { |
| 1707 | unsigned Opcode = MI0.getOpcode(); |
| 1708 | if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || |
| 1709 | Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || |
| 1710 | Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr || |
| 1711 | Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel || |
| 1712 | Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || |
| 1713 | Opcode == ARM::t2MOV_ga_pcrel) { |
| 1714 | if (MI1.getOpcode() != Opcode) |
| 1715 | return false; |
| 1716 | if (MI0.getNumOperands() != MI1.getNumOperands()) |
| 1717 | return false; |
| 1718 | |
| 1719 | const MachineOperand &MO0 = MI0.getOperand(i: 1); |
| 1720 | const MachineOperand &MO1 = MI1.getOperand(i: 1); |
| 1721 | if (MO0.getOffset() != MO1.getOffset()) |
| 1722 | return false; |
| 1723 | |
| 1724 | if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr || |
| 1725 | Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel || |
| 1726 | Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || |
| 1727 | Opcode == ARM::t2MOV_ga_pcrel) |
| 1728 | // Ignore the PC labels. |
| 1729 | return MO0.getGlobal() == MO1.getGlobal(); |
| 1730 | |
| 1731 | const MachineFunction *MF = MI0.getParent()->getParent(); |
| 1732 | const MachineConstantPool *MCP = MF->getConstantPool(); |
| 1733 | int CPI0 = MO0.getIndex(); |
| 1734 | int CPI1 = MO1.getIndex(); |
| 1735 | const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; |
| 1736 | const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; |
| 1737 | bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); |
| 1738 | bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); |
| 1739 | if (isARMCP0 && isARMCP1) { |
| 1740 | ARMConstantPoolValue *ACPV0 = |
| 1741 | static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); |
| 1742 | ARMConstantPoolValue *ACPV1 = |
| 1743 | static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); |
| 1744 | return ACPV0->hasSameValue(ACPV: ACPV1); |
| 1745 | } else if (!isARMCP0 && !isARMCP1) { |
| 1746 | return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; |
| 1747 | } |
| 1748 | return false; |
| 1749 | } else if (Opcode == ARM::PICLDR) { |
| 1750 | if (MI1.getOpcode() != Opcode) |
| 1751 | return false; |
| 1752 | if (MI0.getNumOperands() != MI1.getNumOperands()) |
| 1753 | return false; |
| 1754 | |
| 1755 | Register Addr0 = MI0.getOperand(i: 1).getReg(); |
| 1756 | Register Addr1 = MI1.getOperand(i: 1).getReg(); |
| 1757 | if (Addr0 != Addr1) { |
| 1758 | if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual()) |
| 1759 | return false; |
| 1760 | |
| 1761 | // This assumes SSA form. |
| 1762 | MachineInstr *Def0 = MRI->getVRegDef(Reg: Addr0); |
| 1763 | MachineInstr *Def1 = MRI->getVRegDef(Reg: Addr1); |
| 1764 | // Check if the loaded value, e.g. a constantpool of a global address, are |
| 1765 | // the same. |
| 1766 | if (!produceSameValue(MI0: *Def0, MI1: *Def1, MRI)) |
| 1767 | return false; |
| 1768 | } |
| 1769 | |
| 1770 | for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { |
| 1771 | // %12 = PICLDR %11, 0, 14, %noreg |
| 1772 | const MachineOperand &MO0 = MI0.getOperand(i); |
| 1773 | const MachineOperand &MO1 = MI1.getOperand(i); |
| 1774 | if (!MO0.isIdenticalTo(Other: MO1)) |
| 1775 | return false; |
| 1776 | } |
| 1777 | return true; |
| 1778 | } |
| 1779 | |
| 1780 | return MI0.isIdenticalTo(Other: MI1, Check: MachineInstr::IgnoreVRegDefs); |
| 1781 | } |
| 1782 | |
| 1783 | /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to |
| 1784 | /// determine if two loads are loading from the same base address. It should |
| 1785 | /// only return true if the base pointers are the same and the only differences |
| 1786 | /// between the two addresses is the offset. It also returns the offsets by |
| 1787 | /// reference. |
| 1788 | /// |
| 1789 | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
| 1790 | /// is permanently disabled. |
| 1791 | bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, |
| 1792 | int64_t &Offset1, |
| 1793 | int64_t &Offset2) const { |
| 1794 | // Don't worry about Thumb: just ARM and Thumb2. |
| 1795 | if (Subtarget.isThumb1Only()) return false; |
| 1796 | |
| 1797 | if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) |
| 1798 | return false; |
| 1799 | |
| 1800 | auto IsLoadOpcode = [&](unsigned Opcode) { |
| 1801 | switch (Opcode) { |
| 1802 | default: |
| 1803 | return false; |
| 1804 | case ARM::LDRi12: |
| 1805 | case ARM::LDRBi12: |
| 1806 | case ARM::LDRD: |
| 1807 | case ARM::LDRH: |
| 1808 | case ARM::LDRSB: |
| 1809 | case ARM::LDRSH: |
| 1810 | case ARM::VLDRD: |
| 1811 | case ARM::VLDRS: |
| 1812 | case ARM::t2LDRi8: |
| 1813 | case ARM::t2LDRBi8: |
| 1814 | case ARM::t2LDRDi8: |
| 1815 | case ARM::t2LDRSHi8: |
| 1816 | case ARM::t2LDRi12: |
| 1817 | case ARM::t2LDRBi12: |
| 1818 | case ARM::t2LDRSHi12: |
| 1819 | return true; |
| 1820 | } |
| 1821 | }; |
| 1822 | |
| 1823 | if (!IsLoadOpcode(Load1->getMachineOpcode()) || |
| 1824 | !IsLoadOpcode(Load2->getMachineOpcode())) |
| 1825 | return false; |
| 1826 | |
| 1827 | // Check if base addresses and chain operands match. |
| 1828 | if (Load1->getOperand(Num: 0) != Load2->getOperand(Num: 0) || |
| 1829 | Load1->getOperand(Num: 4) != Load2->getOperand(Num: 4)) |
| 1830 | return false; |
| 1831 | |
| 1832 | // Index should be Reg0. |
| 1833 | if (Load1->getOperand(Num: 3) != Load2->getOperand(Num: 3)) |
| 1834 | return false; |
| 1835 | |
| 1836 | // Determine the offsets. |
| 1837 | if (isa<ConstantSDNode>(Val: Load1->getOperand(Num: 1)) && |
| 1838 | isa<ConstantSDNode>(Val: Load2->getOperand(Num: 1))) { |
| 1839 | Offset1 = cast<ConstantSDNode>(Val: Load1->getOperand(Num: 1))->getSExtValue(); |
| 1840 | Offset2 = cast<ConstantSDNode>(Val: Load2->getOperand(Num: 1))->getSExtValue(); |
| 1841 | return true; |
| 1842 | } |
| 1843 | |
| 1844 | return false; |
| 1845 | } |
| 1846 | |
| 1847 | /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to |
| 1848 | /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should |
| 1849 | /// be scheduled togther. On some targets if two loads are loading from |
| 1850 | /// addresses in the same cache line, it's better if they are scheduled |
| 1851 | /// together. This function takes two integers that represent the load offsets |
| 1852 | /// from the common base address. It returns true if it decides it's desirable |
| 1853 | /// to schedule the two loads together. "NumLoads" is the number of loads that |
| 1854 | /// have already been scheduled after Load1. |
| 1855 | /// |
| 1856 | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
| 1857 | /// is permanently disabled. |
| 1858 | bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, |
| 1859 | int64_t Offset1, int64_t Offset2, |
| 1860 | unsigned NumLoads) const { |
| 1861 | // Don't worry about Thumb: just ARM and Thumb2. |
| 1862 | if (Subtarget.isThumb1Only()) return false; |
| 1863 | |
| 1864 | assert(Offset2 > Offset1); |
| 1865 | |
| 1866 | if ((Offset2 - Offset1) / 8 > 64) |
| 1867 | return false; |
| 1868 | |
| 1869 | // Check if the machine opcodes are different. If they are different |
| 1870 | // then we consider them to not be of the same base address, |
| 1871 | // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. |
| 1872 | // In this case, they are considered to be the same because they are different |
| 1873 | // encoding forms of the same basic instruction. |
| 1874 | if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && |
| 1875 | !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && |
| 1876 | Load2->getMachineOpcode() == ARM::t2LDRBi12) || |
| 1877 | (Load1->getMachineOpcode() == ARM::t2LDRBi12 && |
| 1878 | Load2->getMachineOpcode() == ARM::t2LDRBi8))) |
| 1879 | return false; // FIXME: overly conservative? |
| 1880 | |
| 1881 | // Four loads in a row should be sufficient. |
| 1882 | if (NumLoads >= 3) |
| 1883 | return false; |
| 1884 | |
| 1885 | return true; |
| 1886 | } |
| 1887 | |
| 1888 | bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
| 1889 | const MachineBasicBlock *MBB, |
| 1890 | const MachineFunction &MF) const { |
| 1891 | // Debug info is never a scheduling boundary. It's necessary to be explicit |
| 1892 | // due to the special treatment of IT instructions below, otherwise a |
| 1893 | // dbg_value followed by an IT will result in the IT instruction being |
| 1894 | // considered a scheduling hazard, which is wrong. It should be the actual |
| 1895 | // instruction preceding the dbg_value instruction(s), just like it is |
| 1896 | // when debug info is not present. |
| 1897 | if (MI.isDebugInstr()) |
| 1898 | return false; |
| 1899 | |
| 1900 | // Terminators and labels can't be scheduled around. |
| 1901 | if (MI.isTerminator() || MI.isPosition()) |
| 1902 | return true; |
| 1903 | |
| 1904 | // INLINEASM_BR can jump to another block |
| 1905 | if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) |
| 1906 | return true; |
| 1907 | |
| 1908 | if (isSEHInstruction(MI)) |
| 1909 | return true; |
| 1910 | |
| 1911 | // Treat the start of the IT block as a scheduling boundary, but schedule |
| 1912 | // t2IT along with all instructions following it. |
| 1913 | // FIXME: This is a big hammer. But the alternative is to add all potential |
| 1914 | // true and anti dependencies to IT block instructions as implicit operands |
| 1915 | // to the t2IT instruction. The added compile time and complexity does not |
| 1916 | // seem worth it. |
| 1917 | MachineBasicBlock::const_iterator I = MI; |
| 1918 | // Make sure to skip any debug instructions |
| 1919 | while (++I != MBB->end() && I->isDebugInstr()) |
| 1920 | ; |
| 1921 | if (I != MBB->end() && I->getOpcode() == ARM::t2IT) |
| 1922 | return true; |
| 1923 | |
| 1924 | // Don't attempt to schedule around any instruction that defines |
| 1925 | // a stack-oriented pointer, as it's unlikely to be profitable. This |
| 1926 | // saves compile time, because it doesn't require every single |
| 1927 | // stack slot reference to depend on the instruction that does the |
| 1928 | // modification. |
| 1929 | // Calls don't actually change the stack pointer, even if they have imp-defs. |
| 1930 | // No ARM calling conventions change the stack pointer. (X86 calling |
| 1931 | // conventions sometimes do). |
| 1932 | if (!MI.isCall() && MI.definesRegister(Reg: ARM::SP, /*TRI=*/nullptr)) |
| 1933 | return true; |
| 1934 | |
| 1935 | return false; |
| 1936 | } |
| 1937 | |
| 1938 | bool ARMBaseInstrInfo:: |
| 1939 | isProfitableToIfCvt(MachineBasicBlock &MBB, |
| 1940 | unsigned NumCycles, unsigned , |
| 1941 | BranchProbability Probability) const { |
| 1942 | if (!NumCycles) |
| 1943 | return false; |
| 1944 | |
| 1945 | // If we are optimizing for size, see if the branch in the predecessor can be |
| 1946 | // lowered to cbn?z by the constant island lowering pass, and return false if |
| 1947 | // so. This results in a shorter instruction sequence. |
| 1948 | if (MBB.getParent()->getFunction().hasOptSize()) { |
| 1949 | MachineBasicBlock *Pred = *MBB.pred_begin(); |
| 1950 | if (!Pred->empty()) { |
| 1951 | MachineInstr *LastMI = &*Pred->rbegin(); |
| 1952 | if (LastMI->getOpcode() == ARM::t2Bcc) { |
| 1953 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
| 1954 | MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br: LastMI, TRI); |
| 1955 | if (CmpMI) |
| 1956 | return false; |
| 1957 | } |
| 1958 | } |
| 1959 | } |
| 1960 | return isProfitableToIfCvt(TMBB&: MBB, NumT: NumCycles, ExtraT: ExtraPredCycles, |
| 1961 | FMBB&: MBB, NumF: 0, ExtraF: 0, Probability); |
| 1962 | } |
| 1963 | |
| 1964 | bool ARMBaseInstrInfo:: |
| 1965 | isProfitableToIfCvt(MachineBasicBlock &TBB, |
| 1966 | unsigned TCycles, unsigned , |
| 1967 | MachineBasicBlock &FBB, |
| 1968 | unsigned FCycles, unsigned , |
| 1969 | BranchProbability Probability) const { |
| 1970 | if (!TCycles) |
| 1971 | return false; |
| 1972 | |
| 1973 | // In thumb code we often end up trading one branch for a IT block, and |
| 1974 | // if we are cloning the instruction can increase code size. Prevent |
| 1975 | // blocks with multiple predecesors from being ifcvted to prevent this |
| 1976 | // cloning. |
| 1977 | if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) { |
| 1978 | if (TBB.pred_size() != 1 || FBB.pred_size() != 1) |
| 1979 | return false; |
| 1980 | } |
| 1981 | |
| 1982 | // Attempt to estimate the relative costs of predication versus branching. |
| 1983 | // Here we scale up each component of UnpredCost to avoid precision issue when |
| 1984 | // scaling TCycles/FCycles by Probability. |
| 1985 | const unsigned ScalingUpFactor = 1024; |
| 1986 | |
| 1987 | unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; |
| 1988 | unsigned UnpredCost; |
| 1989 | if (!Subtarget.hasBranchPredictor()) { |
| 1990 | // When we don't have a branch predictor it's always cheaper to not take a |
| 1991 | // branch than take it, so we have to take that into account. |
| 1992 | unsigned NotTakenBranchCost = 1; |
| 1993 | unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); |
| 1994 | unsigned TUnpredCycles, FUnpredCycles; |
| 1995 | if (!FCycles) { |
| 1996 | // Triangle: TBB is the fallthrough |
| 1997 | TUnpredCycles = TCycles + NotTakenBranchCost; |
| 1998 | FUnpredCycles = TakenBranchCost; |
| 1999 | } else { |
| 2000 | // Diamond: TBB is the block that is branched to, FBB is the fallthrough |
| 2001 | TUnpredCycles = TCycles + TakenBranchCost; |
| 2002 | FUnpredCycles = FCycles + NotTakenBranchCost; |
| 2003 | // The branch at the end of FBB will disappear when it's predicated, so |
| 2004 | // discount it from PredCost. |
| 2005 | PredCost -= 1 * ScalingUpFactor; |
| 2006 | } |
| 2007 | // The total cost is the cost of each path scaled by their probabilites |
| 2008 | unsigned TUnpredCost = Probability.scale(Num: TUnpredCycles * ScalingUpFactor); |
| 2009 | unsigned FUnpredCost = Probability.getCompl().scale(Num: FUnpredCycles * ScalingUpFactor); |
| 2010 | UnpredCost = TUnpredCost + FUnpredCost; |
| 2011 | // When predicating assume that the first IT can be folded away but later |
| 2012 | // ones cost one cycle each |
| 2013 | if (Subtarget.isThumb2() && TCycles + FCycles > 4) { |
| 2014 | PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; |
| 2015 | } |
| 2016 | } else { |
| 2017 | unsigned TUnpredCost = Probability.scale(Num: TCycles * ScalingUpFactor); |
| 2018 | unsigned FUnpredCost = |
| 2019 | Probability.getCompl().scale(Num: FCycles * ScalingUpFactor); |
| 2020 | UnpredCost = TUnpredCost + FUnpredCost; |
| 2021 | UnpredCost += 1 * ScalingUpFactor; // The branch itself |
| 2022 | UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; |
| 2023 | } |
| 2024 | |
| 2025 | return PredCost <= UnpredCost; |
| 2026 | } |
| 2027 | |
| 2028 | unsigned |
| 2029 | ARMBaseInstrInfo::(const MachineFunction &MF, |
| 2030 | unsigned NumInsts) const { |
| 2031 | // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions. |
| 2032 | // ARM has a condition code field in every predicable instruction, using it |
| 2033 | // doesn't change code size. |
| 2034 | if (!Subtarget.isThumb2()) |
| 2035 | return 0; |
| 2036 | |
| 2037 | // It's possible that the size of the IT is restricted to a single block. |
| 2038 | unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4; |
| 2039 | return divideCeil(Numerator: NumInsts, Denominator: MaxInsts) * 2; |
| 2040 | } |
| 2041 | |
| 2042 | unsigned |
| 2043 | ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const { |
| 2044 | // If this branch is likely to be folded into the comparison to form a |
| 2045 | // CB(N)Z, then removing it won't reduce code size at all, because that will |
| 2046 | // just replace the CB(N)Z with a CMP. |
| 2047 | if (MI.getOpcode() == ARM::t2Bcc && |
| 2048 | findCMPToFoldIntoCBZ(Br: &MI, TRI: &getRegisterInfo())) |
| 2049 | return 0; |
| 2050 | |
| 2051 | unsigned Size = getInstSizeInBytes(MI); |
| 2052 | |
| 2053 | // For Thumb2, all branches are 32-bit instructions during the if conversion |
| 2054 | // pass, but may be replaced with 16-bit instructions during size reduction. |
| 2055 | // Since the branches considered by if conversion tend to be forward branches |
| 2056 | // over small basic blocks, they are very likely to be in range for the |
| 2057 | // narrow instructions, so we assume the final code size will be half what it |
| 2058 | // currently is. |
| 2059 | if (Subtarget.isThumb2()) |
| 2060 | Size /= 2; |
| 2061 | |
| 2062 | return Size; |
| 2063 | } |
| 2064 | |
| 2065 | bool |
| 2066 | ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, |
| 2067 | MachineBasicBlock &FMBB) const { |
| 2068 | // Reduce false anti-dependencies to let the target's out-of-order execution |
| 2069 | // engine do its thing. |
| 2070 | return Subtarget.isProfitableToUnpredicate(); |
| 2071 | } |
| 2072 | |
| 2073 | /// getInstrPredicate - If instruction is predicated, returns its predicate |
| 2074 | /// condition, otherwise returns AL. It also returns the condition code |
| 2075 | /// register by reference. |
| 2076 | ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, |
| 2077 | Register &PredReg) { |
| 2078 | int PIdx = MI.findFirstPredOperandIdx(); |
| 2079 | if (PIdx == -1) { |
| 2080 | PredReg = 0; |
| 2081 | return ARMCC::AL; |
| 2082 | } |
| 2083 | |
| 2084 | PredReg = MI.getOperand(i: PIdx+1).getReg(); |
| 2085 | return (ARMCC::CondCodes)MI.getOperand(i: PIdx).getImm(); |
| 2086 | } |
| 2087 | |
| 2088 | unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { |
| 2089 | if (Opc == ARM::B) |
| 2090 | return ARM::Bcc; |
| 2091 | if (Opc == ARM::tB) |
| 2092 | return ARM::tBcc; |
| 2093 | if (Opc == ARM::t2B) |
| 2094 | return ARM::t2Bcc; |
| 2095 | |
| 2096 | llvm_unreachable("Unknown unconditional branch opcode!" ); |
| 2097 | } |
| 2098 | |
| 2099 | MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, |
| 2100 | bool NewMI, |
| 2101 | unsigned OpIdx1, |
| 2102 | unsigned OpIdx2) const { |
| 2103 | switch (MI.getOpcode()) { |
| 2104 | case ARM::MOVCCr: |
| 2105 | case ARM::t2MOVCCr: { |
| 2106 | // MOVCC can be commuted by inverting the condition. |
| 2107 | Register PredReg; |
| 2108 | ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); |
| 2109 | // MOVCC AL can't be inverted. Shouldn't happen. |
| 2110 | if (CC == ARMCC::AL || PredReg != ARM::CPSR) |
| 2111 | return nullptr; |
| 2112 | MachineInstr *CommutedMI = |
| 2113 | TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
| 2114 | if (!CommutedMI) |
| 2115 | return nullptr; |
| 2116 | // After swapping the MOVCC operands, also invert the condition. |
| 2117 | CommutedMI->getOperand(i: CommutedMI->findFirstPredOperandIdx()) |
| 2118 | .setImm(ARMCC::getOppositeCondition(CC)); |
| 2119 | return CommutedMI; |
| 2120 | } |
| 2121 | } |
| 2122 | return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
| 2123 | } |
| 2124 | |
| 2125 | /// Identify instructions that can be folded into a MOVCC instruction, and |
| 2126 | /// return the defining instruction. |
| 2127 | MachineInstr * |
| 2128 | ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, |
| 2129 | const TargetInstrInfo *TII) const { |
| 2130 | if (!Reg.isVirtual()) |
| 2131 | return nullptr; |
| 2132 | if (!MRI.hasOneNonDBGUse(RegNo: Reg)) |
| 2133 | return nullptr; |
| 2134 | MachineInstr *MI = MRI.getVRegDef(Reg); |
| 2135 | if (!MI) |
| 2136 | return nullptr; |
| 2137 | // Check if MI can be predicated and folded into the MOVCC. |
| 2138 | if (!isPredicable(MI: *MI)) |
| 2139 | return nullptr; |
| 2140 | // Check if MI has any non-dead defs or physreg uses. This also detects |
| 2141 | // predicated instructions which will be reading CPSR. |
| 2142 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 1)) { |
| 2143 | // Reject frame index operands, PEI can't handle the predicated pseudos. |
| 2144 | if (MO.isFI() || MO.isCPI() || MO.isJTI()) |
| 2145 | return nullptr; |
| 2146 | if (!MO.isReg()) |
| 2147 | continue; |
| 2148 | // MI can't have any tied operands, that would conflict with predication. |
| 2149 | if (MO.isTied()) |
| 2150 | return nullptr; |
| 2151 | if (MO.getReg().isPhysical()) |
| 2152 | return nullptr; |
| 2153 | if (MO.isDef() && !MO.isDead()) |
| 2154 | return nullptr; |
| 2155 | } |
| 2156 | bool DontMoveAcrossStores = true; |
| 2157 | if (!MI->isSafeToMove(SawStore&: DontMoveAcrossStores)) |
| 2158 | return nullptr; |
| 2159 | return MI; |
| 2160 | } |
| 2161 | |
| 2162 | bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI, |
| 2163 | SmallVectorImpl<MachineOperand> &Cond, |
| 2164 | unsigned &TrueOp, unsigned &FalseOp, |
| 2165 | bool &Optimizable) const { |
| 2166 | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
| 2167 | "Unknown select instruction" ); |
| 2168 | // MOVCC operands: |
| 2169 | // 0: Def. |
| 2170 | // 1: True use. |
| 2171 | // 2: False use. |
| 2172 | // 3: Condition code. |
| 2173 | // 4: CPSR use. |
| 2174 | TrueOp = 1; |
| 2175 | FalseOp = 2; |
| 2176 | Cond.push_back(Elt: MI.getOperand(i: 3)); |
| 2177 | Cond.push_back(Elt: MI.getOperand(i: 4)); |
| 2178 | // We can always fold a def. |
| 2179 | Optimizable = true; |
| 2180 | return false; |
| 2181 | } |
| 2182 | |
| 2183 | MachineInstr * |
| 2184 | ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, |
| 2185 | SmallPtrSetImpl<MachineInstr *> &SeenMIs, |
| 2186 | bool PreferFalse) const { |
| 2187 | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
| 2188 | "Unknown select instruction" ); |
| 2189 | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
| 2190 | MachineInstr *DefMI = canFoldIntoMOVCC(Reg: MI.getOperand(i: 2).getReg(), MRI, TII: this); |
| 2191 | bool Invert = !DefMI; |
| 2192 | if (!DefMI) |
| 2193 | DefMI = canFoldIntoMOVCC(Reg: MI.getOperand(i: 1).getReg(), MRI, TII: this); |
| 2194 | if (!DefMI) |
| 2195 | return nullptr; |
| 2196 | |
| 2197 | // Find new register class to use. |
| 2198 | MachineOperand FalseReg = MI.getOperand(i: Invert ? 2 : 1); |
| 2199 | MachineOperand TrueReg = MI.getOperand(i: Invert ? 1 : 2); |
| 2200 | Register DestReg = MI.getOperand(i: 0).getReg(); |
| 2201 | const TargetRegisterClass *FalseClass = MRI.getRegClass(Reg: FalseReg.getReg()); |
| 2202 | const TargetRegisterClass *TrueClass = MRI.getRegClass(Reg: TrueReg.getReg()); |
| 2203 | if (!MRI.constrainRegClass(Reg: DestReg, RC: FalseClass)) |
| 2204 | return nullptr; |
| 2205 | if (!MRI.constrainRegClass(Reg: DestReg, RC: TrueClass)) |
| 2206 | return nullptr; |
| 2207 | |
| 2208 | // Create a new predicated version of DefMI. |
| 2209 | // Rfalse is the first use. |
| 2210 | MachineInstrBuilder NewMI = |
| 2211 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: DefMI->getDesc(), DestReg); |
| 2212 | |
| 2213 | // Copy all the DefMI operands, excluding its (null) predicate. |
| 2214 | const MCInstrDesc &DefDesc = DefMI->getDesc(); |
| 2215 | for (unsigned i = 1, e = DefDesc.getNumOperands(); |
| 2216 | i != e && !DefDesc.operands()[i].isPredicate(); ++i) |
| 2217 | NewMI.add(MO: DefMI->getOperand(i)); |
| 2218 | |
| 2219 | unsigned CondCode = MI.getOperand(i: 3).getImm(); |
| 2220 | if (Invert) |
| 2221 | NewMI.addImm(Val: ARMCC::getOppositeCondition(CC: ARMCC::CondCodes(CondCode))); |
| 2222 | else |
| 2223 | NewMI.addImm(Val: CondCode); |
| 2224 | NewMI.add(MO: MI.getOperand(i: 4)); |
| 2225 | |
| 2226 | // DefMI is not the -S version that sets CPSR, so add an optional %noreg. |
| 2227 | if (NewMI->hasOptionalDef()) |
| 2228 | NewMI.add(MO: condCodeOp()); |
| 2229 | |
| 2230 | // The output register value when the predicate is false is an implicit |
| 2231 | // register operand tied to the first def. |
| 2232 | // The tie makes the register allocator ensure the FalseReg is allocated the |
| 2233 | // same register as operand 0. |
| 2234 | FalseReg.setImplicit(); |
| 2235 | NewMI.add(MO: FalseReg); |
| 2236 | NewMI->tieOperands(DefIdx: 0, UseIdx: NewMI->getNumOperands() - 1); |
| 2237 | |
| 2238 | // Update SeenMIs set: register newly created MI and erase removed DefMI. |
| 2239 | SeenMIs.insert(Ptr: NewMI); |
| 2240 | SeenMIs.erase(Ptr: DefMI); |
| 2241 | |
| 2242 | // If MI is inside a loop, and DefMI is outside the loop, then kill flags on |
| 2243 | // DefMI would be invalid when tranferred inside the loop. Checking for a |
| 2244 | // loop is expensive, but at least remove kill flags if they are in different |
| 2245 | // BBs. |
| 2246 | if (DefMI->getParent() != MI.getParent()) |
| 2247 | NewMI->clearKillInfo(); |
| 2248 | |
| 2249 | // The caller will erase MI, but not DefMI. |
| 2250 | DefMI->eraseFromParent(); |
| 2251 | return NewMI; |
| 2252 | } |
| 2253 | |
| 2254 | /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the |
| 2255 | /// instruction is encoded with an 'S' bit is determined by the optional CPSR |
| 2256 | /// def operand. |
| 2257 | /// |
| 2258 | /// This will go away once we can teach tblgen how to set the optional CPSR def |
| 2259 | /// operand itself. |
| 2260 | struct AddSubFlagsOpcodePair { |
| 2261 | uint16_t PseudoOpc; |
| 2262 | uint16_t MachineOpc; |
| 2263 | }; |
| 2264 | |
| 2265 | static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { |
| 2266 | {.PseudoOpc: ARM::ADDSri, .MachineOpc: ARM::ADDri}, |
| 2267 | {.PseudoOpc: ARM::ADDSrr, .MachineOpc: ARM::ADDrr}, |
| 2268 | {.PseudoOpc: ARM::ADDSrsi, .MachineOpc: ARM::ADDrsi}, |
| 2269 | {.PseudoOpc: ARM::ADDSrsr, .MachineOpc: ARM::ADDrsr}, |
| 2270 | |
| 2271 | {.PseudoOpc: ARM::SUBSri, .MachineOpc: ARM::SUBri}, |
| 2272 | {.PseudoOpc: ARM::SUBSrr, .MachineOpc: ARM::SUBrr}, |
| 2273 | {.PseudoOpc: ARM::SUBSrsi, .MachineOpc: ARM::SUBrsi}, |
| 2274 | {.PseudoOpc: ARM::SUBSrsr, .MachineOpc: ARM::SUBrsr}, |
| 2275 | |
| 2276 | {.PseudoOpc: ARM::RSBSri, .MachineOpc: ARM::RSBri}, |
| 2277 | {.PseudoOpc: ARM::RSBSrsi, .MachineOpc: ARM::RSBrsi}, |
| 2278 | {.PseudoOpc: ARM::RSBSrsr, .MachineOpc: ARM::RSBrsr}, |
| 2279 | |
| 2280 | {.PseudoOpc: ARM::tADDSi3, .MachineOpc: ARM::tADDi3}, |
| 2281 | {.PseudoOpc: ARM::tADDSi8, .MachineOpc: ARM::tADDi8}, |
| 2282 | {.PseudoOpc: ARM::tADDSrr, .MachineOpc: ARM::tADDrr}, |
| 2283 | {.PseudoOpc: ARM::tADCS, .MachineOpc: ARM::tADC}, |
| 2284 | |
| 2285 | {.PseudoOpc: ARM::tSUBSi3, .MachineOpc: ARM::tSUBi3}, |
| 2286 | {.PseudoOpc: ARM::tSUBSi8, .MachineOpc: ARM::tSUBi8}, |
| 2287 | {.PseudoOpc: ARM::tSUBSrr, .MachineOpc: ARM::tSUBrr}, |
| 2288 | {.PseudoOpc: ARM::tSBCS, .MachineOpc: ARM::tSBC}, |
| 2289 | {.PseudoOpc: ARM::tRSBS, .MachineOpc: ARM::tRSB}, |
| 2290 | {.PseudoOpc: ARM::tLSLSri, .MachineOpc: ARM::tLSLri}, |
| 2291 | |
| 2292 | {.PseudoOpc: ARM::t2ADDSri, .MachineOpc: ARM::t2ADDri}, |
| 2293 | {.PseudoOpc: ARM::t2ADDSrr, .MachineOpc: ARM::t2ADDrr}, |
| 2294 | {.PseudoOpc: ARM::t2ADDSrs, .MachineOpc: ARM::t2ADDrs}, |
| 2295 | |
| 2296 | {.PseudoOpc: ARM::t2SUBSri, .MachineOpc: ARM::t2SUBri}, |
| 2297 | {.PseudoOpc: ARM::t2SUBSrr, .MachineOpc: ARM::t2SUBrr}, |
| 2298 | {.PseudoOpc: ARM::t2SUBSrs, .MachineOpc: ARM::t2SUBrs}, |
| 2299 | |
| 2300 | {.PseudoOpc: ARM::t2RSBSri, .MachineOpc: ARM::t2RSBri}, |
| 2301 | {.PseudoOpc: ARM::t2RSBSrs, .MachineOpc: ARM::t2RSBrs}, |
| 2302 | }; |
| 2303 | |
| 2304 | unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { |
| 2305 | for (const auto &Entry : AddSubFlagsOpcodeMap) |
| 2306 | if (OldOpc == Entry.PseudoOpc) |
| 2307 | return Entry.MachineOpc; |
| 2308 | return 0; |
| 2309 | } |
| 2310 | |
| 2311 | void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, |
| 2312 | MachineBasicBlock::iterator &MBBI, |
| 2313 | const DebugLoc &dl, Register DestReg, |
| 2314 | Register BaseReg, int NumBytes, |
| 2315 | ARMCC::CondCodes Pred, Register PredReg, |
| 2316 | const ARMBaseInstrInfo &TII, |
| 2317 | unsigned MIFlags) { |
| 2318 | if (NumBytes == 0 && DestReg != BaseReg) { |
| 2319 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg) |
| 2320 | .addReg(RegNo: BaseReg, flags: RegState::Kill) |
| 2321 | .add(MOs: predOps(Pred, PredReg)) |
| 2322 | .add(MO: condCodeOp()) |
| 2323 | .setMIFlags(MIFlags); |
| 2324 | return; |
| 2325 | } |
| 2326 | |
| 2327 | bool isSub = NumBytes < 0; |
| 2328 | if (isSub) NumBytes = -NumBytes; |
| 2329 | |
| 2330 | while (NumBytes) { |
| 2331 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Imm: NumBytes); |
| 2332 | unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(V: 0xFF, R: RotAmt); |
| 2333 | assert(ThisVal && "Didn't extract field correctly" ); |
| 2334 | |
| 2335 | // We will handle these bits from offset, clear them. |
| 2336 | NumBytes &= ~ThisVal; |
| 2337 | |
| 2338 | assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?" ); |
| 2339 | |
| 2340 | // Build the new ADD / SUB. |
| 2341 | unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; |
| 2342 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: Opc), DestReg) |
| 2343 | .addReg(RegNo: BaseReg, flags: RegState::Kill) |
| 2344 | .addImm(Val: ThisVal) |
| 2345 | .add(MOs: predOps(Pred, PredReg)) |
| 2346 | .add(MO: condCodeOp()) |
| 2347 | .setMIFlags(MIFlags); |
| 2348 | BaseReg = DestReg; |
| 2349 | } |
| 2350 | } |
| 2351 | |
| 2352 | bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, |
| 2353 | MachineFunction &MF, MachineInstr *MI, |
| 2354 | unsigned NumBytes) { |
| 2355 | // This optimisation potentially adds lots of load and store |
| 2356 | // micro-operations, it's only really a great benefit to code-size. |
| 2357 | if (!Subtarget.hasMinSize()) |
| 2358 | return false; |
| 2359 | |
| 2360 | // If only one register is pushed/popped, LLVM can use an LDR/STR |
| 2361 | // instead. We can't modify those so make sure we're dealing with an |
| 2362 | // instruction we understand. |
| 2363 | bool IsPop = isPopOpcode(Opc: MI->getOpcode()); |
| 2364 | bool IsPush = isPushOpcode(Opc: MI->getOpcode()); |
| 2365 | if (!IsPush && !IsPop) |
| 2366 | return false; |
| 2367 | |
| 2368 | bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || |
| 2369 | MI->getOpcode() == ARM::VLDMDIA_UPD; |
| 2370 | bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || |
| 2371 | MI->getOpcode() == ARM::tPOP || |
| 2372 | MI->getOpcode() == ARM::tPOP_RET; |
| 2373 | |
| 2374 | assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && |
| 2375 | MI->getOperand(1).getReg() == ARM::SP)) && |
| 2376 | "trying to fold sp update into non-sp-updating push/pop" ); |
| 2377 | |
| 2378 | // The VFP push & pop act on D-registers, so we can only fold an adjustment |
| 2379 | // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try |
| 2380 | // if this is violated. |
| 2381 | if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) |
| 2382 | return false; |
| 2383 | |
| 2384 | // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ |
| 2385 | // pred) so the list starts at 4. Thumb1 starts after the predicate. |
| 2386 | int RegListIdx = IsT1PushPop ? 2 : 4; |
| 2387 | |
| 2388 | // Calculate the space we'll need in terms of registers. |
| 2389 | unsigned RegsNeeded; |
| 2390 | const TargetRegisterClass *RegClass; |
| 2391 | if (IsVFPPushPop) { |
| 2392 | RegsNeeded = NumBytes / 8; |
| 2393 | RegClass = &ARM::DPRRegClass; |
| 2394 | } else { |
| 2395 | RegsNeeded = NumBytes / 4; |
| 2396 | RegClass = &ARM::GPRRegClass; |
| 2397 | } |
| 2398 | |
| 2399 | // We're going to have to strip all list operands off before |
| 2400 | // re-adding them since the order matters, so save the existing ones |
| 2401 | // for later. |
| 2402 | SmallVector<MachineOperand, 4> RegList; |
| 2403 | |
| 2404 | // We're also going to need the first register transferred by this |
| 2405 | // instruction, which won't necessarily be the first register in the list. |
| 2406 | unsigned FirstRegEnc = -1; |
| 2407 | |
| 2408 | const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); |
| 2409 | for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) { |
| 2410 | MachineOperand &MO = MI->getOperand(i); |
| 2411 | RegList.push_back(Elt: MO); |
| 2412 | |
| 2413 | if (MO.isReg() && !MO.isImplicit() && |
| 2414 | TRI->getEncodingValue(Reg: MO.getReg()) < FirstRegEnc) |
| 2415 | FirstRegEnc = TRI->getEncodingValue(Reg: MO.getReg()); |
| 2416 | } |
| 2417 | |
| 2418 | const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF: &MF); |
| 2419 | |
| 2420 | // Now try to find enough space in the reglist to allocate NumBytes. |
| 2421 | for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; |
| 2422 | --CurRegEnc) { |
| 2423 | MCRegister CurReg = RegClass->getRegister(i: CurRegEnc); |
| 2424 | if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(Reg: ARM::R7)) |
| 2425 | continue; |
| 2426 | if (!IsPop) { |
| 2427 | // Pushing any register is completely harmless, mark the register involved |
| 2428 | // as undef since we don't care about its value and must not restore it |
| 2429 | // during stack unwinding. |
| 2430 | RegList.push_back(Elt: MachineOperand::CreateReg(Reg: CurReg, isDef: false, isImp: false, |
| 2431 | isKill: false, isDead: false, isUndef: true)); |
| 2432 | --RegsNeeded; |
| 2433 | continue; |
| 2434 | } |
| 2435 | |
| 2436 | // However, we can only pop an extra register if it's not live. For |
| 2437 | // registers live within the function we might clobber a return value |
| 2438 | // register; the other way a register can be live here is if it's |
| 2439 | // callee-saved. |
| 2440 | if (isCalleeSavedRegister(Reg: CurReg, CSRegs) || |
| 2441 | MI->getParent()->computeRegisterLiveness(TRI, Reg: CurReg, Before: MI) != |
| 2442 | MachineBasicBlock::LQR_Dead) { |
| 2443 | // VFP pops don't allow holes in the register list, so any skip is fatal |
| 2444 | // for our transformation. GPR pops do, so we should just keep looking. |
| 2445 | if (IsVFPPushPop) |
| 2446 | return false; |
| 2447 | else |
| 2448 | continue; |
| 2449 | } |
| 2450 | |
| 2451 | // Mark the unimportant registers as <def,dead> in the POP. |
| 2452 | RegList.push_back(Elt: MachineOperand::CreateReg(Reg: CurReg, isDef: true, isImp: false, isKill: false, |
| 2453 | isDead: true)); |
| 2454 | --RegsNeeded; |
| 2455 | } |
| 2456 | |
| 2457 | if (RegsNeeded > 0) |
| 2458 | return false; |
| 2459 | |
| 2460 | // Finally we know we can profitably perform the optimisation so go |
| 2461 | // ahead: strip all existing registers off and add them back again |
| 2462 | // in the right order. |
| 2463 | for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) |
| 2464 | MI->removeOperand(OpNo: i); |
| 2465 | |
| 2466 | // Add the complete list back in. |
| 2467 | MachineInstrBuilder MIB(MF, &*MI); |
| 2468 | for (const MachineOperand &MO : llvm::reverse(C&: RegList)) |
| 2469 | MIB.add(MO); |
| 2470 | |
| 2471 | return true; |
| 2472 | } |
| 2473 | |
| 2474 | bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
| 2475 | Register FrameReg, int &Offset, |
| 2476 | const ARMBaseInstrInfo &TII) { |
| 2477 | unsigned Opcode = MI.getOpcode(); |
| 2478 | const MCInstrDesc &Desc = MI.getDesc(); |
| 2479 | unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); |
| 2480 | bool isSub = false; |
| 2481 | |
| 2482 | // Memory operands in inline assembly always use AddrMode2. |
| 2483 | if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) |
| 2484 | AddrMode = ARMII::AddrMode2; |
| 2485 | |
| 2486 | if (Opcode == ARM::ADDri) { |
| 2487 | Offset += MI.getOperand(i: FrameRegIdx+1).getImm(); |
| 2488 | if (Offset == 0) { |
| 2489 | // Turn it into a move. |
| 2490 | MI.setDesc(TII.get(Opcode: ARM::MOVr)); |
| 2491 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
| 2492 | MI.removeOperand(OpNo: FrameRegIdx+1); |
| 2493 | Offset = 0; |
| 2494 | return true; |
| 2495 | } else if (Offset < 0) { |
| 2496 | Offset = -Offset; |
| 2497 | isSub = true; |
| 2498 | MI.setDesc(TII.get(Opcode: ARM::SUBri)); |
| 2499 | } |
| 2500 | |
| 2501 | // Common case: small offset, fits into instruction. |
| 2502 | if (ARM_AM::getSOImmVal(Arg: Offset) != -1) { |
| 2503 | // Replace the FrameIndex with sp / fp |
| 2504 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
| 2505 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset); |
| 2506 | Offset = 0; |
| 2507 | return true; |
| 2508 | } |
| 2509 | |
| 2510 | // Otherwise, pull as much of the immedidate into this ADDri/SUBri |
| 2511 | // as possible. |
| 2512 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Imm: Offset); |
| 2513 | unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(V: 0xFF, R: RotAmt); |
| 2514 | |
| 2515 | // We will handle these bits from offset, clear them. |
| 2516 | Offset &= ~ThisImmVal; |
| 2517 | |
| 2518 | // Get the properly encoded SOImmVal field. |
| 2519 | assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && |
| 2520 | "Bit extraction didn't work?" ); |
| 2521 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: ThisImmVal); |
| 2522 | } else { |
| 2523 | unsigned ImmIdx = 0; |
| 2524 | int InstrOffs = 0; |
| 2525 | unsigned NumBits = 0; |
| 2526 | unsigned Scale = 1; |
| 2527 | switch (AddrMode) { |
| 2528 | case ARMII::AddrMode_i12: |
| 2529 | ImmIdx = FrameRegIdx + 1; |
| 2530 | InstrOffs = MI.getOperand(i: ImmIdx).getImm(); |
| 2531 | NumBits = 12; |
| 2532 | break; |
| 2533 | case ARMII::AddrMode2: |
| 2534 | ImmIdx = FrameRegIdx+2; |
| 2535 | InstrOffs = ARM_AM::getAM2Offset(AM2Opc: MI.getOperand(i: ImmIdx).getImm()); |
| 2536 | if (ARM_AM::getAM2Op(AM2Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
| 2537 | InstrOffs *= -1; |
| 2538 | NumBits = 12; |
| 2539 | break; |
| 2540 | case ARMII::AddrMode3: |
| 2541 | ImmIdx = FrameRegIdx+2; |
| 2542 | InstrOffs = ARM_AM::getAM3Offset(AM3Opc: MI.getOperand(i: ImmIdx).getImm()); |
| 2543 | if (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
| 2544 | InstrOffs *= -1; |
| 2545 | NumBits = 8; |
| 2546 | break; |
| 2547 | case ARMII::AddrMode4: |
| 2548 | case ARMII::AddrMode6: |
| 2549 | // Can't fold any offset even if it's zero. |
| 2550 | return false; |
| 2551 | case ARMII::AddrMode5: |
| 2552 | ImmIdx = FrameRegIdx+1; |
| 2553 | InstrOffs = ARM_AM::getAM5Offset(AM5Opc: MI.getOperand(i: ImmIdx).getImm()); |
| 2554 | if (ARM_AM::getAM5Op(AM5Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
| 2555 | InstrOffs *= -1; |
| 2556 | NumBits = 8; |
| 2557 | Scale = 4; |
| 2558 | break; |
| 2559 | case ARMII::AddrMode5FP16: |
| 2560 | ImmIdx = FrameRegIdx+1; |
| 2561 | InstrOffs = ARM_AM::getAM5Offset(AM5Opc: MI.getOperand(i: ImmIdx).getImm()); |
| 2562 | if (ARM_AM::getAM5Op(AM5Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
| 2563 | InstrOffs *= -1; |
| 2564 | NumBits = 8; |
| 2565 | Scale = 2; |
| 2566 | break; |
| 2567 | case ARMII::AddrModeT2_i7: |
| 2568 | case ARMII::AddrModeT2_i7s2: |
| 2569 | case ARMII::AddrModeT2_i7s4: |
| 2570 | ImmIdx = FrameRegIdx+1; |
| 2571 | InstrOffs = MI.getOperand(i: ImmIdx).getImm(); |
| 2572 | NumBits = 7; |
| 2573 | Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 : |
| 2574 | AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1); |
| 2575 | break; |
| 2576 | default: |
| 2577 | llvm_unreachable("Unsupported addressing mode!" ); |
| 2578 | } |
| 2579 | |
| 2580 | Offset += InstrOffs * Scale; |
| 2581 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!" ); |
| 2582 | if (Offset < 0) { |
| 2583 | Offset = -Offset; |
| 2584 | isSub = true; |
| 2585 | } |
| 2586 | |
| 2587 | // Attempt to fold address comp. if opcode has offset bits |
| 2588 | if (NumBits > 0) { |
| 2589 | // Common case: small offset, fits into instruction. |
| 2590 | MachineOperand &ImmOp = MI.getOperand(i: ImmIdx); |
| 2591 | int ImmedOffset = Offset / Scale; |
| 2592 | unsigned Mask = (1 << NumBits) - 1; |
| 2593 | if ((unsigned)Offset <= Mask * Scale) { |
| 2594 | // Replace the FrameIndex with sp |
| 2595 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
| 2596 | // FIXME: When addrmode2 goes away, this will simplify (like the |
| 2597 | // T2 version), as the LDR.i12 versions don't need the encoding |
| 2598 | // tricks for the offset value. |
| 2599 | if (isSub) { |
| 2600 | if (AddrMode == ARMII::AddrMode_i12) |
| 2601 | ImmedOffset = -ImmedOffset; |
| 2602 | else |
| 2603 | ImmedOffset |= 1 << NumBits; |
| 2604 | } |
| 2605 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
| 2606 | Offset = 0; |
| 2607 | return true; |
| 2608 | } |
| 2609 | |
| 2610 | // Otherwise, it didn't fit. Pull in what we can to simplify the immed. |
| 2611 | ImmedOffset = ImmedOffset & Mask; |
| 2612 | if (isSub) { |
| 2613 | if (AddrMode == ARMII::AddrMode_i12) |
| 2614 | ImmedOffset = -ImmedOffset; |
| 2615 | else |
| 2616 | ImmedOffset |= 1 << NumBits; |
| 2617 | } |
| 2618 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
| 2619 | Offset &= ~(Mask*Scale); |
| 2620 | } |
| 2621 | } |
| 2622 | |
| 2623 | Offset = (isSub) ? -Offset : Offset; |
| 2624 | return Offset == 0; |
| 2625 | } |
| 2626 | |
| 2627 | /// analyzeCompare - For a comparison instruction, return the source registers |
| 2628 | /// in SrcReg and SrcReg2 if having two register operands, and the value it |
| 2629 | /// compares against in CmpValue. Return true if the comparison instruction |
| 2630 | /// can be analyzed. |
| 2631 | bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, |
| 2632 | Register &SrcReg2, int64_t &CmpMask, |
| 2633 | int64_t &CmpValue) const { |
| 2634 | switch (MI.getOpcode()) { |
| 2635 | default: break; |
| 2636 | case ARM::CMPri: |
| 2637 | case ARM::t2CMPri: |
| 2638 | case ARM::tCMPi8: |
| 2639 | SrcReg = MI.getOperand(i: 0).getReg(); |
| 2640 | SrcReg2 = 0; |
| 2641 | CmpMask = ~0; |
| 2642 | CmpValue = MI.getOperand(i: 1).getImm(); |
| 2643 | return true; |
| 2644 | case ARM::CMPrr: |
| 2645 | case ARM::t2CMPrr: |
| 2646 | case ARM::tCMPr: |
| 2647 | SrcReg = MI.getOperand(i: 0).getReg(); |
| 2648 | SrcReg2 = MI.getOperand(i: 1).getReg(); |
| 2649 | CmpMask = ~0; |
| 2650 | CmpValue = 0; |
| 2651 | return true; |
| 2652 | case ARM::TSTri: |
| 2653 | case ARM::t2TSTri: |
| 2654 | SrcReg = MI.getOperand(i: 0).getReg(); |
| 2655 | SrcReg2 = 0; |
| 2656 | CmpMask = MI.getOperand(i: 1).getImm(); |
| 2657 | CmpValue = 0; |
| 2658 | return true; |
| 2659 | } |
| 2660 | |
| 2661 | return false; |
| 2662 | } |
| 2663 | |
| 2664 | /// isSuitableForMask - Identify a suitable 'and' instruction that |
| 2665 | /// operates on the given source register and applies the same mask |
| 2666 | /// as a 'tst' instruction. Provide a limited look-through for copies. |
| 2667 | /// When successful, MI will hold the found instruction. |
| 2668 | static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, |
| 2669 | int CmpMask, bool CommonUse) { |
| 2670 | switch (MI->getOpcode()) { |
| 2671 | case ARM::ANDri: |
| 2672 | case ARM::t2ANDri: |
| 2673 | if (CmpMask != MI->getOperand(i: 2).getImm()) |
| 2674 | return false; |
| 2675 | if (SrcReg == MI->getOperand(i: CommonUse ? 1 : 0).getReg()) |
| 2676 | return true; |
| 2677 | break; |
| 2678 | } |
| 2679 | |
| 2680 | return false; |
| 2681 | } |
| 2682 | |
| 2683 | /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return |
| 2684 | /// the condition code if we modify the instructions such that flags are |
| 2685 | /// set by ADD(a,b,X). |
| 2686 | inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { |
| 2687 | switch (CC) { |
| 2688 | default: return ARMCC::AL; |
| 2689 | case ARMCC::HS: return ARMCC::LO; |
| 2690 | case ARMCC::LO: return ARMCC::HS; |
| 2691 | case ARMCC::VS: return ARMCC::VS; |
| 2692 | case ARMCC::VC: return ARMCC::VC; |
| 2693 | } |
| 2694 | } |
| 2695 | |
| 2696 | /// isRedundantFlagInstr - check whether the first instruction, whose only |
| 2697 | /// purpose is to update flags, can be made redundant. |
| 2698 | /// CMPrr can be made redundant by SUBrr if the operands are the same. |
| 2699 | /// CMPri can be made redundant by SUBri if the operands are the same. |
| 2700 | /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X). |
| 2701 | /// This function can be extended later on. |
| 2702 | inline static bool isRedundantFlagInstr(const MachineInstr *CmpI, |
| 2703 | Register SrcReg, Register SrcReg2, |
| 2704 | int64_t ImmValue, |
| 2705 | const MachineInstr *OI, |
| 2706 | bool &IsThumb1) { |
| 2707 | if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && |
| 2708 | (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) && |
| 2709 | ((OI->getOperand(i: 1).getReg() == SrcReg && |
| 2710 | OI->getOperand(i: 2).getReg() == SrcReg2) || |
| 2711 | (OI->getOperand(i: 1).getReg() == SrcReg2 && |
| 2712 | OI->getOperand(i: 2).getReg() == SrcReg))) { |
| 2713 | IsThumb1 = false; |
| 2714 | return true; |
| 2715 | } |
| 2716 | |
| 2717 | if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr && |
| 2718 | ((OI->getOperand(i: 2).getReg() == SrcReg && |
| 2719 | OI->getOperand(i: 3).getReg() == SrcReg2) || |
| 2720 | (OI->getOperand(i: 2).getReg() == SrcReg2 && |
| 2721 | OI->getOperand(i: 3).getReg() == SrcReg))) { |
| 2722 | IsThumb1 = true; |
| 2723 | return true; |
| 2724 | } |
| 2725 | |
| 2726 | if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) && |
| 2727 | (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) && |
| 2728 | OI->getOperand(i: 1).getReg() == SrcReg && |
| 2729 | OI->getOperand(i: 2).getImm() == ImmValue) { |
| 2730 | IsThumb1 = false; |
| 2731 | return true; |
| 2732 | } |
| 2733 | |
| 2734 | if (CmpI->getOpcode() == ARM::tCMPi8 && |
| 2735 | (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) && |
| 2736 | OI->getOperand(i: 2).getReg() == SrcReg && |
| 2737 | OI->getOperand(i: 3).getImm() == ImmValue) { |
| 2738 | IsThumb1 = true; |
| 2739 | return true; |
| 2740 | } |
| 2741 | |
| 2742 | if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && |
| 2743 | (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr || |
| 2744 | OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) && |
| 2745 | OI->getOperand(i: 0).isReg() && OI->getOperand(i: 1).isReg() && |
| 2746 | OI->getOperand(i: 0).getReg() == SrcReg && |
| 2747 | OI->getOperand(i: 1).getReg() == SrcReg2) { |
| 2748 | IsThumb1 = false; |
| 2749 | return true; |
| 2750 | } |
| 2751 | |
| 2752 | if (CmpI->getOpcode() == ARM::tCMPr && |
| 2753 | (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 || |
| 2754 | OI->getOpcode() == ARM::tADDrr) && |
| 2755 | OI->getOperand(i: 0).getReg() == SrcReg && |
| 2756 | OI->getOperand(i: 2).getReg() == SrcReg2) { |
| 2757 | IsThumb1 = true; |
| 2758 | return true; |
| 2759 | } |
| 2760 | |
| 2761 | return false; |
| 2762 | } |
| 2763 | |
| 2764 | static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { |
| 2765 | switch (MI->getOpcode()) { |
| 2766 | default: return false; |
| 2767 | case ARM::tLSLri: |
| 2768 | case ARM::tLSRri: |
| 2769 | case ARM::tLSLrr: |
| 2770 | case ARM::tLSRrr: |
| 2771 | case ARM::tSUBrr: |
| 2772 | case ARM::tADDrr: |
| 2773 | case ARM::tADDi3: |
| 2774 | case ARM::tADDi8: |
| 2775 | case ARM::tSUBi3: |
| 2776 | case ARM::tSUBi8: |
| 2777 | case ARM::tMUL: |
| 2778 | case ARM::tADC: |
| 2779 | case ARM::tSBC: |
| 2780 | case ARM::tRSB: |
| 2781 | case ARM::tAND: |
| 2782 | case ARM::tORR: |
| 2783 | case ARM::tEOR: |
| 2784 | case ARM::tBIC: |
| 2785 | case ARM::tMVN: |
| 2786 | case ARM::tASRri: |
| 2787 | case ARM::tASRrr: |
| 2788 | case ARM::tROR: |
| 2789 | IsThumb1 = true; |
| 2790 | [[fallthrough]]; |
| 2791 | case ARM::RSBrr: |
| 2792 | case ARM::RSBri: |
| 2793 | case ARM::RSCrr: |
| 2794 | case ARM::RSCri: |
| 2795 | case ARM::ADDrr: |
| 2796 | case ARM::ADDri: |
| 2797 | case ARM::ADCrr: |
| 2798 | case ARM::ADCri: |
| 2799 | case ARM::SUBrr: |
| 2800 | case ARM::SUBri: |
| 2801 | case ARM::SBCrr: |
| 2802 | case ARM::SBCri: |
| 2803 | case ARM::t2RSBri: |
| 2804 | case ARM::t2ADDrr: |
| 2805 | case ARM::t2ADDri: |
| 2806 | case ARM::t2ADCrr: |
| 2807 | case ARM::t2ADCri: |
| 2808 | case ARM::t2SUBrr: |
| 2809 | case ARM::t2SUBri: |
| 2810 | case ARM::t2SBCrr: |
| 2811 | case ARM::t2SBCri: |
| 2812 | case ARM::ANDrr: |
| 2813 | case ARM::ANDri: |
| 2814 | case ARM::ANDrsr: |
| 2815 | case ARM::ANDrsi: |
| 2816 | case ARM::t2ANDrr: |
| 2817 | case ARM::t2ANDri: |
| 2818 | case ARM::t2ANDrs: |
| 2819 | case ARM::ORRrr: |
| 2820 | case ARM::ORRri: |
| 2821 | case ARM::ORRrsr: |
| 2822 | case ARM::ORRrsi: |
| 2823 | case ARM::t2ORRrr: |
| 2824 | case ARM::t2ORRri: |
| 2825 | case ARM::t2ORRrs: |
| 2826 | case ARM::EORrr: |
| 2827 | case ARM::EORri: |
| 2828 | case ARM::EORrsr: |
| 2829 | case ARM::EORrsi: |
| 2830 | case ARM::t2EORrr: |
| 2831 | case ARM::t2EORri: |
| 2832 | case ARM::t2EORrs: |
| 2833 | case ARM::BICri: |
| 2834 | case ARM::BICrr: |
| 2835 | case ARM::BICrsi: |
| 2836 | case ARM::BICrsr: |
| 2837 | case ARM::t2BICri: |
| 2838 | case ARM::t2BICrr: |
| 2839 | case ARM::t2BICrs: |
| 2840 | case ARM::t2LSRri: |
| 2841 | case ARM::t2LSRrr: |
| 2842 | case ARM::t2LSLri: |
| 2843 | case ARM::t2LSLrr: |
| 2844 | case ARM::MOVsr: |
| 2845 | case ARM::MOVsi: |
| 2846 | return true; |
| 2847 | } |
| 2848 | } |
| 2849 | |
| 2850 | /// optimizeCompareInstr - Convert the instruction supplying the argument to the |
| 2851 | /// comparison into one that sets the zero bit in the flags register; |
| 2852 | /// Remove a redundant Compare instruction if an earlier instruction can set the |
| 2853 | /// flags in the same way as Compare. |
| 2854 | /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two |
| 2855 | /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the |
| 2856 | /// condition code of instructions which use the flags. |
| 2857 | bool ARMBaseInstrInfo::optimizeCompareInstr( |
| 2858 | MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, |
| 2859 | int64_t CmpValue, const MachineRegisterInfo *MRI) const { |
| 2860 | // Get the unique definition of SrcReg. |
| 2861 | MachineInstr *MI = MRI->getUniqueVRegDef(Reg: SrcReg); |
| 2862 | if (!MI) return false; |
| 2863 | |
| 2864 | // Masked compares sometimes use the same register as the corresponding 'and'. |
| 2865 | if (CmpMask != ~0) { |
| 2866 | if (!isSuitableForMask(MI, SrcReg, CmpMask, CommonUse: false) || isPredicated(MI: *MI)) { |
| 2867 | MI = nullptr; |
| 2868 | for (MachineRegisterInfo::use_instr_iterator |
| 2869 | UI = MRI->use_instr_begin(RegNo: SrcReg), UE = MRI->use_instr_end(); |
| 2870 | UI != UE; ++UI) { |
| 2871 | if (UI->getParent() != CmpInstr.getParent()) |
| 2872 | continue; |
| 2873 | MachineInstr *PotentialAND = &*UI; |
| 2874 | if (!isSuitableForMask(MI&: PotentialAND, SrcReg, CmpMask, CommonUse: true) || |
| 2875 | isPredicated(MI: *PotentialAND)) |
| 2876 | continue; |
| 2877 | MI = PotentialAND; |
| 2878 | break; |
| 2879 | } |
| 2880 | if (!MI) return false; |
| 2881 | } |
| 2882 | } |
| 2883 | |
| 2884 | // Get ready to iterate backward from CmpInstr. |
| 2885 | MachineBasicBlock::iterator I = CmpInstr, E = MI, |
| 2886 | B = CmpInstr.getParent()->begin(); |
| 2887 | |
| 2888 | // Early exit if CmpInstr is at the beginning of the BB. |
| 2889 | if (I == B) return false; |
| 2890 | |
| 2891 | // There are two possible candidates which can be changed to set CPSR: |
| 2892 | // One is MI, the other is a SUB or ADD instruction. |
| 2893 | // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or |
| 2894 | // ADDr[ri](r1, r2, X). |
| 2895 | // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). |
| 2896 | MachineInstr *SubAdd = nullptr; |
| 2897 | if (SrcReg2 != 0) |
| 2898 | // MI is not a candidate for CMPrr. |
| 2899 | MI = nullptr; |
| 2900 | else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { |
| 2901 | // Conservatively refuse to convert an instruction which isn't in the same |
| 2902 | // BB as the comparison. |
| 2903 | // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate. |
| 2904 | // Thus we cannot return here. |
| 2905 | if (CmpInstr.getOpcode() == ARM::CMPri || |
| 2906 | CmpInstr.getOpcode() == ARM::t2CMPri || |
| 2907 | CmpInstr.getOpcode() == ARM::tCMPi8) |
| 2908 | MI = nullptr; |
| 2909 | else |
| 2910 | return false; |
| 2911 | } |
| 2912 | |
| 2913 | bool IsThumb1 = false; |
| 2914 | if (MI && !isOptimizeCompareCandidate(MI, IsThumb1)) |
| 2915 | return false; |
| 2916 | |
| 2917 | // We also want to do this peephole for cases like this: if (a*b == 0), |
| 2918 | // and optimise away the CMP instruction from the generated code sequence: |
| 2919 | // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values |
| 2920 | // resulting from the select instruction, but these MOVS instructions for |
| 2921 | // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. |
| 2922 | // However, if we only have MOVS instructions in between the CMP and the |
| 2923 | // other instruction (the MULS in this example), then the CPSR is dead so we |
| 2924 | // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this |
| 2925 | // reordering and then continue the analysis hoping we can eliminate the |
| 2926 | // CMP. This peephole works on the vregs, so is still in SSA form. As a |
| 2927 | // consequence, the movs won't redefine/kill the MUL operands which would |
| 2928 | // make this reordering illegal. |
| 2929 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
| 2930 | if (MI && IsThumb1) { |
| 2931 | --I; |
| 2932 | if (I != E && !MI->readsRegister(Reg: ARM::CPSR, TRI)) { |
| 2933 | bool CanReorder = true; |
| 2934 | for (; I != E; --I) { |
| 2935 | if (I->getOpcode() != ARM::tMOVi8) { |
| 2936 | CanReorder = false; |
| 2937 | break; |
| 2938 | } |
| 2939 | } |
| 2940 | if (CanReorder) { |
| 2941 | MI = MI->removeFromParent(); |
| 2942 | E = CmpInstr; |
| 2943 | CmpInstr.getParent()->insert(I: E, MI); |
| 2944 | } |
| 2945 | } |
| 2946 | I = CmpInstr; |
| 2947 | E = MI; |
| 2948 | } |
| 2949 | |
| 2950 | // Check that CPSR isn't set between the comparison instruction and the one we |
| 2951 | // want to change. At the same time, search for SubAdd. |
| 2952 | bool SubAddIsThumb1 = false; |
| 2953 | do { |
| 2954 | const MachineInstr &Instr = *--I; |
| 2955 | |
| 2956 | // Check whether CmpInstr can be made redundant by the current instruction. |
| 2957 | if (isRedundantFlagInstr(CmpI: &CmpInstr, SrcReg, SrcReg2, ImmValue: CmpValue, OI: &Instr, |
| 2958 | IsThumb1&: SubAddIsThumb1)) { |
| 2959 | SubAdd = &*I; |
| 2960 | break; |
| 2961 | } |
| 2962 | |
| 2963 | // Allow E (which was initially MI) to be SubAdd but do not search before E. |
| 2964 | if (I == E) |
| 2965 | break; |
| 2966 | |
| 2967 | if (Instr.modifiesRegister(Reg: ARM::CPSR, TRI) || |
| 2968 | Instr.readsRegister(Reg: ARM::CPSR, TRI)) |
| 2969 | // This instruction modifies or uses CPSR after the one we want to |
| 2970 | // change. We can't do this transformation. |
| 2971 | return false; |
| 2972 | |
| 2973 | if (I == B) { |
| 2974 | // In some cases, we scan the use-list of an instruction for an AND; |
| 2975 | // that AND is in the same BB, but may not be scheduled before the |
| 2976 | // corresponding TST. In that case, bail out. |
| 2977 | // |
| 2978 | // FIXME: We could try to reschedule the AND. |
| 2979 | return false; |
| 2980 | } |
| 2981 | } while (true); |
| 2982 | |
| 2983 | // Return false if no candidates exist. |
| 2984 | if (!MI && !SubAdd) |
| 2985 | return false; |
| 2986 | |
| 2987 | // If we found a SubAdd, use it as it will be closer to the CMP |
| 2988 | if (SubAdd) { |
| 2989 | MI = SubAdd; |
| 2990 | IsThumb1 = SubAddIsThumb1; |
| 2991 | } |
| 2992 | |
| 2993 | // We can't use a predicated instruction - it doesn't always write the flags. |
| 2994 | if (isPredicated(MI: *MI)) |
| 2995 | return false; |
| 2996 | |
| 2997 | // Scan forward for the use of CPSR |
| 2998 | // When checking against MI: if it's a conditional code that requires |
| 2999 | // checking of the V bit or C bit, then this is not safe to do. |
| 3000 | // It is safe to remove CmpInstr if CPSR is redefined or killed. |
| 3001 | // If we are done with the basic block, we need to check whether CPSR is |
| 3002 | // live-out. |
| 3003 | SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> |
| 3004 | OperandsToUpdate; |
| 3005 | bool isSafe = false; |
| 3006 | I = CmpInstr; |
| 3007 | E = CmpInstr.getParent()->end(); |
| 3008 | while (!isSafe && ++I != E) { |
| 3009 | const MachineInstr &Instr = *I; |
| 3010 | for (unsigned IO = 0, EO = Instr.getNumOperands(); |
| 3011 | !isSafe && IO != EO; ++IO) { |
| 3012 | const MachineOperand &MO = Instr.getOperand(i: IO); |
| 3013 | if (MO.isRegMask() && MO.clobbersPhysReg(PhysReg: ARM::CPSR)) { |
| 3014 | isSafe = true; |
| 3015 | break; |
| 3016 | } |
| 3017 | if (!MO.isReg() || MO.getReg() != ARM::CPSR) |
| 3018 | continue; |
| 3019 | if (MO.isDef()) { |
| 3020 | isSafe = true; |
| 3021 | break; |
| 3022 | } |
| 3023 | // Condition code is after the operand before CPSR except for VSELs. |
| 3024 | ARMCC::CondCodes CC; |
| 3025 | bool IsInstrVSel = true; |
| 3026 | switch (Instr.getOpcode()) { |
| 3027 | default: |
| 3028 | IsInstrVSel = false; |
| 3029 | CC = (ARMCC::CondCodes)Instr.getOperand(i: IO - 1).getImm(); |
| 3030 | break; |
| 3031 | case ARM::VSELEQD: |
| 3032 | case ARM::VSELEQS: |
| 3033 | case ARM::VSELEQH: |
| 3034 | CC = ARMCC::EQ; |
| 3035 | break; |
| 3036 | case ARM::VSELGTD: |
| 3037 | case ARM::VSELGTS: |
| 3038 | case ARM::VSELGTH: |
| 3039 | CC = ARMCC::GT; |
| 3040 | break; |
| 3041 | case ARM::VSELGED: |
| 3042 | case ARM::VSELGES: |
| 3043 | case ARM::VSELGEH: |
| 3044 | CC = ARMCC::GE; |
| 3045 | break; |
| 3046 | case ARM::VSELVSD: |
| 3047 | case ARM::VSELVSS: |
| 3048 | case ARM::VSELVSH: |
| 3049 | CC = ARMCC::VS; |
| 3050 | break; |
| 3051 | } |
| 3052 | |
| 3053 | if (SubAdd) { |
| 3054 | // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based |
| 3055 | // on CMP needs to be updated to be based on SUB. |
| 3056 | // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also |
| 3057 | // needs to be modified. |
| 3058 | // Push the condition code operands to OperandsToUpdate. |
| 3059 | // If it is safe to remove CmpInstr, the condition code of these |
| 3060 | // operands will be modified. |
| 3061 | unsigned Opc = SubAdd->getOpcode(); |
| 3062 | bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr || |
| 3063 | Opc == ARM::SUBri || Opc == ARM::t2SUBri || |
| 3064 | Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 || |
| 3065 | Opc == ARM::tSUBi8; |
| 3066 | unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2; |
| 3067 | if (!IsSub || |
| 3068 | (SrcReg2 != 0 && SubAdd->getOperand(i: OpI).getReg() == SrcReg2 && |
| 3069 | SubAdd->getOperand(i: OpI + 1).getReg() == SrcReg)) { |
| 3070 | // VSel doesn't support condition code update. |
| 3071 | if (IsInstrVSel) |
| 3072 | return false; |
| 3073 | // Ensure we can swap the condition. |
| 3074 | ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC)); |
| 3075 | if (NewCC == ARMCC::AL) |
| 3076 | return false; |
| 3077 | OperandsToUpdate.push_back( |
| 3078 | Elt: std::make_pair(x: &((*I).getOperand(i: IO - 1)), y&: NewCC)); |
| 3079 | } |
| 3080 | } else { |
| 3081 | // No SubAdd, so this is x = <op> y, z; cmp x, 0. |
| 3082 | switch (CC) { |
| 3083 | case ARMCC::EQ: // Z |
| 3084 | case ARMCC::NE: // Z |
| 3085 | case ARMCC::MI: // N |
| 3086 | case ARMCC::PL: // N |
| 3087 | case ARMCC::AL: // none |
| 3088 | // CPSR can be used multiple times, we should continue. |
| 3089 | break; |
| 3090 | case ARMCC::HS: // C |
| 3091 | case ARMCC::LO: // C |
| 3092 | case ARMCC::VS: // V |
| 3093 | case ARMCC::VC: // V |
| 3094 | case ARMCC::HI: // C Z |
| 3095 | case ARMCC::LS: // C Z |
| 3096 | case ARMCC::GE: // N V |
| 3097 | case ARMCC::LT: // N V |
| 3098 | case ARMCC::GT: // Z N V |
| 3099 | case ARMCC::LE: // Z N V |
| 3100 | // The instruction uses the V bit or C bit which is not safe. |
| 3101 | return false; |
| 3102 | } |
| 3103 | } |
| 3104 | } |
| 3105 | } |
| 3106 | |
| 3107 | // If CPSR is not killed nor re-defined, we should check whether it is |
| 3108 | // live-out. If it is live-out, do not optimize. |
| 3109 | if (!isSafe) { |
| 3110 | MachineBasicBlock *MBB = CmpInstr.getParent(); |
| 3111 | for (MachineBasicBlock *Succ : MBB->successors()) |
| 3112 | if (Succ->isLiveIn(Reg: ARM::CPSR)) |
| 3113 | return false; |
| 3114 | } |
| 3115 | |
| 3116 | // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always |
| 3117 | // set CPSR so this is represented as an explicit output) |
| 3118 | if (!IsThumb1) { |
| 3119 | unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1; |
| 3120 | MI->getOperand(i: CPSRRegNum).setReg(ARM::CPSR); |
| 3121 | MI->getOperand(i: CPSRRegNum).setIsDef(true); |
| 3122 | } |
| 3123 | assert(!isPredicated(*MI) && "Can't use flags from predicated instruction" ); |
| 3124 | CmpInstr.eraseFromParent(); |
| 3125 | |
| 3126 | // Modify the condition code of operands in OperandsToUpdate. |
| 3127 | // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to |
| 3128 | // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. |
| 3129 | for (auto &[MO, Cond] : OperandsToUpdate) |
| 3130 | MO->setImm(Cond); |
| 3131 | |
| 3132 | MI->clearRegisterDeads(Reg: ARM::CPSR); |
| 3133 | |
| 3134 | return true; |
| 3135 | } |
| 3136 | |
| 3137 | bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { |
| 3138 | // Do not sink MI if it might be used to optimize a redundant compare. |
| 3139 | // We heuristically only look at the instruction immediately following MI to |
| 3140 | // avoid potentially searching the entire basic block. |
| 3141 | if (isPredicated(MI)) |
| 3142 | return true; |
| 3143 | MachineBasicBlock::const_iterator Next = &MI; |
| 3144 | ++Next; |
| 3145 | Register SrcReg, SrcReg2; |
| 3146 | int64_t CmpMask, CmpValue; |
| 3147 | bool IsThumb1; |
| 3148 | if (Next != MI.getParent()->end() && |
| 3149 | analyzeCompare(MI: *Next, SrcReg, SrcReg2, CmpMask, CmpValue) && |
| 3150 | isRedundantFlagInstr(CmpI: &*Next, SrcReg, SrcReg2, ImmValue: CmpValue, OI: &MI, IsThumb1)) |
| 3151 | return false; |
| 3152 | return true; |
| 3153 | } |
| 3154 | |
| 3155 | bool ARMBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, |
| 3156 | Register Reg, |
| 3157 | MachineRegisterInfo *MRI) const { |
| 3158 | // Fold large immediates into add, sub, or, xor. |
| 3159 | unsigned DefOpc = DefMI.getOpcode(); |
| 3160 | if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm && |
| 3161 | DefOpc != ARM::tMOVi32imm) |
| 3162 | return false; |
| 3163 | if (!DefMI.getOperand(i: 1).isImm()) |
| 3164 | // Could be t2MOVi32imm @xx |
| 3165 | return false; |
| 3166 | |
| 3167 | if (!MRI->hasOneNonDBGUse(RegNo: Reg)) |
| 3168 | return false; |
| 3169 | |
| 3170 | const MCInstrDesc &DefMCID = DefMI.getDesc(); |
| 3171 | if (DefMCID.hasOptionalDef()) { |
| 3172 | unsigned NumOps = DefMCID.getNumOperands(); |
| 3173 | const MachineOperand &MO = DefMI.getOperand(i: NumOps - 1); |
| 3174 | if (MO.getReg() == ARM::CPSR && !MO.isDead()) |
| 3175 | // If DefMI defines CPSR and it is not dead, it's obviously not safe |
| 3176 | // to delete DefMI. |
| 3177 | return false; |
| 3178 | } |
| 3179 | |
| 3180 | const MCInstrDesc &UseMCID = UseMI.getDesc(); |
| 3181 | if (UseMCID.hasOptionalDef()) { |
| 3182 | unsigned NumOps = UseMCID.getNumOperands(); |
| 3183 | if (UseMI.getOperand(i: NumOps - 1).getReg() == ARM::CPSR) |
| 3184 | // If the instruction sets the flag, do not attempt this optimization |
| 3185 | // since it may change the semantics of the code. |
| 3186 | return false; |
| 3187 | } |
| 3188 | |
| 3189 | unsigned UseOpc = UseMI.getOpcode(); |
| 3190 | unsigned NewUseOpc = 0; |
| 3191 | uint32_t ImmVal = (uint32_t)DefMI.getOperand(i: 1).getImm(); |
| 3192 | uint32_t SOImmValV1 = 0, SOImmValV2 = 0; |
| 3193 | bool Commute = false; |
| 3194 | switch (UseOpc) { |
| 3195 | default: return false; |
| 3196 | case ARM::SUBrr: |
| 3197 | case ARM::ADDrr: |
| 3198 | case ARM::ORRrr: |
| 3199 | case ARM::EORrr: |
| 3200 | case ARM::t2SUBrr: |
| 3201 | case ARM::t2ADDrr: |
| 3202 | case ARM::t2ORRrr: |
| 3203 | case ARM::t2EORrr: { |
| 3204 | Commute = UseMI.getOperand(i: 2).getReg() != Reg; |
| 3205 | switch (UseOpc) { |
| 3206 | default: break; |
| 3207 | case ARM::ADDrr: |
| 3208 | case ARM::SUBrr: |
| 3209 | if (UseOpc == ARM::SUBrr && Commute) |
| 3210 | return false; |
| 3211 | |
| 3212 | // ADD/SUB are special because they're essentially the same operation, so |
| 3213 | // we can handle a larger range of immediates. |
| 3214 | if (ARM_AM::isSOImmTwoPartVal(V: ImmVal)) |
| 3215 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; |
| 3216 | else if (ARM_AM::isSOImmTwoPartVal(V: -ImmVal)) { |
| 3217 | ImmVal = -ImmVal; |
| 3218 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; |
| 3219 | } else |
| 3220 | return false; |
| 3221 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(V: ImmVal); |
| 3222 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(V: ImmVal); |
| 3223 | break; |
| 3224 | case ARM::ORRrr: |
| 3225 | case ARM::EORrr: |
| 3226 | if (!ARM_AM::isSOImmTwoPartVal(V: ImmVal)) |
| 3227 | return false; |
| 3228 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(V: ImmVal); |
| 3229 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(V: ImmVal); |
| 3230 | switch (UseOpc) { |
| 3231 | default: break; |
| 3232 | case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; |
| 3233 | case ARM::EORrr: NewUseOpc = ARM::EORri; break; |
| 3234 | } |
| 3235 | break; |
| 3236 | case ARM::t2ADDrr: |
| 3237 | case ARM::t2SUBrr: { |
| 3238 | if (UseOpc == ARM::t2SUBrr && Commute) |
| 3239 | return false; |
| 3240 | |
| 3241 | // ADD/SUB are special because they're essentially the same operation, so |
| 3242 | // we can handle a larger range of immediates. |
| 3243 | const bool ToSP = DefMI.getOperand(i: 0).getReg() == ARM::SP; |
| 3244 | const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; |
| 3245 | const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; |
| 3246 | if (ARM_AM::isT2SOImmTwoPartVal(Imm: ImmVal)) |
| 3247 | NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB; |
| 3248 | else if (ARM_AM::isT2SOImmTwoPartVal(Imm: -ImmVal)) { |
| 3249 | ImmVal = -ImmVal; |
| 3250 | NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD; |
| 3251 | } else |
| 3252 | return false; |
| 3253 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(Imm: ImmVal); |
| 3254 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(Imm: ImmVal); |
| 3255 | break; |
| 3256 | } |
| 3257 | case ARM::t2ORRrr: |
| 3258 | case ARM::t2EORrr: |
| 3259 | if (!ARM_AM::isT2SOImmTwoPartVal(Imm: ImmVal)) |
| 3260 | return false; |
| 3261 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(Imm: ImmVal); |
| 3262 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(Imm: ImmVal); |
| 3263 | switch (UseOpc) { |
| 3264 | default: break; |
| 3265 | case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; |
| 3266 | case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; |
| 3267 | } |
| 3268 | break; |
| 3269 | } |
| 3270 | } |
| 3271 | } |
| 3272 | |
| 3273 | unsigned OpIdx = Commute ? 2 : 1; |
| 3274 | Register Reg1 = UseMI.getOperand(i: OpIdx).getReg(); |
| 3275 | bool isKill = UseMI.getOperand(i: OpIdx).isKill(); |
| 3276 | const TargetRegisterClass *TRC = MRI->getRegClass(Reg); |
| 3277 | Register NewReg = MRI->createVirtualRegister(RegClass: TRC); |
| 3278 | BuildMI(BB&: *UseMI.getParent(), I&: UseMI, MIMD: UseMI.getDebugLoc(), MCID: get(Opcode: NewUseOpc), |
| 3279 | DestReg: NewReg) |
| 3280 | .addReg(RegNo: Reg1, flags: getKillRegState(B: isKill)) |
| 3281 | .addImm(Val: SOImmValV1) |
| 3282 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 3283 | .add(MO: condCodeOp()); |
| 3284 | UseMI.setDesc(get(Opcode: NewUseOpc)); |
| 3285 | UseMI.getOperand(i: 1).setReg(NewReg); |
| 3286 | UseMI.getOperand(i: 1).setIsKill(); |
| 3287 | UseMI.getOperand(i: 2).ChangeToImmediate(ImmVal: SOImmValV2); |
| 3288 | DefMI.eraseFromParent(); |
| 3289 | // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP. |
| 3290 | // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm]. |
| 3291 | // Then the below code will not be needed, as the input/output register |
| 3292 | // classes will be rgpr or gprSP. |
| 3293 | // For now, we fix the UseMI operand explicitly here: |
| 3294 | switch(NewUseOpc){ |
| 3295 | case ARM::t2ADDspImm: |
| 3296 | case ARM::t2SUBspImm: |
| 3297 | case ARM::t2ADDri: |
| 3298 | case ARM::t2SUBri: |
| 3299 | MRI->constrainRegClass(Reg: UseMI.getOperand(i: 0).getReg(), RC: TRC); |
| 3300 | } |
| 3301 | return true; |
| 3302 | } |
| 3303 | |
| 3304 | static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, |
| 3305 | const MachineInstr &MI) { |
| 3306 | switch (MI.getOpcode()) { |
| 3307 | default: { |
| 3308 | const MCInstrDesc &Desc = MI.getDesc(); |
| 3309 | int UOps = ItinData->getNumMicroOps(ItinClassIndx: Desc.getSchedClass()); |
| 3310 | assert(UOps >= 0 && "bad # UOps" ); |
| 3311 | return UOps; |
| 3312 | } |
| 3313 | |
| 3314 | case ARM::LDRrs: |
| 3315 | case ARM::LDRBrs: |
| 3316 | case ARM::STRrs: |
| 3317 | case ARM::STRBrs: { |
| 3318 | unsigned ShOpVal = MI.getOperand(i: 3).getImm(); |
| 3319 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
| 3320 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
| 3321 | if (!isSub && |
| 3322 | (ShImm == 0 || |
| 3323 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
| 3324 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
| 3325 | return 1; |
| 3326 | return 2; |
| 3327 | } |
| 3328 | |
| 3329 | case ARM::LDRH: |
| 3330 | case ARM::STRH: { |
| 3331 | if (!MI.getOperand(i: 2).getReg()) |
| 3332 | return 1; |
| 3333 | |
| 3334 | unsigned ShOpVal = MI.getOperand(i: 3).getImm(); |
| 3335 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
| 3336 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
| 3337 | if (!isSub && |
| 3338 | (ShImm == 0 || |
| 3339 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
| 3340 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
| 3341 | return 1; |
| 3342 | return 2; |
| 3343 | } |
| 3344 | |
| 3345 | case ARM::LDRSB: |
| 3346 | case ARM::LDRSH: |
| 3347 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 3).getImm()) == ARM_AM::sub) ? 3 : 2; |
| 3348 | |
| 3349 | case ARM::LDRSB_POST: |
| 3350 | case ARM::LDRSH_POST: { |
| 3351 | Register Rt = MI.getOperand(i: 0).getReg(); |
| 3352 | Register Rm = MI.getOperand(i: 3).getReg(); |
| 3353 | return (Rt == Rm) ? 4 : 3; |
| 3354 | } |
| 3355 | |
| 3356 | case ARM::LDR_PRE_REG: |
| 3357 | case ARM::LDRB_PRE_REG: { |
| 3358 | Register Rt = MI.getOperand(i: 0).getReg(); |
| 3359 | Register Rm = MI.getOperand(i: 3).getReg(); |
| 3360 | if (Rt == Rm) |
| 3361 | return 3; |
| 3362 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
| 3363 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
| 3364 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
| 3365 | if (!isSub && |
| 3366 | (ShImm == 0 || |
| 3367 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
| 3368 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
| 3369 | return 2; |
| 3370 | return 3; |
| 3371 | } |
| 3372 | |
| 3373 | case ARM::STR_PRE_REG: |
| 3374 | case ARM::STRB_PRE_REG: { |
| 3375 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
| 3376 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
| 3377 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
| 3378 | if (!isSub && |
| 3379 | (ShImm == 0 || |
| 3380 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
| 3381 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
| 3382 | return 2; |
| 3383 | return 3; |
| 3384 | } |
| 3385 | |
| 3386 | case ARM::LDRH_PRE: |
| 3387 | case ARM::STRH_PRE: { |
| 3388 | Register Rt = MI.getOperand(i: 0).getReg(); |
| 3389 | Register Rm = MI.getOperand(i: 3).getReg(); |
| 3390 | if (!Rm) |
| 3391 | return 2; |
| 3392 | if (Rt == Rm) |
| 3393 | return 3; |
| 3394 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 3 : 2; |
| 3395 | } |
| 3396 | |
| 3397 | case ARM::LDR_POST_REG: |
| 3398 | case ARM::LDRB_POST_REG: |
| 3399 | case ARM::LDRH_POST: { |
| 3400 | Register Rt = MI.getOperand(i: 0).getReg(); |
| 3401 | Register Rm = MI.getOperand(i: 3).getReg(); |
| 3402 | return (Rt == Rm) ? 3 : 2; |
| 3403 | } |
| 3404 | |
| 3405 | case ARM::LDR_PRE_IMM: |
| 3406 | case ARM::LDRB_PRE_IMM: |
| 3407 | case ARM::LDR_POST_IMM: |
| 3408 | case ARM::LDRB_POST_IMM: |
| 3409 | case ARM::STRB_POST_IMM: |
| 3410 | case ARM::STRB_POST_REG: |
| 3411 | case ARM::STRB_PRE_IMM: |
| 3412 | case ARM::STRH_POST: |
| 3413 | case ARM::STR_POST_IMM: |
| 3414 | case ARM::STR_POST_REG: |
| 3415 | case ARM::STR_PRE_IMM: |
| 3416 | return 2; |
| 3417 | |
| 3418 | case ARM::LDRSB_PRE: |
| 3419 | case ARM::LDRSH_PRE: { |
| 3420 | Register Rm = MI.getOperand(i: 3).getReg(); |
| 3421 | if (Rm == 0) |
| 3422 | return 3; |
| 3423 | Register Rt = MI.getOperand(i: 0).getReg(); |
| 3424 | if (Rt == Rm) |
| 3425 | return 4; |
| 3426 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
| 3427 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
| 3428 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
| 3429 | if (!isSub && |
| 3430 | (ShImm == 0 || |
| 3431 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
| 3432 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
| 3433 | return 3; |
| 3434 | return 4; |
| 3435 | } |
| 3436 | |
| 3437 | case ARM::LDRD: { |
| 3438 | Register Rt = MI.getOperand(i: 0).getReg(); |
| 3439 | Register Rn = MI.getOperand(i: 2).getReg(); |
| 3440 | Register Rm = MI.getOperand(i: 3).getReg(); |
| 3441 | if (Rm) |
| 3442 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 4 |
| 3443 | : 3; |
| 3444 | return (Rt == Rn) ? 3 : 2; |
| 3445 | } |
| 3446 | |
| 3447 | case ARM::STRD: { |
| 3448 | Register Rm = MI.getOperand(i: 3).getReg(); |
| 3449 | if (Rm) |
| 3450 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 4 |
| 3451 | : 3; |
| 3452 | return 2; |
| 3453 | } |
| 3454 | |
| 3455 | case ARM::LDRD_POST: |
| 3456 | case ARM::t2LDRD_POST: |
| 3457 | return 3; |
| 3458 | |
| 3459 | case ARM::STRD_POST: |
| 3460 | case ARM::t2STRD_POST: |
| 3461 | return 4; |
| 3462 | |
| 3463 | case ARM::LDRD_PRE: { |
| 3464 | Register Rt = MI.getOperand(i: 0).getReg(); |
| 3465 | Register Rn = MI.getOperand(i: 3).getReg(); |
| 3466 | Register Rm = MI.getOperand(i: 4).getReg(); |
| 3467 | if (Rm) |
| 3468 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 5).getImm()) == ARM_AM::sub) ? 5 |
| 3469 | : 4; |
| 3470 | return (Rt == Rn) ? 4 : 3; |
| 3471 | } |
| 3472 | |
| 3473 | case ARM::t2LDRD_PRE: { |
| 3474 | Register Rt = MI.getOperand(i: 0).getReg(); |
| 3475 | Register Rn = MI.getOperand(i: 3).getReg(); |
| 3476 | return (Rt == Rn) ? 4 : 3; |
| 3477 | } |
| 3478 | |
| 3479 | case ARM::STRD_PRE: { |
| 3480 | Register Rm = MI.getOperand(i: 4).getReg(); |
| 3481 | if (Rm) |
| 3482 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 5).getImm()) == ARM_AM::sub) ? 5 |
| 3483 | : 4; |
| 3484 | return 3; |
| 3485 | } |
| 3486 | |
| 3487 | case ARM::t2STRD_PRE: |
| 3488 | return 3; |
| 3489 | |
| 3490 | case ARM::t2LDR_POST: |
| 3491 | case ARM::t2LDRB_POST: |
| 3492 | case ARM::t2LDRB_PRE: |
| 3493 | case ARM::t2LDRSBi12: |
| 3494 | case ARM::t2LDRSBi8: |
| 3495 | case ARM::t2LDRSBpci: |
| 3496 | case ARM::t2LDRSBs: |
| 3497 | case ARM::t2LDRH_POST: |
| 3498 | case ARM::t2LDRH_PRE: |
| 3499 | case ARM::t2LDRSBT: |
| 3500 | case ARM::t2LDRSB_POST: |
| 3501 | case ARM::t2LDRSB_PRE: |
| 3502 | case ARM::t2LDRSH_POST: |
| 3503 | case ARM::t2LDRSH_PRE: |
| 3504 | case ARM::t2LDRSHi12: |
| 3505 | case ARM::t2LDRSHi8: |
| 3506 | case ARM::t2LDRSHpci: |
| 3507 | case ARM::t2LDRSHs: |
| 3508 | return 2; |
| 3509 | |
| 3510 | case ARM::t2LDRDi8: { |
| 3511 | Register Rt = MI.getOperand(i: 0).getReg(); |
| 3512 | Register Rn = MI.getOperand(i: 2).getReg(); |
| 3513 | return (Rt == Rn) ? 3 : 2; |
| 3514 | } |
| 3515 | |
| 3516 | case ARM::t2STRB_POST: |
| 3517 | case ARM::t2STRB_PRE: |
| 3518 | case ARM::t2STRBs: |
| 3519 | case ARM::t2STRDi8: |
| 3520 | case ARM::t2STRH_POST: |
| 3521 | case ARM::t2STRH_PRE: |
| 3522 | case ARM::t2STRHs: |
| 3523 | case ARM::t2STR_POST: |
| 3524 | case ARM::t2STR_PRE: |
| 3525 | case ARM::t2STRs: |
| 3526 | return 2; |
| 3527 | } |
| 3528 | } |
| 3529 | |
| 3530 | // Return the number of 32-bit words loaded by LDM or stored by STM. If this |
| 3531 | // can't be easily determined return 0 (missing MachineMemOperand). |
| 3532 | // |
| 3533 | // FIXME: The current MachineInstr design does not support relying on machine |
| 3534 | // mem operands to determine the width of a memory access. Instead, we expect |
| 3535 | // the target to provide this information based on the instruction opcode and |
| 3536 | // operands. However, using MachineMemOperand is the best solution now for |
| 3537 | // two reasons: |
| 3538 | // |
| 3539 | // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI |
| 3540 | // operands. This is much more dangerous than using the MachineMemOperand |
| 3541 | // sizes because CodeGen passes can insert/remove optional machine operands. In |
| 3542 | // fact, it's totally incorrect for preRA passes and appears to be wrong for |
| 3543 | // postRA passes as well. |
| 3544 | // |
| 3545 | // 2) getNumLDMAddresses is only used by the scheduling machine model and any |
| 3546 | // machine model that calls this should handle the unknown (zero size) case. |
| 3547 | // |
| 3548 | // Long term, we should require a target hook that verifies MachineMemOperand |
| 3549 | // sizes during MC lowering. That target hook should be local to MC lowering |
| 3550 | // because we can't ensure that it is aware of other MI forms. Doing this will |
| 3551 | // ensure that MachineMemOperands are correctly propagated through all passes. |
| 3552 | unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { |
| 3553 | unsigned Size = 0; |
| 3554 | for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), |
| 3555 | E = MI.memoperands_end(); |
| 3556 | I != E; ++I) { |
| 3557 | Size += (*I)->getSize().getValue(); |
| 3558 | } |
| 3559 | // FIXME: The scheduler currently can't handle values larger than 16. But |
| 3560 | // the values can actually go up to 32 for floating-point load/store |
| 3561 | // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory |
| 3562 | // operations isn't right; we could end up with "extra" memory operands for |
| 3563 | // various reasons, like tail merge merging two memory operations. |
| 3564 | return std::min(a: Size / 4, b: 16U); |
| 3565 | } |
| 3566 | |
| 3567 | static unsigned (unsigned Opc, |
| 3568 | unsigned NumRegs) { |
| 3569 | unsigned UOps = 1 + NumRegs; // 1 for address computation. |
| 3570 | switch (Opc) { |
| 3571 | default: |
| 3572 | break; |
| 3573 | case ARM::VLDMDIA_UPD: |
| 3574 | case ARM::VLDMDDB_UPD: |
| 3575 | case ARM::VLDMSIA_UPD: |
| 3576 | case ARM::VLDMSDB_UPD: |
| 3577 | case ARM::VSTMDIA_UPD: |
| 3578 | case ARM::VSTMDDB_UPD: |
| 3579 | case ARM::VSTMSIA_UPD: |
| 3580 | case ARM::VSTMSDB_UPD: |
| 3581 | case ARM::LDMIA_UPD: |
| 3582 | case ARM::LDMDA_UPD: |
| 3583 | case ARM::LDMDB_UPD: |
| 3584 | case ARM::LDMIB_UPD: |
| 3585 | case ARM::STMIA_UPD: |
| 3586 | case ARM::STMDA_UPD: |
| 3587 | case ARM::STMDB_UPD: |
| 3588 | case ARM::STMIB_UPD: |
| 3589 | case ARM::tLDMIA_UPD: |
| 3590 | case ARM::tSTMIA_UPD: |
| 3591 | case ARM::t2LDMIA_UPD: |
| 3592 | case ARM::t2LDMDB_UPD: |
| 3593 | case ARM::t2STMIA_UPD: |
| 3594 | case ARM::t2STMDB_UPD: |
| 3595 | ++UOps; // One for base register writeback. |
| 3596 | break; |
| 3597 | case ARM::LDMIA_RET: |
| 3598 | case ARM::tPOP_RET: |
| 3599 | case ARM::t2LDMIA_RET: |
| 3600 | UOps += 2; // One for base reg wb, one for write to pc. |
| 3601 | break; |
| 3602 | } |
| 3603 | return UOps; |
| 3604 | } |
| 3605 | |
| 3606 | unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, |
| 3607 | const MachineInstr &MI) const { |
| 3608 | if (!ItinData || ItinData->isEmpty()) |
| 3609 | return 1; |
| 3610 | |
| 3611 | const MCInstrDesc &Desc = MI.getDesc(); |
| 3612 | unsigned Class = Desc.getSchedClass(); |
| 3613 | int ItinUOps = ItinData->getNumMicroOps(ItinClassIndx: Class); |
| 3614 | if (ItinUOps >= 0) { |
| 3615 | if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) |
| 3616 | return getNumMicroOpsSwiftLdSt(ItinData, MI); |
| 3617 | |
| 3618 | return ItinUOps; |
| 3619 | } |
| 3620 | |
| 3621 | unsigned Opc = MI.getOpcode(); |
| 3622 | switch (Opc) { |
| 3623 | default: |
| 3624 | llvm_unreachable("Unexpected multi-uops instruction!" ); |
| 3625 | case ARM::VLDMQIA: |
| 3626 | case ARM::VSTMQIA: |
| 3627 | return 2; |
| 3628 | |
| 3629 | // The number of uOps for load / store multiple are determined by the number |
| 3630 | // registers. |
| 3631 | // |
| 3632 | // On Cortex-A8, each pair of register loads / stores can be scheduled on the |
| 3633 | // same cycle. The scheduling for the first load / store must be done |
| 3634 | // separately by assuming the address is not 64-bit aligned. |
| 3635 | // |
| 3636 | // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address |
| 3637 | // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON |
| 3638 | // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. |
| 3639 | case ARM::VLDMDIA: |
| 3640 | case ARM::VLDMDIA_UPD: |
| 3641 | case ARM::VLDMDDB_UPD: |
| 3642 | case ARM::VLDMSIA: |
| 3643 | case ARM::VLDMSIA_UPD: |
| 3644 | case ARM::VLDMSDB_UPD: |
| 3645 | case ARM::VSTMDIA: |
| 3646 | case ARM::VSTMDIA_UPD: |
| 3647 | case ARM::VSTMDDB_UPD: |
| 3648 | case ARM::VSTMSIA: |
| 3649 | case ARM::VSTMSIA_UPD: |
| 3650 | case ARM::VSTMSDB_UPD: { |
| 3651 | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands(); |
| 3652 | return (NumRegs / 2) + (NumRegs % 2) + 1; |
| 3653 | } |
| 3654 | |
| 3655 | case ARM::LDMIA_RET: |
| 3656 | case ARM::LDMIA: |
| 3657 | case ARM::LDMDA: |
| 3658 | case ARM::LDMDB: |
| 3659 | case ARM::LDMIB: |
| 3660 | case ARM::LDMIA_UPD: |
| 3661 | case ARM::LDMDA_UPD: |
| 3662 | case ARM::LDMDB_UPD: |
| 3663 | case ARM::LDMIB_UPD: |
| 3664 | case ARM::STMIA: |
| 3665 | case ARM::STMDA: |
| 3666 | case ARM::STMDB: |
| 3667 | case ARM::STMIB: |
| 3668 | case ARM::STMIA_UPD: |
| 3669 | case ARM::STMDA_UPD: |
| 3670 | case ARM::STMDB_UPD: |
| 3671 | case ARM::STMIB_UPD: |
| 3672 | case ARM::tLDMIA: |
| 3673 | case ARM::tLDMIA_UPD: |
| 3674 | case ARM::tSTMIA_UPD: |
| 3675 | case ARM::tPOP_RET: |
| 3676 | case ARM::tPOP: |
| 3677 | case ARM::tPUSH: |
| 3678 | case ARM::t2LDMIA_RET: |
| 3679 | case ARM::t2LDMIA: |
| 3680 | case ARM::t2LDMDB: |
| 3681 | case ARM::t2LDMIA_UPD: |
| 3682 | case ARM::t2LDMDB_UPD: |
| 3683 | case ARM::t2STMIA: |
| 3684 | case ARM::t2STMDB: |
| 3685 | case ARM::t2STMIA_UPD: |
| 3686 | case ARM::t2STMDB_UPD: { |
| 3687 | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1; |
| 3688 | switch (Subtarget.getLdStMultipleTiming()) { |
| 3689 | case ARMSubtarget::SingleIssuePlusExtras: |
| 3690 | return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs); |
| 3691 | case ARMSubtarget::SingleIssue: |
| 3692 | // Assume the worst. |
| 3693 | return NumRegs; |
| 3694 | case ARMSubtarget::DoubleIssue: { |
| 3695 | if (NumRegs < 4) |
| 3696 | return 2; |
| 3697 | // 4 registers would be issued: 2, 2. |
| 3698 | // 5 registers would be issued: 2, 2, 1. |
| 3699 | unsigned UOps = (NumRegs / 2); |
| 3700 | if (NumRegs % 2) |
| 3701 | ++UOps; |
| 3702 | return UOps; |
| 3703 | } |
| 3704 | case ARMSubtarget::DoubleIssueCheckUnalignedAccess: { |
| 3705 | unsigned UOps = (NumRegs / 2); |
| 3706 | // If there are odd number of registers or if it's not 64-bit aligned, |
| 3707 | // then it takes an extra AGU (Address Generation Unit) cycle. |
| 3708 | if ((NumRegs % 2) || !MI.hasOneMemOperand() || |
| 3709 | (*MI.memoperands_begin())->getAlign() < Align(8)) |
| 3710 | ++UOps; |
| 3711 | return UOps; |
| 3712 | } |
| 3713 | } |
| 3714 | } |
| 3715 | } |
| 3716 | llvm_unreachable("Didn't find the number of microops" ); |
| 3717 | } |
| 3718 | |
| 3719 | std::optional<unsigned> |
| 3720 | ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, |
| 3721 | const MCInstrDesc &DefMCID, unsigned DefClass, |
| 3722 | unsigned DefIdx, unsigned DefAlign) const { |
| 3723 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
| 3724 | if (RegNo <= 0) |
| 3725 | // Def is the address writeback. |
| 3726 | return ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
| 3727 | |
| 3728 | unsigned DefCycle; |
| 3729 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
| 3730 | // (regno / 2) + (regno % 2) + 1 |
| 3731 | DefCycle = RegNo / 2 + 1; |
| 3732 | if (RegNo % 2) |
| 3733 | ++DefCycle; |
| 3734 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
| 3735 | DefCycle = RegNo; |
| 3736 | bool isSLoad = false; |
| 3737 | |
| 3738 | switch (DefMCID.getOpcode()) { |
| 3739 | default: break; |
| 3740 | case ARM::VLDMSIA: |
| 3741 | case ARM::VLDMSIA_UPD: |
| 3742 | case ARM::VLDMSDB_UPD: |
| 3743 | isSLoad = true; |
| 3744 | break; |
| 3745 | } |
| 3746 | |
| 3747 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
| 3748 | // then it takes an extra cycle. |
| 3749 | if ((isSLoad && (RegNo % 2)) || DefAlign < 8) |
| 3750 | ++DefCycle; |
| 3751 | } else { |
| 3752 | // Assume the worst. |
| 3753 | DefCycle = RegNo + 2; |
| 3754 | } |
| 3755 | |
| 3756 | return DefCycle; |
| 3757 | } |
| 3758 | |
| 3759 | std::optional<unsigned> |
| 3760 | ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, |
| 3761 | const MCInstrDesc &DefMCID, unsigned DefClass, |
| 3762 | unsigned DefIdx, unsigned DefAlign) const { |
| 3763 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
| 3764 | if (RegNo <= 0) |
| 3765 | // Def is the address writeback. |
| 3766 | return ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
| 3767 | |
| 3768 | unsigned DefCycle; |
| 3769 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
| 3770 | // 4 registers would be issued: 1, 2, 1. |
| 3771 | // 5 registers would be issued: 1, 2, 2. |
| 3772 | DefCycle = RegNo / 2; |
| 3773 | if (DefCycle < 1) |
| 3774 | DefCycle = 1; |
| 3775 | // Result latency is issue cycle + 2: E2. |
| 3776 | DefCycle += 2; |
| 3777 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
| 3778 | DefCycle = (RegNo / 2); |
| 3779 | // If there are odd number of registers or if it's not 64-bit aligned, |
| 3780 | // then it takes an extra AGU (Address Generation Unit) cycle. |
| 3781 | if ((RegNo % 2) || DefAlign < 8) |
| 3782 | ++DefCycle; |
| 3783 | // Result latency is AGU cycles + 2. |
| 3784 | DefCycle += 2; |
| 3785 | } else { |
| 3786 | // Assume the worst. |
| 3787 | DefCycle = RegNo + 2; |
| 3788 | } |
| 3789 | |
| 3790 | return DefCycle; |
| 3791 | } |
| 3792 | |
| 3793 | std::optional<unsigned> |
| 3794 | ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, |
| 3795 | const MCInstrDesc &UseMCID, unsigned UseClass, |
| 3796 | unsigned UseIdx, unsigned UseAlign) const { |
| 3797 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
| 3798 | if (RegNo <= 0) |
| 3799 | return ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
| 3800 | |
| 3801 | unsigned UseCycle; |
| 3802 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
| 3803 | // (regno / 2) + (regno % 2) + 1 |
| 3804 | UseCycle = RegNo / 2 + 1; |
| 3805 | if (RegNo % 2) |
| 3806 | ++UseCycle; |
| 3807 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
| 3808 | UseCycle = RegNo; |
| 3809 | bool isSStore = false; |
| 3810 | |
| 3811 | switch (UseMCID.getOpcode()) { |
| 3812 | default: break; |
| 3813 | case ARM::VSTMSIA: |
| 3814 | case ARM::VSTMSIA_UPD: |
| 3815 | case ARM::VSTMSDB_UPD: |
| 3816 | isSStore = true; |
| 3817 | break; |
| 3818 | } |
| 3819 | |
| 3820 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
| 3821 | // then it takes an extra cycle. |
| 3822 | if ((isSStore && (RegNo % 2)) || UseAlign < 8) |
| 3823 | ++UseCycle; |
| 3824 | } else { |
| 3825 | // Assume the worst. |
| 3826 | UseCycle = RegNo + 2; |
| 3827 | } |
| 3828 | |
| 3829 | return UseCycle; |
| 3830 | } |
| 3831 | |
| 3832 | std::optional<unsigned> |
| 3833 | ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, |
| 3834 | const MCInstrDesc &UseMCID, unsigned UseClass, |
| 3835 | unsigned UseIdx, unsigned UseAlign) const { |
| 3836 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
| 3837 | if (RegNo <= 0) |
| 3838 | return ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
| 3839 | |
| 3840 | unsigned UseCycle; |
| 3841 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
| 3842 | UseCycle = RegNo / 2; |
| 3843 | if (UseCycle < 2) |
| 3844 | UseCycle = 2; |
| 3845 | // Read in E3. |
| 3846 | UseCycle += 2; |
| 3847 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
| 3848 | UseCycle = (RegNo / 2); |
| 3849 | // If there are odd number of registers or if it's not 64-bit aligned, |
| 3850 | // then it takes an extra AGU (Address Generation Unit) cycle. |
| 3851 | if ((RegNo % 2) || UseAlign < 8) |
| 3852 | ++UseCycle; |
| 3853 | } else { |
| 3854 | // Assume the worst. |
| 3855 | UseCycle = 1; |
| 3856 | } |
| 3857 | return UseCycle; |
| 3858 | } |
| 3859 | |
| 3860 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency( |
| 3861 | const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, |
| 3862 | unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID, |
| 3863 | unsigned UseIdx, unsigned UseAlign) const { |
| 3864 | unsigned DefClass = DefMCID.getSchedClass(); |
| 3865 | unsigned UseClass = UseMCID.getSchedClass(); |
| 3866 | |
| 3867 | if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) |
| 3868 | return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); |
| 3869 | |
| 3870 | // This may be a def / use of a variable_ops instruction, the operand |
| 3871 | // latency might be determinable dynamically. Let the target try to |
| 3872 | // figure it out. |
| 3873 | std::optional<unsigned> DefCycle; |
| 3874 | bool LdmBypass = false; |
| 3875 | switch (DefMCID.getOpcode()) { |
| 3876 | default: |
| 3877 | DefCycle = ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
| 3878 | break; |
| 3879 | |
| 3880 | case ARM::VLDMDIA: |
| 3881 | case ARM::VLDMDIA_UPD: |
| 3882 | case ARM::VLDMDDB_UPD: |
| 3883 | case ARM::VLDMSIA: |
| 3884 | case ARM::VLDMSIA_UPD: |
| 3885 | case ARM::VLDMSDB_UPD: |
| 3886 | DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
| 3887 | break; |
| 3888 | |
| 3889 | case ARM::LDMIA_RET: |
| 3890 | case ARM::LDMIA: |
| 3891 | case ARM::LDMDA: |
| 3892 | case ARM::LDMDB: |
| 3893 | case ARM::LDMIB: |
| 3894 | case ARM::LDMIA_UPD: |
| 3895 | case ARM::LDMDA_UPD: |
| 3896 | case ARM::LDMDB_UPD: |
| 3897 | case ARM::LDMIB_UPD: |
| 3898 | case ARM::tLDMIA: |
| 3899 | case ARM::tLDMIA_UPD: |
| 3900 | case ARM::tPUSH: |
| 3901 | case ARM::t2LDMIA_RET: |
| 3902 | case ARM::t2LDMIA: |
| 3903 | case ARM::t2LDMDB: |
| 3904 | case ARM::t2LDMIA_UPD: |
| 3905 | case ARM::t2LDMDB_UPD: |
| 3906 | LdmBypass = true; |
| 3907 | DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
| 3908 | break; |
| 3909 | } |
| 3910 | |
| 3911 | if (!DefCycle) |
| 3912 | // We can't seem to determine the result latency of the def, assume it's 2. |
| 3913 | DefCycle = 2; |
| 3914 | |
| 3915 | std::optional<unsigned> UseCycle; |
| 3916 | switch (UseMCID.getOpcode()) { |
| 3917 | default: |
| 3918 | UseCycle = ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
| 3919 | break; |
| 3920 | |
| 3921 | case ARM::VSTMDIA: |
| 3922 | case ARM::VSTMDIA_UPD: |
| 3923 | case ARM::VSTMDDB_UPD: |
| 3924 | case ARM::VSTMSIA: |
| 3925 | case ARM::VSTMSIA_UPD: |
| 3926 | case ARM::VSTMSDB_UPD: |
| 3927 | UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
| 3928 | break; |
| 3929 | |
| 3930 | case ARM::STMIA: |
| 3931 | case ARM::STMDA: |
| 3932 | case ARM::STMDB: |
| 3933 | case ARM::STMIB: |
| 3934 | case ARM::STMIA_UPD: |
| 3935 | case ARM::STMDA_UPD: |
| 3936 | case ARM::STMDB_UPD: |
| 3937 | case ARM::STMIB_UPD: |
| 3938 | case ARM::tSTMIA_UPD: |
| 3939 | case ARM::tPOP_RET: |
| 3940 | case ARM::tPOP: |
| 3941 | case ARM::t2STMIA: |
| 3942 | case ARM::t2STMDB: |
| 3943 | case ARM::t2STMIA_UPD: |
| 3944 | case ARM::t2STMDB_UPD: |
| 3945 | UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
| 3946 | break; |
| 3947 | } |
| 3948 | |
| 3949 | if (!UseCycle) |
| 3950 | // Assume it's read in the first stage. |
| 3951 | UseCycle = 1; |
| 3952 | |
| 3953 | if (UseCycle > *DefCycle + 1) |
| 3954 | return std::nullopt; |
| 3955 | |
| 3956 | UseCycle = *DefCycle - *UseCycle + 1; |
| 3957 | if (UseCycle > 0u) { |
| 3958 | if (LdmBypass) { |
| 3959 | // It's a variable_ops instruction so we can't use DefIdx here. Just use |
| 3960 | // first def operand. |
| 3961 | if (ItinData->hasPipelineForwarding(DefClass, DefIdx: DefMCID.getNumOperands()-1, |
| 3962 | UseClass, UseIdx)) |
| 3963 | UseCycle = *UseCycle - 1; |
| 3964 | } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, |
| 3965 | UseClass, UseIdx)) { |
| 3966 | UseCycle = *UseCycle - 1; |
| 3967 | } |
| 3968 | } |
| 3969 | |
| 3970 | return UseCycle; |
| 3971 | } |
| 3972 | |
| 3973 | static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, |
| 3974 | const MachineInstr *MI, unsigned Reg, |
| 3975 | unsigned &DefIdx, unsigned &Dist) { |
| 3976 | Dist = 0; |
| 3977 | |
| 3978 | MachineBasicBlock::const_iterator I = MI; ++I; |
| 3979 | MachineBasicBlock::const_instr_iterator II = std::prev(x: I.getInstrIterator()); |
| 3980 | assert(II->isInsideBundle() && "Empty bundle?" ); |
| 3981 | |
| 3982 | int Idx = -1; |
| 3983 | while (II->isInsideBundle()) { |
| 3984 | Idx = II->findRegisterDefOperandIdx(Reg, TRI, isDead: false, Overlap: true); |
| 3985 | if (Idx != -1) |
| 3986 | break; |
| 3987 | --II; |
| 3988 | ++Dist; |
| 3989 | } |
| 3990 | |
| 3991 | assert(Idx != -1 && "Cannot find bundled definition!" ); |
| 3992 | DefIdx = Idx; |
| 3993 | return &*II; |
| 3994 | } |
| 3995 | |
| 3996 | static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, |
| 3997 | const MachineInstr &MI, unsigned Reg, |
| 3998 | unsigned &UseIdx, unsigned &Dist) { |
| 3999 | Dist = 0; |
| 4000 | |
| 4001 | MachineBasicBlock::const_instr_iterator II = ++MI.getIterator(); |
| 4002 | assert(II->isInsideBundle() && "Empty bundle?" ); |
| 4003 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
| 4004 | |
| 4005 | // FIXME: This doesn't properly handle multiple uses. |
| 4006 | int Idx = -1; |
| 4007 | while (II != E && II->isInsideBundle()) { |
| 4008 | Idx = II->findRegisterUseOperandIdx(Reg, TRI, isKill: false); |
| 4009 | if (Idx != -1) |
| 4010 | break; |
| 4011 | if (II->getOpcode() != ARM::t2IT) |
| 4012 | ++Dist; |
| 4013 | ++II; |
| 4014 | } |
| 4015 | |
| 4016 | if (Idx == -1) { |
| 4017 | Dist = 0; |
| 4018 | return nullptr; |
| 4019 | } |
| 4020 | |
| 4021 | UseIdx = Idx; |
| 4022 | return &*II; |
| 4023 | } |
| 4024 | |
| 4025 | /// Return the number of cycles to add to (or subtract from) the static |
| 4026 | /// itinerary based on the def opcode and alignment. The caller will ensure that |
| 4027 | /// adjusted latency is at least one cycle. |
| 4028 | static int adjustDefLatency(const ARMSubtarget &Subtarget, |
| 4029 | const MachineInstr &DefMI, |
| 4030 | const MCInstrDesc &DefMCID, unsigned DefAlign) { |
| 4031 | int Adjust = 0; |
| 4032 | if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { |
| 4033 | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
| 4034 | // variants are one cycle cheaper. |
| 4035 | switch (DefMCID.getOpcode()) { |
| 4036 | default: break; |
| 4037 | case ARM::LDRrs: |
| 4038 | case ARM::LDRBrs: { |
| 4039 | unsigned ShOpVal = DefMI.getOperand(i: 3).getImm(); |
| 4040 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
| 4041 | if (ShImm == 0 || |
| 4042 | (ShImm == 2 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
| 4043 | --Adjust; |
| 4044 | break; |
| 4045 | } |
| 4046 | case ARM::t2LDRs: |
| 4047 | case ARM::t2LDRBs: |
| 4048 | case ARM::t2LDRHs: |
| 4049 | case ARM::t2LDRSHs: { |
| 4050 | // Thumb2 mode: lsl only. |
| 4051 | unsigned ShAmt = DefMI.getOperand(i: 3).getImm(); |
| 4052 | if (ShAmt == 0 || ShAmt == 2) |
| 4053 | --Adjust; |
| 4054 | break; |
| 4055 | } |
| 4056 | } |
| 4057 | } else if (Subtarget.isSwift()) { |
| 4058 | // FIXME: Properly handle all of the latency adjustments for address |
| 4059 | // writeback. |
| 4060 | switch (DefMCID.getOpcode()) { |
| 4061 | default: break; |
| 4062 | case ARM::LDRrs: |
| 4063 | case ARM::LDRBrs: { |
| 4064 | unsigned ShOpVal = DefMI.getOperand(i: 3).getImm(); |
| 4065 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
| 4066 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
| 4067 | if (!isSub && |
| 4068 | (ShImm == 0 || |
| 4069 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
| 4070 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
| 4071 | Adjust -= 2; |
| 4072 | else if (!isSub && |
| 4073 | ShImm == 1 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsr) |
| 4074 | --Adjust; |
| 4075 | break; |
| 4076 | } |
| 4077 | case ARM::t2LDRs: |
| 4078 | case ARM::t2LDRBs: |
| 4079 | case ARM::t2LDRHs: |
| 4080 | case ARM::t2LDRSHs: { |
| 4081 | // Thumb2 mode: lsl only. |
| 4082 | unsigned ShAmt = DefMI.getOperand(i: 3).getImm(); |
| 4083 | if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) |
| 4084 | Adjust -= 2; |
| 4085 | break; |
| 4086 | } |
| 4087 | } |
| 4088 | } |
| 4089 | |
| 4090 | if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) { |
| 4091 | switch (DefMCID.getOpcode()) { |
| 4092 | default: break; |
| 4093 | case ARM::VLD1q8: |
| 4094 | case ARM::VLD1q16: |
| 4095 | case ARM::VLD1q32: |
| 4096 | case ARM::VLD1q64: |
| 4097 | case ARM::VLD1q8wb_fixed: |
| 4098 | case ARM::VLD1q16wb_fixed: |
| 4099 | case ARM::VLD1q32wb_fixed: |
| 4100 | case ARM::VLD1q64wb_fixed: |
| 4101 | case ARM::VLD1q8wb_register: |
| 4102 | case ARM::VLD1q16wb_register: |
| 4103 | case ARM::VLD1q32wb_register: |
| 4104 | case ARM::VLD1q64wb_register: |
| 4105 | case ARM::VLD2d8: |
| 4106 | case ARM::VLD2d16: |
| 4107 | case ARM::VLD2d32: |
| 4108 | case ARM::VLD2q8: |
| 4109 | case ARM::VLD2q16: |
| 4110 | case ARM::VLD2q32: |
| 4111 | case ARM::VLD2d8wb_fixed: |
| 4112 | case ARM::VLD2d16wb_fixed: |
| 4113 | case ARM::VLD2d32wb_fixed: |
| 4114 | case ARM::VLD2q8wb_fixed: |
| 4115 | case ARM::VLD2q16wb_fixed: |
| 4116 | case ARM::VLD2q32wb_fixed: |
| 4117 | case ARM::VLD2d8wb_register: |
| 4118 | case ARM::VLD2d16wb_register: |
| 4119 | case ARM::VLD2d32wb_register: |
| 4120 | case ARM::VLD2q8wb_register: |
| 4121 | case ARM::VLD2q16wb_register: |
| 4122 | case ARM::VLD2q32wb_register: |
| 4123 | case ARM::VLD3d8: |
| 4124 | case ARM::VLD3d16: |
| 4125 | case ARM::VLD3d32: |
| 4126 | case ARM::VLD1d64T: |
| 4127 | case ARM::VLD3d8_UPD: |
| 4128 | case ARM::VLD3d16_UPD: |
| 4129 | case ARM::VLD3d32_UPD: |
| 4130 | case ARM::VLD1d64Twb_fixed: |
| 4131 | case ARM::VLD1d64Twb_register: |
| 4132 | case ARM::VLD3q8_UPD: |
| 4133 | case ARM::VLD3q16_UPD: |
| 4134 | case ARM::VLD3q32_UPD: |
| 4135 | case ARM::VLD4d8: |
| 4136 | case ARM::VLD4d16: |
| 4137 | case ARM::VLD4d32: |
| 4138 | case ARM::VLD1d64Q: |
| 4139 | case ARM::VLD4d8_UPD: |
| 4140 | case ARM::VLD4d16_UPD: |
| 4141 | case ARM::VLD4d32_UPD: |
| 4142 | case ARM::VLD1d64Qwb_fixed: |
| 4143 | case ARM::VLD1d64Qwb_register: |
| 4144 | case ARM::VLD4q8_UPD: |
| 4145 | case ARM::VLD4q16_UPD: |
| 4146 | case ARM::VLD4q32_UPD: |
| 4147 | case ARM::VLD1DUPq8: |
| 4148 | case ARM::VLD1DUPq16: |
| 4149 | case ARM::VLD1DUPq32: |
| 4150 | case ARM::VLD1DUPq8wb_fixed: |
| 4151 | case ARM::VLD1DUPq16wb_fixed: |
| 4152 | case ARM::VLD1DUPq32wb_fixed: |
| 4153 | case ARM::VLD1DUPq8wb_register: |
| 4154 | case ARM::VLD1DUPq16wb_register: |
| 4155 | case ARM::VLD1DUPq32wb_register: |
| 4156 | case ARM::VLD2DUPd8: |
| 4157 | case ARM::VLD2DUPd16: |
| 4158 | case ARM::VLD2DUPd32: |
| 4159 | case ARM::VLD2DUPd8wb_fixed: |
| 4160 | case ARM::VLD2DUPd16wb_fixed: |
| 4161 | case ARM::VLD2DUPd32wb_fixed: |
| 4162 | case ARM::VLD2DUPd8wb_register: |
| 4163 | case ARM::VLD2DUPd16wb_register: |
| 4164 | case ARM::VLD2DUPd32wb_register: |
| 4165 | case ARM::VLD4DUPd8: |
| 4166 | case ARM::VLD4DUPd16: |
| 4167 | case ARM::VLD4DUPd32: |
| 4168 | case ARM::VLD4DUPd8_UPD: |
| 4169 | case ARM::VLD4DUPd16_UPD: |
| 4170 | case ARM::VLD4DUPd32_UPD: |
| 4171 | case ARM::VLD1LNd8: |
| 4172 | case ARM::VLD1LNd16: |
| 4173 | case ARM::VLD1LNd32: |
| 4174 | case ARM::VLD1LNd8_UPD: |
| 4175 | case ARM::VLD1LNd16_UPD: |
| 4176 | case ARM::VLD1LNd32_UPD: |
| 4177 | case ARM::VLD2LNd8: |
| 4178 | case ARM::VLD2LNd16: |
| 4179 | case ARM::VLD2LNd32: |
| 4180 | case ARM::VLD2LNq16: |
| 4181 | case ARM::VLD2LNq32: |
| 4182 | case ARM::VLD2LNd8_UPD: |
| 4183 | case ARM::VLD2LNd16_UPD: |
| 4184 | case ARM::VLD2LNd32_UPD: |
| 4185 | case ARM::VLD2LNq16_UPD: |
| 4186 | case ARM::VLD2LNq32_UPD: |
| 4187 | case ARM::VLD4LNd8: |
| 4188 | case ARM::VLD4LNd16: |
| 4189 | case ARM::VLD4LNd32: |
| 4190 | case ARM::VLD4LNq16: |
| 4191 | case ARM::VLD4LNq32: |
| 4192 | case ARM::VLD4LNd8_UPD: |
| 4193 | case ARM::VLD4LNd16_UPD: |
| 4194 | case ARM::VLD4LNd32_UPD: |
| 4195 | case ARM::VLD4LNq16_UPD: |
| 4196 | case ARM::VLD4LNq32_UPD: |
| 4197 | // If the address is not 64-bit aligned, the latencies of these |
| 4198 | // instructions increases by one. |
| 4199 | ++Adjust; |
| 4200 | break; |
| 4201 | } |
| 4202 | } |
| 4203 | return Adjust; |
| 4204 | } |
| 4205 | |
| 4206 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency( |
| 4207 | const InstrItineraryData *ItinData, const MachineInstr &DefMI, |
| 4208 | unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { |
| 4209 | // No operand latency. The caller may fall back to getInstrLatency. |
| 4210 | if (!ItinData || ItinData->isEmpty()) |
| 4211 | return std::nullopt; |
| 4212 | |
| 4213 | const MachineOperand &DefMO = DefMI.getOperand(i: DefIdx); |
| 4214 | Register Reg = DefMO.getReg(); |
| 4215 | |
| 4216 | const MachineInstr *ResolvedDefMI = &DefMI; |
| 4217 | unsigned DefAdj = 0; |
| 4218 | if (DefMI.isBundle()) |
| 4219 | ResolvedDefMI = |
| 4220 | getBundledDefMI(TRI: &getRegisterInfo(), MI: &DefMI, Reg, DefIdx, Dist&: DefAdj); |
| 4221 | if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() || |
| 4222 | ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) { |
| 4223 | return 1; |
| 4224 | } |
| 4225 | |
| 4226 | const MachineInstr *ResolvedUseMI = &UseMI; |
| 4227 | unsigned UseAdj = 0; |
| 4228 | if (UseMI.isBundle()) { |
| 4229 | ResolvedUseMI = |
| 4230 | getBundledUseMI(TRI: &getRegisterInfo(), MI: UseMI, Reg, UseIdx, Dist&: UseAdj); |
| 4231 | if (!ResolvedUseMI) |
| 4232 | return std::nullopt; |
| 4233 | } |
| 4234 | |
| 4235 | return getOperandLatencyImpl( |
| 4236 | ItinData, DefMI: *ResolvedDefMI, DefIdx, DefMCID: ResolvedDefMI->getDesc(), DefAdj, DefMO, |
| 4237 | Reg, UseMI: *ResolvedUseMI, UseIdx, UseMCID: ResolvedUseMI->getDesc(), UseAdj); |
| 4238 | } |
| 4239 | |
| 4240 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl( |
| 4241 | const InstrItineraryData *ItinData, const MachineInstr &DefMI, |
| 4242 | unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, |
| 4243 | const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, |
| 4244 | unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const { |
| 4245 | if (Reg == ARM::CPSR) { |
| 4246 | if (DefMI.getOpcode() == ARM::FMSTAT) { |
| 4247 | // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) |
| 4248 | return Subtarget.isLikeA9() ? 1 : 20; |
| 4249 | } |
| 4250 | |
| 4251 | // CPSR set and branch can be paired in the same cycle. |
| 4252 | if (UseMI.isBranch()) |
| 4253 | return 0; |
| 4254 | |
| 4255 | // Otherwise it takes the instruction latency (generally one). |
| 4256 | unsigned Latency = getInstrLatency(ItinData, MI: DefMI); |
| 4257 | |
| 4258 | // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to |
| 4259 | // its uses. Instructions which are otherwise scheduled between them may |
| 4260 | // incur a code size penalty (not able to use the CPSR setting 16-bit |
| 4261 | // instructions). |
| 4262 | if (Latency > 0 && Subtarget.isThumb2()) { |
| 4263 | const MachineFunction *MF = DefMI.getParent()->getParent(); |
| 4264 | // FIXME: Use Function::hasOptSize(). |
| 4265 | if (MF->getFunction().hasFnAttribute(Kind: Attribute::OptimizeForSize)) |
| 4266 | --Latency; |
| 4267 | } |
| 4268 | return Latency; |
| 4269 | } |
| 4270 | |
| 4271 | if (DefMO.isImplicit() || UseMI.getOperand(i: UseIdx).isImplicit()) |
| 4272 | return std::nullopt; |
| 4273 | |
| 4274 | unsigned DefAlign = DefMI.hasOneMemOperand() |
| 4275 | ? (*DefMI.memoperands_begin())->getAlign().value() |
| 4276 | : 0; |
| 4277 | unsigned UseAlign = UseMI.hasOneMemOperand() |
| 4278 | ? (*UseMI.memoperands_begin())->getAlign().value() |
| 4279 | : 0; |
| 4280 | |
| 4281 | // Get the itinerary's latency if possible, and handle variable_ops. |
| 4282 | std::optional<unsigned> Latency = getOperandLatency( |
| 4283 | ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); |
| 4284 | // Unable to find operand latency. The caller may resort to getInstrLatency. |
| 4285 | if (!Latency) |
| 4286 | return std::nullopt; |
| 4287 | |
| 4288 | // Adjust for IT block position. |
| 4289 | int Adj = DefAdj + UseAdj; |
| 4290 | |
| 4291 | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
| 4292 | Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); |
| 4293 | if (Adj >= 0 || (int)*Latency > -Adj) { |
| 4294 | return *Latency + Adj; |
| 4295 | } |
| 4296 | // Return the itinerary latency, which may be zero but not less than zero. |
| 4297 | return Latency; |
| 4298 | } |
| 4299 | |
| 4300 | std::optional<unsigned> |
| 4301 | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
| 4302 | SDNode *DefNode, unsigned DefIdx, |
| 4303 | SDNode *UseNode, unsigned UseIdx) const { |
| 4304 | if (!DefNode->isMachineOpcode()) |
| 4305 | return 1; |
| 4306 | |
| 4307 | const MCInstrDesc &DefMCID = get(Opcode: DefNode->getMachineOpcode()); |
| 4308 | |
| 4309 | if (isZeroCost(Opcode: DefMCID.Opcode)) |
| 4310 | return 0; |
| 4311 | |
| 4312 | if (!ItinData || ItinData->isEmpty()) |
| 4313 | return DefMCID.mayLoad() ? 3 : 1; |
| 4314 | |
| 4315 | if (!UseNode->isMachineOpcode()) { |
| 4316 | std::optional<unsigned> Latency = |
| 4317 | ItinData->getOperandCycle(ItinClassIndx: DefMCID.getSchedClass(), OperandIdx: DefIdx); |
| 4318 | int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); |
| 4319 | int Threshold = 1 + Adj; |
| 4320 | return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj; |
| 4321 | } |
| 4322 | |
| 4323 | const MCInstrDesc &UseMCID = get(Opcode: UseNode->getMachineOpcode()); |
| 4324 | auto *DefMN = cast<MachineSDNode>(Val: DefNode); |
| 4325 | unsigned DefAlign = !DefMN->memoperands_empty() |
| 4326 | ? (*DefMN->memoperands_begin())->getAlign().value() |
| 4327 | : 0; |
| 4328 | auto *UseMN = cast<MachineSDNode>(Val: UseNode); |
| 4329 | unsigned UseAlign = !UseMN->memoperands_empty() |
| 4330 | ? (*UseMN->memoperands_begin())->getAlign().value() |
| 4331 | : 0; |
| 4332 | std::optional<unsigned> Latency = getOperandLatency( |
| 4333 | ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); |
| 4334 | if (!Latency) |
| 4335 | return std::nullopt; |
| 4336 | |
| 4337 | if (Latency > 1U && |
| 4338 | (Subtarget.isCortexA8() || Subtarget.isLikeA9() || |
| 4339 | Subtarget.isCortexA7())) { |
| 4340 | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
| 4341 | // variants are one cycle cheaper. |
| 4342 | switch (DefMCID.getOpcode()) { |
| 4343 | default: break; |
| 4344 | case ARM::LDRrs: |
| 4345 | case ARM::LDRBrs: { |
| 4346 | unsigned ShOpVal = DefNode->getConstantOperandVal(Num: 2); |
| 4347 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
| 4348 | if (ShImm == 0 || |
| 4349 | (ShImm == 2 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
| 4350 | Latency = *Latency - 1; |
| 4351 | break; |
| 4352 | } |
| 4353 | case ARM::t2LDRs: |
| 4354 | case ARM::t2LDRBs: |
| 4355 | case ARM::t2LDRHs: |
| 4356 | case ARM::t2LDRSHs: { |
| 4357 | // Thumb2 mode: lsl only. |
| 4358 | unsigned ShAmt = DefNode->getConstantOperandVal(Num: 2); |
| 4359 | if (ShAmt == 0 || ShAmt == 2) |
| 4360 | Latency = *Latency - 1; |
| 4361 | break; |
| 4362 | } |
| 4363 | } |
| 4364 | } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) { |
| 4365 | // FIXME: Properly handle all of the latency adjustments for address |
| 4366 | // writeback. |
| 4367 | switch (DefMCID.getOpcode()) { |
| 4368 | default: break; |
| 4369 | case ARM::LDRrs: |
| 4370 | case ARM::LDRBrs: { |
| 4371 | unsigned ShOpVal = DefNode->getConstantOperandVal(Num: 2); |
| 4372 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
| 4373 | if (ShImm == 0 || |
| 4374 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
| 4375 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
| 4376 | Latency = *Latency - 2; |
| 4377 | else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsr) |
| 4378 | Latency = *Latency - 1; |
| 4379 | break; |
| 4380 | } |
| 4381 | case ARM::t2LDRs: |
| 4382 | case ARM::t2LDRBs: |
| 4383 | case ARM::t2LDRHs: |
| 4384 | case ARM::t2LDRSHs: |
| 4385 | // Thumb2 mode: lsl 0-3 only. |
| 4386 | Latency = *Latency - 2; |
| 4387 | break; |
| 4388 | } |
| 4389 | } |
| 4390 | |
| 4391 | if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) |
| 4392 | switch (DefMCID.getOpcode()) { |
| 4393 | default: break; |
| 4394 | case ARM::VLD1q8: |
| 4395 | case ARM::VLD1q16: |
| 4396 | case ARM::VLD1q32: |
| 4397 | case ARM::VLD1q64: |
| 4398 | case ARM::VLD1q8wb_register: |
| 4399 | case ARM::VLD1q16wb_register: |
| 4400 | case ARM::VLD1q32wb_register: |
| 4401 | case ARM::VLD1q64wb_register: |
| 4402 | case ARM::VLD1q8wb_fixed: |
| 4403 | case ARM::VLD1q16wb_fixed: |
| 4404 | case ARM::VLD1q32wb_fixed: |
| 4405 | case ARM::VLD1q64wb_fixed: |
| 4406 | case ARM::VLD2d8: |
| 4407 | case ARM::VLD2d16: |
| 4408 | case ARM::VLD2d32: |
| 4409 | case ARM::VLD2q8Pseudo: |
| 4410 | case ARM::VLD2q16Pseudo: |
| 4411 | case ARM::VLD2q32Pseudo: |
| 4412 | case ARM::VLD2d8wb_fixed: |
| 4413 | case ARM::VLD2d16wb_fixed: |
| 4414 | case ARM::VLD2d32wb_fixed: |
| 4415 | case ARM::VLD2q8PseudoWB_fixed: |
| 4416 | case ARM::VLD2q16PseudoWB_fixed: |
| 4417 | case ARM::VLD2q32PseudoWB_fixed: |
| 4418 | case ARM::VLD2d8wb_register: |
| 4419 | case ARM::VLD2d16wb_register: |
| 4420 | case ARM::VLD2d32wb_register: |
| 4421 | case ARM::VLD2q8PseudoWB_register: |
| 4422 | case ARM::VLD2q16PseudoWB_register: |
| 4423 | case ARM::VLD2q32PseudoWB_register: |
| 4424 | case ARM::VLD3d8Pseudo: |
| 4425 | case ARM::VLD3d16Pseudo: |
| 4426 | case ARM::VLD3d32Pseudo: |
| 4427 | case ARM::VLD1d8TPseudo: |
| 4428 | case ARM::VLD1d16TPseudo: |
| 4429 | case ARM::VLD1d32TPseudo: |
| 4430 | case ARM::VLD1d64TPseudo: |
| 4431 | case ARM::VLD1d64TPseudoWB_fixed: |
| 4432 | case ARM::VLD1d64TPseudoWB_register: |
| 4433 | case ARM::VLD3d8Pseudo_UPD: |
| 4434 | case ARM::VLD3d16Pseudo_UPD: |
| 4435 | case ARM::VLD3d32Pseudo_UPD: |
| 4436 | case ARM::VLD3q8Pseudo_UPD: |
| 4437 | case ARM::VLD3q16Pseudo_UPD: |
| 4438 | case ARM::VLD3q32Pseudo_UPD: |
| 4439 | case ARM::VLD3q8oddPseudo: |
| 4440 | case ARM::VLD3q16oddPseudo: |
| 4441 | case ARM::VLD3q32oddPseudo: |
| 4442 | case ARM::VLD3q8oddPseudo_UPD: |
| 4443 | case ARM::VLD3q16oddPseudo_UPD: |
| 4444 | case ARM::VLD3q32oddPseudo_UPD: |
| 4445 | case ARM::VLD4d8Pseudo: |
| 4446 | case ARM::VLD4d16Pseudo: |
| 4447 | case ARM::VLD4d32Pseudo: |
| 4448 | case ARM::VLD1d8QPseudo: |
| 4449 | case ARM::VLD1d16QPseudo: |
| 4450 | case ARM::VLD1d32QPseudo: |
| 4451 | case ARM::VLD1d64QPseudo: |
| 4452 | case ARM::VLD1d64QPseudoWB_fixed: |
| 4453 | case ARM::VLD1d64QPseudoWB_register: |
| 4454 | case ARM::VLD1q8HighQPseudo: |
| 4455 | case ARM::VLD1q8LowQPseudo_UPD: |
| 4456 | case ARM::VLD1q8HighTPseudo: |
| 4457 | case ARM::VLD1q8LowTPseudo_UPD: |
| 4458 | case ARM::VLD1q16HighQPseudo: |
| 4459 | case ARM::VLD1q16LowQPseudo_UPD: |
| 4460 | case ARM::VLD1q16HighTPseudo: |
| 4461 | case ARM::VLD1q16LowTPseudo_UPD: |
| 4462 | case ARM::VLD1q32HighQPseudo: |
| 4463 | case ARM::VLD1q32LowQPseudo_UPD: |
| 4464 | case ARM::VLD1q32HighTPseudo: |
| 4465 | case ARM::VLD1q32LowTPseudo_UPD: |
| 4466 | case ARM::VLD1q64HighQPseudo: |
| 4467 | case ARM::VLD1q64LowQPseudo_UPD: |
| 4468 | case ARM::VLD1q64HighTPseudo: |
| 4469 | case ARM::VLD1q64LowTPseudo_UPD: |
| 4470 | case ARM::VLD4d8Pseudo_UPD: |
| 4471 | case ARM::VLD4d16Pseudo_UPD: |
| 4472 | case ARM::VLD4d32Pseudo_UPD: |
| 4473 | case ARM::VLD4q8Pseudo_UPD: |
| 4474 | case ARM::VLD4q16Pseudo_UPD: |
| 4475 | case ARM::VLD4q32Pseudo_UPD: |
| 4476 | case ARM::VLD4q8oddPseudo: |
| 4477 | case ARM::VLD4q16oddPseudo: |
| 4478 | case ARM::VLD4q32oddPseudo: |
| 4479 | case ARM::VLD4q8oddPseudo_UPD: |
| 4480 | case ARM::VLD4q16oddPseudo_UPD: |
| 4481 | case ARM::VLD4q32oddPseudo_UPD: |
| 4482 | case ARM::VLD1DUPq8: |
| 4483 | case ARM::VLD1DUPq16: |
| 4484 | case ARM::VLD1DUPq32: |
| 4485 | case ARM::VLD1DUPq8wb_fixed: |
| 4486 | case ARM::VLD1DUPq16wb_fixed: |
| 4487 | case ARM::VLD1DUPq32wb_fixed: |
| 4488 | case ARM::VLD1DUPq8wb_register: |
| 4489 | case ARM::VLD1DUPq16wb_register: |
| 4490 | case ARM::VLD1DUPq32wb_register: |
| 4491 | case ARM::VLD2DUPd8: |
| 4492 | case ARM::VLD2DUPd16: |
| 4493 | case ARM::VLD2DUPd32: |
| 4494 | case ARM::VLD2DUPd8wb_fixed: |
| 4495 | case ARM::VLD2DUPd16wb_fixed: |
| 4496 | case ARM::VLD2DUPd32wb_fixed: |
| 4497 | case ARM::VLD2DUPd8wb_register: |
| 4498 | case ARM::VLD2DUPd16wb_register: |
| 4499 | case ARM::VLD2DUPd32wb_register: |
| 4500 | case ARM::VLD2DUPq8EvenPseudo: |
| 4501 | case ARM::VLD2DUPq8OddPseudo: |
| 4502 | case ARM::VLD2DUPq16EvenPseudo: |
| 4503 | case ARM::VLD2DUPq16OddPseudo: |
| 4504 | case ARM::VLD2DUPq32EvenPseudo: |
| 4505 | case ARM::VLD2DUPq32OddPseudo: |
| 4506 | case ARM::VLD3DUPq8EvenPseudo: |
| 4507 | case ARM::VLD3DUPq8OddPseudo: |
| 4508 | case ARM::VLD3DUPq16EvenPseudo: |
| 4509 | case ARM::VLD3DUPq16OddPseudo: |
| 4510 | case ARM::VLD3DUPq32EvenPseudo: |
| 4511 | case ARM::VLD3DUPq32OddPseudo: |
| 4512 | case ARM::VLD4DUPd8Pseudo: |
| 4513 | case ARM::VLD4DUPd16Pseudo: |
| 4514 | case ARM::VLD4DUPd32Pseudo: |
| 4515 | case ARM::VLD4DUPd8Pseudo_UPD: |
| 4516 | case ARM::VLD4DUPd16Pseudo_UPD: |
| 4517 | case ARM::VLD4DUPd32Pseudo_UPD: |
| 4518 | case ARM::VLD4DUPq8EvenPseudo: |
| 4519 | case ARM::VLD4DUPq8OddPseudo: |
| 4520 | case ARM::VLD4DUPq16EvenPseudo: |
| 4521 | case ARM::VLD4DUPq16OddPseudo: |
| 4522 | case ARM::VLD4DUPq32EvenPseudo: |
| 4523 | case ARM::VLD4DUPq32OddPseudo: |
| 4524 | case ARM::VLD1LNq8Pseudo: |
| 4525 | case ARM::VLD1LNq16Pseudo: |
| 4526 | case ARM::VLD1LNq32Pseudo: |
| 4527 | case ARM::VLD1LNq8Pseudo_UPD: |
| 4528 | case ARM::VLD1LNq16Pseudo_UPD: |
| 4529 | case ARM::VLD1LNq32Pseudo_UPD: |
| 4530 | case ARM::VLD2LNd8Pseudo: |
| 4531 | case ARM::VLD2LNd16Pseudo: |
| 4532 | case ARM::VLD2LNd32Pseudo: |
| 4533 | case ARM::VLD2LNq16Pseudo: |
| 4534 | case ARM::VLD2LNq32Pseudo: |
| 4535 | case ARM::VLD2LNd8Pseudo_UPD: |
| 4536 | case ARM::VLD2LNd16Pseudo_UPD: |
| 4537 | case ARM::VLD2LNd32Pseudo_UPD: |
| 4538 | case ARM::VLD2LNq16Pseudo_UPD: |
| 4539 | case ARM::VLD2LNq32Pseudo_UPD: |
| 4540 | case ARM::VLD4LNd8Pseudo: |
| 4541 | case ARM::VLD4LNd16Pseudo: |
| 4542 | case ARM::VLD4LNd32Pseudo: |
| 4543 | case ARM::VLD4LNq16Pseudo: |
| 4544 | case ARM::VLD4LNq32Pseudo: |
| 4545 | case ARM::VLD4LNd8Pseudo_UPD: |
| 4546 | case ARM::VLD4LNd16Pseudo_UPD: |
| 4547 | case ARM::VLD4LNd32Pseudo_UPD: |
| 4548 | case ARM::VLD4LNq16Pseudo_UPD: |
| 4549 | case ARM::VLD4LNq32Pseudo_UPD: |
| 4550 | // If the address is not 64-bit aligned, the latencies of these |
| 4551 | // instructions increases by one. |
| 4552 | Latency = *Latency + 1; |
| 4553 | break; |
| 4554 | } |
| 4555 | |
| 4556 | return Latency; |
| 4557 | } |
| 4558 | |
| 4559 | unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { |
| 4560 | if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || |
| 4561 | MI.isImplicitDef()) |
| 4562 | return 0; |
| 4563 | |
| 4564 | if (MI.isBundle()) |
| 4565 | return 0; |
| 4566 | |
| 4567 | const MCInstrDesc &MCID = MI.getDesc(); |
| 4568 | |
| 4569 | if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(Reg: ARM::CPSR) && |
| 4570 | !Subtarget.cheapPredicableCPSRDef())) { |
| 4571 | // When predicated, CPSR is an additional source operand for CPSR updating |
| 4572 | // instructions, this apparently increases their latencies. |
| 4573 | return 1; |
| 4574 | } |
| 4575 | return 0; |
| 4576 | } |
| 4577 | |
| 4578 | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
| 4579 | const MachineInstr &MI, |
| 4580 | unsigned *PredCost) const { |
| 4581 | if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || |
| 4582 | MI.isImplicitDef()) |
| 4583 | return 1; |
| 4584 | |
| 4585 | // An instruction scheduler typically runs on unbundled instructions, however |
| 4586 | // other passes may query the latency of a bundled instruction. |
| 4587 | if (MI.isBundle()) { |
| 4588 | unsigned Latency = 0; |
| 4589 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
| 4590 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
| 4591 | while (++I != E && I->isInsideBundle()) { |
| 4592 | if (I->getOpcode() != ARM::t2IT) |
| 4593 | Latency += getInstrLatency(ItinData, MI: *I, PredCost); |
| 4594 | } |
| 4595 | return Latency; |
| 4596 | } |
| 4597 | |
| 4598 | const MCInstrDesc &MCID = MI.getDesc(); |
| 4599 | if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(Reg: ARM::CPSR) && |
| 4600 | !Subtarget.cheapPredicableCPSRDef()))) { |
| 4601 | // When predicated, CPSR is an additional source operand for CPSR updating |
| 4602 | // instructions, this apparently increases their latencies. |
| 4603 | *PredCost = 1; |
| 4604 | } |
| 4605 | // Be sure to call getStageLatency for an empty itinerary in case it has a |
| 4606 | // valid MinLatency property. |
| 4607 | if (!ItinData) |
| 4608 | return MI.mayLoad() ? 3 : 1; |
| 4609 | |
| 4610 | unsigned Class = MCID.getSchedClass(); |
| 4611 | |
| 4612 | // For instructions with variable uops, use uops as latency. |
| 4613 | if (!ItinData->isEmpty() && ItinData->getNumMicroOps(ItinClassIndx: Class) < 0) |
| 4614 | return getNumMicroOps(ItinData, MI); |
| 4615 | |
| 4616 | // For the common case, fall back on the itinerary's latency. |
| 4617 | unsigned Latency = ItinData->getStageLatency(ItinClassIndx: Class); |
| 4618 | |
| 4619 | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
| 4620 | unsigned DefAlign = |
| 4621 | MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0; |
| 4622 | int Adj = adjustDefLatency(Subtarget, DefMI: MI, DefMCID: MCID, DefAlign); |
| 4623 | if (Adj >= 0 || (int)Latency > -Adj) { |
| 4624 | return Latency + Adj; |
| 4625 | } |
| 4626 | return Latency; |
| 4627 | } |
| 4628 | |
| 4629 | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
| 4630 | SDNode *Node) const { |
| 4631 | if (!Node->isMachineOpcode()) |
| 4632 | return 1; |
| 4633 | |
| 4634 | if (!ItinData || ItinData->isEmpty()) |
| 4635 | return 1; |
| 4636 | |
| 4637 | unsigned Opcode = Node->getMachineOpcode(); |
| 4638 | switch (Opcode) { |
| 4639 | default: |
| 4640 | return ItinData->getStageLatency(ItinClassIndx: get(Opcode).getSchedClass()); |
| 4641 | case ARM::VLDMQIA: |
| 4642 | case ARM::VSTMQIA: |
| 4643 | return 2; |
| 4644 | } |
| 4645 | } |
| 4646 | |
| 4647 | bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, |
| 4648 | const MachineRegisterInfo *MRI, |
| 4649 | const MachineInstr &DefMI, |
| 4650 | unsigned DefIdx, |
| 4651 | const MachineInstr &UseMI, |
| 4652 | unsigned UseIdx) const { |
| 4653 | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
| 4654 | unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask; |
| 4655 | if (Subtarget.nonpipelinedVFP() && |
| 4656 | (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) |
| 4657 | return true; |
| 4658 | |
| 4659 | // Hoist VFP / NEON instructions with 4 or higher latency. |
| 4660 | unsigned Latency = |
| 4661 | SchedModel.computeOperandLatency(DefMI: &DefMI, DefOperIdx: DefIdx, UseMI: &UseMI, UseOperIdx: UseIdx); |
| 4662 | if (Latency <= 3) |
| 4663 | return false; |
| 4664 | return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || |
| 4665 | UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; |
| 4666 | } |
| 4667 | |
| 4668 | bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, |
| 4669 | const MachineInstr &DefMI, |
| 4670 | unsigned DefIdx) const { |
| 4671 | const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); |
| 4672 | if (!ItinData || ItinData->isEmpty()) |
| 4673 | return false; |
| 4674 | |
| 4675 | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
| 4676 | if (DDomain == ARMII::DomainGeneral) { |
| 4677 | unsigned DefClass = DefMI.getDesc().getSchedClass(); |
| 4678 | std::optional<unsigned> DefCycle = |
| 4679 | ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
| 4680 | return DefCycle && DefCycle <= 2U; |
| 4681 | } |
| 4682 | return false; |
| 4683 | } |
| 4684 | |
| 4685 | bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, |
| 4686 | StringRef &ErrInfo) const { |
| 4687 | if (convertAddSubFlagsOpcode(OldOpc: MI.getOpcode())) { |
| 4688 | ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG" ; |
| 4689 | return false; |
| 4690 | } |
| 4691 | if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) { |
| 4692 | // Make sure we don't generate a lo-lo mov that isn't supported. |
| 4693 | if (!ARM::hGPRRegClass.contains(Reg: MI.getOperand(i: 0).getReg()) && |
| 4694 | !ARM::hGPRRegClass.contains(Reg: MI.getOperand(i: 1).getReg())) { |
| 4695 | ErrInfo = "Non-flag-setting Thumb1 mov is v6-only" ; |
| 4696 | return false; |
| 4697 | } |
| 4698 | } |
| 4699 | if (MI.getOpcode() == ARM::tPUSH || |
| 4700 | MI.getOpcode() == ARM::tPOP || |
| 4701 | MI.getOpcode() == ARM::tPOP_RET) { |
| 4702 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands(), N: 2)) { |
| 4703 | if (MO.isImplicit() || !MO.isReg()) |
| 4704 | continue; |
| 4705 | Register Reg = MO.getReg(); |
| 4706 | if (Reg < ARM::R0 || Reg > ARM::R7) { |
| 4707 | if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) && |
| 4708 | !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) { |
| 4709 | ErrInfo = "Unsupported register in Thumb1 push/pop" ; |
| 4710 | return false; |
| 4711 | } |
| 4712 | } |
| 4713 | } |
| 4714 | } |
| 4715 | if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) { |
| 4716 | assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm()); |
| 4717 | if ((MI.getOperand(i: 4).getImm() != 2 && MI.getOperand(i: 4).getImm() != 3) || |
| 4718 | MI.getOperand(i: 4).getImm() != MI.getOperand(i: 5).getImm() + 2) { |
| 4719 | ErrInfo = "Incorrect array index for MVE_VMOV_q_rr" ; |
| 4720 | return false; |
| 4721 | } |
| 4722 | } |
| 4723 | |
| 4724 | // Check the address model by taking the first Imm operand and checking it is |
| 4725 | // legal for that addressing mode. |
| 4726 | ARMII::AddrMode AddrMode = |
| 4727 | (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask); |
| 4728 | switch (AddrMode) { |
| 4729 | default: |
| 4730 | break; |
| 4731 | case ARMII::AddrModeT2_i7: |
| 4732 | case ARMII::AddrModeT2_i7s2: |
| 4733 | case ARMII::AddrModeT2_i7s4: |
| 4734 | case ARMII::AddrModeT2_i8: |
| 4735 | case ARMII::AddrModeT2_i8pos: |
| 4736 | case ARMII::AddrModeT2_i8neg: |
| 4737 | case ARMII::AddrModeT2_i8s4: |
| 4738 | case ARMII::AddrModeT2_i12: { |
| 4739 | uint32_t Imm = 0; |
| 4740 | for (auto Op : MI.operands()) { |
| 4741 | if (Op.isImm()) { |
| 4742 | Imm = Op.getImm(); |
| 4743 | break; |
| 4744 | } |
| 4745 | } |
| 4746 | if (!isLegalAddressImm(Opcode: MI.getOpcode(), Imm, TII: this)) { |
| 4747 | ErrInfo = "Incorrect AddrMode Imm for instruction" ; |
| 4748 | return false; |
| 4749 | } |
| 4750 | break; |
| 4751 | } |
| 4752 | } |
| 4753 | return true; |
| 4754 | } |
| 4755 | |
| 4756 | void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, |
| 4757 | unsigned LoadImmOpc, |
| 4758 | unsigned LoadOpc) const { |
| 4759 | assert(!Subtarget.isROPI() && !Subtarget.isRWPI() && |
| 4760 | "ROPI/RWPI not currently supported with stack guard" ); |
| 4761 | |
| 4762 | MachineBasicBlock &MBB = *MI->getParent(); |
| 4763 | DebugLoc DL = MI->getDebugLoc(); |
| 4764 | Register Reg = MI->getOperand(i: 0).getReg(); |
| 4765 | MachineInstrBuilder MIB; |
| 4766 | unsigned int Offset = 0; |
| 4767 | |
| 4768 | if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) { |
| 4769 | assert(!Subtarget.isReadTPSoft() && |
| 4770 | "TLS stack protector requires hardware TLS register" ); |
| 4771 | |
| 4772 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg) |
| 4773 | .addImm(Val: 15) |
| 4774 | .addImm(Val: 0) |
| 4775 | .addImm(Val: 13) |
| 4776 | .addImm(Val: 0) |
| 4777 | .addImm(Val: 3) |
| 4778 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 4779 | |
| 4780 | Module &M = *MBB.getParent()->getFunction().getParent(); |
| 4781 | Offset = M.getStackProtectorGuardOffset(); |
| 4782 | if (Offset & ~0xfffU) { |
| 4783 | // The offset won't fit in the LDR's 12-bit immediate field, so emit an |
| 4784 | // extra ADD to cover the delta. This gives us a guaranteed 8 additional |
| 4785 | // bits, resulting in a range of 0 to +1 MiB for the guard offset. |
| 4786 | unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri; |
| 4787 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: AddOpc), DestReg: Reg) |
| 4788 | .addReg(RegNo: Reg, flags: RegState::Kill) |
| 4789 | .addImm(Val: Offset & ~0xfffU) |
| 4790 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 4791 | .addReg(RegNo: 0); |
| 4792 | Offset &= 0xfffU; |
| 4793 | } |
| 4794 | } else { |
| 4795 | const GlobalValue *GV = |
| 4796 | cast<GlobalValue>(Val: (*MI->memoperands_begin())->getValue()); |
| 4797 | bool IsIndirect = Subtarget.isGVIndirectSymbol(GV); |
| 4798 | |
| 4799 | unsigned TargetFlags = ARMII::MO_NO_FLAG; |
| 4800 | if (Subtarget.isTargetMachO()) { |
| 4801 | TargetFlags |= ARMII::MO_NONLAZY; |
| 4802 | } else if (Subtarget.isTargetCOFF()) { |
| 4803 | if (GV->hasDLLImportStorageClass()) |
| 4804 | TargetFlags |= ARMII::MO_DLLIMPORT; |
| 4805 | else if (IsIndirect) |
| 4806 | TargetFlags |= ARMII::MO_COFFSTUB; |
| 4807 | } else if (IsIndirect) { |
| 4808 | TargetFlags |= ARMII::MO_GOT; |
| 4809 | } |
| 4810 | |
| 4811 | if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only |
| 4812 | Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register |
| 4813 | auto APSREncoding = |
| 4814 | ARMSysReg::lookupMClassSysRegByName(Name: "apsr_nzcvq" )->Encoding; |
| 4815 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: ARM::t2MRS_M), DestReg: CPSRSaveReg) |
| 4816 | .addImm(Val: APSREncoding) |
| 4817 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 4818 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg) |
| 4819 | .addGlobalAddress(GV, Offset: 0, TargetFlags); |
| 4820 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: ARM::t2MSR_M)) |
| 4821 | .addImm(Val: APSREncoding) |
| 4822 | .addReg(RegNo: CPSRSaveReg, flags: RegState::Kill) |
| 4823 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 4824 | } else { |
| 4825 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg) |
| 4826 | .addGlobalAddress(GV, Offset: 0, TargetFlags); |
| 4827 | } |
| 4828 | |
| 4829 | if (IsIndirect) { |
| 4830 | MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadOpc), DestReg: Reg); |
| 4831 | MIB.addReg(RegNo: Reg, flags: RegState::Kill).addImm(Val: 0); |
| 4832 | auto Flags = MachineMemOperand::MOLoad | |
| 4833 | MachineMemOperand::MODereferenceable | |
| 4834 | MachineMemOperand::MOInvariant; |
| 4835 | MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( |
| 4836 | PtrInfo: MachinePointerInfo::getGOT(MF&: *MBB.getParent()), F: Flags, Size: 4, BaseAlignment: Align(4)); |
| 4837 | MIB.addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL)); |
| 4838 | } |
| 4839 | } |
| 4840 | |
| 4841 | MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadOpc), DestReg: Reg); |
| 4842 | MIB.addReg(RegNo: Reg, flags: RegState::Kill) |
| 4843 | .addImm(Val: Offset) |
| 4844 | .cloneMemRefs(OtherMI: *MI) |
| 4845 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 4846 | } |
| 4847 | |
| 4848 | bool |
| 4849 | ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, |
| 4850 | unsigned &AddSubOpc, |
| 4851 | bool &NegAcc, bool &HasLane) const { |
| 4852 | DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Val: Opcode); |
| 4853 | if (I == MLxEntryMap.end()) |
| 4854 | return false; |
| 4855 | |
| 4856 | const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; |
| 4857 | MulOpc = Entry.MulOpc; |
| 4858 | AddSubOpc = Entry.AddSubOpc; |
| 4859 | NegAcc = Entry.NegAcc; |
| 4860 | HasLane = Entry.HasLane; |
| 4861 | return true; |
| 4862 | } |
| 4863 | |
| 4864 | //===----------------------------------------------------------------------===// |
| 4865 | // Execution domains. |
| 4866 | //===----------------------------------------------------------------------===// |
| 4867 | // |
| 4868 | // Some instructions go down the NEON pipeline, some go down the VFP pipeline, |
| 4869 | // and some can go down both. The vmov instructions go down the VFP pipeline, |
| 4870 | // but they can be changed to vorr equivalents that are executed by the NEON |
| 4871 | // pipeline. |
| 4872 | // |
| 4873 | // We use the following execution domain numbering: |
| 4874 | // |
| 4875 | enum ARMExeDomain { |
| 4876 | ExeGeneric = 0, |
| 4877 | ExeVFP = 1, |
| 4878 | ExeNEON = 2 |
| 4879 | }; |
| 4880 | |
| 4881 | // |
| 4882 | // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h |
| 4883 | // |
| 4884 | std::pair<uint16_t, uint16_t> |
| 4885 | ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const { |
| 4886 | // If we don't have access to NEON instructions then we won't be able |
| 4887 | // to swizzle anything to the NEON domain. Check to make sure. |
| 4888 | if (Subtarget.hasNEON()) { |
| 4889 | // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON |
| 4890 | // if they are not predicated. |
| 4891 | if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI)) |
| 4892 | return std::make_pair(x: ExeVFP, y: (1 << ExeVFP) | (1 << ExeNEON)); |
| 4893 | |
| 4894 | // CortexA9 is particularly picky about mixing the two and wants these |
| 4895 | // converted. |
| 4896 | if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) && |
| 4897 | (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR || |
| 4898 | MI.getOpcode() == ARM::VMOVS)) |
| 4899 | return std::make_pair(x: ExeVFP, y: (1 << ExeVFP) | (1 << ExeNEON)); |
| 4900 | } |
| 4901 | // No other instructions can be swizzled, so just determine their domain. |
| 4902 | unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask; |
| 4903 | |
| 4904 | if (Domain & ARMII::DomainNEON) |
| 4905 | return std::make_pair(x: ExeNEON, y: 0); |
| 4906 | |
| 4907 | // Certain instructions can go either way on Cortex-A8. |
| 4908 | // Treat them as NEON instructions. |
| 4909 | if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) |
| 4910 | return std::make_pair(x: ExeNEON, y: 0); |
| 4911 | |
| 4912 | if (Domain & ARMII::DomainVFP) |
| 4913 | return std::make_pair(x: ExeVFP, y: 0); |
| 4914 | |
| 4915 | return std::make_pair(x: ExeGeneric, y: 0); |
| 4916 | } |
| 4917 | |
| 4918 | static MCRegister getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, |
| 4919 | unsigned SReg, unsigned &Lane) { |
| 4920 | MCRegister DReg = |
| 4921 | TRI->getMatchingSuperReg(Reg: SReg, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass); |
| 4922 | Lane = 0; |
| 4923 | |
| 4924 | if (DReg) |
| 4925 | return DReg; |
| 4926 | |
| 4927 | Lane = 1; |
| 4928 | DReg = TRI->getMatchingSuperReg(Reg: SReg, SubIdx: ARM::ssub_1, RC: &ARM::DPRRegClass); |
| 4929 | |
| 4930 | assert(DReg && "S-register with no D super-register?" ); |
| 4931 | return DReg; |
| 4932 | } |
| 4933 | |
| 4934 | /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, |
| 4935 | /// set ImplicitSReg to a register number that must be marked as implicit-use or |
| 4936 | /// zero if no register needs to be defined as implicit-use. |
| 4937 | /// |
| 4938 | /// If the function cannot determine if an SPR should be marked implicit use or |
| 4939 | /// not, it returns false. |
| 4940 | /// |
| 4941 | /// This function handles cases where an instruction is being modified from taking |
| 4942 | /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict |
| 4943 | /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other |
| 4944 | /// lane of the DPR). |
| 4945 | /// |
| 4946 | /// If the other SPR is defined, an implicit-use of it should be added. Else, |
| 4947 | /// (including the case where the DPR itself is defined), it should not. |
| 4948 | /// |
| 4949 | static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, |
| 4950 | MachineInstr &MI, MCRegister DReg, |
| 4951 | unsigned Lane, |
| 4952 | MCRegister &ImplicitSReg) { |
| 4953 | // If the DPR is defined or used already, the other SPR lane will be chained |
| 4954 | // correctly, so there is nothing to be done. |
| 4955 | if (MI.definesRegister(Reg: DReg, TRI) || MI.readsRegister(Reg: DReg, TRI)) { |
| 4956 | ImplicitSReg = MCRegister(); |
| 4957 | return true; |
| 4958 | } |
| 4959 | |
| 4960 | // Otherwise we need to go searching to see if the SPR is set explicitly. |
| 4961 | ImplicitSReg = TRI->getSubReg(Reg: DReg, |
| 4962 | Idx: (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); |
| 4963 | MachineBasicBlock::LivenessQueryResult LQR = |
| 4964 | MI.getParent()->computeRegisterLiveness(TRI, Reg: ImplicitSReg, Before: MI); |
| 4965 | |
| 4966 | if (LQR == MachineBasicBlock::LQR_Live) |
| 4967 | return true; |
| 4968 | else if (LQR == MachineBasicBlock::LQR_Unknown) |
| 4969 | return false; |
| 4970 | |
| 4971 | // If the register is known not to be live, there is no need to add an |
| 4972 | // implicit-use. |
| 4973 | ImplicitSReg = MCRegister(); |
| 4974 | return true; |
| 4975 | } |
| 4976 | |
| 4977 | void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, |
| 4978 | unsigned Domain) const { |
| 4979 | unsigned DstReg, SrcReg; |
| 4980 | MCRegister DReg; |
| 4981 | unsigned Lane; |
| 4982 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
| 4983 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
| 4984 | switch (MI.getOpcode()) { |
| 4985 | default: |
| 4986 | llvm_unreachable("cannot handle opcode!" ); |
| 4987 | break; |
| 4988 | case ARM::VMOVD: |
| 4989 | if (Domain != ExeNEON) |
| 4990 | break; |
| 4991 | |
| 4992 | // Zap the predicate operands. |
| 4993 | assert(!isPredicated(MI) && "Cannot predicate a VORRd" ); |
| 4994 | |
| 4995 | // Make sure we've got NEON instructions. |
| 4996 | assert(Subtarget.hasNEON() && "VORRd requires NEON" ); |
| 4997 | |
| 4998 | // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) |
| 4999 | DstReg = MI.getOperand(i: 0).getReg(); |
| 5000 | SrcReg = MI.getOperand(i: 1).getReg(); |
| 5001 | |
| 5002 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
| 5003 | MI.removeOperand(OpNo: i - 1); |
| 5004 | |
| 5005 | // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) |
| 5006 | MI.setDesc(get(Opcode: ARM::VORRd)); |
| 5007 | MIB.addReg(RegNo: DstReg, flags: RegState::Define) |
| 5008 | .addReg(RegNo: SrcReg) |
| 5009 | .addReg(RegNo: SrcReg) |
| 5010 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 5011 | break; |
| 5012 | case ARM::VMOVRS: |
| 5013 | if (Domain != ExeNEON) |
| 5014 | break; |
| 5015 | assert(!isPredicated(MI) && "Cannot predicate a VGETLN" ); |
| 5016 | |
| 5017 | // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) |
| 5018 | DstReg = MI.getOperand(i: 0).getReg(); |
| 5019 | SrcReg = MI.getOperand(i: 1).getReg(); |
| 5020 | |
| 5021 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
| 5022 | MI.removeOperand(OpNo: i - 1); |
| 5023 | |
| 5024 | DReg = getCorrespondingDRegAndLane(TRI, SReg: SrcReg, Lane); |
| 5025 | |
| 5026 | // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) |
| 5027 | // Note that DSrc has been widened and the other lane may be undef, which |
| 5028 | // contaminates the entire register. |
| 5029 | MI.setDesc(get(Opcode: ARM::VGETLNi32)); |
| 5030 | MIB.addReg(RegNo: DstReg, flags: RegState::Define) |
| 5031 | .addReg(RegNo: DReg, flags: RegState::Undef) |
| 5032 | .addImm(Val: Lane) |
| 5033 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 5034 | |
| 5035 | // The old source should be an implicit use, otherwise we might think it |
| 5036 | // was dead before here. |
| 5037 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
| 5038 | break; |
| 5039 | case ARM::VMOVSR: { |
| 5040 | if (Domain != ExeNEON) |
| 5041 | break; |
| 5042 | assert(!isPredicated(MI) && "Cannot predicate a VSETLN" ); |
| 5043 | |
| 5044 | // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) |
| 5045 | DstReg = MI.getOperand(i: 0).getReg(); |
| 5046 | SrcReg = MI.getOperand(i: 1).getReg(); |
| 5047 | |
| 5048 | DReg = getCorrespondingDRegAndLane(TRI, SReg: DstReg, Lane); |
| 5049 | |
| 5050 | MCRegister ImplicitSReg; |
| 5051 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) |
| 5052 | break; |
| 5053 | |
| 5054 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
| 5055 | MI.removeOperand(OpNo: i - 1); |
| 5056 | |
| 5057 | // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) |
| 5058 | // Again DDst may be undefined at the beginning of this instruction. |
| 5059 | MI.setDesc(get(Opcode: ARM::VSETLNi32)); |
| 5060 | MIB.addReg(RegNo: DReg, flags: RegState::Define) |
| 5061 | .addReg(RegNo: DReg, flags: getUndefRegState(B: !MI.readsRegister(Reg: DReg, TRI))) |
| 5062 | .addReg(RegNo: SrcReg) |
| 5063 | .addImm(Val: Lane) |
| 5064 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 5065 | |
| 5066 | // The narrower destination must be marked as set to keep previous chains |
| 5067 | // in place. |
| 5068 | MIB.addReg(RegNo: DstReg, flags: RegState::Define | RegState::Implicit); |
| 5069 | if (ImplicitSReg) |
| 5070 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
| 5071 | break; |
| 5072 | } |
| 5073 | case ARM::VMOVS: { |
| 5074 | if (Domain != ExeNEON) |
| 5075 | break; |
| 5076 | |
| 5077 | // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) |
| 5078 | DstReg = MI.getOperand(i: 0).getReg(); |
| 5079 | SrcReg = MI.getOperand(i: 1).getReg(); |
| 5080 | |
| 5081 | unsigned DstLane = 0, SrcLane = 0; |
| 5082 | MCRegister DDst, DSrc; |
| 5083 | DDst = getCorrespondingDRegAndLane(TRI, SReg: DstReg, Lane&: DstLane); |
| 5084 | DSrc = getCorrespondingDRegAndLane(TRI, SReg: SrcReg, Lane&: SrcLane); |
| 5085 | |
| 5086 | MCRegister ImplicitSReg; |
| 5087 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg: DSrc, Lane: SrcLane, ImplicitSReg)) |
| 5088 | break; |
| 5089 | |
| 5090 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
| 5091 | MI.removeOperand(OpNo: i - 1); |
| 5092 | |
| 5093 | if (DSrc == DDst) { |
| 5094 | // Destination can be: |
| 5095 | // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) |
| 5096 | MI.setDesc(get(Opcode: ARM::VDUPLN32d)); |
| 5097 | MIB.addReg(RegNo: DDst, flags: RegState::Define) |
| 5098 | .addReg(RegNo: DDst, flags: getUndefRegState(B: !MI.readsRegister(Reg: DDst, TRI))) |
| 5099 | .addImm(Val: SrcLane) |
| 5100 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 5101 | |
| 5102 | // Neither the source or the destination are naturally represented any |
| 5103 | // more, so add them in manually. |
| 5104 | MIB.addReg(RegNo: DstReg, flags: RegState::Implicit | RegState::Define); |
| 5105 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
| 5106 | if (ImplicitSReg) |
| 5107 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
| 5108 | break; |
| 5109 | } |
| 5110 | |
| 5111 | // In general there's no single instruction that can perform an S <-> S |
| 5112 | // move in NEON space, but a pair of VEXT instructions *can* do the |
| 5113 | // job. It turns out that the VEXTs needed will only use DSrc once, with |
| 5114 | // the position based purely on the combination of lane-0 and lane-1 |
| 5115 | // involved. For example |
| 5116 | // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 |
| 5117 | // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 |
| 5118 | // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 |
| 5119 | // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 |
| 5120 | // |
| 5121 | // Pattern of the MachineInstrs is: |
| 5122 | // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) |
| 5123 | MachineInstrBuilder NewMIB; |
| 5124 | NewMIB = BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: ARM::VEXTd32), |
| 5125 | DestReg: DDst); |
| 5126 | |
| 5127 | // On the first instruction, both DSrc and DDst may be undef if present. |
| 5128 | // Specifically when the original instruction didn't have them as an |
| 5129 | // <imp-use>. |
| 5130 | MCRegister CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; |
| 5131 | bool CurUndef = !MI.readsRegister(Reg: CurReg, TRI); |
| 5132 | NewMIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)); |
| 5133 | |
| 5134 | CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; |
| 5135 | CurUndef = !MI.readsRegister(Reg: CurReg, TRI); |
| 5136 | NewMIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)) |
| 5137 | .addImm(Val: 1) |
| 5138 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 5139 | |
| 5140 | if (SrcLane == DstLane) |
| 5141 | NewMIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
| 5142 | |
| 5143 | MI.setDesc(get(Opcode: ARM::VEXTd32)); |
| 5144 | MIB.addReg(RegNo: DDst, flags: RegState::Define); |
| 5145 | |
| 5146 | // On the second instruction, DDst has definitely been defined above, so |
| 5147 | // it is not undef. DSrc, if present, can be undef as above. |
| 5148 | CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; |
| 5149 | CurUndef = CurReg == DSrc && !MI.readsRegister(Reg: CurReg, TRI); |
| 5150 | MIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)); |
| 5151 | |
| 5152 | CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; |
| 5153 | CurUndef = CurReg == DSrc && !MI.readsRegister(Reg: CurReg, TRI); |
| 5154 | MIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)) |
| 5155 | .addImm(Val: 1) |
| 5156 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 5157 | |
| 5158 | if (SrcLane != DstLane) |
| 5159 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
| 5160 | |
| 5161 | // As before, the original destination is no longer represented, add it |
| 5162 | // implicitly. |
| 5163 | MIB.addReg(RegNo: DstReg, flags: RegState::Define | RegState::Implicit); |
| 5164 | if (ImplicitSReg != 0) |
| 5165 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
| 5166 | break; |
| 5167 | } |
| 5168 | } |
| 5169 | } |
| 5170 | |
| 5171 | //===----------------------------------------------------------------------===// |
| 5172 | // Partial register updates |
| 5173 | //===----------------------------------------------------------------------===// |
| 5174 | // |
| 5175 | // Swift renames NEON registers with 64-bit granularity. That means any |
| 5176 | // instruction writing an S-reg implicitly reads the containing D-reg. The |
| 5177 | // problem is mostly avoided by translating f32 operations to v2f32 operations |
| 5178 | // on D-registers, but f32 loads are still a problem. |
| 5179 | // |
| 5180 | // These instructions can load an f32 into a NEON register: |
| 5181 | // |
| 5182 | // VLDRS - Only writes S, partial D update. |
| 5183 | // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. |
| 5184 | // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. |
| 5185 | // |
| 5186 | // FCONSTD can be used as a dependency-breaking instruction. |
| 5187 | unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( |
| 5188 | const MachineInstr &MI, unsigned OpNum, |
| 5189 | const TargetRegisterInfo *TRI) const { |
| 5190 | auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); |
| 5191 | if (!PartialUpdateClearance) |
| 5192 | return 0; |
| 5193 | |
| 5194 | assert(TRI && "Need TRI instance" ); |
| 5195 | |
| 5196 | const MachineOperand &MO = MI.getOperand(i: OpNum); |
| 5197 | if (MO.readsReg()) |
| 5198 | return 0; |
| 5199 | Register Reg = MO.getReg(); |
| 5200 | int UseOp = -1; |
| 5201 | |
| 5202 | switch (MI.getOpcode()) { |
| 5203 | // Normal instructions writing only an S-register. |
| 5204 | case ARM::VLDRS: |
| 5205 | case ARM::FCONSTS: |
| 5206 | case ARM::VMOVSR: |
| 5207 | case ARM::VMOVv8i8: |
| 5208 | case ARM::VMOVv4i16: |
| 5209 | case ARM::VMOVv2i32: |
| 5210 | case ARM::VMOVv2f32: |
| 5211 | case ARM::VMOVv1i64: |
| 5212 | UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, isKill: false); |
| 5213 | break; |
| 5214 | |
| 5215 | // Explicitly reads the dependency. |
| 5216 | case ARM::VLD1LNd32: |
| 5217 | UseOp = 3; |
| 5218 | break; |
| 5219 | default: |
| 5220 | return 0; |
| 5221 | } |
| 5222 | |
| 5223 | // If this instruction actually reads a value from Reg, there is no unwanted |
| 5224 | // dependency. |
| 5225 | if (UseOp != -1 && MI.getOperand(i: UseOp).readsReg()) |
| 5226 | return 0; |
| 5227 | |
| 5228 | // We must be able to clobber the whole D-reg. |
| 5229 | if (Reg.isVirtual()) { |
| 5230 | // Virtual register must be a def undef foo:ssub_0 operand. |
| 5231 | if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) |
| 5232 | return 0; |
| 5233 | } else if (ARM::SPRRegClass.contains(Reg)) { |
| 5234 | // Physical register: MI must define the full D-reg. |
| 5235 | MCRegister DReg = |
| 5236 | TRI->getMatchingSuperReg(Reg, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass); |
| 5237 | if (!DReg || !MI.definesRegister(Reg: DReg, TRI)) |
| 5238 | return 0; |
| 5239 | } |
| 5240 | |
| 5241 | // MI has an unwanted D-register dependency. |
| 5242 | // Avoid defs in the previous N instructrions. |
| 5243 | return PartialUpdateClearance; |
| 5244 | } |
| 5245 | |
| 5246 | // Break a partial register dependency after getPartialRegUpdateClearance |
| 5247 | // returned non-zero. |
| 5248 | void ARMBaseInstrInfo::breakPartialRegDependency( |
| 5249 | MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { |
| 5250 | assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def" ); |
| 5251 | assert(TRI && "Need TRI instance" ); |
| 5252 | |
| 5253 | const MachineOperand &MO = MI.getOperand(i: OpNum); |
| 5254 | Register Reg = MO.getReg(); |
| 5255 | assert(Reg.isPhysical() && "Can't break virtual register dependencies." ); |
| 5256 | unsigned DReg = Reg; |
| 5257 | |
| 5258 | // If MI defines an S-reg, find the corresponding D super-register. |
| 5259 | if (ARM::SPRRegClass.contains(Reg)) { |
| 5260 | DReg = ARM::D0 + (Reg - ARM::S0) / 2; |
| 5261 | assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken" ); |
| 5262 | } |
| 5263 | |
| 5264 | assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps" ); |
| 5265 | assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg" ); |
| 5266 | |
| 5267 | // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines |
| 5268 | // the full D-register by loading the same value to both lanes. The |
| 5269 | // instruction is micro-coded with 2 uops, so don't do this until we can |
| 5270 | // properly schedule micro-coded instructions. The dispatcher stalls cause |
| 5271 | // too big regressions. |
| 5272 | |
| 5273 | // Insert the dependency-breaking FCONSTD before MI. |
| 5274 | // 96 is the encoding of 0.5, but the actual value doesn't matter here. |
| 5275 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: ARM::FCONSTD), DestReg: DReg) |
| 5276 | .addImm(Val: 96) |
| 5277 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 5278 | MI.addRegisterKilled(IncomingReg: DReg, RegInfo: TRI, AddIfNotFound: true); |
| 5279 | } |
| 5280 | |
| 5281 | bool ARMBaseInstrInfo::hasNOP() const { |
| 5282 | return Subtarget.hasFeature(Feature: ARM::HasV6KOps); |
| 5283 | } |
| 5284 | |
| 5285 | bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { |
| 5286 | if (MI->getNumOperands() < 4) |
| 5287 | return true; |
| 5288 | unsigned ShOpVal = MI->getOperand(i: 3).getImm(); |
| 5289 | unsigned ShImm = ARM_AM::getSORegOffset(Op: ShOpVal); |
| 5290 | // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. |
| 5291 | if ((ShImm == 1 && ARM_AM::getSORegShOp(Op: ShOpVal) == ARM_AM::lsr) || |
| 5292 | ((ShImm == 1 || ShImm == 2) && |
| 5293 | ARM_AM::getSORegShOp(Op: ShOpVal) == ARM_AM::lsl)) |
| 5294 | return true; |
| 5295 | |
| 5296 | return false; |
| 5297 | } |
| 5298 | |
| 5299 | bool ARMBaseInstrInfo::getRegSequenceLikeInputs( |
| 5300 | const MachineInstr &MI, unsigned DefIdx, |
| 5301 | SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { |
| 5302 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
| 5303 | assert(MI.isRegSequenceLike() && "Invalid kind of instruction" ); |
| 5304 | |
| 5305 | switch (MI.getOpcode()) { |
| 5306 | case ARM::VMOVDRR: |
| 5307 | // dX = VMOVDRR rY, rZ |
| 5308 | // is the same as: |
| 5309 | // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 |
| 5310 | // Populate the InputRegs accordingly. |
| 5311 | // rY |
| 5312 | const MachineOperand *MOReg = &MI.getOperand(i: 1); |
| 5313 | if (!MOReg->isUndef()) |
| 5314 | InputRegs.push_back(Elt: RegSubRegPairAndIdx(MOReg->getReg(), |
| 5315 | MOReg->getSubReg(), ARM::ssub_0)); |
| 5316 | // rZ |
| 5317 | MOReg = &MI.getOperand(i: 2); |
| 5318 | if (!MOReg->isUndef()) |
| 5319 | InputRegs.push_back(Elt: RegSubRegPairAndIdx(MOReg->getReg(), |
| 5320 | MOReg->getSubReg(), ARM::ssub_1)); |
| 5321 | return true; |
| 5322 | } |
| 5323 | llvm_unreachable("Target dependent opcode missing" ); |
| 5324 | } |
| 5325 | |
| 5326 | bool ARMBaseInstrInfo::getExtractSubregLikeInputs( |
| 5327 | const MachineInstr &MI, unsigned DefIdx, |
| 5328 | RegSubRegPairAndIdx &InputReg) const { |
| 5329 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
| 5330 | assert(MI.isExtractSubregLike() && "Invalid kind of instruction" ); |
| 5331 | |
| 5332 | switch (MI.getOpcode()) { |
| 5333 | case ARM::VMOVRRD: |
| 5334 | // rX, rY = VMOVRRD dZ |
| 5335 | // is the same as: |
| 5336 | // rX = EXTRACT_SUBREG dZ, ssub_0 |
| 5337 | // rY = EXTRACT_SUBREG dZ, ssub_1 |
| 5338 | const MachineOperand &MOReg = MI.getOperand(i: 2); |
| 5339 | if (MOReg.isUndef()) |
| 5340 | return false; |
| 5341 | InputReg.Reg = MOReg.getReg(); |
| 5342 | InputReg.SubReg = MOReg.getSubReg(); |
| 5343 | InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; |
| 5344 | return true; |
| 5345 | } |
| 5346 | llvm_unreachable("Target dependent opcode missing" ); |
| 5347 | } |
| 5348 | |
| 5349 | bool ARMBaseInstrInfo::getInsertSubregLikeInputs( |
| 5350 | const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, |
| 5351 | RegSubRegPairAndIdx &InsertedReg) const { |
| 5352 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
| 5353 | assert(MI.isInsertSubregLike() && "Invalid kind of instruction" ); |
| 5354 | |
| 5355 | switch (MI.getOpcode()) { |
| 5356 | case ARM::VSETLNi32: |
| 5357 | case ARM::MVE_VMOV_to_lane_32: |
| 5358 | // dX = VSETLNi32 dY, rZ, imm |
| 5359 | // qX = MVE_VMOV_to_lane_32 qY, rZ, imm |
| 5360 | const MachineOperand &MOBaseReg = MI.getOperand(i: 1); |
| 5361 | const MachineOperand &MOInsertedReg = MI.getOperand(i: 2); |
| 5362 | if (MOInsertedReg.isUndef()) |
| 5363 | return false; |
| 5364 | const MachineOperand &MOIndex = MI.getOperand(i: 3); |
| 5365 | BaseReg.Reg = MOBaseReg.getReg(); |
| 5366 | BaseReg.SubReg = MOBaseReg.getSubReg(); |
| 5367 | |
| 5368 | InsertedReg.Reg = MOInsertedReg.getReg(); |
| 5369 | InsertedReg.SubReg = MOInsertedReg.getSubReg(); |
| 5370 | InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm(); |
| 5371 | return true; |
| 5372 | } |
| 5373 | llvm_unreachable("Target dependent opcode missing" ); |
| 5374 | } |
| 5375 | |
| 5376 | std::pair<unsigned, unsigned> |
| 5377 | ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { |
| 5378 | const unsigned Mask = ARMII::MO_OPTION_MASK; |
| 5379 | return std::make_pair(x: TF & Mask, y: TF & ~Mask); |
| 5380 | } |
| 5381 | |
| 5382 | ArrayRef<std::pair<unsigned, const char *>> |
| 5383 | ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { |
| 5384 | using namespace ARMII; |
| 5385 | |
| 5386 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
| 5387 | {MO_LO16, "arm-lo16" }, {MO_HI16, "arm-hi16" }, |
| 5388 | {MO_LO_0_7, "arm-lo-0-7" }, {MO_HI_0_7, "arm-hi-0-7" }, |
| 5389 | {MO_LO_8_15, "arm-lo-8-15" }, {MO_HI_8_15, "arm-hi-8-15" }, |
| 5390 | }; |
| 5391 | return ArrayRef(TargetFlags); |
| 5392 | } |
| 5393 | |
| 5394 | ArrayRef<std::pair<unsigned, const char *>> |
| 5395 | ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { |
| 5396 | using namespace ARMII; |
| 5397 | |
| 5398 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
| 5399 | {MO_COFFSTUB, "arm-coffstub" }, |
| 5400 | {MO_GOT, "arm-got" }, |
| 5401 | {MO_SBREL, "arm-sbrel" }, |
| 5402 | {MO_DLLIMPORT, "arm-dllimport" }, |
| 5403 | {MO_SECREL, "arm-secrel" }, |
| 5404 | {MO_NONLAZY, "arm-nonlazy" }}; |
| 5405 | return ArrayRef(TargetFlags); |
| 5406 | } |
| 5407 | |
| 5408 | std::optional<RegImmPair> |
| 5409 | ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const { |
| 5410 | int Sign = 1; |
| 5411 | unsigned Opcode = MI.getOpcode(); |
| 5412 | int64_t Offset = 0; |
| 5413 | |
| 5414 | // TODO: Handle cases where Reg is a super- or sub-register of the |
| 5415 | // destination register. |
| 5416 | const MachineOperand &Op0 = MI.getOperand(i: 0); |
| 5417 | if (!Op0.isReg() || Reg != Op0.getReg()) |
| 5418 | return std::nullopt; |
| 5419 | |
| 5420 | // We describe SUBri or ADDri instructions. |
| 5421 | if (Opcode == ARM::SUBri) |
| 5422 | Sign = -1; |
| 5423 | else if (Opcode != ARM::ADDri) |
| 5424 | return std::nullopt; |
| 5425 | |
| 5426 | // TODO: Third operand can be global address (usually some string). Since |
| 5427 | // strings can be relocated we cannot calculate their offsets for |
| 5428 | // now. |
| 5429 | if (!MI.getOperand(i: 1).isReg() || !MI.getOperand(i: 2).isImm()) |
| 5430 | return std::nullopt; |
| 5431 | |
| 5432 | Offset = MI.getOperand(i: 2).getImm() * Sign; |
| 5433 | return RegImmPair{MI.getOperand(i: 1).getReg(), Offset}; |
| 5434 | } |
| 5435 | |
| 5436 | bool llvm::registerDefinedBetween(unsigned Reg, |
| 5437 | MachineBasicBlock::iterator From, |
| 5438 | MachineBasicBlock::iterator To, |
| 5439 | const TargetRegisterInfo *TRI) { |
| 5440 | for (auto I = From; I != To; ++I) |
| 5441 | if (I->modifiesRegister(Reg, TRI)) |
| 5442 | return true; |
| 5443 | return false; |
| 5444 | } |
| 5445 | |
| 5446 | MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, |
| 5447 | const TargetRegisterInfo *TRI) { |
| 5448 | // Search backwards to the instruction that defines CSPR. This may or not |
| 5449 | // be a CMP, we check that after this loop. If we find another instruction |
| 5450 | // that reads cpsr, we return nullptr. |
| 5451 | MachineBasicBlock::iterator CmpMI = Br; |
| 5452 | while (CmpMI != Br->getParent()->begin()) { |
| 5453 | --CmpMI; |
| 5454 | if (CmpMI->modifiesRegister(Reg: ARM::CPSR, TRI)) |
| 5455 | break; |
| 5456 | if (CmpMI->readsRegister(Reg: ARM::CPSR, TRI)) |
| 5457 | break; |
| 5458 | } |
| 5459 | |
| 5460 | // Check that this inst is a CMP r[0-7], #0 and that the register |
| 5461 | // is not redefined between the cmp and the br. |
| 5462 | if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri) |
| 5463 | return nullptr; |
| 5464 | Register Reg = CmpMI->getOperand(i: 0).getReg(); |
| 5465 | Register PredReg; |
| 5466 | ARMCC::CondCodes Pred = getInstrPredicate(MI: *CmpMI, PredReg); |
| 5467 | if (Pred != ARMCC::AL || CmpMI->getOperand(i: 1).getImm() != 0) |
| 5468 | return nullptr; |
| 5469 | if (!isARMLowRegister(Reg)) |
| 5470 | return nullptr; |
| 5471 | if (registerDefinedBetween(Reg, From: CmpMI->getNextNode(), To: Br, TRI)) |
| 5472 | return nullptr; |
| 5473 | |
| 5474 | return &*CmpMI; |
| 5475 | } |
| 5476 | |
| 5477 | unsigned llvm::ConstantMaterializationCost(unsigned Val, |
| 5478 | const ARMSubtarget *Subtarget, |
| 5479 | bool ForCodesize) { |
| 5480 | if (Subtarget->isThumb()) { |
| 5481 | if (Val <= 255) // MOV |
| 5482 | return ForCodesize ? 2 : 1; |
| 5483 | if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV |
| 5484 | ARM_AM::getT2SOImmVal(Arg: Val) != -1 || // MOVW |
| 5485 | ARM_AM::getT2SOImmVal(Arg: ~Val) != -1)) // MVN |
| 5486 | return ForCodesize ? 4 : 1; |
| 5487 | if (Val <= 510) // MOV + ADDi8 |
| 5488 | return ForCodesize ? 4 : 2; |
| 5489 | if (~Val <= 255) // MOV + MVN |
| 5490 | return ForCodesize ? 4 : 2; |
| 5491 | if (ARM_AM::isThumbImmShiftedVal(V: Val)) // MOV + LSL |
| 5492 | return ForCodesize ? 4 : 2; |
| 5493 | } else { |
| 5494 | if (ARM_AM::getSOImmVal(Arg: Val) != -1) // MOV |
| 5495 | return ForCodesize ? 4 : 1; |
| 5496 | if (ARM_AM::getSOImmVal(Arg: ~Val) != -1) // MVN |
| 5497 | return ForCodesize ? 4 : 1; |
| 5498 | if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW |
| 5499 | return ForCodesize ? 4 : 1; |
| 5500 | if (ARM_AM::isSOImmTwoPartVal(V: Val)) // two instrs |
| 5501 | return ForCodesize ? 8 : 2; |
| 5502 | if (ARM_AM::isSOImmTwoPartValNeg(V: Val)) // two instrs |
| 5503 | return ForCodesize ? 8 : 2; |
| 5504 | } |
| 5505 | if (Subtarget->useMovt()) // MOVW + MOVT |
| 5506 | return ForCodesize ? 8 : 2; |
| 5507 | return ForCodesize ? 8 : 3; // Literal pool load |
| 5508 | } |
| 5509 | |
| 5510 | bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, |
| 5511 | const ARMSubtarget *Subtarget, |
| 5512 | bool ForCodesize) { |
| 5513 | // Check with ForCodesize |
| 5514 | unsigned Cost1 = ConstantMaterializationCost(Val: Val1, Subtarget, ForCodesize); |
| 5515 | unsigned Cost2 = ConstantMaterializationCost(Val: Val2, Subtarget, ForCodesize); |
| 5516 | if (Cost1 < Cost2) |
| 5517 | return true; |
| 5518 | if (Cost1 > Cost2) |
| 5519 | return false; |
| 5520 | |
| 5521 | // If they are equal, try with !ForCodesize |
| 5522 | return ConstantMaterializationCost(Val: Val1, Subtarget, ForCodesize: !ForCodesize) < |
| 5523 | ConstantMaterializationCost(Val: Val2, Subtarget, ForCodesize: !ForCodesize); |
| 5524 | } |
| 5525 | |
| 5526 | /// Constants defining how certain sequences should be outlined. |
| 5527 | /// This encompasses how an outlined function should be called, and what kind of |
| 5528 | /// frame should be emitted for that outlined function. |
| 5529 | /// |
| 5530 | /// \p MachineOutlinerTailCall implies that the function is being created from |
| 5531 | /// a sequence of instructions ending in a return. |
| 5532 | /// |
| 5533 | /// That is, |
| 5534 | /// |
| 5535 | /// I1 OUTLINED_FUNCTION: |
| 5536 | /// I2 --> B OUTLINED_FUNCTION I1 |
| 5537 | /// BX LR I2 |
| 5538 | /// BX LR |
| 5539 | /// |
| 5540 | /// +-------------------------+--------+-----+ |
| 5541 | /// | | Thumb2 | ARM | |
| 5542 | /// +-------------------------+--------+-----+ |
| 5543 | /// | Call overhead in Bytes | 4 | 4 | |
| 5544 | /// | Frame overhead in Bytes | 0 | 0 | |
| 5545 | /// | Stack fixup required | No | No | |
| 5546 | /// +-------------------------+--------+-----+ |
| 5547 | /// |
| 5548 | /// \p MachineOutlinerThunk implies that the function is being created from |
| 5549 | /// a sequence of instructions ending in a call. The outlined function is |
| 5550 | /// called with a BL instruction, and the outlined function tail-calls the |
| 5551 | /// original call destination. |
| 5552 | /// |
| 5553 | /// That is, |
| 5554 | /// |
| 5555 | /// I1 OUTLINED_FUNCTION: |
| 5556 | /// I2 --> BL OUTLINED_FUNCTION I1 |
| 5557 | /// BL f I2 |
| 5558 | /// B f |
| 5559 | /// |
| 5560 | /// +-------------------------+--------+-----+ |
| 5561 | /// | | Thumb2 | ARM | |
| 5562 | /// +-------------------------+--------+-----+ |
| 5563 | /// | Call overhead in Bytes | 4 | 4 | |
| 5564 | /// | Frame overhead in Bytes | 0 | 0 | |
| 5565 | /// | Stack fixup required | No | No | |
| 5566 | /// +-------------------------+--------+-----+ |
| 5567 | /// |
| 5568 | /// \p MachineOutlinerNoLRSave implies that the function should be called using |
| 5569 | /// a BL instruction, but doesn't require LR to be saved and restored. This |
| 5570 | /// happens when LR is known to be dead. |
| 5571 | /// |
| 5572 | /// That is, |
| 5573 | /// |
| 5574 | /// I1 OUTLINED_FUNCTION: |
| 5575 | /// I2 --> BL OUTLINED_FUNCTION I1 |
| 5576 | /// I3 I2 |
| 5577 | /// I3 |
| 5578 | /// BX LR |
| 5579 | /// |
| 5580 | /// +-------------------------+--------+-----+ |
| 5581 | /// | | Thumb2 | ARM | |
| 5582 | /// +-------------------------+--------+-----+ |
| 5583 | /// | Call overhead in Bytes | 4 | 4 | |
| 5584 | /// | Frame overhead in Bytes | 2 | 4 | |
| 5585 | /// | Stack fixup required | No | No | |
| 5586 | /// +-------------------------+--------+-----+ |
| 5587 | /// |
| 5588 | /// \p MachineOutlinerRegSave implies that the function should be called with a |
| 5589 | /// save and restore of LR to an available register. This allows us to avoid |
| 5590 | /// stack fixups. Note that this outlining variant is compatible with the |
| 5591 | /// NoLRSave case. |
| 5592 | /// |
| 5593 | /// That is, |
| 5594 | /// |
| 5595 | /// I1 Save LR OUTLINED_FUNCTION: |
| 5596 | /// I2 --> BL OUTLINED_FUNCTION I1 |
| 5597 | /// I3 Restore LR I2 |
| 5598 | /// I3 |
| 5599 | /// BX LR |
| 5600 | /// |
| 5601 | /// +-------------------------+--------+-----+ |
| 5602 | /// | | Thumb2 | ARM | |
| 5603 | /// +-------------------------+--------+-----+ |
| 5604 | /// | Call overhead in Bytes | 8 | 12 | |
| 5605 | /// | Frame overhead in Bytes | 2 | 4 | |
| 5606 | /// | Stack fixup required | No | No | |
| 5607 | /// +-------------------------+--------+-----+ |
| 5608 | /// |
| 5609 | /// \p MachineOutlinerDefault implies that the function should be called with |
| 5610 | /// a save and restore of LR to the stack. |
| 5611 | /// |
| 5612 | /// That is, |
| 5613 | /// |
| 5614 | /// I1 Save LR OUTLINED_FUNCTION: |
| 5615 | /// I2 --> BL OUTLINED_FUNCTION I1 |
| 5616 | /// I3 Restore LR I2 |
| 5617 | /// I3 |
| 5618 | /// BX LR |
| 5619 | /// |
| 5620 | /// +-------------------------+--------+-----+ |
| 5621 | /// | | Thumb2 | ARM | |
| 5622 | /// +-------------------------+--------+-----+ |
| 5623 | /// | Call overhead in Bytes | 8 | 12 | |
| 5624 | /// | Frame overhead in Bytes | 2 | 4 | |
| 5625 | /// | Stack fixup required | Yes | Yes | |
| 5626 | /// +-------------------------+--------+-----+ |
| 5627 | |
| 5628 | enum MachineOutlinerClass { |
| 5629 | MachineOutlinerTailCall, |
| 5630 | MachineOutlinerThunk, |
| 5631 | MachineOutlinerNoLRSave, |
| 5632 | MachineOutlinerRegSave, |
| 5633 | MachineOutlinerDefault |
| 5634 | }; |
| 5635 | |
| 5636 | enum MachineOutlinerMBBFlags { |
| 5637 | LRUnavailableSomewhere = 0x2, |
| 5638 | HasCalls = 0x4, |
| 5639 | UnsafeRegsDead = 0x8 |
| 5640 | }; |
| 5641 | |
| 5642 | struct OutlinerCosts { |
| 5643 | int CallTailCall; |
| 5644 | int FrameTailCall; |
| 5645 | int CallThunk; |
| 5646 | int FrameThunk; |
| 5647 | int CallNoLRSave; |
| 5648 | int FrameNoLRSave; |
| 5649 | int CallRegSave; |
| 5650 | int FrameRegSave; |
| 5651 | int CallDefault; |
| 5652 | int FrameDefault; |
| 5653 | int SaveRestoreLROnStack; |
| 5654 | |
| 5655 | OutlinerCosts(const ARMSubtarget &target) |
| 5656 | : CallTailCall(target.isThumb() ? 4 : 4), |
| 5657 | FrameTailCall(target.isThumb() ? 0 : 0), |
| 5658 | CallThunk(target.isThumb() ? 4 : 4), |
| 5659 | FrameThunk(target.isThumb() ? 0 : 0), |
| 5660 | CallNoLRSave(target.isThumb() ? 4 : 4), |
| 5661 | FrameNoLRSave(target.isThumb() ? 2 : 4), |
| 5662 | CallRegSave(target.isThumb() ? 8 : 12), |
| 5663 | FrameRegSave(target.isThumb() ? 2 : 4), |
| 5664 | CallDefault(target.isThumb() ? 8 : 12), |
| 5665 | FrameDefault(target.isThumb() ? 2 : 4), |
| 5666 | SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {} |
| 5667 | }; |
| 5668 | |
| 5669 | Register |
| 5670 | ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const { |
| 5671 | MachineFunction *MF = C.getMF(); |
| 5672 | const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); |
| 5673 | const ARMBaseRegisterInfo *ARI = |
| 5674 | static_cast<const ARMBaseRegisterInfo *>(&TRI); |
| 5675 | |
| 5676 | BitVector regsReserved = ARI->getReservedRegs(MF: *MF); |
| 5677 | // Check if there is an available register across the sequence that we can |
| 5678 | // use. |
| 5679 | for (Register Reg : ARM::rGPRRegClass) { |
| 5680 | if (!(Reg < regsReserved.size() && regsReserved.test(Idx: Reg)) && |
| 5681 | Reg != ARM::LR && // LR is not reserved, but don't use it. |
| 5682 | Reg != ARM::R12 && // R12 is not guaranteed to be preserved. |
| 5683 | C.isAvailableAcrossAndOutOfSeq(Reg, TRI) && |
| 5684 | C.isAvailableInsideSeq(Reg, TRI)) |
| 5685 | return Reg; |
| 5686 | } |
| 5687 | return Register(); |
| 5688 | } |
| 5689 | |
| 5690 | // Compute liveness of LR at the point after the interval [I, E), which |
| 5691 | // denotes a *backward* iteration through instructions. Used only for return |
| 5692 | // basic blocks, which do not end with a tail call. |
| 5693 | static bool isLRAvailable(const TargetRegisterInfo &TRI, |
| 5694 | MachineBasicBlock::reverse_iterator I, |
| 5695 | MachineBasicBlock::reverse_iterator E) { |
| 5696 | // At the end of the function LR dead. |
| 5697 | bool Live = false; |
| 5698 | for (; I != E; ++I) { |
| 5699 | const MachineInstr &MI = *I; |
| 5700 | |
| 5701 | // Check defs of LR. |
| 5702 | if (MI.modifiesRegister(Reg: ARM::LR, TRI: &TRI)) |
| 5703 | Live = false; |
| 5704 | |
| 5705 | // Check uses of LR. |
| 5706 | unsigned Opcode = MI.getOpcode(); |
| 5707 | if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR || |
| 5708 | Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET || |
| 5709 | Opcode == ARM::tBXNS_RET) { |
| 5710 | // These instructions use LR, but it's not an (explicit or implicit) |
| 5711 | // operand. |
| 5712 | Live = true; |
| 5713 | continue; |
| 5714 | } |
| 5715 | if (MI.readsRegister(Reg: ARM::LR, TRI: &TRI)) |
| 5716 | Live = true; |
| 5717 | } |
| 5718 | return !Live; |
| 5719 | } |
| 5720 | |
| 5721 | std::optional<std::unique_ptr<outliner::OutlinedFunction>> |
| 5722 | ARMBaseInstrInfo::getOutliningCandidateInfo( |
| 5723 | const MachineModuleInfo &MMI, |
| 5724 | std::vector<outliner::Candidate> &RepeatedSequenceLocs, |
| 5725 | unsigned MinRepeats) const { |
| 5726 | unsigned SequenceSize = 0; |
| 5727 | for (auto &MI : RepeatedSequenceLocs[0]) |
| 5728 | SequenceSize += getInstSizeInBytes(MI); |
| 5729 | |
| 5730 | // Properties about candidate MBBs that hold for all of them. |
| 5731 | unsigned FlagsSetInAll = 0xF; |
| 5732 | |
| 5733 | // Compute liveness information for each candidate, and set FlagsSetInAll. |
| 5734 | const TargetRegisterInfo &TRI = getRegisterInfo(); |
| 5735 | for (outliner::Candidate &C : RepeatedSequenceLocs) |
| 5736 | FlagsSetInAll &= C.Flags; |
| 5737 | |
| 5738 | // According to the ARM Procedure Call Standard, the following are |
| 5739 | // undefined on entry/exit from a function call: |
| 5740 | // |
| 5741 | // * Register R12(IP), |
| 5742 | // * Condition codes (and thus the CPSR register) |
| 5743 | // |
| 5744 | // Since we control the instructions which are part of the outlined regions |
| 5745 | // we don't need to be fully compliant with the AAPCS, but we have to |
| 5746 | // guarantee that if a veneer is inserted at link time the code is still |
| 5747 | // correct. Because of this, we can't outline any sequence of instructions |
| 5748 | // where one of these registers is live into/across it. Thus, we need to |
| 5749 | // delete those candidates. |
| 5750 | auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { |
| 5751 | // If the unsafe registers in this block are all dead, then we don't need |
| 5752 | // to compute liveness here. |
| 5753 | if (C.Flags & UnsafeRegsDead) |
| 5754 | return false; |
| 5755 | return C.isAnyUnavailableAcrossOrOutOfSeq(Regs: {ARM::R12, ARM::CPSR}, TRI); |
| 5756 | }; |
| 5757 | |
| 5758 | // Are there any candidates where those registers are live? |
| 5759 | if (!(FlagsSetInAll & UnsafeRegsDead)) { |
| 5760 | // Erase every candidate that violates the restrictions above. (It could be |
| 5761 | // true that we have viable candidates, so it's not worth bailing out in |
| 5762 | // the case that, say, 1 out of 20 candidates violate the restructions.) |
| 5763 | llvm::erase_if(C&: RepeatedSequenceLocs, P: CantGuaranteeValueAcrossCall); |
| 5764 | |
| 5765 | // If the sequence doesn't have enough candidates left, then we're done. |
| 5766 | if (RepeatedSequenceLocs.size() < MinRepeats) |
| 5767 | return std::nullopt; |
| 5768 | } |
| 5769 | |
| 5770 | // We expect the majority of the outlining candidates to be in consensus with |
| 5771 | // regard to return address sign and authentication, and branch target |
| 5772 | // enforcement, in other words, partitioning according to all the four |
| 5773 | // possible combinations of PAC-RET and BTI is going to yield one big subset |
| 5774 | // and three small (likely empty) subsets. That allows us to cull incompatible |
| 5775 | // candidates separately for PAC-RET and BTI. |
| 5776 | |
| 5777 | // Partition the candidates in two sets: one with BTI enabled and one with BTI |
| 5778 | // disabled. Remove the candidates from the smaller set. If they are the same |
| 5779 | // number prefer the non-BTI ones for outlining, since they have less |
| 5780 | // overhead. |
| 5781 | auto NoBTI = |
| 5782 | llvm::partition(Range&: RepeatedSequenceLocs, P: [](const outliner::Candidate &C) { |
| 5783 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
| 5784 | return AFI.branchTargetEnforcement(); |
| 5785 | }); |
| 5786 | if (std::distance(first: RepeatedSequenceLocs.begin(), last: NoBTI) > |
| 5787 | std::distance(first: NoBTI, last: RepeatedSequenceLocs.end())) |
| 5788 | RepeatedSequenceLocs.erase(first: NoBTI, last: RepeatedSequenceLocs.end()); |
| 5789 | else |
| 5790 | RepeatedSequenceLocs.erase(first: RepeatedSequenceLocs.begin(), last: NoBTI); |
| 5791 | |
| 5792 | if (RepeatedSequenceLocs.size() < MinRepeats) |
| 5793 | return std::nullopt; |
| 5794 | |
| 5795 | // Likewise, partition the candidates according to PAC-RET enablement. |
| 5796 | auto NoPAC = |
| 5797 | llvm::partition(Range&: RepeatedSequenceLocs, P: [](const outliner::Candidate &C) { |
| 5798 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
| 5799 | // If the function happens to not spill the LR, do not disqualify it |
| 5800 | // from the outlining. |
| 5801 | return AFI.shouldSignReturnAddress(SpillsLR: true); |
| 5802 | }); |
| 5803 | if (std::distance(first: RepeatedSequenceLocs.begin(), last: NoPAC) > |
| 5804 | std::distance(first: NoPAC, last: RepeatedSequenceLocs.end())) |
| 5805 | RepeatedSequenceLocs.erase(first: NoPAC, last: RepeatedSequenceLocs.end()); |
| 5806 | else |
| 5807 | RepeatedSequenceLocs.erase(first: RepeatedSequenceLocs.begin(), last: NoPAC); |
| 5808 | |
| 5809 | if (RepeatedSequenceLocs.size() < MinRepeats) |
| 5810 | return std::nullopt; |
| 5811 | |
| 5812 | // At this point, we have only "safe" candidates to outline. Figure out |
| 5813 | // frame + call instruction information. |
| 5814 | |
| 5815 | unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode(); |
| 5816 | |
| 5817 | // Helper lambda which sets call information for every candidate. |
| 5818 | auto SetCandidateCallInfo = |
| 5819 | [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { |
| 5820 | for (outliner::Candidate &C : RepeatedSequenceLocs) |
| 5821 | C.setCallInfo(CID: CallID, CO: NumBytesForCall); |
| 5822 | }; |
| 5823 | |
| 5824 | OutlinerCosts Costs(Subtarget); |
| 5825 | |
| 5826 | const auto &SomeMFI = |
| 5827 | *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>(); |
| 5828 | // Adjust costs to account for the BTI instructions. |
| 5829 | if (SomeMFI.branchTargetEnforcement()) { |
| 5830 | Costs.FrameDefault += 4; |
| 5831 | Costs.FrameNoLRSave += 4; |
| 5832 | Costs.FrameRegSave += 4; |
| 5833 | Costs.FrameTailCall += 4; |
| 5834 | Costs.FrameThunk += 4; |
| 5835 | } |
| 5836 | |
| 5837 | // Adjust costs to account for sign and authentication instructions. |
| 5838 | if (SomeMFI.shouldSignReturnAddress(SpillsLR: true)) { |
| 5839 | Costs.CallDefault += 8; // +PAC instr, +AUT instr |
| 5840 | Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr |
| 5841 | } |
| 5842 | |
| 5843 | unsigned FrameID = MachineOutlinerDefault; |
| 5844 | unsigned NumBytesToCreateFrame = Costs.FrameDefault; |
| 5845 | |
| 5846 | // If the last instruction in any candidate is a terminator, then we should |
| 5847 | // tail call all of the candidates. |
| 5848 | if (RepeatedSequenceLocs[0].back().isTerminator()) { |
| 5849 | FrameID = MachineOutlinerTailCall; |
| 5850 | NumBytesToCreateFrame = Costs.FrameTailCall; |
| 5851 | SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall); |
| 5852 | } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || |
| 5853 | LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL || |
| 5854 | LastInstrOpcode == ARM::tBLXr || |
| 5855 | LastInstrOpcode == ARM::tBLXr_noip || |
| 5856 | LastInstrOpcode == ARM::tBLXi) { |
| 5857 | FrameID = MachineOutlinerThunk; |
| 5858 | NumBytesToCreateFrame = Costs.FrameThunk; |
| 5859 | SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk); |
| 5860 | } else { |
| 5861 | // We need to decide how to emit calls + frames. We can always emit the same |
| 5862 | // frame if we don't need to save to the stack. If we have to save to the |
| 5863 | // stack, then we need a different frame. |
| 5864 | unsigned NumBytesNoStackCalls = 0; |
| 5865 | std::vector<outliner::Candidate> CandidatesWithoutStackFixups; |
| 5866 | |
| 5867 | for (outliner::Candidate &C : RepeatedSequenceLocs) { |
| 5868 | // LR liveness is overestimated in return blocks, unless they end with a |
| 5869 | // tail call. |
| 5870 | const auto Last = C.getMBB()->rbegin(); |
| 5871 | const bool LRIsAvailable = |
| 5872 | C.getMBB()->isReturnBlock() && !Last->isCall() |
| 5873 | ? isLRAvailable(TRI, I: Last, |
| 5874 | E: (MachineBasicBlock::reverse_iterator)C.begin()) |
| 5875 | : C.isAvailableAcrossAndOutOfSeq(Reg: ARM::LR, TRI); |
| 5876 | if (LRIsAvailable) { |
| 5877 | FrameID = MachineOutlinerNoLRSave; |
| 5878 | NumBytesNoStackCalls += Costs.CallNoLRSave; |
| 5879 | C.setCallInfo(CID: MachineOutlinerNoLRSave, CO: Costs.CallNoLRSave); |
| 5880 | CandidatesWithoutStackFixups.push_back(x: C); |
| 5881 | } |
| 5882 | |
| 5883 | // Is an unused register available? If so, we won't modify the stack, so |
| 5884 | // we can outline with the same frame type as those that don't save LR. |
| 5885 | else if (findRegisterToSaveLRTo(C)) { |
| 5886 | FrameID = MachineOutlinerRegSave; |
| 5887 | NumBytesNoStackCalls += Costs.CallRegSave; |
| 5888 | C.setCallInfo(CID: MachineOutlinerRegSave, CO: Costs.CallRegSave); |
| 5889 | CandidatesWithoutStackFixups.push_back(x: C); |
| 5890 | } |
| 5891 | |
| 5892 | // Is SP used in the sequence at all? If not, we don't have to modify |
| 5893 | // the stack, so we are guaranteed to get the same frame. |
| 5894 | else if (C.isAvailableInsideSeq(Reg: ARM::SP, TRI)) { |
| 5895 | NumBytesNoStackCalls += Costs.CallDefault; |
| 5896 | C.setCallInfo(CID: MachineOutlinerDefault, CO: Costs.CallDefault); |
| 5897 | CandidatesWithoutStackFixups.push_back(x: C); |
| 5898 | } |
| 5899 | |
| 5900 | // If we outline this, we need to modify the stack. Pretend we don't |
| 5901 | // outline this by saving all of its bytes. |
| 5902 | else |
| 5903 | NumBytesNoStackCalls += SequenceSize; |
| 5904 | } |
| 5905 | |
| 5906 | // If there are no places where we have to save LR, then note that we don't |
| 5907 | // have to update the stack. Otherwise, give every candidate the default |
| 5908 | // call type |
| 5909 | if (NumBytesNoStackCalls <= |
| 5910 | RepeatedSequenceLocs.size() * Costs.CallDefault) { |
| 5911 | RepeatedSequenceLocs = CandidatesWithoutStackFixups; |
| 5912 | FrameID = MachineOutlinerNoLRSave; |
| 5913 | if (RepeatedSequenceLocs.size() < MinRepeats) |
| 5914 | return std::nullopt; |
| 5915 | } else |
| 5916 | SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault); |
| 5917 | } |
| 5918 | |
| 5919 | // Does every candidate's MBB contain a call? If so, then we might have a |
| 5920 | // call in the range. |
| 5921 | if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { |
| 5922 | // check if the range contains a call. These require a save + restore of |
| 5923 | // the link register. |
| 5924 | outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; |
| 5925 | if (any_of(Range: drop_end(RangeOrContainer&: FirstCand), |
| 5926 | P: [](const MachineInstr &MI) { return MI.isCall(); })) |
| 5927 | NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; |
| 5928 | |
| 5929 | // Handle the last instruction separately. If it is tail call, then the |
| 5930 | // last instruction is a call, we don't want to save + restore in this |
| 5931 | // case. However, it could be possible that the last instruction is a |
| 5932 | // call without it being valid to tail call this sequence. We should |
| 5933 | // consider this as well. |
| 5934 | else if (FrameID != MachineOutlinerThunk && |
| 5935 | FrameID != MachineOutlinerTailCall && FirstCand.back().isCall()) |
| 5936 | NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; |
| 5937 | } |
| 5938 | |
| 5939 | return std::make_unique<outliner::OutlinedFunction>( |
| 5940 | args&: RepeatedSequenceLocs, args&: SequenceSize, args&: NumBytesToCreateFrame, args&: FrameID); |
| 5941 | } |
| 5942 | |
| 5943 | bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, |
| 5944 | int64_t Fixup, |
| 5945 | bool Updt) const { |
| 5946 | int SPIdx = MI->findRegisterUseOperandIdx(Reg: ARM::SP, /*TRI=*/nullptr); |
| 5947 | unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask); |
| 5948 | if (SPIdx < 0) |
| 5949 | // No SP operand |
| 5950 | return true; |
| 5951 | else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2)) |
| 5952 | // If SP is not the base register we can't do much |
| 5953 | return false; |
| 5954 | |
| 5955 | // Stack might be involved but addressing mode doesn't handle any offset. |
| 5956 | // Rq: AddrModeT1_[1|2|4] don't operate on SP |
| 5957 | if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions |
| 5958 | AddrMode == ARMII::AddrMode4 || // Load/Store Multiple |
| 5959 | AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple |
| 5960 | AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register |
| 5961 | AddrMode == ARMII::AddrModeT2_pc || // PCrel access |
| 5962 | AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST |
| 5963 | AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE |
| 5964 | AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE |
| 5965 | AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR |
| 5966 | AddrMode == ARMII::AddrModeNone || |
| 5967 | AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions |
| 5968 | AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm |
| 5969 | return false; |
| 5970 | |
| 5971 | unsigned NumOps = MI->getDesc().getNumOperands(); |
| 5972 | unsigned ImmIdx = NumOps - 3; |
| 5973 | |
| 5974 | const MachineOperand &Offset = MI->getOperand(i: ImmIdx); |
| 5975 | assert(Offset.isImm() && "Is not an immediate" ); |
| 5976 | int64_t OffVal = Offset.getImm(); |
| 5977 | |
| 5978 | if (OffVal < 0) |
| 5979 | // Don't override data if the are below SP. |
| 5980 | return false; |
| 5981 | |
| 5982 | unsigned NumBits = 0; |
| 5983 | unsigned Scale = 1; |
| 5984 | |
| 5985 | switch (AddrMode) { |
| 5986 | case ARMII::AddrMode3: |
| 5987 | if (ARM_AM::getAM3Op(AM3Opc: OffVal) == ARM_AM::sub) |
| 5988 | return false; |
| 5989 | OffVal = ARM_AM::getAM3Offset(AM3Opc: OffVal); |
| 5990 | NumBits = 8; |
| 5991 | break; |
| 5992 | case ARMII::AddrMode5: |
| 5993 | if (ARM_AM::getAM5Op(AM5Opc: OffVal) == ARM_AM::sub) |
| 5994 | return false; |
| 5995 | OffVal = ARM_AM::getAM5Offset(AM5Opc: OffVal); |
| 5996 | NumBits = 8; |
| 5997 | Scale = 4; |
| 5998 | break; |
| 5999 | case ARMII::AddrMode5FP16: |
| 6000 | if (ARM_AM::getAM5FP16Op(AM5Opc: OffVal) == ARM_AM::sub) |
| 6001 | return false; |
| 6002 | OffVal = ARM_AM::getAM5FP16Offset(AM5Opc: OffVal); |
| 6003 | NumBits = 8; |
| 6004 | Scale = 2; |
| 6005 | break; |
| 6006 | case ARMII::AddrModeT2_i8pos: |
| 6007 | NumBits = 8; |
| 6008 | break; |
| 6009 | case ARMII::AddrModeT2_i8s4: |
| 6010 | // FIXME: Values are already scaled in this addressing mode. |
| 6011 | assert((Fixup & 3) == 0 && "Can't encode this offset!" ); |
| 6012 | NumBits = 10; |
| 6013 | break; |
| 6014 | case ARMII::AddrModeT2_ldrex: |
| 6015 | NumBits = 8; |
| 6016 | Scale = 4; |
| 6017 | break; |
| 6018 | case ARMII::AddrModeT2_i12: |
| 6019 | case ARMII::AddrMode_i12: |
| 6020 | NumBits = 12; |
| 6021 | break; |
| 6022 | case ARMII::AddrModeT1_s: // SP-relative LD/ST |
| 6023 | NumBits = 8; |
| 6024 | Scale = 4; |
| 6025 | break; |
| 6026 | default: |
| 6027 | llvm_unreachable("Unsupported addressing mode!" ); |
| 6028 | } |
| 6029 | // Make sure the offset is encodable for instructions that scale the |
| 6030 | // immediate. |
| 6031 | assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 && |
| 6032 | "Can't encode this offset!" ); |
| 6033 | OffVal += Fixup / Scale; |
| 6034 | |
| 6035 | unsigned Mask = (1 << NumBits) - 1; |
| 6036 | |
| 6037 | if (OffVal <= Mask) { |
| 6038 | if (Updt) |
| 6039 | MI->getOperand(i: ImmIdx).setImm(OffVal); |
| 6040 | return true; |
| 6041 | } |
| 6042 | |
| 6043 | return false; |
| 6044 | } |
| 6045 | |
| 6046 | void ARMBaseInstrInfo::mergeOutliningCandidateAttributes( |
| 6047 | Function &F, std::vector<outliner::Candidate> &Candidates) const { |
| 6048 | outliner::Candidate &C = Candidates.front(); |
| 6049 | // branch-target-enforcement is guaranteed to be consistent between all |
| 6050 | // candidates, so we only need to look at one. |
| 6051 | const Function &CFn = C.getMF()->getFunction(); |
| 6052 | if (CFn.hasFnAttribute(Kind: "branch-target-enforcement" )) |
| 6053 | F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "branch-target-enforcement" )); |
| 6054 | |
| 6055 | if (CFn.hasFnAttribute(Kind: "sign-return-address" )) |
| 6056 | F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "sign-return-address" )); |
| 6057 | |
| 6058 | ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates); |
| 6059 | } |
| 6060 | |
| 6061 | bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( |
| 6062 | MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { |
| 6063 | const Function &F = MF.getFunction(); |
| 6064 | |
| 6065 | // Can F be deduplicated by the linker? If it can, don't outline from it. |
| 6066 | if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) |
| 6067 | return false; |
| 6068 | |
| 6069 | // Don't outline from functions with section markings; the program could |
| 6070 | // expect that all the code is in the named section. |
| 6071 | // FIXME: Allow outlining from multiple functions with the same section |
| 6072 | // marking. |
| 6073 | if (F.hasSection()) |
| 6074 | return false; |
| 6075 | |
| 6076 | // FIXME: Thumb1 outlining is not handled |
| 6077 | if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction()) |
| 6078 | return false; |
| 6079 | |
| 6080 | // It's safe to outline from MF. |
| 6081 | return true; |
| 6082 | } |
| 6083 | |
| 6084 | bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, |
| 6085 | unsigned &Flags) const { |
| 6086 | // Check if LR is available through all of the MBB. If it's not, then set |
| 6087 | // a flag. |
| 6088 | assert(MBB.getParent()->getRegInfo().tracksLiveness() && |
| 6089 | "Suitable Machine Function for outlining must track liveness" ); |
| 6090 | |
| 6091 | LiveRegUnits LRU(getRegisterInfo()); |
| 6092 | |
| 6093 | for (MachineInstr &MI : llvm::reverse(C&: MBB)) |
| 6094 | LRU.accumulate(MI); |
| 6095 | |
| 6096 | // Check if each of the unsafe registers are available... |
| 6097 | bool R12AvailableInBlock = LRU.available(Reg: ARM::R12); |
| 6098 | bool CPSRAvailableInBlock = LRU.available(Reg: ARM::CPSR); |
| 6099 | |
| 6100 | // If all of these are dead (and not live out), we know we don't have to check |
| 6101 | // them later. |
| 6102 | if (R12AvailableInBlock && CPSRAvailableInBlock) |
| 6103 | Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; |
| 6104 | |
| 6105 | // Now, add the live outs to the set. |
| 6106 | LRU.addLiveOuts(MBB); |
| 6107 | |
| 6108 | // If any of these registers is available in the MBB, but also a live out of |
| 6109 | // the block, then we know outlining is unsafe. |
| 6110 | if (R12AvailableInBlock && !LRU.available(Reg: ARM::R12)) |
| 6111 | return false; |
| 6112 | if (CPSRAvailableInBlock && !LRU.available(Reg: ARM::CPSR)) |
| 6113 | return false; |
| 6114 | |
| 6115 | // Check if there's a call inside this MachineBasicBlock. If there is, then |
| 6116 | // set a flag. |
| 6117 | if (any_of(Range&: MBB, P: [](MachineInstr &MI) { return MI.isCall(); })) |
| 6118 | Flags |= MachineOutlinerMBBFlags::HasCalls; |
| 6119 | |
| 6120 | // LR liveness is overestimated in return blocks. |
| 6121 | |
| 6122 | bool LRIsAvailable = |
| 6123 | MBB.isReturnBlock() && !MBB.back().isCall() |
| 6124 | ? isLRAvailable(TRI: getRegisterInfo(), I: MBB.rbegin(), E: MBB.rend()) |
| 6125 | : LRU.available(Reg: ARM::LR); |
| 6126 | if (!LRIsAvailable) |
| 6127 | Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; |
| 6128 | |
| 6129 | return true; |
| 6130 | } |
| 6131 | |
| 6132 | outliner::InstrType |
| 6133 | ARMBaseInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI, |
| 6134 | MachineBasicBlock::iterator &MIT, |
| 6135 | unsigned Flags) const { |
| 6136 | MachineInstr &MI = *MIT; |
| 6137 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
| 6138 | |
| 6139 | // PIC instructions contain labels, outlining them would break offset |
| 6140 | // computing. unsigned Opc = MI.getOpcode(); |
| 6141 | unsigned Opc = MI.getOpcode(); |
| 6142 | if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR || |
| 6143 | Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR || |
| 6144 | Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB || |
| 6145 | Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic || |
| 6146 | Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel || |
| 6147 | Opc == ARM::t2MOV_ga_pcrel) |
| 6148 | return outliner::InstrType::Illegal; |
| 6149 | |
| 6150 | // Be conservative with ARMv8.1 MVE instructions. |
| 6151 | if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart || |
| 6152 | Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart || |
| 6153 | Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP || |
| 6154 | Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd || |
| 6155 | Opc == ARM::t2LoopEndDec) |
| 6156 | return outliner::InstrType::Illegal; |
| 6157 | |
| 6158 | const MCInstrDesc &MCID = MI.getDesc(); |
| 6159 | uint64_t MIFlags = MCID.TSFlags; |
| 6160 | if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE) |
| 6161 | return outliner::InstrType::Illegal; |
| 6162 | |
| 6163 | // Is this a terminator for a basic block? |
| 6164 | if (MI.isTerminator()) |
| 6165 | // TargetInstrInfo::getOutliningType has already filtered out anything |
| 6166 | // that would break this, so we can allow it here. |
| 6167 | return outliner::InstrType::Legal; |
| 6168 | |
| 6169 | // Don't outline if link register or program counter value are used. |
| 6170 | if (MI.readsRegister(Reg: ARM::LR, TRI) || MI.readsRegister(Reg: ARM::PC, TRI)) |
| 6171 | return outliner::InstrType::Illegal; |
| 6172 | |
| 6173 | if (MI.isCall()) { |
| 6174 | // Get the function associated with the call. Look at each operand and find |
| 6175 | // the one that represents the calle and get its name. |
| 6176 | const Function *Callee = nullptr; |
| 6177 | for (const MachineOperand &MOP : MI.operands()) { |
| 6178 | if (MOP.isGlobal()) { |
| 6179 | Callee = dyn_cast<Function>(Val: MOP.getGlobal()); |
| 6180 | break; |
| 6181 | } |
| 6182 | } |
| 6183 | |
| 6184 | // Dont't outline calls to "mcount" like functions, in particular Linux |
| 6185 | // kernel function tracing relies on it. |
| 6186 | if (Callee && |
| 6187 | (Callee->getName() == "\01__gnu_mcount_nc" || |
| 6188 | Callee->getName() == "\01mcount" || Callee->getName() == "__mcount" )) |
| 6189 | return outliner::InstrType::Illegal; |
| 6190 | |
| 6191 | // If we don't know anything about the callee, assume it depends on the |
| 6192 | // stack layout of the caller. In that case, it's only legal to outline |
| 6193 | // as a tail-call. Explicitly list the call instructions we know about so |
| 6194 | // we don't get unexpected results with call pseudo-instructions. |
| 6195 | auto UnknownCallOutlineType = outliner::InstrType::Illegal; |
| 6196 | if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX || |
| 6197 | Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip || |
| 6198 | Opc == ARM::tBLXi) |
| 6199 | UnknownCallOutlineType = outliner::InstrType::LegalTerminator; |
| 6200 | |
| 6201 | if (!Callee) |
| 6202 | return UnknownCallOutlineType; |
| 6203 | |
| 6204 | // We have a function we have information about. Check if it's something we |
| 6205 | // can safely outline. |
| 6206 | MachineFunction *CalleeMF = MMI.getMachineFunction(F: *Callee); |
| 6207 | |
| 6208 | // We don't know what's going on with the callee at all. Don't touch it. |
| 6209 | if (!CalleeMF) |
| 6210 | return UnknownCallOutlineType; |
| 6211 | |
| 6212 | // Check if we know anything about the callee saves on the function. If we |
| 6213 | // don't, then don't touch it, since that implies that we haven't computed |
| 6214 | // anything about its stack frame yet. |
| 6215 | MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); |
| 6216 | if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || |
| 6217 | MFI.getNumObjects() > 0) |
| 6218 | return UnknownCallOutlineType; |
| 6219 | |
| 6220 | // At this point, we can say that CalleeMF ought to not pass anything on the |
| 6221 | // stack. Therefore, we can outline it. |
| 6222 | return outliner::InstrType::Legal; |
| 6223 | } |
| 6224 | |
| 6225 | // Since calls are handled, don't touch LR or PC |
| 6226 | if (MI.modifiesRegister(Reg: ARM::LR, TRI) || MI.modifiesRegister(Reg: ARM::PC, TRI)) |
| 6227 | return outliner::InstrType::Illegal; |
| 6228 | |
| 6229 | // Does this use the stack? |
| 6230 | if (MI.modifiesRegister(Reg: ARM::SP, TRI) || MI.readsRegister(Reg: ARM::SP, TRI)) { |
| 6231 | // True if there is no chance that any outlined candidate from this range |
| 6232 | // could require stack fixups. That is, both |
| 6233 | // * LR is available in the range (No save/restore around call) |
| 6234 | // * The range doesn't include calls (No save/restore in outlined frame) |
| 6235 | // are true. |
| 6236 | // These conditions also ensure correctness of the return address |
| 6237 | // authentication - we insert sign and authentication instructions only if |
| 6238 | // we save/restore LR on stack, but then this condition ensures that the |
| 6239 | // outlined range does not modify the SP, therefore the SP value used for |
| 6240 | // signing is the same as the one used for authentication. |
| 6241 | // FIXME: This is very restrictive; the flags check the whole block, |
| 6242 | // not just the bit we will try to outline. |
| 6243 | bool MightNeedStackFixUp = |
| 6244 | (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | |
| 6245 | MachineOutlinerMBBFlags::HasCalls)); |
| 6246 | |
| 6247 | if (!MightNeedStackFixUp) |
| 6248 | return outliner::InstrType::Legal; |
| 6249 | |
| 6250 | // Any modification of SP will break our code to save/restore LR. |
| 6251 | // FIXME: We could handle some instructions which add a constant offset to |
| 6252 | // SP, with a bit more work. |
| 6253 | if (MI.modifiesRegister(Reg: ARM::SP, TRI)) |
| 6254 | return outliner::InstrType::Illegal; |
| 6255 | |
| 6256 | // At this point, we have a stack instruction that we might need to fix up. |
| 6257 | // up. We'll handle it if it's a load or store. |
| 6258 | if (checkAndUpdateStackOffset(MI: &MI, Fixup: Subtarget.getStackAlignment().value(), |
| 6259 | Updt: false)) |
| 6260 | return outliner::InstrType::Legal; |
| 6261 | |
| 6262 | // We can't fix it up, so don't outline it. |
| 6263 | return outliner::InstrType::Illegal; |
| 6264 | } |
| 6265 | |
| 6266 | // Be conservative with IT blocks. |
| 6267 | if (MI.readsRegister(Reg: ARM::ITSTATE, TRI) || |
| 6268 | MI.modifiesRegister(Reg: ARM::ITSTATE, TRI)) |
| 6269 | return outliner::InstrType::Illegal; |
| 6270 | |
| 6271 | // Don't outline CFI instructions. |
| 6272 | if (MI.isCFIInstruction()) |
| 6273 | return outliner::InstrType::Illegal; |
| 6274 | |
| 6275 | return outliner::InstrType::Legal; |
| 6276 | } |
| 6277 | |
| 6278 | void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { |
| 6279 | for (MachineInstr &MI : MBB) { |
| 6280 | checkAndUpdateStackOffset(MI: &MI, Fixup: Subtarget.getStackAlignment().value(), Updt: true); |
| 6281 | } |
| 6282 | } |
| 6283 | |
| 6284 | void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB, |
| 6285 | MachineBasicBlock::iterator It, bool CFI, |
| 6286 | bool Auth) const { |
| 6287 | int Align = std::max(a: Subtarget.getStackAlignment().value(), b: uint64_t(8)); |
| 6288 | unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0; |
| 6289 | assert(Align >= 8 && Align <= 256); |
| 6290 | if (Auth) { |
| 6291 | assert(Subtarget.isThumb2()); |
| 6292 | // Compute PAC in R12. Outlining ensures R12 is dead across the outlined |
| 6293 | // sequence. |
| 6294 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2PAC)).setMIFlags(MIFlags); |
| 6295 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2STRD_PRE), DestReg: ARM::SP) |
| 6296 | .addReg(RegNo: ARM::R12, flags: RegState::Kill) |
| 6297 | .addReg(RegNo: ARM::LR, flags: RegState::Kill) |
| 6298 | .addReg(RegNo: ARM::SP) |
| 6299 | .addImm(Val: -Align) |
| 6300 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 6301 | .setMIFlags(MIFlags); |
| 6302 | } else { |
| 6303 | unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; |
| 6304 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: Opc), DestReg: ARM::SP) |
| 6305 | .addReg(RegNo: ARM::LR, flags: RegState::Kill) |
| 6306 | .addReg(RegNo: ARM::SP) |
| 6307 | .addImm(Val: -Align) |
| 6308 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 6309 | .setMIFlags(MIFlags); |
| 6310 | } |
| 6311 | |
| 6312 | if (!CFI) |
| 6313 | return; |
| 6314 | |
| 6315 | // Add a CFI, saying CFA is offset by Align bytes from SP. |
| 6316 | CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup); |
| 6317 | CFIBuilder.buildDefCFAOffset(Offset: Align); |
| 6318 | |
| 6319 | // Add a CFI saying that the LR that we want to find is now higher than |
| 6320 | // before. |
| 6321 | int LROffset = Auth ? Align - 4 : Align; |
| 6322 | CFIBuilder.buildOffset(Reg: ARM::LR, Offset: -LROffset); |
| 6323 | if (Auth) { |
| 6324 | // Add a CFI for the location of the return adddress PAC. |
| 6325 | CFIBuilder.buildOffset(Reg: ARM::RA_AUTH_CODE, Offset: -Align); |
| 6326 | } |
| 6327 | } |
| 6328 | |
| 6329 | void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB, |
| 6330 | MachineBasicBlock::iterator It, |
| 6331 | bool CFI, bool Auth) const { |
| 6332 | int Align = Subtarget.getStackAlignment().value(); |
| 6333 | unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0; |
| 6334 | if (Auth) { |
| 6335 | assert(Subtarget.isThumb2()); |
| 6336 | // Restore return address PAC and LR. |
| 6337 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2LDRD_POST)) |
| 6338 | .addReg(RegNo: ARM::R12, flags: RegState::Define) |
| 6339 | .addReg(RegNo: ARM::LR, flags: RegState::Define) |
| 6340 | .addReg(RegNo: ARM::SP, flags: RegState::Define) |
| 6341 | .addReg(RegNo: ARM::SP) |
| 6342 | .addImm(Val: Align) |
| 6343 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 6344 | .setMIFlags(MIFlags); |
| 6345 | // LR authentication is after the CFI instructions, below. |
| 6346 | } else { |
| 6347 | unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; |
| 6348 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: Opc), DestReg: ARM::LR) |
| 6349 | .addReg(RegNo: ARM::SP, flags: RegState::Define) |
| 6350 | .addReg(RegNo: ARM::SP); |
| 6351 | if (!Subtarget.isThumb()) |
| 6352 | MIB.addReg(RegNo: 0); |
| 6353 | MIB.addImm(Val: Subtarget.getStackAlignment().value()) |
| 6354 | .add(MOs: predOps(Pred: ARMCC::AL)) |
| 6355 | .setMIFlags(MIFlags); |
| 6356 | } |
| 6357 | |
| 6358 | if (CFI) { |
| 6359 | // Now stack has moved back up and we have restored LR. |
| 6360 | CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameDestroy); |
| 6361 | CFIBuilder.buildDefCFAOffset(Offset: 0); |
| 6362 | CFIBuilder.buildRestore(Reg: ARM::LR); |
| 6363 | if (Auth) |
| 6364 | CFIBuilder.buildUndefined(Reg: ARM::RA_AUTH_CODE); |
| 6365 | } |
| 6366 | |
| 6367 | if (Auth) |
| 6368 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2AUT)); |
| 6369 | } |
| 6370 | |
| 6371 | void ARMBaseInstrInfo::buildOutlinedFrame( |
| 6372 | MachineBasicBlock &MBB, MachineFunction &MF, |
| 6373 | const outliner::OutlinedFunction &OF) const { |
| 6374 | // For thunk outlining, rewrite the last instruction from a call to a |
| 6375 | // tail-call. |
| 6376 | if (OF.FrameConstructionID == MachineOutlinerThunk) { |
| 6377 | MachineInstr *Call = &*--MBB.instr_end(); |
| 6378 | bool isThumb = Subtarget.isThumb(); |
| 6379 | unsigned FuncOp = isThumb ? 2 : 0; |
| 6380 | unsigned Opc = Call->getOperand(i: FuncOp).isReg() |
| 6381 | ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr |
| 6382 | : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd |
| 6383 | : ARM::tTAILJMPdND |
| 6384 | : ARM::TAILJMPd; |
| 6385 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: get(Opcode: Opc)) |
| 6386 | .add(MO: Call->getOperand(i: FuncOp)); |
| 6387 | if (isThumb && !Call->getOperand(i: FuncOp).isReg()) |
| 6388 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
| 6389 | Call->eraseFromParent(); |
| 6390 | } |
| 6391 | |
| 6392 | // Is there a call in the outlined range? |
| 6393 | auto IsNonTailCall = [](MachineInstr &MI) { |
| 6394 | return MI.isCall() && !MI.isReturn(); |
| 6395 | }; |
| 6396 | if (llvm::any_of(Range: MBB.instrs(), P: IsNonTailCall)) { |
| 6397 | MachineBasicBlock::iterator It = MBB.begin(); |
| 6398 | MachineBasicBlock::iterator Et = MBB.end(); |
| 6399 | |
| 6400 | if (OF.FrameConstructionID == MachineOutlinerTailCall || |
| 6401 | OF.FrameConstructionID == MachineOutlinerThunk) |
| 6402 | Et = std::prev(x: MBB.end()); |
| 6403 | |
| 6404 | // We have to save and restore LR, we need to add it to the liveins if it |
| 6405 | // is not already part of the set. This is suffient since outlined |
| 6406 | // functions only have one block. |
| 6407 | if (!MBB.isLiveIn(Reg: ARM::LR)) |
| 6408 | MBB.addLiveIn(PhysReg: ARM::LR); |
| 6409 | |
| 6410 | // Insert a save before the outlined region |
| 6411 | bool Auth = MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(SpillsLR: true); |
| 6412 | saveLROnStack(MBB, It, CFI: true, Auth); |
| 6413 | |
| 6414 | // Fix up the instructions in the range, since we're going to modify the |
| 6415 | // stack. |
| 6416 | assert(OF.FrameConstructionID != MachineOutlinerDefault && |
| 6417 | "Can only fix up stack references once" ); |
| 6418 | fixupPostOutline(MBB); |
| 6419 | |
| 6420 | // Insert a restore before the terminator for the function. Restore LR. |
| 6421 | restoreLRFromStack(MBB, It: Et, CFI: true, Auth); |
| 6422 | } |
| 6423 | |
| 6424 | // If this is a tail call outlined function, then there's already a return. |
| 6425 | if (OF.FrameConstructionID == MachineOutlinerTailCall || |
| 6426 | OF.FrameConstructionID == MachineOutlinerThunk) |
| 6427 | return; |
| 6428 | |
| 6429 | // Here we have to insert the return ourselves. Get the correct opcode from |
| 6430 | // current feature set. |
| 6431 | BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: get(Opcode: Subtarget.getReturnOpcode())) |
| 6432 | .add(MOs: predOps(Pred: ARMCC::AL)); |
| 6433 | |
| 6434 | // Did we have to modify the stack by saving the link register? |
| 6435 | if (OF.FrameConstructionID != MachineOutlinerDefault && |
| 6436 | OF.Candidates[0].CallConstructionID != MachineOutlinerDefault) |
| 6437 | return; |
| 6438 | |
| 6439 | // We modified the stack. |
| 6440 | // Walk over the basic block and fix up all the stack accesses. |
| 6441 | fixupPostOutline(MBB); |
| 6442 | } |
| 6443 | |
| 6444 | MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( |
| 6445 | Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, |
| 6446 | MachineFunction &MF, outliner::Candidate &C) const { |
| 6447 | MachineInstrBuilder MIB; |
| 6448 | MachineBasicBlock::iterator CallPt; |
| 6449 | unsigned Opc; |
| 6450 | bool isThumb = Subtarget.isThumb(); |
| 6451 | |
| 6452 | // Are we tail calling? |
| 6453 | if (C.CallConstructionID == MachineOutlinerTailCall) { |
| 6454 | // If yes, then we can just branch to the label. |
| 6455 | Opc = isThumb |
| 6456 | ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND |
| 6457 | : ARM::TAILJMPd; |
| 6458 | MIB = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: Opc)) |
| 6459 | .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())); |
| 6460 | if (isThumb) |
| 6461 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
| 6462 | It = MBB.insert(I: It, MI: MIB); |
| 6463 | return It; |
| 6464 | } |
| 6465 | |
| 6466 | // Create the call instruction. |
| 6467 | Opc = isThumb ? ARM::tBL : ARM::BL; |
| 6468 | MachineInstrBuilder CallMIB = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: Opc)); |
| 6469 | if (isThumb) |
| 6470 | CallMIB.add(MOs: predOps(Pred: ARMCC::AL)); |
| 6471 | CallMIB.addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())); |
| 6472 | |
| 6473 | if (C.CallConstructionID == MachineOutlinerNoLRSave || |
| 6474 | C.CallConstructionID == MachineOutlinerThunk) { |
| 6475 | // No, so just insert the call. |
| 6476 | It = MBB.insert(I: It, MI: CallMIB); |
| 6477 | return It; |
| 6478 | } |
| 6479 | |
| 6480 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
| 6481 | // Can we save to a register? |
| 6482 | if (C.CallConstructionID == MachineOutlinerRegSave) { |
| 6483 | Register Reg = findRegisterToSaveLRTo(C); |
| 6484 | assert(Reg != 0 && "No callee-saved register available?" ); |
| 6485 | |
| 6486 | // Save and restore LR from that register. |
| 6487 | copyPhysReg(MBB, I: It, DL: DebugLoc(), DestReg: Reg, SrcReg: ARM::LR, KillSrc: true); |
| 6488 | if (!AFI.isLRSpilled()) |
| 6489 | CFIInstBuilder(MBB, It, MachineInstr::FrameSetup) |
| 6490 | .buildRegister(Reg1: ARM::LR, Reg2: Reg); |
| 6491 | CallPt = MBB.insert(I: It, MI: CallMIB); |
| 6492 | copyPhysReg(MBB, I: It, DL: DebugLoc(), DestReg: ARM::LR, SrcReg: Reg, KillSrc: true); |
| 6493 | if (!AFI.isLRSpilled()) |
| 6494 | CFIInstBuilder(MBB, It, MachineInstr::FrameDestroy).buildRestore(Reg: ARM::LR); |
| 6495 | It--; |
| 6496 | return CallPt; |
| 6497 | } |
| 6498 | // We have the default case. Save and restore from SP. |
| 6499 | if (!MBB.isLiveIn(Reg: ARM::LR)) |
| 6500 | MBB.addLiveIn(PhysReg: ARM::LR); |
| 6501 | bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(SpillsLR: true); |
| 6502 | saveLROnStack(MBB, It, CFI: !AFI.isLRSpilled(), Auth); |
| 6503 | CallPt = MBB.insert(I: It, MI: CallMIB); |
| 6504 | restoreLRFromStack(MBB, It, CFI: !AFI.isLRSpilled(), Auth); |
| 6505 | It--; |
| 6506 | return CallPt; |
| 6507 | } |
| 6508 | |
| 6509 | bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault( |
| 6510 | MachineFunction &MF) const { |
| 6511 | return Subtarget.isMClass() && MF.getFunction().hasMinSize(); |
| 6512 | } |
| 6513 | |
| 6514 | bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable( |
| 6515 | const MachineInstr &MI) const { |
| 6516 | // Try hard to rematerialize any VCTPs because if we spill P0, it will block |
| 6517 | // the tail predication conversion. This means that the element count |
| 6518 | // register has to be live for longer, but that has to be better than |
| 6519 | // spill/restore and VPT predication. |
| 6520 | return (isVCTP(MI: &MI) && !isPredicated(MI)) || |
| 6521 | TargetInstrInfo::isReallyTriviallyReMaterializable(MI); |
| 6522 | } |
| 6523 | |
| 6524 | unsigned llvm::getBLXOpcode(const MachineFunction &MF) { |
| 6525 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip |
| 6526 | : ARM::BLX; |
| 6527 | } |
| 6528 | |
| 6529 | unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) { |
| 6530 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip |
| 6531 | : ARM::tBLXr; |
| 6532 | } |
| 6533 | |
| 6534 | unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) { |
| 6535 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip |
| 6536 | : ARM::BLX_pred; |
| 6537 | } |
| 6538 | |
| 6539 | namespace { |
| 6540 | class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { |
| 6541 | MachineInstr *EndLoop, *LoopCount; |
| 6542 | MachineFunction *MF; |
| 6543 | const TargetInstrInfo *TII; |
| 6544 | |
| 6545 | // Bitset[0 .. MAX_STAGES-1] ... iterations needed |
| 6546 | // [LAST_IS_USE] : last reference to register in schedule is a use |
| 6547 | // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live |
| 6548 | static int constexpr MAX_STAGES = 30; |
| 6549 | static int constexpr LAST_IS_USE = MAX_STAGES; |
| 6550 | static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1; |
| 6551 | typedef std::bitset<MAX_STAGES + 2> IterNeed; |
| 6552 | typedef std::map<unsigned, IterNeed> IterNeeds; |
| 6553 | |
| 6554 | void bumpCrossIterationPressure(RegPressureTracker &RPT, |
| 6555 | const IterNeeds &CIN); |
| 6556 | bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS); |
| 6557 | |
| 6558 | // Meanings of the various stuff with loop types: |
| 6559 | // t2Bcc: |
| 6560 | // EndLoop = branch at end of original BB that will become a kernel |
| 6561 | // LoopCount = CC setter live into branch |
| 6562 | // t2LoopEnd: |
| 6563 | // EndLoop = branch at end of original BB |
| 6564 | // LoopCount = t2LoopDec |
| 6565 | public: |
| 6566 | ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount) |
| 6567 | : EndLoop(EndLoop), LoopCount(LoopCount), |
| 6568 | MF(EndLoop->getParent()->getParent()), |
| 6569 | TII(MF->getSubtarget().getInstrInfo()) {} |
| 6570 | |
| 6571 | bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { |
| 6572 | // Only ignore the terminator. |
| 6573 | return MI == EndLoop || MI == LoopCount; |
| 6574 | } |
| 6575 | |
| 6576 | bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override { |
| 6577 | if (tooMuchRegisterPressure(SSD, SMS)) |
| 6578 | return false; |
| 6579 | |
| 6580 | return true; |
| 6581 | } |
| 6582 | |
| 6583 | std::optional<bool> createTripCountGreaterCondition( |
| 6584 | int TC, MachineBasicBlock &MBB, |
| 6585 | SmallVectorImpl<MachineOperand> &Cond) override { |
| 6586 | |
| 6587 | if (isCondBranchOpcode(Opc: EndLoop->getOpcode())) { |
| 6588 | Cond.push_back(Elt: EndLoop->getOperand(i: 1)); |
| 6589 | Cond.push_back(Elt: EndLoop->getOperand(i: 2)); |
| 6590 | if (EndLoop->getOperand(i: 0).getMBB() == EndLoop->getParent()) { |
| 6591 | TII->reverseBranchCondition(Cond); |
| 6592 | } |
| 6593 | return {}; |
| 6594 | } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) { |
| 6595 | // General case just lets the unrolled t2LoopDec do the subtraction and |
| 6596 | // therefore just needs to check if zero has been reached. |
| 6597 | MachineInstr *LoopDec = nullptr; |
| 6598 | for (auto &I : MBB.instrs()) |
| 6599 | if (I.getOpcode() == ARM::t2LoopDec) |
| 6600 | LoopDec = &I; |
| 6601 | assert(LoopDec && "Unable to find copied LoopDec" ); |
| 6602 | // Check if we're done with the loop. |
| 6603 | BuildMI(BB: &MBB, MIMD: LoopDec->getDebugLoc(), MCID: TII->get(Opcode: ARM::t2CMPri)) |
| 6604 | .addReg(RegNo: LoopDec->getOperand(i: 0).getReg()) |
| 6605 | .addImm(Val: 0) |
| 6606 | .addImm(Val: ARMCC::AL) |
| 6607 | .addReg(RegNo: ARM::NoRegister); |
| 6608 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: ARMCC::EQ)); |
| 6609 | Cond.push_back(Elt: MachineOperand::CreateReg(Reg: ARM::CPSR, isDef: false)); |
| 6610 | return {}; |
| 6611 | } else |
| 6612 | llvm_unreachable("Unknown EndLoop" ); |
| 6613 | } |
| 6614 | |
| 6615 | void (MachineBasicBlock *) override {} |
| 6616 | |
| 6617 | void adjustTripCount(int TripCountAdjust) override {} |
| 6618 | }; |
| 6619 | |
| 6620 | void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT, |
| 6621 | const IterNeeds &CIN) { |
| 6622 | // Increase pressure by the amounts in CrossIterationNeeds |
| 6623 | for (const auto &N : CIN) { |
| 6624 | int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2; |
| 6625 | for (int I = 0; I < Cnt; ++I) |
| 6626 | RPT.increaseRegPressure(RegUnit: Register(N.first), PreviousMask: LaneBitmask::getNone(), |
| 6627 | NewMask: LaneBitmask::getAll()); |
| 6628 | } |
| 6629 | // Decrease pressure by the amounts in CrossIterationNeeds |
| 6630 | for (const auto &N : CIN) { |
| 6631 | int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2; |
| 6632 | for (int I = 0; I < Cnt; ++I) |
| 6633 | RPT.decreaseRegPressure(RegUnit: Register(N.first), PreviousMask: LaneBitmask::getAll(), |
| 6634 | NewMask: LaneBitmask::getNone()); |
| 6635 | } |
| 6636 | } |
| 6637 | |
| 6638 | bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD, |
| 6639 | SMSchedule &SMS) { |
| 6640 | IterNeeds CrossIterationNeeds; |
| 6641 | |
| 6642 | // Determine which values will be loop-carried after the schedule is |
| 6643 | // applied |
| 6644 | |
| 6645 | for (auto &SU : SSD.SUnits) { |
| 6646 | const MachineInstr *MI = SU.getInstr(); |
| 6647 | int Stg = SMS.stageScheduled(SU: const_cast<SUnit *>(&SU)); |
| 6648 | for (auto &S : SU.Succs) |
| 6649 | if (MI->isPHI() && S.getKind() == SDep::Anti) { |
| 6650 | Register Reg = S.getReg(); |
| 6651 | if (Reg.isVirtual()) |
| 6652 | CrossIterationNeeds[Reg.id()].set(position: 0); |
| 6653 | } else if (S.isAssignedRegDep()) { |
| 6654 | int OStg = SMS.stageScheduled(SU: S.getSUnit()); |
| 6655 | if (OStg >= 0 && OStg != Stg) { |
| 6656 | Register Reg = S.getReg(); |
| 6657 | if (Reg.isVirtual()) |
| 6658 | CrossIterationNeeds[Reg.id()] |= ((1 << (OStg - Stg)) - 1); |
| 6659 | } |
| 6660 | } |
| 6661 | } |
| 6662 | |
| 6663 | // Determine more-or-less what the proposed schedule (reversed) is going to |
| 6664 | // be; it might not be quite the same because the within-cycle ordering |
| 6665 | // created by SMSchedule depends upon changes to help with address offsets and |
| 6666 | // the like. |
| 6667 | std::vector<SUnit *> ProposedSchedule; |
| 6668 | for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle) |
| 6669 | for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd; |
| 6670 | ++Stage) { |
| 6671 | std::deque<SUnit *> Instrs = |
| 6672 | SMS.getInstructions(cycle: Cycle + Stage * SMS.getInitiationInterval()); |
| 6673 | std::sort(first: Instrs.begin(), last: Instrs.end(), |
| 6674 | comp: [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; }); |
| 6675 | llvm::append_range(C&: ProposedSchedule, R&: Instrs); |
| 6676 | } |
| 6677 | |
| 6678 | // Learn whether the last use/def of each cross-iteration register is a use or |
| 6679 | // def. If it is a def, RegisterPressure will implicitly increase max pressure |
| 6680 | // and we do not have to add the pressure. |
| 6681 | for (auto *SU : ProposedSchedule) |
| 6682 | for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid(); |
| 6683 | ++OperI) { |
| 6684 | auto MO = *OperI; |
| 6685 | if (!MO.isReg() || !MO.getReg()) |
| 6686 | continue; |
| 6687 | Register Reg = MO.getReg(); |
| 6688 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
| 6689 | if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] || |
| 6690 | CIter->second[SEEN_AS_LIVE]) |
| 6691 | continue; |
| 6692 | if (MO.isDef() && !MO.isDead()) |
| 6693 | CIter->second.set(position: SEEN_AS_LIVE); |
| 6694 | else if (MO.isUse()) |
| 6695 | CIter->second.set(position: LAST_IS_USE); |
| 6696 | } |
| 6697 | for (auto &CI : CrossIterationNeeds) |
| 6698 | CI.second.reset(position: LAST_IS_USE); |
| 6699 | |
| 6700 | RegionPressure RecRegPressure; |
| 6701 | RegPressureTracker RPTracker(RecRegPressure); |
| 6702 | RegisterClassInfo RegClassInfo; |
| 6703 | RegClassInfo.runOnMachineFunction(MF: *MF); |
| 6704 | RPTracker.init(mf: MF, rci: &RegClassInfo, lis: nullptr, mbb: EndLoop->getParent(), |
| 6705 | pos: EndLoop->getParent()->end(), TrackLaneMasks: false, TrackUntiedDefs: false); |
| 6706 | |
| 6707 | bumpCrossIterationPressure(RPT&: RPTracker, CIN: CrossIterationNeeds); |
| 6708 | |
| 6709 | for (auto *SU : ProposedSchedule) { |
| 6710 | MachineBasicBlock::const_iterator CurInstI = SU->getInstr(); |
| 6711 | RPTracker.setPos(std::next(x: CurInstI)); |
| 6712 | RPTracker.recede(); |
| 6713 | |
| 6714 | // Track what cross-iteration registers would be seen as live |
| 6715 | for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) { |
| 6716 | auto MO = *OperI; |
| 6717 | if (!MO.isReg() || !MO.getReg()) |
| 6718 | continue; |
| 6719 | Register Reg = MO.getReg(); |
| 6720 | if (MO.isDef() && !MO.isDead()) { |
| 6721 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
| 6722 | if (CIter != CrossIterationNeeds.end()) { |
| 6723 | CIter->second.reset(position: 0); |
| 6724 | CIter->second.reset(position: SEEN_AS_LIVE); |
| 6725 | } |
| 6726 | } |
| 6727 | } |
| 6728 | for (auto &S : SU->Preds) { |
| 6729 | auto Stg = SMS.stageScheduled(SU); |
| 6730 | if (S.isAssignedRegDep()) { |
| 6731 | Register Reg = S.getReg(); |
| 6732 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
| 6733 | if (CIter != CrossIterationNeeds.end()) { |
| 6734 | auto Stg2 = SMS.stageScheduled(SU: const_cast<SUnit *>(S.getSUnit())); |
| 6735 | assert(Stg2 <= Stg && "Data dependence upon earlier stage" ); |
| 6736 | if (Stg - Stg2 < MAX_STAGES) |
| 6737 | CIter->second.set(position: Stg - Stg2); |
| 6738 | CIter->second.set(position: SEEN_AS_LIVE); |
| 6739 | } |
| 6740 | } |
| 6741 | } |
| 6742 | |
| 6743 | bumpCrossIterationPressure(RPT&: RPTracker, CIN: CrossIterationNeeds); |
| 6744 | } |
| 6745 | |
| 6746 | auto &P = RPTracker.getPressure().MaxSetPressure; |
| 6747 | for (unsigned I = 0, E = P.size(); I < E; ++I) { |
| 6748 | // Exclude some Neon register classes. |
| 6749 | if (I == ARM::DQuad_with_ssub_0 || I == ARM::DTripleSpc_with_ssub_0 || |
| 6750 | I == ARM::DTriple_with_qsub_0_in_QPR) |
| 6751 | continue; |
| 6752 | |
| 6753 | if (P[I] > RegClassInfo.getRegPressureSetLimit(Idx: I)) { |
| 6754 | return true; |
| 6755 | } |
| 6756 | } |
| 6757 | return false; |
| 6758 | } |
| 6759 | |
| 6760 | } // namespace |
| 6761 | |
| 6762 | std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> |
| 6763 | ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { |
| 6764 | MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); |
| 6765 | MachineBasicBlock * = *LoopBB->pred_begin(); |
| 6766 | if (Preheader == LoopBB) |
| 6767 | Preheader = *std::next(x: LoopBB->pred_begin()); |
| 6768 | |
| 6769 | if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) { |
| 6770 | // If the branch is a Bcc, then the CPSR should be set somewhere within the |
| 6771 | // block. We need to determine the reaching definition of CPSR so that |
| 6772 | // it can be marked as non-pipelineable, allowing the pipeliner to force |
| 6773 | // it into stage 0 or give up if it cannot or will not do so. |
| 6774 | MachineInstr *CCSetter = nullptr; |
| 6775 | for (auto &L : LoopBB->instrs()) { |
| 6776 | if (L.isCall()) |
| 6777 | return nullptr; |
| 6778 | if (isCPSRDefined(MI: L)) |
| 6779 | CCSetter = &L; |
| 6780 | } |
| 6781 | if (CCSetter) |
| 6782 | return std::make_unique<ARMPipelinerLoopInfo>(args: &*I, args&: CCSetter); |
| 6783 | else |
| 6784 | return nullptr; // Unable to find the CC setter, so unable to guarantee |
| 6785 | // that pipeline will work |
| 6786 | } |
| 6787 | |
| 6788 | // Recognize: |
| 6789 | // preheader: |
| 6790 | // %1 = t2DoopLoopStart %0 |
| 6791 | // loop: |
| 6792 | // %2 = phi %1, <not loop>, %..., %loop |
| 6793 | // %3 = t2LoopDec %2, <imm> |
| 6794 | // t2LoopEnd %3, %loop |
| 6795 | |
| 6796 | if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) { |
| 6797 | for (auto &L : LoopBB->instrs()) |
| 6798 | if (L.isCall()) |
| 6799 | return nullptr; |
| 6800 | else if (isVCTP(MI: &L)) |
| 6801 | return nullptr; |
| 6802 | Register LoopDecResult = I->getOperand(i: 0).getReg(); |
| 6803 | MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); |
| 6804 | MachineInstr *LoopDec = MRI.getUniqueVRegDef(Reg: LoopDecResult); |
| 6805 | if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) |
| 6806 | return nullptr; |
| 6807 | MachineInstr *LoopStart = nullptr; |
| 6808 | for (auto &J : Preheader->instrs()) |
| 6809 | if (J.getOpcode() == ARM::t2DoLoopStart) |
| 6810 | LoopStart = &J; |
| 6811 | if (!LoopStart) |
| 6812 | return nullptr; |
| 6813 | return std::make_unique<ARMPipelinerLoopInfo>(args: &*I, args&: LoopDec); |
| 6814 | } |
| 6815 | return nullptr; |
| 6816 | } |
| 6817 | |