1 | //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the Base ARM implementation of the TargetInstrInfo class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "ARMBaseInstrInfo.h" |
14 | #include "ARMBaseRegisterInfo.h" |
15 | #include "ARMConstantPoolValue.h" |
16 | #include "ARMFeatures.h" |
17 | #include "ARMHazardRecognizer.h" |
18 | #include "ARMMachineFunctionInfo.h" |
19 | #include "ARMSubtarget.h" |
20 | #include "MCTargetDesc/ARMAddressingModes.h" |
21 | #include "MCTargetDesc/ARMBaseInfo.h" |
22 | #include "MVETailPredUtils.h" |
23 | #include "llvm/ADT/DenseMap.h" |
24 | #include "llvm/ADT/STLExtras.h" |
25 | #include "llvm/ADT/SmallSet.h" |
26 | #include "llvm/ADT/SmallVector.h" |
27 | #include "llvm/CodeGen/DFAPacketizer.h" |
28 | #include "llvm/CodeGen/LiveVariables.h" |
29 | #include "llvm/CodeGen/MachineBasicBlock.h" |
30 | #include "llvm/CodeGen/MachineConstantPool.h" |
31 | #include "llvm/CodeGen/MachineFrameInfo.h" |
32 | #include "llvm/CodeGen/MachineFunction.h" |
33 | #include "llvm/CodeGen/MachineInstr.h" |
34 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
35 | #include "llvm/CodeGen/MachineMemOperand.h" |
36 | #include "llvm/CodeGen/MachineModuleInfo.h" |
37 | #include "llvm/CodeGen/MachineOperand.h" |
38 | #include "llvm/CodeGen/MachinePipeliner.h" |
39 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
40 | #include "llvm/CodeGen/MachineScheduler.h" |
41 | #include "llvm/CodeGen/MultiHazardRecognizer.h" |
42 | #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" |
43 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
44 | #include "llvm/CodeGen/TargetInstrInfo.h" |
45 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
46 | #include "llvm/CodeGen/TargetSchedule.h" |
47 | #include "llvm/IR/Attributes.h" |
48 | #include "llvm/IR/Constants.h" |
49 | #include "llvm/IR/DebugLoc.h" |
50 | #include "llvm/IR/Function.h" |
51 | #include "llvm/IR/GlobalValue.h" |
52 | #include "llvm/IR/Module.h" |
53 | #include "llvm/MC/MCAsmInfo.h" |
54 | #include "llvm/MC/MCInstrDesc.h" |
55 | #include "llvm/MC/MCInstrItineraries.h" |
56 | #include "llvm/Support/BranchProbability.h" |
57 | #include "llvm/Support/Casting.h" |
58 | #include "llvm/Support/CommandLine.h" |
59 | #include "llvm/Support/Compiler.h" |
60 | #include "llvm/Support/Debug.h" |
61 | #include "llvm/Support/ErrorHandling.h" |
62 | #include "llvm/Support/raw_ostream.h" |
63 | #include "llvm/Target/TargetMachine.h" |
64 | #include "llvm/TargetParser/Triple.h" |
65 | #include <algorithm> |
66 | #include <cassert> |
67 | #include <cstdint> |
68 | #include <iterator> |
69 | #include <new> |
70 | #include <utility> |
71 | #include <vector> |
72 | |
73 | using namespace llvm; |
74 | |
75 | #define DEBUG_TYPE "arm-instrinfo" |
76 | |
77 | #define GET_INSTRINFO_CTOR_DTOR |
78 | #include "ARMGenInstrInfo.inc" |
79 | |
80 | static cl::opt<bool> |
81 | EnableARM3Addr("enable-arm-3-addr-conv" , cl::Hidden, |
82 | cl::desc("Enable ARM 2-addr to 3-addr conv" )); |
83 | |
84 | /// ARM_MLxEntry - Record information about MLA / MLS instructions. |
85 | struct ARM_MLxEntry { |
86 | uint16_t MLxOpc; // MLA / MLS opcode |
87 | uint16_t MulOpc; // Expanded multiplication opcode |
88 | uint16_t AddSubOpc; // Expanded add / sub opcode |
89 | bool NegAcc; // True if the acc is negated before the add / sub. |
90 | bool HasLane; // True if instruction has an extra "lane" operand. |
91 | }; |
92 | |
93 | static const ARM_MLxEntry ARM_MLxTable[] = { |
94 | // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane |
95 | // fp scalar ops |
96 | { .MLxOpc: ARM::VMLAS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VADDS, .NegAcc: false, .HasLane: false }, |
97 | { .MLxOpc: ARM::VMLSS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: false, .HasLane: false }, |
98 | { .MLxOpc: ARM::VMLAD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VADDD, .NegAcc: false, .HasLane: false }, |
99 | { .MLxOpc: ARM::VMLSD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: false, .HasLane: false }, |
100 | { .MLxOpc: ARM::VNMLAS, .MulOpc: ARM::VNMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: true, .HasLane: false }, |
101 | { .MLxOpc: ARM::VNMLSS, .MulOpc: ARM::VMULS, .AddSubOpc: ARM::VSUBS, .NegAcc: true, .HasLane: false }, |
102 | { .MLxOpc: ARM::VNMLAD, .MulOpc: ARM::VNMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: true, .HasLane: false }, |
103 | { .MLxOpc: ARM::VNMLSD, .MulOpc: ARM::VMULD, .AddSubOpc: ARM::VSUBD, .NegAcc: true, .HasLane: false }, |
104 | |
105 | // fp SIMD ops |
106 | { .MLxOpc: ARM::VMLAfd, .MulOpc: ARM::VMULfd, .AddSubOpc: ARM::VADDfd, .NegAcc: false, .HasLane: false }, |
107 | { .MLxOpc: ARM::VMLSfd, .MulOpc: ARM::VMULfd, .AddSubOpc: ARM::VSUBfd, .NegAcc: false, .HasLane: false }, |
108 | { .MLxOpc: ARM::VMLAfq, .MulOpc: ARM::VMULfq, .AddSubOpc: ARM::VADDfq, .NegAcc: false, .HasLane: false }, |
109 | { .MLxOpc: ARM::VMLSfq, .MulOpc: ARM::VMULfq, .AddSubOpc: ARM::VSUBfq, .NegAcc: false, .HasLane: false }, |
110 | { .MLxOpc: ARM::VMLAslfd, .MulOpc: ARM::VMULslfd, .AddSubOpc: ARM::VADDfd, .NegAcc: false, .HasLane: true }, |
111 | { .MLxOpc: ARM::VMLSslfd, .MulOpc: ARM::VMULslfd, .AddSubOpc: ARM::VSUBfd, .NegAcc: false, .HasLane: true }, |
112 | { .MLxOpc: ARM::VMLAslfq, .MulOpc: ARM::VMULslfq, .AddSubOpc: ARM::VADDfq, .NegAcc: false, .HasLane: true }, |
113 | { .MLxOpc: ARM::VMLSslfq, .MulOpc: ARM::VMULslfq, .AddSubOpc: ARM::VSUBfq, .NegAcc: false, .HasLane: true }, |
114 | }; |
115 | |
116 | ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) |
117 | : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), |
118 | Subtarget(STI) { |
119 | for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) { |
120 | if (!MLxEntryMap.insert(KV: std::make_pair(x: ARM_MLxTable[i].MLxOpc, y&: i)).second) |
121 | llvm_unreachable("Duplicated entries?" ); |
122 | MLxHazardOpcodes.insert(V: ARM_MLxTable[i].AddSubOpc); |
123 | MLxHazardOpcodes.insert(V: ARM_MLxTable[i].MulOpc); |
124 | } |
125 | } |
126 | |
127 | // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl |
128 | // currently defaults to no prepass hazard recognizer. |
129 | ScheduleHazardRecognizer * |
130 | ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, |
131 | const ScheduleDAG *DAG) const { |
132 | if (usePreRAHazardRecognizer()) { |
133 | const InstrItineraryData *II = |
134 | static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); |
135 | return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched" ); |
136 | } |
137 | return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); |
138 | } |
139 | |
140 | // Called during: |
141 | // - pre-RA scheduling |
142 | // - post-RA scheduling when FeatureUseMISched is set |
143 | ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer( |
144 | const InstrItineraryData *II, const ScheduleDAGMI *DAG) const { |
145 | MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); |
146 | |
147 | // We would like to restrict this hazard recognizer to only |
148 | // post-RA scheduling; we can tell that we're post-RA because we don't |
149 | // track VRegLiveness. |
150 | // Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM |
151 | // banks banked on bit 2. Assume that TCMs are in use. |
152 | if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness()) |
153 | MHR->AddHazardRecognizer( |
154 | std::make_unique<ARMBankConflictHazardRecognizer>(args&: DAG, args: 0x4, args: true)); |
155 | |
156 | // Not inserting ARMHazardRecognizerFPMLx because that would change |
157 | // legacy behavior |
158 | |
159 | auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG); |
160 | MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR)); |
161 | return MHR; |
162 | } |
163 | |
164 | // Called during post-RA scheduling when FeatureUseMISched is not set |
165 | ScheduleHazardRecognizer *ARMBaseInstrInfo:: |
166 | CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, |
167 | const ScheduleDAG *DAG) const { |
168 | MultiHazardRecognizer *MHR = new MultiHazardRecognizer(); |
169 | |
170 | if (Subtarget.isThumb2() || Subtarget.hasVFP2Base()) |
171 | MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>()); |
172 | |
173 | auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); |
174 | if (BHR) |
175 | MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR)); |
176 | return MHR; |
177 | } |
178 | |
179 | MachineInstr * |
180 | ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, |
181 | LiveIntervals *LIS) const { |
182 | // FIXME: Thumb2 support. |
183 | |
184 | if (!EnableARM3Addr) |
185 | return nullptr; |
186 | |
187 | MachineFunction &MF = *MI.getParent()->getParent(); |
188 | uint64_t TSFlags = MI.getDesc().TSFlags; |
189 | bool isPre = false; |
190 | switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { |
191 | default: return nullptr; |
192 | case ARMII::IndexModePre: |
193 | isPre = true; |
194 | break; |
195 | case ARMII::IndexModePost: |
196 | break; |
197 | } |
198 | |
199 | // Try splitting an indexed load/store to an un-indexed one plus an add/sub |
200 | // operation. |
201 | unsigned MemOpc = getUnindexedOpcode(Opc: MI.getOpcode()); |
202 | if (MemOpc == 0) |
203 | return nullptr; |
204 | |
205 | MachineInstr *UpdateMI = nullptr; |
206 | MachineInstr *MemMI = nullptr; |
207 | unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); |
208 | const MCInstrDesc &MCID = MI.getDesc(); |
209 | unsigned NumOps = MCID.getNumOperands(); |
210 | bool isLoad = !MI.mayStore(); |
211 | const MachineOperand &WB = isLoad ? MI.getOperand(i: 1) : MI.getOperand(i: 0); |
212 | const MachineOperand &Base = MI.getOperand(i: 2); |
213 | const MachineOperand &Offset = MI.getOperand(i: NumOps - 3); |
214 | Register WBReg = WB.getReg(); |
215 | Register BaseReg = Base.getReg(); |
216 | Register OffReg = Offset.getReg(); |
217 | unsigned OffImm = MI.getOperand(i: NumOps - 2).getImm(); |
218 | ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(i: NumOps - 1).getImm(); |
219 | switch (AddrMode) { |
220 | default: llvm_unreachable("Unknown indexed op!" ); |
221 | case ARMII::AddrMode2: { |
222 | bool isSub = ARM_AM::getAM2Op(AM2Opc: OffImm) == ARM_AM::sub; |
223 | unsigned Amt = ARM_AM::getAM2Offset(AM2Opc: OffImm); |
224 | if (OffReg == 0) { |
225 | if (ARM_AM::getSOImmVal(Arg: Amt) == -1) |
226 | // Can't encode it in a so_imm operand. This transformation will |
227 | // add more than 1 instruction. Abandon! |
228 | return nullptr; |
229 | UpdateMI = BuildMI(MF, MIMD: MI.getDebugLoc(), |
230 | MCID: get(Opcode: isSub ? ARM::SUBri : ARM::ADDri), DestReg: WBReg) |
231 | .addReg(RegNo: BaseReg) |
232 | .addImm(Val: Amt) |
233 | .add(MOs: predOps(Pred)) |
234 | .add(MO: condCodeOp()); |
235 | } else if (Amt != 0) { |
236 | ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(AM2Opc: OffImm); |
237 | unsigned SOOpc = ARM_AM::getSORegOpc(ShOp: ShOpc, Imm: Amt); |
238 | UpdateMI = BuildMI(MF, MIMD: MI.getDebugLoc(), |
239 | MCID: get(Opcode: isSub ? ARM::SUBrsi : ARM::ADDrsi), DestReg: WBReg) |
240 | .addReg(RegNo: BaseReg) |
241 | .addReg(RegNo: OffReg) |
242 | .addReg(RegNo: 0) |
243 | .addImm(Val: SOOpc) |
244 | .add(MOs: predOps(Pred)) |
245 | .add(MO: condCodeOp()); |
246 | } else |
247 | UpdateMI = BuildMI(MF, MIMD: MI.getDebugLoc(), |
248 | MCID: get(Opcode: isSub ? ARM::SUBrr : ARM::ADDrr), DestReg: WBReg) |
249 | .addReg(RegNo: BaseReg) |
250 | .addReg(RegNo: OffReg) |
251 | .add(MOs: predOps(Pred)) |
252 | .add(MO: condCodeOp()); |
253 | break; |
254 | } |
255 | case ARMII::AddrMode3 : { |
256 | bool isSub = ARM_AM::getAM3Op(AM3Opc: OffImm) == ARM_AM::sub; |
257 | unsigned Amt = ARM_AM::getAM3Offset(AM3Opc: OffImm); |
258 | if (OffReg == 0) |
259 | // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. |
260 | UpdateMI = BuildMI(MF, MIMD: MI.getDebugLoc(), |
261 | MCID: get(Opcode: isSub ? ARM::SUBri : ARM::ADDri), DestReg: WBReg) |
262 | .addReg(RegNo: BaseReg) |
263 | .addImm(Val: Amt) |
264 | .add(MOs: predOps(Pred)) |
265 | .add(MO: condCodeOp()); |
266 | else |
267 | UpdateMI = BuildMI(MF, MIMD: MI.getDebugLoc(), |
268 | MCID: get(Opcode: isSub ? ARM::SUBrr : ARM::ADDrr), DestReg: WBReg) |
269 | .addReg(RegNo: BaseReg) |
270 | .addReg(RegNo: OffReg) |
271 | .add(MOs: predOps(Pred)) |
272 | .add(MO: condCodeOp()); |
273 | break; |
274 | } |
275 | } |
276 | |
277 | std::vector<MachineInstr*> NewMIs; |
278 | if (isPre) { |
279 | if (isLoad) |
280 | MemMI = |
281 | BuildMI(MF, MIMD: MI.getDebugLoc(), MCID: get(Opcode: MemOpc), DestReg: MI.getOperand(i: 0).getReg()) |
282 | .addReg(RegNo: WBReg) |
283 | .addImm(Val: 0) |
284 | .addImm(Val: Pred); |
285 | else |
286 | MemMI = BuildMI(MF, MIMD: MI.getDebugLoc(), MCID: get(Opcode: MemOpc)) |
287 | .addReg(RegNo: MI.getOperand(i: 1).getReg()) |
288 | .addReg(RegNo: WBReg) |
289 | .addReg(RegNo: 0) |
290 | .addImm(Val: 0) |
291 | .addImm(Val: Pred); |
292 | NewMIs.push_back(x: MemMI); |
293 | NewMIs.push_back(x: UpdateMI); |
294 | } else { |
295 | if (isLoad) |
296 | MemMI = |
297 | BuildMI(MF, MIMD: MI.getDebugLoc(), MCID: get(Opcode: MemOpc), DestReg: MI.getOperand(i: 0).getReg()) |
298 | .addReg(RegNo: BaseReg) |
299 | .addImm(Val: 0) |
300 | .addImm(Val: Pred); |
301 | else |
302 | MemMI = BuildMI(MF, MIMD: MI.getDebugLoc(), MCID: get(Opcode: MemOpc)) |
303 | .addReg(RegNo: MI.getOperand(i: 1).getReg()) |
304 | .addReg(RegNo: BaseReg) |
305 | .addReg(RegNo: 0) |
306 | .addImm(Val: 0) |
307 | .addImm(Val: Pred); |
308 | if (WB.isDead()) |
309 | UpdateMI->getOperand(i: 0).setIsDead(); |
310 | NewMIs.push_back(x: UpdateMI); |
311 | NewMIs.push_back(x: MemMI); |
312 | } |
313 | |
314 | // Transfer LiveVariables states, kill / dead info. |
315 | if (LV) { |
316 | for (const MachineOperand &MO : MI.operands()) { |
317 | if (MO.isReg() && MO.getReg().isVirtual()) { |
318 | Register Reg = MO.getReg(); |
319 | |
320 | LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); |
321 | if (MO.isDef()) { |
322 | MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; |
323 | if (MO.isDead()) |
324 | LV->addVirtualRegisterDead(IncomingReg: Reg, MI&: *NewMI); |
325 | } |
326 | if (MO.isUse() && MO.isKill()) { |
327 | for (unsigned j = 0; j < 2; ++j) { |
328 | // Look at the two new MI's in reverse order. |
329 | MachineInstr *NewMI = NewMIs[j]; |
330 | if (!NewMI->readsRegister(Reg, /*TRI=*/nullptr)) |
331 | continue; |
332 | LV->addVirtualRegisterKilled(IncomingReg: Reg, MI&: *NewMI); |
333 | if (VI.removeKill(MI)) |
334 | VI.Kills.push_back(x: NewMI); |
335 | break; |
336 | } |
337 | } |
338 | } |
339 | } |
340 | } |
341 | |
342 | MachineBasicBlock &MBB = *MI.getParent(); |
343 | MBB.insert(I: MI, MI: NewMIs[1]); |
344 | MBB.insert(I: MI, MI: NewMIs[0]); |
345 | return NewMIs[0]; |
346 | } |
347 | |
348 | // Branch analysis. |
349 | // Cond vector output format: |
350 | // 0 elements indicates an unconditional branch |
351 | // 2 elements indicates a conditional branch; the elements are |
352 | // the condition to check and the CPSR. |
353 | // 3 elements indicates a hardware loop end; the elements |
354 | // are the opcode, the operand value to test, and a dummy |
355 | // operand used to pad out to 3 operands. |
356 | bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB, |
357 | MachineBasicBlock *&TBB, |
358 | MachineBasicBlock *&FBB, |
359 | SmallVectorImpl<MachineOperand> &Cond, |
360 | bool AllowModify) const { |
361 | TBB = nullptr; |
362 | FBB = nullptr; |
363 | |
364 | MachineBasicBlock::instr_iterator I = MBB.instr_end(); |
365 | if (I == MBB.instr_begin()) |
366 | return false; // Empty blocks are easy. |
367 | --I; |
368 | |
369 | // Walk backwards from the end of the basic block until the branch is |
370 | // analyzed or we give up. |
371 | while (isPredicated(MI: *I) || I->isTerminator() || I->isDebugValue()) { |
372 | // Flag to be raised on unanalyzeable instructions. This is useful in cases |
373 | // where we want to clean up on the end of the basic block before we bail |
374 | // out. |
375 | bool CantAnalyze = false; |
376 | |
377 | // Skip over DEBUG values, predicated nonterminators and speculation |
378 | // barrier terminators. |
379 | while (I->isDebugInstr() || !I->isTerminator() || |
380 | isSpeculationBarrierEndBBOpcode(Opc: I->getOpcode()) || |
381 | I->getOpcode() == ARM::t2DoLoopStartTP){ |
382 | if (I == MBB.instr_begin()) |
383 | return false; |
384 | --I; |
385 | } |
386 | |
387 | if (isIndirectBranchOpcode(Opc: I->getOpcode()) || |
388 | isJumpTableBranchOpcode(Opc: I->getOpcode())) { |
389 | // Indirect branches and jump tables can't be analyzed, but we still want |
390 | // to clean up any instructions at the tail of the basic block. |
391 | CantAnalyze = true; |
392 | } else if (isUncondBranchOpcode(Opc: I->getOpcode())) { |
393 | TBB = I->getOperand(i: 0).getMBB(); |
394 | } else if (isCondBranchOpcode(Opc: I->getOpcode())) { |
395 | // Bail out if we encounter multiple conditional branches. |
396 | if (!Cond.empty()) |
397 | return true; |
398 | |
399 | assert(!FBB && "FBB should have been null." ); |
400 | FBB = TBB; |
401 | TBB = I->getOperand(i: 0).getMBB(); |
402 | Cond.push_back(Elt: I->getOperand(i: 1)); |
403 | Cond.push_back(Elt: I->getOperand(i: 2)); |
404 | } else if (I->isReturn()) { |
405 | // Returns can't be analyzed, but we should run cleanup. |
406 | CantAnalyze = true; |
407 | } else if (I->getOpcode() == ARM::t2LoopEnd && |
408 | MBB.getParent() |
409 | ->getSubtarget<ARMSubtarget>() |
410 | .enableMachinePipeliner()) { |
411 | if (!Cond.empty()) |
412 | return true; |
413 | FBB = TBB; |
414 | TBB = I->getOperand(i: 1).getMBB(); |
415 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: I->getOpcode())); |
416 | Cond.push_back(Elt: I->getOperand(i: 0)); |
417 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: 0)); |
418 | } else { |
419 | // We encountered other unrecognized terminator. Bail out immediately. |
420 | return true; |
421 | } |
422 | |
423 | // Cleanup code - to be run for unpredicated unconditional branches and |
424 | // returns. |
425 | if (!isPredicated(MI: *I) && |
426 | (isUncondBranchOpcode(Opc: I->getOpcode()) || |
427 | isIndirectBranchOpcode(Opc: I->getOpcode()) || |
428 | isJumpTableBranchOpcode(Opc: I->getOpcode()) || |
429 | I->isReturn())) { |
430 | // Forget any previous condition branch information - it no longer applies. |
431 | Cond.clear(); |
432 | FBB = nullptr; |
433 | |
434 | // If we can modify the function, delete everything below this |
435 | // unconditional branch. |
436 | if (AllowModify) { |
437 | MachineBasicBlock::iterator DI = std::next(x: I); |
438 | while (DI != MBB.instr_end()) { |
439 | MachineInstr &InstToDelete = *DI; |
440 | ++DI; |
441 | // Speculation barriers must not be deleted. |
442 | if (isSpeculationBarrierEndBBOpcode(Opc: InstToDelete.getOpcode())) |
443 | continue; |
444 | InstToDelete.eraseFromParent(); |
445 | } |
446 | } |
447 | } |
448 | |
449 | if (CantAnalyze) { |
450 | // We may not be able to analyze the block, but we could still have |
451 | // an unconditional branch as the last instruction in the block, which |
452 | // just branches to layout successor. If this is the case, then just |
453 | // remove it if we're allowed to make modifications. |
454 | if (AllowModify && !isPredicated(MI: MBB.back()) && |
455 | isUncondBranchOpcode(Opc: MBB.back().getOpcode()) && |
456 | TBB && MBB.isLayoutSuccessor(MBB: TBB)) |
457 | removeBranch(MBB); |
458 | return true; |
459 | } |
460 | |
461 | if (I == MBB.instr_begin()) |
462 | return false; |
463 | |
464 | --I; |
465 | } |
466 | |
467 | // We made it past the terminators without bailing out - we must have |
468 | // analyzed this branch successfully. |
469 | return false; |
470 | } |
471 | |
472 | unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB, |
473 | int *BytesRemoved) const { |
474 | assert(!BytesRemoved && "code size not handled" ); |
475 | |
476 | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); |
477 | if (I == MBB.end()) |
478 | return 0; |
479 | |
480 | if (!isUncondBranchOpcode(Opc: I->getOpcode()) && |
481 | !isCondBranchOpcode(Opc: I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) |
482 | return 0; |
483 | |
484 | // Remove the branch. |
485 | I->eraseFromParent(); |
486 | |
487 | I = MBB.end(); |
488 | |
489 | if (I == MBB.begin()) return 1; |
490 | --I; |
491 | if (!isCondBranchOpcode(Opc: I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd) |
492 | return 1; |
493 | |
494 | // Remove the branch. |
495 | I->eraseFromParent(); |
496 | return 2; |
497 | } |
498 | |
499 | unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB, |
500 | MachineBasicBlock *TBB, |
501 | MachineBasicBlock *FBB, |
502 | ArrayRef<MachineOperand> Cond, |
503 | const DebugLoc &DL, |
504 | int *BytesAdded) const { |
505 | assert(!BytesAdded && "code size not handled" ); |
506 | ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); |
507 | int BOpc = !AFI->isThumbFunction() |
508 | ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); |
509 | int BccOpc = !AFI->isThumbFunction() |
510 | ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); |
511 | bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); |
512 | |
513 | // Shouldn't be a fall through. |
514 | assert(TBB && "insertBranch must not be told to insert a fallthrough" ); |
515 | assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) && |
516 | "ARM branch conditions have two or three components!" ); |
517 | |
518 | // For conditional branches, we use addOperand to preserve CPSR flags. |
519 | |
520 | if (!FBB) { |
521 | if (Cond.empty()) { // Unconditional branch? |
522 | if (isThumb) |
523 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: TBB).add(MOs: predOps(Pred: ARMCC::AL)); |
524 | else |
525 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: TBB); |
526 | } else if (Cond.size() == 2) { |
527 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BccOpc)) |
528 | .addMBB(MBB: TBB) |
529 | .addImm(Val: Cond[0].getImm()) |
530 | .add(MO: Cond[1]); |
531 | } else |
532 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[0].getImm())).add(MO: Cond[1]).addMBB(MBB: TBB); |
533 | return 1; |
534 | } |
535 | |
536 | // Two-way conditional branch. |
537 | if (Cond.size() == 2) |
538 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BccOpc)) |
539 | .addMBB(MBB: TBB) |
540 | .addImm(Val: Cond[0].getImm()) |
541 | .add(MO: Cond[1]); |
542 | else if (Cond.size() == 3) |
543 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond[0].getImm())).add(MO: Cond[1]).addMBB(MBB: TBB); |
544 | if (isThumb) |
545 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: FBB).add(MOs: predOps(Pred: ARMCC::AL)); |
546 | else |
547 | BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: BOpc)).addMBB(MBB: FBB); |
548 | return 2; |
549 | } |
550 | |
551 | bool ARMBaseInstrInfo:: |
552 | reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { |
553 | if (Cond.size() == 2) { |
554 | ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); |
555 | Cond[0].setImm(ARMCC::getOppositeCondition(CC)); |
556 | return false; |
557 | } |
558 | return true; |
559 | } |
560 | |
561 | bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const { |
562 | if (MI.isBundle()) { |
563 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
564 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
565 | while (++I != E && I->isInsideBundle()) { |
566 | int PIdx = I->findFirstPredOperandIdx(); |
567 | if (PIdx != -1 && I->getOperand(i: PIdx).getImm() != ARMCC::AL) |
568 | return true; |
569 | } |
570 | return false; |
571 | } |
572 | |
573 | int PIdx = MI.findFirstPredOperandIdx(); |
574 | return PIdx != -1 && MI.getOperand(i: PIdx).getImm() != ARMCC::AL; |
575 | } |
576 | |
577 | std::string ARMBaseInstrInfo::createMIROperandComment( |
578 | const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, |
579 | const TargetRegisterInfo *TRI) const { |
580 | |
581 | // First, let's see if there is a generic comment for this operand |
582 | std::string = |
583 | TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); |
584 | if (!GenericComment.empty()) |
585 | return GenericComment; |
586 | |
587 | // If not, check if we have an immediate operand. |
588 | if (!Op.isImm()) |
589 | return std::string(); |
590 | |
591 | // And print its corresponding condition code if the immediate is a |
592 | // predicate. |
593 | int FirstPredOp = MI.findFirstPredOperandIdx(); |
594 | if (FirstPredOp != (int) OpIdx) |
595 | return std::string(); |
596 | |
597 | std::string CC = "CC::" ; |
598 | CC += ARMCondCodeToString(CC: (ARMCC::CondCodes)Op.getImm()); |
599 | return CC; |
600 | } |
601 | |
602 | bool ARMBaseInstrInfo::PredicateInstruction( |
603 | MachineInstr &MI, ArrayRef<MachineOperand> Pred) const { |
604 | unsigned Opc = MI.getOpcode(); |
605 | if (isUncondBranchOpcode(Opc)) { |
606 | MI.setDesc(get(Opcode: getMatchingCondBranchOpcode(Opc))); |
607 | MachineInstrBuilder(*MI.getParent()->getParent(), MI) |
608 | .addImm(Val: Pred[0].getImm()) |
609 | .addReg(RegNo: Pred[1].getReg()); |
610 | return true; |
611 | } |
612 | |
613 | int PIdx = MI.findFirstPredOperandIdx(); |
614 | if (PIdx != -1) { |
615 | MachineOperand &PMO = MI.getOperand(i: PIdx); |
616 | PMO.setImm(Pred[0].getImm()); |
617 | MI.getOperand(i: PIdx+1).setReg(Pred[1].getReg()); |
618 | |
619 | // Thumb 1 arithmetic instructions do not set CPSR when executed inside an |
620 | // IT block. This affects how they are printed. |
621 | const MCInstrDesc &MCID = MI.getDesc(); |
622 | if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { |
623 | assert(MCID.operands()[1].isOptionalDef() && |
624 | "CPSR def isn't expected operand" ); |
625 | assert((MI.getOperand(1).isDead() || |
626 | MI.getOperand(1).getReg() != ARM::CPSR) && |
627 | "if conversion tried to stop defining used CPSR" ); |
628 | MI.getOperand(i: 1).setReg(ARM::NoRegister); |
629 | } |
630 | |
631 | return true; |
632 | } |
633 | return false; |
634 | } |
635 | |
636 | bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, |
637 | ArrayRef<MachineOperand> Pred2) const { |
638 | if (Pred1.size() > 2 || Pred2.size() > 2) |
639 | return false; |
640 | |
641 | ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); |
642 | ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); |
643 | if (CC1 == CC2) |
644 | return true; |
645 | |
646 | switch (CC1) { |
647 | default: |
648 | return false; |
649 | case ARMCC::AL: |
650 | return true; |
651 | case ARMCC::HS: |
652 | return CC2 == ARMCC::HI; |
653 | case ARMCC::LS: |
654 | return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; |
655 | case ARMCC::GE: |
656 | return CC2 == ARMCC::GT; |
657 | case ARMCC::LE: |
658 | return CC2 == ARMCC::LT; |
659 | } |
660 | } |
661 | |
662 | bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI, |
663 | std::vector<MachineOperand> &Pred, |
664 | bool SkipDead) const { |
665 | bool Found = false; |
666 | for (const MachineOperand &MO : MI.operands()) { |
667 | bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(PhysReg: ARM::CPSR); |
668 | bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR; |
669 | if (ClobbersCPSR || IsCPSR) { |
670 | |
671 | // Filter out T1 instructions that have a dead CPSR, |
672 | // allowing IT blocks to be generated containing T1 instructions |
673 | const MCInstrDesc &MCID = MI.getDesc(); |
674 | if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() && |
675 | SkipDead) |
676 | continue; |
677 | |
678 | Pred.push_back(x: MO); |
679 | Found = true; |
680 | } |
681 | } |
682 | |
683 | return Found; |
684 | } |
685 | |
686 | bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { |
687 | for (const auto &MO : MI.operands()) |
688 | if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) |
689 | return true; |
690 | return false; |
691 | } |
692 | |
693 | static bool isEligibleForITBlock(const MachineInstr *MI) { |
694 | switch (MI->getOpcode()) { |
695 | default: return true; |
696 | case ARM::tADC: // ADC (register) T1 |
697 | case ARM::tADDi3: // ADD (immediate) T1 |
698 | case ARM::tADDi8: // ADD (immediate) T2 |
699 | case ARM::tADDrr: // ADD (register) T1 |
700 | case ARM::tAND: // AND (register) T1 |
701 | case ARM::tASRri: // ASR (immediate) T1 |
702 | case ARM::tASRrr: // ASR (register) T1 |
703 | case ARM::tBIC: // BIC (register) T1 |
704 | case ARM::tEOR: // EOR (register) T1 |
705 | case ARM::tLSLri: // LSL (immediate) T1 |
706 | case ARM::tLSLrr: // LSL (register) T1 |
707 | case ARM::tLSRri: // LSR (immediate) T1 |
708 | case ARM::tLSRrr: // LSR (register) T1 |
709 | case ARM::tMUL: // MUL T1 |
710 | case ARM::tMVN: // MVN (register) T1 |
711 | case ARM::tORR: // ORR (register) T1 |
712 | case ARM::tROR: // ROR (register) T1 |
713 | case ARM::tRSB: // RSB (immediate) T1 |
714 | case ARM::tSBC: // SBC (register) T1 |
715 | case ARM::tSUBi3: // SUB (immediate) T1 |
716 | case ARM::tSUBi8: // SUB (immediate) T2 |
717 | case ARM::tSUBrr: // SUB (register) T1 |
718 | return !ARMBaseInstrInfo::isCPSRDefined(MI: *MI); |
719 | } |
720 | } |
721 | |
722 | /// isPredicable - Return true if the specified instruction can be predicated. |
723 | /// By default, this returns true for every instruction with a |
724 | /// PredicateOperand. |
725 | bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { |
726 | if (!MI.isPredicable()) |
727 | return false; |
728 | |
729 | if (MI.isBundle()) |
730 | return false; |
731 | |
732 | if (!isEligibleForITBlock(MI: &MI)) |
733 | return false; |
734 | |
735 | const MachineFunction *MF = MI.getParent()->getParent(); |
736 | const ARMFunctionInfo *AFI = |
737 | MF->getInfo<ARMFunctionInfo>(); |
738 | |
739 | // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. |
740 | // In their ARM encoding, they can't be encoded in a conditional form. |
741 | if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) |
742 | return false; |
743 | |
744 | // Make indirect control flow changes unpredicable when SLS mitigation is |
745 | // enabled. |
746 | const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>(); |
747 | if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI)) |
748 | return false; |
749 | if (ST.hardenSlsBlr() && isIndirectCall(MI)) |
750 | return false; |
751 | |
752 | if (AFI->isThumb2Function()) { |
753 | if (getSubtarget().restrictIT()) |
754 | return isV8EligibleForIT(Instr: &MI); |
755 | } |
756 | |
757 | return true; |
758 | } |
759 | |
760 | namespace llvm { |
761 | |
762 | template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) { |
763 | for (const MachineOperand &MO : MI->operands()) { |
764 | if (!MO.isReg() || MO.isUndef() || MO.isUse()) |
765 | continue; |
766 | if (MO.getReg() != ARM::CPSR) |
767 | continue; |
768 | if (!MO.isDead()) |
769 | return false; |
770 | } |
771 | // all definitions of CPSR are dead |
772 | return true; |
773 | } |
774 | |
775 | } // end namespace llvm |
776 | |
777 | /// GetInstSize - Return the size of the specified MachineInstr. |
778 | /// |
779 | unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { |
780 | const MachineBasicBlock &MBB = *MI.getParent(); |
781 | const MachineFunction *MF = MBB.getParent(); |
782 | const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); |
783 | |
784 | const MCInstrDesc &MCID = MI.getDesc(); |
785 | |
786 | switch (MI.getOpcode()) { |
787 | default: |
788 | // Return the size specified in .td file. If there's none, return 0, as we |
789 | // can't define a default size (Thumb1 instructions are 2 bytes, Thumb2 |
790 | // instructions are 2-4 bytes, and ARM instructions are 4 bytes), in |
791 | // contrast to AArch64 instructions which have a default size of 4 bytes for |
792 | // example. |
793 | return MCID.getSize(); |
794 | case TargetOpcode::BUNDLE: |
795 | return getInstBundleLength(MI); |
796 | case ARM::CONSTPOOL_ENTRY: |
797 | case ARM::JUMPTABLE_INSTS: |
798 | case ARM::JUMPTABLE_ADDRS: |
799 | case ARM::JUMPTABLE_TBB: |
800 | case ARM::JUMPTABLE_TBH: |
801 | // If this machine instr is a constant pool entry, its size is recorded as |
802 | // operand #2. |
803 | return MI.getOperand(i: 2).getImm(); |
804 | case ARM::SPACE: |
805 | return MI.getOperand(i: 1).getImm(); |
806 | case ARM::INLINEASM: |
807 | case ARM::INLINEASM_BR: { |
808 | // If this machine instr is an inline asm, measure it. |
809 | unsigned Size = getInlineAsmLength(Str: MI.getOperand(i: 0).getSymbolName(), MAI: *MAI); |
810 | if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction()) |
811 | Size = alignTo(Value: Size, Align: 4); |
812 | return Size; |
813 | } |
814 | } |
815 | } |
816 | |
817 | unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const { |
818 | unsigned Size = 0; |
819 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
820 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
821 | while (++I != E && I->isInsideBundle()) { |
822 | assert(!I->isBundle() && "No nested bundle!" ); |
823 | Size += getInstSizeInBytes(MI: *I); |
824 | } |
825 | return Size; |
826 | } |
827 | |
828 | void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, |
829 | MachineBasicBlock::iterator I, |
830 | unsigned DestReg, bool KillSrc, |
831 | const ARMSubtarget &Subtarget) const { |
832 | unsigned Opc = Subtarget.isThumb() |
833 | ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) |
834 | : ARM::MRS; |
835 | |
836 | MachineInstrBuilder MIB = |
837 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc), DestReg); |
838 | |
839 | // There is only 1 A/R class MRS instruction, and it always refers to |
840 | // APSR. However, there are lots of other possibilities on M-class cores. |
841 | if (Subtarget.isMClass()) |
842 | MIB.addImm(Val: 0x800); |
843 | |
844 | MIB.add(MOs: predOps(Pred: ARMCC::AL)) |
845 | .addReg(RegNo: ARM::CPSR, flags: RegState::Implicit | getKillRegState(B: KillSrc)); |
846 | } |
847 | |
848 | void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, |
849 | MachineBasicBlock::iterator I, |
850 | unsigned SrcReg, bool KillSrc, |
851 | const ARMSubtarget &Subtarget) const { |
852 | unsigned Opc = Subtarget.isThumb() |
853 | ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) |
854 | : ARM::MSR; |
855 | |
856 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc)); |
857 | |
858 | if (Subtarget.isMClass()) |
859 | MIB.addImm(Val: 0x800); |
860 | else |
861 | MIB.addImm(Val: 8); |
862 | |
863 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
864 | .add(MOs: predOps(Pred: ARMCC::AL)) |
865 | .addReg(RegNo: ARM::CPSR, flags: RegState::Implicit | RegState::Define); |
866 | } |
867 | |
868 | void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) { |
869 | MIB.addImm(Val: ARMVCC::None); |
870 | MIB.addReg(RegNo: 0); |
871 | MIB.addReg(RegNo: 0); // tp_reg |
872 | } |
873 | |
874 | void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, |
875 | Register DestReg) { |
876 | addUnpredicatedMveVpredNOp(MIB); |
877 | MIB.addReg(RegNo: DestReg, flags: RegState::Undef); |
878 | } |
879 | |
880 | void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) { |
881 | MIB.addImm(Val: Cond); |
882 | MIB.addReg(RegNo: ARM::VPR, flags: RegState::Implicit); |
883 | MIB.addReg(RegNo: 0); // tp_reg |
884 | } |
885 | |
886 | void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB, |
887 | unsigned Cond, unsigned Inactive) { |
888 | addPredicatedMveVpredNOp(MIB, Cond); |
889 | MIB.addReg(RegNo: Inactive); |
890 | } |
891 | |
892 | void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
893 | MachineBasicBlock::iterator I, |
894 | const DebugLoc &DL, MCRegister DestReg, |
895 | MCRegister SrcReg, bool KillSrc) const { |
896 | bool GPRDest = ARM::GPRRegClass.contains(Reg: DestReg); |
897 | bool GPRSrc = ARM::GPRRegClass.contains(Reg: SrcReg); |
898 | |
899 | if (GPRDest && GPRSrc) { |
900 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MOVr), DestReg) |
901 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
902 | .add(MOs: predOps(Pred: ARMCC::AL)) |
903 | .add(MO: condCodeOp()); |
904 | return; |
905 | } |
906 | |
907 | bool SPRDest = ARM::SPRRegClass.contains(Reg: DestReg); |
908 | bool SPRSrc = ARM::SPRRegClass.contains(Reg: SrcReg); |
909 | |
910 | unsigned Opc = 0; |
911 | if (SPRDest && SPRSrc) |
912 | Opc = ARM::VMOVS; |
913 | else if (GPRDest && SPRSrc) |
914 | Opc = ARM::VMOVRS; |
915 | else if (SPRDest && GPRSrc) |
916 | Opc = ARM::VMOVSR; |
917 | else if (ARM::DPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg) && Subtarget.hasFP64()) |
918 | Opc = ARM::VMOVD; |
919 | else if (ARM::QPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) |
920 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy; |
921 | |
922 | if (Opc) { |
923 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: Opc), DestReg); |
924 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)); |
925 | if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) |
926 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)); |
927 | if (Opc == ARM::MVE_VORR) |
928 | addUnpredicatedMveVpredROp(MIB, DestReg); |
929 | else if (Opc != ARM::MQPRCopy) |
930 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
931 | return; |
932 | } |
933 | |
934 | // Handle register classes that require multiple instructions. |
935 | unsigned BeginIdx = 0; |
936 | unsigned SubRegs = 0; |
937 | int Spacing = 1; |
938 | |
939 | // Use VORRq when possible. |
940 | if (ARM::QQPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
941 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; |
942 | BeginIdx = ARM::qsub_0; |
943 | SubRegs = 2; |
944 | } else if (ARM::QQQQPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
945 | Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR; |
946 | BeginIdx = ARM::qsub_0; |
947 | SubRegs = 4; |
948 | // Fall back to VMOVD. |
949 | } else if (ARM::DPairRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
950 | Opc = ARM::VMOVD; |
951 | BeginIdx = ARM::dsub_0; |
952 | SubRegs = 2; |
953 | } else if (ARM::DTripleRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
954 | Opc = ARM::VMOVD; |
955 | BeginIdx = ARM::dsub_0; |
956 | SubRegs = 3; |
957 | } else if (ARM::DQuadRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
958 | Opc = ARM::VMOVD; |
959 | BeginIdx = ARM::dsub_0; |
960 | SubRegs = 4; |
961 | } else if (ARM::GPRPairRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
962 | Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; |
963 | BeginIdx = ARM::gsub_0; |
964 | SubRegs = 2; |
965 | } else if (ARM::DPairSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
966 | Opc = ARM::VMOVD; |
967 | BeginIdx = ARM::dsub_0; |
968 | SubRegs = 2; |
969 | Spacing = 2; |
970 | } else if (ARM::DTripleSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
971 | Opc = ARM::VMOVD; |
972 | BeginIdx = ARM::dsub_0; |
973 | SubRegs = 3; |
974 | Spacing = 2; |
975 | } else if (ARM::DQuadSpcRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) { |
976 | Opc = ARM::VMOVD; |
977 | BeginIdx = ARM::dsub_0; |
978 | SubRegs = 4; |
979 | Spacing = 2; |
980 | } else if (ARM::DPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg) && |
981 | !Subtarget.hasFP64()) { |
982 | Opc = ARM::VMOVS; |
983 | BeginIdx = ARM::ssub_0; |
984 | SubRegs = 2; |
985 | } else if (SrcReg == ARM::CPSR) { |
986 | copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); |
987 | return; |
988 | } else if (DestReg == ARM::CPSR) { |
989 | copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); |
990 | return; |
991 | } else if (DestReg == ARM::VPR) { |
992 | assert(ARM::GPRRegClass.contains(SrcReg)); |
993 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMSR_P0), DestReg) |
994 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
995 | .add(MOs: predOps(Pred: ARMCC::AL)); |
996 | return; |
997 | } else if (SrcReg == ARM::VPR) { |
998 | assert(ARM::GPRRegClass.contains(DestReg)); |
999 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMRS_P0), DestReg) |
1000 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
1001 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1002 | return; |
1003 | } else if (DestReg == ARM::FPSCR_NZCV) { |
1004 | assert(ARM::GPRRegClass.contains(SrcReg)); |
1005 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMSR_FPSCR_NZCVQC), DestReg) |
1006 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
1007 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1008 | return; |
1009 | } else if (SrcReg == ARM::FPSCR_NZCV) { |
1010 | assert(ARM::GPRRegClass.contains(DestReg)); |
1011 | BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: ARM::VMRS_FPSCR_NZCVQC), DestReg) |
1012 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
1013 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1014 | return; |
1015 | } |
1016 | |
1017 | assert(Opc && "Impossible reg-to-reg copy" ); |
1018 | |
1019 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
1020 | MachineInstrBuilder Mov; |
1021 | |
1022 | // Copy register tuples backward when the first Dest reg overlaps with SrcReg. |
1023 | if (TRI->regsOverlap(RegA: SrcReg, RegB: TRI->getSubReg(Reg: DestReg, Idx: BeginIdx))) { |
1024 | BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); |
1025 | Spacing = -Spacing; |
1026 | } |
1027 | #ifndef NDEBUG |
1028 | SmallSet<unsigned, 4> DstRegs; |
1029 | #endif |
1030 | for (unsigned i = 0; i != SubRegs; ++i) { |
1031 | Register Dst = TRI->getSubReg(Reg: DestReg, Idx: BeginIdx + i * Spacing); |
1032 | Register Src = TRI->getSubReg(Reg: SrcReg, Idx: BeginIdx + i * Spacing); |
1033 | assert(Dst && Src && "Bad sub-register" ); |
1034 | #ifndef NDEBUG |
1035 | assert(!DstRegs.count(Src) && "destructive vector copy" ); |
1036 | DstRegs.insert(Dst); |
1037 | #endif |
1038 | Mov = BuildMI(BB&: MBB, I, MIMD: I->getDebugLoc(), MCID: get(Opcode: Opc), DestReg: Dst).addReg(RegNo: Src); |
1039 | // VORR (NEON or MVE) takes two source operands. |
1040 | if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) { |
1041 | Mov.addReg(RegNo: Src); |
1042 | } |
1043 | // MVE VORR takes predicate operands in place of an ordinary condition. |
1044 | if (Opc == ARM::MVE_VORR) |
1045 | addUnpredicatedMveVpredROp(MIB&: Mov, DestReg: Dst); |
1046 | else |
1047 | Mov = Mov.add(MOs: predOps(Pred: ARMCC::AL)); |
1048 | // MOVr can set CC. |
1049 | if (Opc == ARM::MOVr) |
1050 | Mov = Mov.add(MO: condCodeOp()); |
1051 | } |
1052 | // Add implicit super-register defs and kills to the last instruction. |
1053 | Mov->addRegisterDefined(Reg: DestReg, RegInfo: TRI); |
1054 | if (KillSrc) |
1055 | Mov->addRegisterKilled(IncomingReg: SrcReg, RegInfo: TRI); |
1056 | } |
1057 | |
1058 | std::optional<DestSourcePair> |
1059 | ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { |
1060 | // VMOVRRD is also a copy instruction but it requires |
1061 | // special way of handling. It is more complex copy version |
1062 | // and since that we are not considering it. For recognition |
1063 | // of such instruction isExtractSubregLike MI interface fuction |
1064 | // could be used. |
1065 | // VORRq is considered as a move only if two inputs are |
1066 | // the same register. |
1067 | if (!MI.isMoveReg() || |
1068 | (MI.getOpcode() == ARM::VORRq && |
1069 | MI.getOperand(i: 1).getReg() != MI.getOperand(i: 2).getReg())) |
1070 | return std::nullopt; |
1071 | return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)}; |
1072 | } |
1073 | |
1074 | std::optional<ParamLoadedValue> |
1075 | ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI, |
1076 | Register Reg) const { |
1077 | if (auto DstSrcPair = isCopyInstrImpl(MI)) { |
1078 | Register DstReg = DstSrcPair->Destination->getReg(); |
1079 | |
1080 | // TODO: We don't handle cases where the forwarding reg is narrower/wider |
1081 | // than the copy registers. Consider for example: |
1082 | // |
1083 | // s16 = VMOVS s0 |
1084 | // s17 = VMOVS s1 |
1085 | // call @callee(d0) |
1086 | // |
1087 | // We'd like to describe the call site value of d0 as d8, but this requires |
1088 | // gathering and merging the descriptions for the two VMOVS instructions. |
1089 | // |
1090 | // We also don't handle the reverse situation, where the forwarding reg is |
1091 | // narrower than the copy destination: |
1092 | // |
1093 | // d8 = VMOVD d0 |
1094 | // call @callee(s1) |
1095 | // |
1096 | // We need to produce a fragment description (the call site value of s1 is |
1097 | // /not/ just d8). |
1098 | if (DstReg != Reg) |
1099 | return std::nullopt; |
1100 | } |
1101 | return TargetInstrInfo::describeLoadedValue(MI, Reg); |
1102 | } |
1103 | |
1104 | const MachineInstrBuilder & |
1105 | ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg, |
1106 | unsigned SubIdx, unsigned State, |
1107 | const TargetRegisterInfo *TRI) const { |
1108 | if (!SubIdx) |
1109 | return MIB.addReg(RegNo: Reg, flags: State); |
1110 | |
1111 | if (Register::isPhysicalRegister(Reg)) |
1112 | return MIB.addReg(RegNo: TRI->getSubReg(Reg, Idx: SubIdx), flags: State); |
1113 | return MIB.addReg(RegNo: Reg, flags: State, SubReg: SubIdx); |
1114 | } |
1115 | |
1116 | void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
1117 | MachineBasicBlock::iterator I, |
1118 | Register SrcReg, bool isKill, int FI, |
1119 | const TargetRegisterClass *RC, |
1120 | const TargetRegisterInfo *TRI, |
1121 | Register VReg) const { |
1122 | MachineFunction &MF = *MBB.getParent(); |
1123 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1124 | Align Alignment = MFI.getObjectAlign(ObjectIdx: FI); |
1125 | |
1126 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1127 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOStore, |
1128 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment); |
1129 | |
1130 | switch (TRI->getSpillSize(RC: *RC)) { |
1131 | case 2: |
1132 | if (ARM::HPRRegClass.hasSubClassEq(RC)) { |
1133 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRH)) |
1134 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1135 | .addFrameIndex(Idx: FI) |
1136 | .addImm(Val: 0) |
1137 | .addMemOperand(MMO) |
1138 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1139 | } else |
1140 | llvm_unreachable("Unknown reg class!" ); |
1141 | break; |
1142 | case 4: |
1143 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
1144 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STRi12)) |
1145 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1146 | .addFrameIndex(Idx: FI) |
1147 | .addImm(Val: 0) |
1148 | .addMemOperand(MMO) |
1149 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1150 | } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { |
1151 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRS)) |
1152 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1153 | .addFrameIndex(Idx: FI) |
1154 | .addImm(Val: 0) |
1155 | .addMemOperand(MMO) |
1156 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1157 | } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { |
1158 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTR_P0_off)) |
1159 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1160 | .addFrameIndex(Idx: FI) |
1161 | .addImm(Val: 0) |
1162 | .addMemOperand(MMO) |
1163 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1164 | } else |
1165 | llvm_unreachable("Unknown reg class!" ); |
1166 | break; |
1167 | case 8: |
1168 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { |
1169 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTRD)) |
1170 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1171 | .addFrameIndex(Idx: FI) |
1172 | .addImm(Val: 0) |
1173 | .addMemOperand(MMO) |
1174 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1175 | } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
1176 | if (Subtarget.hasV5TEOps()) { |
1177 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STRD)); |
1178 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_0, State: getKillRegState(B: isKill), TRI); |
1179 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_1, State: 0, TRI); |
1180 | MIB.addFrameIndex(Idx: FI).addReg(RegNo: 0).addImm(Val: 0).addMemOperand(MMO) |
1181 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1182 | } else { |
1183 | // Fallback to STM instruction, which has existed since the dawn of |
1184 | // time. |
1185 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::STMIA)) |
1186 | .addFrameIndex(Idx: FI) |
1187 | .addMemOperand(MMO) |
1188 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1189 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_0, State: getKillRegState(B: isKill), TRI); |
1190 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_1, State: 0, TRI); |
1191 | } |
1192 | } else |
1193 | llvm_unreachable("Unknown reg class!" ); |
1194 | break; |
1195 | case 16: |
1196 | if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { |
1197 | // Use aligned spills if the stack can be realigned. |
1198 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { |
1199 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1q64)) |
1200 | .addFrameIndex(Idx: FI) |
1201 | .addImm(Val: 16) |
1202 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1203 | .addMemOperand(MMO) |
1204 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1205 | } else { |
1206 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTMQIA)) |
1207 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1208 | .addFrameIndex(Idx: FI) |
1209 | .addMemOperand(MMO) |
1210 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1211 | } |
1212 | } else if (ARM::QPRRegClass.hasSubClassEq(RC) && |
1213 | Subtarget.hasMVEIntegerOps()) { |
1214 | auto MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MVE_VSTRWU32)); |
1215 | MIB.addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1216 | .addFrameIndex(Idx: FI) |
1217 | .addImm(Val: 0) |
1218 | .addMemOperand(MMO); |
1219 | addUnpredicatedMveVpredNOp(MIB); |
1220 | } else |
1221 | llvm_unreachable("Unknown reg class!" ); |
1222 | break; |
1223 | case 24: |
1224 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { |
1225 | // Use aligned spills if the stack can be realigned. |
1226 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1227 | Subtarget.hasNEON()) { |
1228 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1d64TPseudo)) |
1229 | .addFrameIndex(Idx: FI) |
1230 | .addImm(Val: 16) |
1231 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1232 | .addMemOperand(MMO) |
1233 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1234 | } else { |
1235 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), |
1236 | MCID: get(Opcode: ARM::VSTMDIA)) |
1237 | .addFrameIndex(Idx: FI) |
1238 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1239 | .addMemOperand(MMO); |
1240 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI); |
1241 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI); |
1242 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI); |
1243 | } |
1244 | } else |
1245 | llvm_unreachable("Unknown reg class!" ); |
1246 | break; |
1247 | case 32: |
1248 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || |
1249 | ARM::MQQPRRegClass.hasSubClassEq(RC) || |
1250 | ARM::DQuadRegClass.hasSubClassEq(RC)) { |
1251 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1252 | Subtarget.hasNEON()) { |
1253 | // FIXME: It's possible to only store part of the QQ register if the |
1254 | // spilled def has a sub-register index. |
1255 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VST1d64QPseudo)) |
1256 | .addFrameIndex(Idx: FI) |
1257 | .addImm(Val: 16) |
1258 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1259 | .addMemOperand(MMO) |
1260 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1261 | } else if (Subtarget.hasMVEIntegerOps()) { |
1262 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MQQPRStore)) |
1263 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1264 | .addFrameIndex(Idx: FI) |
1265 | .addMemOperand(MMO); |
1266 | } else { |
1267 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), |
1268 | MCID: get(Opcode: ARM::VSTMDIA)) |
1269 | .addFrameIndex(Idx: FI) |
1270 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1271 | .addMemOperand(MMO); |
1272 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI); |
1273 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI); |
1274 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI); |
1275 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_3, State: 0, TRI); |
1276 | } |
1277 | } else |
1278 | llvm_unreachable("Unknown reg class!" ); |
1279 | break; |
1280 | case 64: |
1281 | if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) && |
1282 | Subtarget.hasMVEIntegerOps()) { |
1283 | BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::MQQQQPRStore)) |
1284 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
1285 | .addFrameIndex(Idx: FI) |
1286 | .addMemOperand(MMO); |
1287 | } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { |
1288 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DebugLoc(), MCID: get(Opcode: ARM::VSTMDIA)) |
1289 | .addFrameIndex(Idx: FI) |
1290 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1291 | .addMemOperand(MMO); |
1292 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_0, State: getKillRegState(B: isKill), TRI); |
1293 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_1, State: 0, TRI); |
1294 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_2, State: 0, TRI); |
1295 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_3, State: 0, TRI); |
1296 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_4, State: 0, TRI); |
1297 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_5, State: 0, TRI); |
1298 | MIB = AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_6, State: 0, TRI); |
1299 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::dsub_7, State: 0, TRI); |
1300 | } else |
1301 | llvm_unreachable("Unknown reg class!" ); |
1302 | break; |
1303 | default: |
1304 | llvm_unreachable("Unknown reg class!" ); |
1305 | } |
1306 | } |
1307 | |
1308 | Register ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, |
1309 | int &FrameIndex) const { |
1310 | switch (MI.getOpcode()) { |
1311 | default: break; |
1312 | case ARM::STRrs: |
1313 | case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. |
1314 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isReg() && |
1315 | MI.getOperand(i: 3).isImm() && MI.getOperand(i: 2).getReg() == 0 && |
1316 | MI.getOperand(i: 3).getImm() == 0) { |
1317 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1318 | return MI.getOperand(i: 0).getReg(); |
1319 | } |
1320 | break; |
1321 | case ARM::STRi12: |
1322 | case ARM::t2STRi12: |
1323 | case ARM::tSTRspi: |
1324 | case ARM::VSTRD: |
1325 | case ARM::VSTRS: |
1326 | case ARM::VSTR_P0_off: |
1327 | case ARM::MVE_VSTRWU32: |
1328 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() && |
1329 | MI.getOperand(i: 2).getImm() == 0) { |
1330 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1331 | return MI.getOperand(i: 0).getReg(); |
1332 | } |
1333 | break; |
1334 | case ARM::VST1q64: |
1335 | case ARM::VST1d64TPseudo: |
1336 | case ARM::VST1d64QPseudo: |
1337 | if (MI.getOperand(i: 0).isFI() && MI.getOperand(i: 2).getSubReg() == 0) { |
1338 | FrameIndex = MI.getOperand(i: 0).getIndex(); |
1339 | return MI.getOperand(i: 2).getReg(); |
1340 | } |
1341 | break; |
1342 | case ARM::VSTMQIA: |
1343 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
1344 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1345 | return MI.getOperand(i: 0).getReg(); |
1346 | } |
1347 | break; |
1348 | case ARM::MQQPRStore: |
1349 | case ARM::MQQQQPRStore: |
1350 | if (MI.getOperand(i: 1).isFI()) { |
1351 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1352 | return MI.getOperand(i: 0).getReg(); |
1353 | } |
1354 | break; |
1355 | } |
1356 | |
1357 | return 0; |
1358 | } |
1359 | |
1360 | Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, |
1361 | int &FrameIndex) const { |
1362 | SmallVector<const MachineMemOperand *, 1> Accesses; |
1363 | if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) && |
1364 | Accesses.size() == 1) { |
1365 | FrameIndex = |
1366 | cast<FixedStackPseudoSourceValue>(Val: Accesses.front()->getPseudoValue()) |
1367 | ->getFrameIndex(); |
1368 | return true; |
1369 | } |
1370 | return false; |
1371 | } |
1372 | |
1373 | void ARMBaseInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, |
1374 | MachineBasicBlock::iterator I, |
1375 | Register DestReg, int FI, |
1376 | const TargetRegisterClass *RC, |
1377 | const TargetRegisterInfo *TRI, |
1378 | Register VReg) const { |
1379 | DebugLoc DL; |
1380 | if (I != MBB.end()) DL = I->getDebugLoc(); |
1381 | MachineFunction &MF = *MBB.getParent(); |
1382 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1383 | const Align Alignment = MFI.getObjectAlign(ObjectIdx: FI); |
1384 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1385 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOLoad, |
1386 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: Alignment); |
1387 | |
1388 | switch (TRI->getSpillSize(RC: *RC)) { |
1389 | case 2: |
1390 | if (ARM::HPRRegClass.hasSubClassEq(RC)) { |
1391 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRH), DestReg) |
1392 | .addFrameIndex(Idx: FI) |
1393 | .addImm(Val: 0) |
1394 | .addMemOperand(MMO) |
1395 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1396 | } else |
1397 | llvm_unreachable("Unknown reg class!" ); |
1398 | break; |
1399 | case 4: |
1400 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
1401 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDRi12), DestReg) |
1402 | .addFrameIndex(Idx: FI) |
1403 | .addImm(Val: 0) |
1404 | .addMemOperand(MMO) |
1405 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1406 | } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { |
1407 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRS), DestReg) |
1408 | .addFrameIndex(Idx: FI) |
1409 | .addImm(Val: 0) |
1410 | .addMemOperand(MMO) |
1411 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1412 | } else if (ARM::VCCRRegClass.hasSubClassEq(RC)) { |
1413 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDR_P0_off), DestReg) |
1414 | .addFrameIndex(Idx: FI) |
1415 | .addImm(Val: 0) |
1416 | .addMemOperand(MMO) |
1417 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1418 | } else |
1419 | llvm_unreachable("Unknown reg class!" ); |
1420 | break; |
1421 | case 8: |
1422 | if (ARM::DPRRegClass.hasSubClassEq(RC)) { |
1423 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDRD), DestReg) |
1424 | .addFrameIndex(Idx: FI) |
1425 | .addImm(Val: 0) |
1426 | .addMemOperand(MMO) |
1427 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1428 | } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
1429 | MachineInstrBuilder MIB; |
1430 | |
1431 | if (Subtarget.hasV5TEOps()) { |
1432 | MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDRD)); |
1433 | AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_0, State: RegState::DefineNoRead, TRI); |
1434 | AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_1, State: RegState::DefineNoRead, TRI); |
1435 | MIB.addFrameIndex(Idx: FI).addReg(RegNo: 0).addImm(Val: 0).addMemOperand(MMO) |
1436 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1437 | } else { |
1438 | // Fallback to LDM instruction, which has existed since the dawn of |
1439 | // time. |
1440 | MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::LDMIA)) |
1441 | .addFrameIndex(Idx: FI) |
1442 | .addMemOperand(MMO) |
1443 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1444 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_0, State: RegState::DefineNoRead, TRI); |
1445 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_1, State: RegState::DefineNoRead, TRI); |
1446 | } |
1447 | |
1448 | if (DestReg.isPhysical()) |
1449 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1450 | } else |
1451 | llvm_unreachable("Unknown reg class!" ); |
1452 | break; |
1453 | case 16: |
1454 | if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) { |
1455 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) { |
1456 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1q64), DestReg) |
1457 | .addFrameIndex(Idx: FI) |
1458 | .addImm(Val: 16) |
1459 | .addMemOperand(MMO) |
1460 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1461 | } else { |
1462 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMQIA), DestReg) |
1463 | .addFrameIndex(Idx: FI) |
1464 | .addMemOperand(MMO) |
1465 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1466 | } |
1467 | } else if (ARM::QPRRegClass.hasSubClassEq(RC) && |
1468 | Subtarget.hasMVEIntegerOps()) { |
1469 | auto MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MVE_VLDRWU32), DestReg); |
1470 | MIB.addFrameIndex(Idx: FI) |
1471 | .addImm(Val: 0) |
1472 | .addMemOperand(MMO); |
1473 | addUnpredicatedMveVpredNOp(MIB); |
1474 | } else |
1475 | llvm_unreachable("Unknown reg class!" ); |
1476 | break; |
1477 | case 24: |
1478 | if (ARM::DTripleRegClass.hasSubClassEq(RC)) { |
1479 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1480 | Subtarget.hasNEON()) { |
1481 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1d64TPseudo), DestReg) |
1482 | .addFrameIndex(Idx: FI) |
1483 | .addImm(Val: 16) |
1484 | .addMemOperand(MMO) |
1485 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1486 | } else { |
1487 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA)) |
1488 | .addFrameIndex(Idx: FI) |
1489 | .addMemOperand(MMO) |
1490 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1491 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI); |
1492 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI); |
1493 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI); |
1494 | if (DestReg.isPhysical()) |
1495 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1496 | } |
1497 | } else |
1498 | llvm_unreachable("Unknown reg class!" ); |
1499 | break; |
1500 | case 32: |
1501 | if (ARM::QQPRRegClass.hasSubClassEq(RC) || |
1502 | ARM::MQQPRRegClass.hasSubClassEq(RC) || |
1503 | ARM::DQuadRegClass.hasSubClassEq(RC)) { |
1504 | if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) && |
1505 | Subtarget.hasNEON()) { |
1506 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLD1d64QPseudo), DestReg) |
1507 | .addFrameIndex(Idx: FI) |
1508 | .addImm(Val: 16) |
1509 | .addMemOperand(MMO) |
1510 | .add(MOs: predOps(Pred: ARMCC::AL)); |
1511 | } else if (Subtarget.hasMVEIntegerOps()) { |
1512 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MQQPRLoad), DestReg) |
1513 | .addFrameIndex(Idx: FI) |
1514 | .addMemOperand(MMO); |
1515 | } else { |
1516 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA)) |
1517 | .addFrameIndex(Idx: FI) |
1518 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1519 | .addMemOperand(MMO); |
1520 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI); |
1521 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI); |
1522 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI); |
1523 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_3, State: RegState::DefineNoRead, TRI); |
1524 | if (DestReg.isPhysical()) |
1525 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1526 | } |
1527 | } else |
1528 | llvm_unreachable("Unknown reg class!" ); |
1529 | break; |
1530 | case 64: |
1531 | if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) && |
1532 | Subtarget.hasMVEIntegerOps()) { |
1533 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::MQQQQPRLoad), DestReg) |
1534 | .addFrameIndex(Idx: FI) |
1535 | .addMemOperand(MMO); |
1536 | } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { |
1537 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::VLDMDIA)) |
1538 | .addFrameIndex(Idx: FI) |
1539 | .add(MOs: predOps(Pred: ARMCC::AL)) |
1540 | .addMemOperand(MMO); |
1541 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_0, State: RegState::DefineNoRead, TRI); |
1542 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_1, State: RegState::DefineNoRead, TRI); |
1543 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_2, State: RegState::DefineNoRead, TRI); |
1544 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_3, State: RegState::DefineNoRead, TRI); |
1545 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_4, State: RegState::DefineNoRead, TRI); |
1546 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_5, State: RegState::DefineNoRead, TRI); |
1547 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_6, State: RegState::DefineNoRead, TRI); |
1548 | MIB = AddDReg(MIB, Reg: DestReg, SubIdx: ARM::dsub_7, State: RegState::DefineNoRead, TRI); |
1549 | if (DestReg.isPhysical()) |
1550 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
1551 | } else |
1552 | llvm_unreachable("Unknown reg class!" ); |
1553 | break; |
1554 | default: |
1555 | llvm_unreachable("Unknown regclass!" ); |
1556 | } |
1557 | } |
1558 | |
1559 | Register ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, |
1560 | int &FrameIndex) const { |
1561 | switch (MI.getOpcode()) { |
1562 | default: break; |
1563 | case ARM::LDRrs: |
1564 | case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. |
1565 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isReg() && |
1566 | MI.getOperand(i: 3).isImm() && MI.getOperand(i: 2).getReg() == 0 && |
1567 | MI.getOperand(i: 3).getImm() == 0) { |
1568 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1569 | return MI.getOperand(i: 0).getReg(); |
1570 | } |
1571 | break; |
1572 | case ARM::LDRi12: |
1573 | case ARM::t2LDRi12: |
1574 | case ARM::tLDRspi: |
1575 | case ARM::VLDRD: |
1576 | case ARM::VLDRS: |
1577 | case ARM::VLDR_P0_off: |
1578 | case ARM::MVE_VLDRWU32: |
1579 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() && |
1580 | MI.getOperand(i: 2).getImm() == 0) { |
1581 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1582 | return MI.getOperand(i: 0).getReg(); |
1583 | } |
1584 | break; |
1585 | case ARM::VLD1q64: |
1586 | case ARM::VLD1d8TPseudo: |
1587 | case ARM::VLD1d16TPseudo: |
1588 | case ARM::VLD1d32TPseudo: |
1589 | case ARM::VLD1d64TPseudo: |
1590 | case ARM::VLD1d8QPseudo: |
1591 | case ARM::VLD1d16QPseudo: |
1592 | case ARM::VLD1d32QPseudo: |
1593 | case ARM::VLD1d64QPseudo: |
1594 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
1595 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1596 | return MI.getOperand(i: 0).getReg(); |
1597 | } |
1598 | break; |
1599 | case ARM::VLDMQIA: |
1600 | if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 0).getSubReg() == 0) { |
1601 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1602 | return MI.getOperand(i: 0).getReg(); |
1603 | } |
1604 | break; |
1605 | case ARM::MQQPRLoad: |
1606 | case ARM::MQQQQPRLoad: |
1607 | if (MI.getOperand(i: 1).isFI()) { |
1608 | FrameIndex = MI.getOperand(i: 1).getIndex(); |
1609 | return MI.getOperand(i: 0).getReg(); |
1610 | } |
1611 | break; |
1612 | } |
1613 | |
1614 | return 0; |
1615 | } |
1616 | |
1617 | Register ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, |
1618 | int &FrameIndex) const { |
1619 | SmallVector<const MachineMemOperand *, 1> Accesses; |
1620 | if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) && |
1621 | Accesses.size() == 1) { |
1622 | FrameIndex = |
1623 | cast<FixedStackPseudoSourceValue>(Val: Accesses.front()->getPseudoValue()) |
1624 | ->getFrameIndex(); |
1625 | return true; |
1626 | } |
1627 | return false; |
1628 | } |
1629 | |
1630 | /// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD |
1631 | /// depending on whether the result is used. |
1632 | void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { |
1633 | bool isThumb1 = Subtarget.isThumb1Only(); |
1634 | bool isThumb2 = Subtarget.isThumb2(); |
1635 | const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo(); |
1636 | |
1637 | DebugLoc dl = MI->getDebugLoc(); |
1638 | MachineBasicBlock *BB = MI->getParent(); |
1639 | |
1640 | MachineInstrBuilder LDM, STM; |
1641 | if (isThumb1 || !MI->getOperand(i: 1).isDead()) { |
1642 | MachineOperand LDWb(MI->getOperand(i: 1)); |
1643 | LDM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2LDMIA_UPD |
1644 | : isThumb1 ? ARM::tLDMIA_UPD |
1645 | : ARM::LDMIA_UPD)) |
1646 | .add(MO: LDWb); |
1647 | } else { |
1648 | LDM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); |
1649 | } |
1650 | |
1651 | if (isThumb1 || !MI->getOperand(i: 0).isDead()) { |
1652 | MachineOperand STWb(MI->getOperand(i: 0)); |
1653 | STM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2STMIA_UPD |
1654 | : isThumb1 ? ARM::tSTMIA_UPD |
1655 | : ARM::STMIA_UPD)) |
1656 | .add(MO: STWb); |
1657 | } else { |
1658 | STM = BuildMI(BB&: *BB, I: MI, MIMD: dl, MCID: TII->get(Opcode: isThumb2 ? ARM::t2STMIA : ARM::STMIA)); |
1659 | } |
1660 | |
1661 | MachineOperand LDBase(MI->getOperand(i: 3)); |
1662 | LDM.add(MO: LDBase).add(MOs: predOps(Pred: ARMCC::AL)); |
1663 | |
1664 | MachineOperand STBase(MI->getOperand(i: 2)); |
1665 | STM.add(MO: STBase).add(MOs: predOps(Pred: ARMCC::AL)); |
1666 | |
1667 | // Sort the scratch registers into ascending order. |
1668 | const TargetRegisterInfo &TRI = getRegisterInfo(); |
1669 | SmallVector<unsigned, 6> ScratchRegs; |
1670 | for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 5)) |
1671 | ScratchRegs.push_back(Elt: MO.getReg()); |
1672 | llvm::sort(C&: ScratchRegs, |
1673 | Comp: [&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool { |
1674 | return TRI.getEncodingValue(RegNo: Reg1) < |
1675 | TRI.getEncodingValue(RegNo: Reg2); |
1676 | }); |
1677 | |
1678 | for (const auto &Reg : ScratchRegs) { |
1679 | LDM.addReg(RegNo: Reg, flags: RegState::Define); |
1680 | STM.addReg(RegNo: Reg, flags: RegState::Kill); |
1681 | } |
1682 | |
1683 | BB->erase(I: MI); |
1684 | } |
1685 | |
1686 | bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { |
1687 | if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { |
1688 | expandLoadStackGuard(MI); |
1689 | MI.getParent()->erase(I: MI); |
1690 | return true; |
1691 | } |
1692 | |
1693 | if (MI.getOpcode() == ARM::MEMCPY) { |
1694 | expandMEMCPY(MI); |
1695 | return true; |
1696 | } |
1697 | |
1698 | // This hook gets to expand COPY instructions before they become |
1699 | // copyPhysReg() calls. Look for VMOVS instructions that can legally be |
1700 | // widened to VMOVD. We prefer the VMOVD when possible because it may be |
1701 | // changed into a VORR that can go down the NEON pipeline. |
1702 | if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64()) |
1703 | return false; |
1704 | |
1705 | // Look for a copy between even S-registers. That is where we keep floats |
1706 | // when using NEON v2f32 instructions for f32 arithmetic. |
1707 | Register DstRegS = MI.getOperand(i: 0).getReg(); |
1708 | Register SrcRegS = MI.getOperand(i: 1).getReg(); |
1709 | if (!ARM::SPRRegClass.contains(Reg1: DstRegS, Reg2: SrcRegS)) |
1710 | return false; |
1711 | |
1712 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
1713 | unsigned DstRegD = TRI->getMatchingSuperReg(Reg: DstRegS, SubIdx: ARM::ssub_0, |
1714 | RC: &ARM::DPRRegClass); |
1715 | unsigned SrcRegD = TRI->getMatchingSuperReg(Reg: SrcRegS, SubIdx: ARM::ssub_0, |
1716 | RC: &ARM::DPRRegClass); |
1717 | if (!DstRegD || !SrcRegD) |
1718 | return false; |
1719 | |
1720 | // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only |
1721 | // legal if the COPY already defines the full DstRegD, and it isn't a |
1722 | // sub-register insertion. |
1723 | if (!MI.definesRegister(Reg: DstRegD, TRI) || MI.readsRegister(Reg: DstRegD, TRI)) |
1724 | return false; |
1725 | |
1726 | // A dead copy shouldn't show up here, but reject it just in case. |
1727 | if (MI.getOperand(i: 0).isDead()) |
1728 | return false; |
1729 | |
1730 | // All clear, widen the COPY. |
1731 | LLVM_DEBUG(dbgs() << "widening: " << MI); |
1732 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
1733 | |
1734 | // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg |
1735 | // or some other super-register. |
1736 | int ImpDefIdx = MI.findRegisterDefOperandIdx(Reg: DstRegD, /*TRI=*/nullptr); |
1737 | if (ImpDefIdx != -1) |
1738 | MI.removeOperand(OpNo: ImpDefIdx); |
1739 | |
1740 | // Change the opcode and operands. |
1741 | MI.setDesc(get(Opcode: ARM::VMOVD)); |
1742 | MI.getOperand(i: 0).setReg(DstRegD); |
1743 | MI.getOperand(i: 1).setReg(SrcRegD); |
1744 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
1745 | |
1746 | // We are now reading SrcRegD instead of SrcRegS. This may upset the |
1747 | // register scavenger and machine verifier, so we need to indicate that we |
1748 | // are reading an undefined value from SrcRegD, but a proper value from |
1749 | // SrcRegS. |
1750 | MI.getOperand(i: 1).setIsUndef(); |
1751 | MIB.addReg(RegNo: SrcRegS, flags: RegState::Implicit); |
1752 | |
1753 | // SrcRegD may actually contain an unrelated value in the ssub_1 |
1754 | // sub-register. Don't kill it. Only kill the ssub_0 sub-register. |
1755 | if (MI.getOperand(i: 1).isKill()) { |
1756 | MI.getOperand(i: 1).setIsKill(false); |
1757 | MI.addRegisterKilled(IncomingReg: SrcRegS, RegInfo: TRI, AddIfNotFound: true); |
1758 | } |
1759 | |
1760 | LLVM_DEBUG(dbgs() << "replaced by: " << MI); |
1761 | return true; |
1762 | } |
1763 | |
1764 | /// Create a copy of a const pool value. Update CPI to the new index and return |
1765 | /// the label UID. |
1766 | static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { |
1767 | MachineConstantPool *MCP = MF.getConstantPool(); |
1768 | ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); |
1769 | |
1770 | const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; |
1771 | assert(MCPE.isMachineConstantPoolEntry() && |
1772 | "Expecting a machine constantpool entry!" ); |
1773 | ARMConstantPoolValue *ACPV = |
1774 | static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); |
1775 | |
1776 | unsigned PCLabelId = AFI->createPICLabelUId(); |
1777 | ARMConstantPoolValue *NewCPV = nullptr; |
1778 | |
1779 | // FIXME: The below assumes PIC relocation model and that the function |
1780 | // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and |
1781 | // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR |
1782 | // instructions, so that's probably OK, but is PIC always correct when |
1783 | // we get here? |
1784 | if (ACPV->isGlobalValue()) |
1785 | NewCPV = ARMConstantPoolConstant::Create( |
1786 | C: cast<ARMConstantPoolConstant>(Val: ACPV)->getGV(), ID: PCLabelId, Kind: ARMCP::CPValue, |
1787 | PCAdj: 4, Modifier: ACPV->getModifier(), AddCurrentAddress: ACPV->mustAddCurrentAddress()); |
1788 | else if (ACPV->isExtSymbol()) |
1789 | NewCPV = ARMConstantPoolSymbol:: |
1790 | Create(C&: MF.getFunction().getContext(), |
1791 | s: cast<ARMConstantPoolSymbol>(Val: ACPV)->getSymbol(), ID: PCLabelId, PCAdj: 4); |
1792 | else if (ACPV->isBlockAddress()) |
1793 | NewCPV = ARMConstantPoolConstant:: |
1794 | Create(C: cast<ARMConstantPoolConstant>(Val: ACPV)->getBlockAddress(), ID: PCLabelId, |
1795 | Kind: ARMCP::CPBlockAddress, PCAdj: 4); |
1796 | else if (ACPV->isLSDA()) |
1797 | NewCPV = ARMConstantPoolConstant::Create(C: &MF.getFunction(), ID: PCLabelId, |
1798 | Kind: ARMCP::CPLSDA, PCAdj: 4); |
1799 | else if (ACPV->isMachineBasicBlock()) |
1800 | NewCPV = ARMConstantPoolMBB:: |
1801 | Create(C&: MF.getFunction().getContext(), |
1802 | mbb: cast<ARMConstantPoolMBB>(Val: ACPV)->getMBB(), ID: PCLabelId, PCAdj: 4); |
1803 | else |
1804 | llvm_unreachable("Unexpected ARM constantpool value type!!" ); |
1805 | CPI = MCP->getConstantPoolIndex(V: NewCPV, Alignment: MCPE.getAlign()); |
1806 | return PCLabelId; |
1807 | } |
1808 | |
1809 | void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, |
1810 | MachineBasicBlock::iterator I, |
1811 | Register DestReg, unsigned SubIdx, |
1812 | const MachineInstr &Orig, |
1813 | const TargetRegisterInfo &TRI) const { |
1814 | unsigned Opcode = Orig.getOpcode(); |
1815 | switch (Opcode) { |
1816 | default: { |
1817 | MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig: &Orig); |
1818 | MI->substituteRegister(FromReg: Orig.getOperand(i: 0).getReg(), ToReg: DestReg, SubIdx, RegInfo: TRI); |
1819 | MBB.insert(I, MI); |
1820 | break; |
1821 | } |
1822 | case ARM::tLDRpci_pic: |
1823 | case ARM::t2LDRpci_pic: { |
1824 | MachineFunction &MF = *MBB.getParent(); |
1825 | unsigned CPI = Orig.getOperand(i: 1).getIndex(); |
1826 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
1827 | BuildMI(BB&: MBB, I, MIMD: Orig.getDebugLoc(), MCID: get(Opcode), DestReg) |
1828 | .addConstantPoolIndex(Idx: CPI) |
1829 | .addImm(Val: PCLabelId) |
1830 | .cloneMemRefs(OtherMI: Orig); |
1831 | break; |
1832 | } |
1833 | } |
1834 | } |
1835 | |
1836 | MachineInstr & |
1837 | ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB, |
1838 | MachineBasicBlock::iterator InsertBefore, |
1839 | const MachineInstr &Orig) const { |
1840 | MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig); |
1841 | MachineBasicBlock::instr_iterator I = Cloned.getIterator(); |
1842 | for (;;) { |
1843 | switch (I->getOpcode()) { |
1844 | case ARM::tLDRpci_pic: |
1845 | case ARM::t2LDRpci_pic: { |
1846 | MachineFunction &MF = *MBB.getParent(); |
1847 | unsigned CPI = I->getOperand(i: 1).getIndex(); |
1848 | unsigned PCLabelId = duplicateCPV(MF, CPI); |
1849 | I->getOperand(i: 1).setIndex(CPI); |
1850 | I->getOperand(i: 2).setImm(PCLabelId); |
1851 | break; |
1852 | } |
1853 | } |
1854 | if (!I->isBundledWithSucc()) |
1855 | break; |
1856 | ++I; |
1857 | } |
1858 | return Cloned; |
1859 | } |
1860 | |
1861 | bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, |
1862 | const MachineInstr &MI1, |
1863 | const MachineRegisterInfo *MRI) const { |
1864 | unsigned Opcode = MI0.getOpcode(); |
1865 | if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || |
1866 | Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic || |
1867 | Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr || |
1868 | Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel || |
1869 | Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || |
1870 | Opcode == ARM::t2MOV_ga_pcrel) { |
1871 | if (MI1.getOpcode() != Opcode) |
1872 | return false; |
1873 | if (MI0.getNumOperands() != MI1.getNumOperands()) |
1874 | return false; |
1875 | |
1876 | const MachineOperand &MO0 = MI0.getOperand(i: 1); |
1877 | const MachineOperand &MO1 = MI1.getOperand(i: 1); |
1878 | if (MO0.getOffset() != MO1.getOffset()) |
1879 | return false; |
1880 | |
1881 | if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr || |
1882 | Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel || |
1883 | Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr || |
1884 | Opcode == ARM::t2MOV_ga_pcrel) |
1885 | // Ignore the PC labels. |
1886 | return MO0.getGlobal() == MO1.getGlobal(); |
1887 | |
1888 | const MachineFunction *MF = MI0.getParent()->getParent(); |
1889 | const MachineConstantPool *MCP = MF->getConstantPool(); |
1890 | int CPI0 = MO0.getIndex(); |
1891 | int CPI1 = MO1.getIndex(); |
1892 | const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; |
1893 | const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; |
1894 | bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); |
1895 | bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); |
1896 | if (isARMCP0 && isARMCP1) { |
1897 | ARMConstantPoolValue *ACPV0 = |
1898 | static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); |
1899 | ARMConstantPoolValue *ACPV1 = |
1900 | static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); |
1901 | return ACPV0->hasSameValue(ACPV: ACPV1); |
1902 | } else if (!isARMCP0 && !isARMCP1) { |
1903 | return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; |
1904 | } |
1905 | return false; |
1906 | } else if (Opcode == ARM::PICLDR) { |
1907 | if (MI1.getOpcode() != Opcode) |
1908 | return false; |
1909 | if (MI0.getNumOperands() != MI1.getNumOperands()) |
1910 | return false; |
1911 | |
1912 | Register Addr0 = MI0.getOperand(i: 1).getReg(); |
1913 | Register Addr1 = MI1.getOperand(i: 1).getReg(); |
1914 | if (Addr0 != Addr1) { |
1915 | if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual()) |
1916 | return false; |
1917 | |
1918 | // This assumes SSA form. |
1919 | MachineInstr *Def0 = MRI->getVRegDef(Reg: Addr0); |
1920 | MachineInstr *Def1 = MRI->getVRegDef(Reg: Addr1); |
1921 | // Check if the loaded value, e.g. a constantpool of a global address, are |
1922 | // the same. |
1923 | if (!produceSameValue(MI0: *Def0, MI1: *Def1, MRI)) |
1924 | return false; |
1925 | } |
1926 | |
1927 | for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { |
1928 | // %12 = PICLDR %11, 0, 14, %noreg |
1929 | const MachineOperand &MO0 = MI0.getOperand(i); |
1930 | const MachineOperand &MO1 = MI1.getOperand(i); |
1931 | if (!MO0.isIdenticalTo(Other: MO1)) |
1932 | return false; |
1933 | } |
1934 | return true; |
1935 | } |
1936 | |
1937 | return MI0.isIdenticalTo(Other: MI1, Check: MachineInstr::IgnoreVRegDefs); |
1938 | } |
1939 | |
1940 | /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to |
1941 | /// determine if two loads are loading from the same base address. It should |
1942 | /// only return true if the base pointers are the same and the only differences |
1943 | /// between the two addresses is the offset. It also returns the offsets by |
1944 | /// reference. |
1945 | /// |
1946 | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
1947 | /// is permanently disabled. |
1948 | bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, |
1949 | int64_t &Offset1, |
1950 | int64_t &Offset2) const { |
1951 | // Don't worry about Thumb: just ARM and Thumb2. |
1952 | if (Subtarget.isThumb1Only()) return false; |
1953 | |
1954 | if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) |
1955 | return false; |
1956 | |
1957 | auto IsLoadOpcode = [&](unsigned Opcode) { |
1958 | switch (Opcode) { |
1959 | default: |
1960 | return false; |
1961 | case ARM::LDRi12: |
1962 | case ARM::LDRBi12: |
1963 | case ARM::LDRD: |
1964 | case ARM::LDRH: |
1965 | case ARM::LDRSB: |
1966 | case ARM::LDRSH: |
1967 | case ARM::VLDRD: |
1968 | case ARM::VLDRS: |
1969 | case ARM::t2LDRi8: |
1970 | case ARM::t2LDRBi8: |
1971 | case ARM::t2LDRDi8: |
1972 | case ARM::t2LDRSHi8: |
1973 | case ARM::t2LDRi12: |
1974 | case ARM::t2LDRBi12: |
1975 | case ARM::t2LDRSHi12: |
1976 | return true; |
1977 | } |
1978 | }; |
1979 | |
1980 | if (!IsLoadOpcode(Load1->getMachineOpcode()) || |
1981 | !IsLoadOpcode(Load2->getMachineOpcode())) |
1982 | return false; |
1983 | |
1984 | // Check if base addresses and chain operands match. |
1985 | if (Load1->getOperand(Num: 0) != Load2->getOperand(Num: 0) || |
1986 | Load1->getOperand(Num: 4) != Load2->getOperand(Num: 4)) |
1987 | return false; |
1988 | |
1989 | // Index should be Reg0. |
1990 | if (Load1->getOperand(Num: 3) != Load2->getOperand(Num: 3)) |
1991 | return false; |
1992 | |
1993 | // Determine the offsets. |
1994 | if (isa<ConstantSDNode>(Val: Load1->getOperand(Num: 1)) && |
1995 | isa<ConstantSDNode>(Val: Load2->getOperand(Num: 1))) { |
1996 | Offset1 = cast<ConstantSDNode>(Val: Load1->getOperand(Num: 1))->getSExtValue(); |
1997 | Offset2 = cast<ConstantSDNode>(Val: Load2->getOperand(Num: 1))->getSExtValue(); |
1998 | return true; |
1999 | } |
2000 | |
2001 | return false; |
2002 | } |
2003 | |
2004 | /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to |
2005 | /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should |
2006 | /// be scheduled togther. On some targets if two loads are loading from |
2007 | /// addresses in the same cache line, it's better if they are scheduled |
2008 | /// together. This function takes two integers that represent the load offsets |
2009 | /// from the common base address. It returns true if it decides it's desirable |
2010 | /// to schedule the two loads together. "NumLoads" is the number of loads that |
2011 | /// have already been scheduled after Load1. |
2012 | /// |
2013 | /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched |
2014 | /// is permanently disabled. |
2015 | bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, |
2016 | int64_t Offset1, int64_t Offset2, |
2017 | unsigned NumLoads) const { |
2018 | // Don't worry about Thumb: just ARM and Thumb2. |
2019 | if (Subtarget.isThumb1Only()) return false; |
2020 | |
2021 | assert(Offset2 > Offset1); |
2022 | |
2023 | if ((Offset2 - Offset1) / 8 > 64) |
2024 | return false; |
2025 | |
2026 | // Check if the machine opcodes are different. If they are different |
2027 | // then we consider them to not be of the same base address, |
2028 | // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. |
2029 | // In this case, they are considered to be the same because they are different |
2030 | // encoding forms of the same basic instruction. |
2031 | if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && |
2032 | !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && |
2033 | Load2->getMachineOpcode() == ARM::t2LDRBi12) || |
2034 | (Load1->getMachineOpcode() == ARM::t2LDRBi12 && |
2035 | Load2->getMachineOpcode() == ARM::t2LDRBi8))) |
2036 | return false; // FIXME: overly conservative? |
2037 | |
2038 | // Four loads in a row should be sufficient. |
2039 | if (NumLoads >= 3) |
2040 | return false; |
2041 | |
2042 | return true; |
2043 | } |
2044 | |
2045 | bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
2046 | const MachineBasicBlock *MBB, |
2047 | const MachineFunction &MF) const { |
2048 | // Debug info is never a scheduling boundary. It's necessary to be explicit |
2049 | // due to the special treatment of IT instructions below, otherwise a |
2050 | // dbg_value followed by an IT will result in the IT instruction being |
2051 | // considered a scheduling hazard, which is wrong. It should be the actual |
2052 | // instruction preceding the dbg_value instruction(s), just like it is |
2053 | // when debug info is not present. |
2054 | if (MI.isDebugInstr()) |
2055 | return false; |
2056 | |
2057 | // Terminators and labels can't be scheduled around. |
2058 | if (MI.isTerminator() || MI.isPosition()) |
2059 | return true; |
2060 | |
2061 | // INLINEASM_BR can jump to another block |
2062 | if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) |
2063 | return true; |
2064 | |
2065 | if (isSEHInstruction(MI)) |
2066 | return true; |
2067 | |
2068 | // Treat the start of the IT block as a scheduling boundary, but schedule |
2069 | // t2IT along with all instructions following it. |
2070 | // FIXME: This is a big hammer. But the alternative is to add all potential |
2071 | // true and anti dependencies to IT block instructions as implicit operands |
2072 | // to the t2IT instruction. The added compile time and complexity does not |
2073 | // seem worth it. |
2074 | MachineBasicBlock::const_iterator I = MI; |
2075 | // Make sure to skip any debug instructions |
2076 | while (++I != MBB->end() && I->isDebugInstr()) |
2077 | ; |
2078 | if (I != MBB->end() && I->getOpcode() == ARM::t2IT) |
2079 | return true; |
2080 | |
2081 | // Don't attempt to schedule around any instruction that defines |
2082 | // a stack-oriented pointer, as it's unlikely to be profitable. This |
2083 | // saves compile time, because it doesn't require every single |
2084 | // stack slot reference to depend on the instruction that does the |
2085 | // modification. |
2086 | // Calls don't actually change the stack pointer, even if they have imp-defs. |
2087 | // No ARM calling conventions change the stack pointer. (X86 calling |
2088 | // conventions sometimes do). |
2089 | if (!MI.isCall() && MI.definesRegister(Reg: ARM::SP, /*TRI=*/nullptr)) |
2090 | return true; |
2091 | |
2092 | return false; |
2093 | } |
2094 | |
2095 | bool ARMBaseInstrInfo:: |
2096 | isProfitableToIfCvt(MachineBasicBlock &MBB, |
2097 | unsigned NumCycles, unsigned , |
2098 | BranchProbability Probability) const { |
2099 | if (!NumCycles) |
2100 | return false; |
2101 | |
2102 | // If we are optimizing for size, see if the branch in the predecessor can be |
2103 | // lowered to cbn?z by the constant island lowering pass, and return false if |
2104 | // so. This results in a shorter instruction sequence. |
2105 | if (MBB.getParent()->getFunction().hasOptSize()) { |
2106 | MachineBasicBlock *Pred = *MBB.pred_begin(); |
2107 | if (!Pred->empty()) { |
2108 | MachineInstr *LastMI = &*Pred->rbegin(); |
2109 | if (LastMI->getOpcode() == ARM::t2Bcc) { |
2110 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
2111 | MachineInstr *CmpMI = findCMPToFoldIntoCBZ(Br: LastMI, TRI); |
2112 | if (CmpMI) |
2113 | return false; |
2114 | } |
2115 | } |
2116 | } |
2117 | return isProfitableToIfCvt(TMBB&: MBB, NumT: NumCycles, ExtraT: ExtraPredCycles, |
2118 | FMBB&: MBB, NumF: 0, ExtraF: 0, Probability); |
2119 | } |
2120 | |
2121 | bool ARMBaseInstrInfo:: |
2122 | isProfitableToIfCvt(MachineBasicBlock &TBB, |
2123 | unsigned TCycles, unsigned , |
2124 | MachineBasicBlock &FBB, |
2125 | unsigned FCycles, unsigned , |
2126 | BranchProbability Probability) const { |
2127 | if (!TCycles) |
2128 | return false; |
2129 | |
2130 | // In thumb code we often end up trading one branch for a IT block, and |
2131 | // if we are cloning the instruction can increase code size. Prevent |
2132 | // blocks with multiple predecesors from being ifcvted to prevent this |
2133 | // cloning. |
2134 | if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) { |
2135 | if (TBB.pred_size() != 1 || FBB.pred_size() != 1) |
2136 | return false; |
2137 | } |
2138 | |
2139 | // Attempt to estimate the relative costs of predication versus branching. |
2140 | // Here we scale up each component of UnpredCost to avoid precision issue when |
2141 | // scaling TCycles/FCycles by Probability. |
2142 | const unsigned ScalingUpFactor = 1024; |
2143 | |
2144 | unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor; |
2145 | unsigned UnpredCost; |
2146 | if (!Subtarget.hasBranchPredictor()) { |
2147 | // When we don't have a branch predictor it's always cheaper to not take a |
2148 | // branch than take it, so we have to take that into account. |
2149 | unsigned NotTakenBranchCost = 1; |
2150 | unsigned TakenBranchCost = Subtarget.getMispredictionPenalty(); |
2151 | unsigned TUnpredCycles, FUnpredCycles; |
2152 | if (!FCycles) { |
2153 | // Triangle: TBB is the fallthrough |
2154 | TUnpredCycles = TCycles + NotTakenBranchCost; |
2155 | FUnpredCycles = TakenBranchCost; |
2156 | } else { |
2157 | // Diamond: TBB is the block that is branched to, FBB is the fallthrough |
2158 | TUnpredCycles = TCycles + TakenBranchCost; |
2159 | FUnpredCycles = FCycles + NotTakenBranchCost; |
2160 | // The branch at the end of FBB will disappear when it's predicated, so |
2161 | // discount it from PredCost. |
2162 | PredCost -= 1 * ScalingUpFactor; |
2163 | } |
2164 | // The total cost is the cost of each path scaled by their probabilites |
2165 | unsigned TUnpredCost = Probability.scale(Num: TUnpredCycles * ScalingUpFactor); |
2166 | unsigned FUnpredCost = Probability.getCompl().scale(Num: FUnpredCycles * ScalingUpFactor); |
2167 | UnpredCost = TUnpredCost + FUnpredCost; |
2168 | // When predicating assume that the first IT can be folded away but later |
2169 | // ones cost one cycle each |
2170 | if (Subtarget.isThumb2() && TCycles + FCycles > 4) { |
2171 | PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor; |
2172 | } |
2173 | } else { |
2174 | unsigned TUnpredCost = Probability.scale(Num: TCycles * ScalingUpFactor); |
2175 | unsigned FUnpredCost = |
2176 | Probability.getCompl().scale(Num: FCycles * ScalingUpFactor); |
2177 | UnpredCost = TUnpredCost + FUnpredCost; |
2178 | UnpredCost += 1 * ScalingUpFactor; // The branch itself |
2179 | UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; |
2180 | } |
2181 | |
2182 | return PredCost <= UnpredCost; |
2183 | } |
2184 | |
2185 | unsigned |
2186 | ARMBaseInstrInfo::(const MachineFunction &MF, |
2187 | unsigned NumInsts) const { |
2188 | // Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions. |
2189 | // ARM has a condition code field in every predicable instruction, using it |
2190 | // doesn't change code size. |
2191 | if (!Subtarget.isThumb2()) |
2192 | return 0; |
2193 | |
2194 | // It's possible that the size of the IT is restricted to a single block. |
2195 | unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4; |
2196 | return divideCeil(Numerator: NumInsts, Denominator: MaxInsts) * 2; |
2197 | } |
2198 | |
2199 | unsigned |
2200 | ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const { |
2201 | // If this branch is likely to be folded into the comparison to form a |
2202 | // CB(N)Z, then removing it won't reduce code size at all, because that will |
2203 | // just replace the CB(N)Z with a CMP. |
2204 | if (MI.getOpcode() == ARM::t2Bcc && |
2205 | findCMPToFoldIntoCBZ(Br: &MI, TRI: &getRegisterInfo())) |
2206 | return 0; |
2207 | |
2208 | unsigned Size = getInstSizeInBytes(MI); |
2209 | |
2210 | // For Thumb2, all branches are 32-bit instructions during the if conversion |
2211 | // pass, but may be replaced with 16-bit instructions during size reduction. |
2212 | // Since the branches considered by if conversion tend to be forward branches |
2213 | // over small basic blocks, they are very likely to be in range for the |
2214 | // narrow instructions, so we assume the final code size will be half what it |
2215 | // currently is. |
2216 | if (Subtarget.isThumb2()) |
2217 | Size /= 2; |
2218 | |
2219 | return Size; |
2220 | } |
2221 | |
2222 | bool |
2223 | ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, |
2224 | MachineBasicBlock &FMBB) const { |
2225 | // Reduce false anti-dependencies to let the target's out-of-order execution |
2226 | // engine do its thing. |
2227 | return Subtarget.isProfitableToUnpredicate(); |
2228 | } |
2229 | |
2230 | /// getInstrPredicate - If instruction is predicated, returns its predicate |
2231 | /// condition, otherwise returns AL. It also returns the condition code |
2232 | /// register by reference. |
2233 | ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI, |
2234 | Register &PredReg) { |
2235 | int PIdx = MI.findFirstPredOperandIdx(); |
2236 | if (PIdx == -1) { |
2237 | PredReg = 0; |
2238 | return ARMCC::AL; |
2239 | } |
2240 | |
2241 | PredReg = MI.getOperand(i: PIdx+1).getReg(); |
2242 | return (ARMCC::CondCodes)MI.getOperand(i: PIdx).getImm(); |
2243 | } |
2244 | |
2245 | unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { |
2246 | if (Opc == ARM::B) |
2247 | return ARM::Bcc; |
2248 | if (Opc == ARM::tB) |
2249 | return ARM::tBcc; |
2250 | if (Opc == ARM::t2B) |
2251 | return ARM::t2Bcc; |
2252 | |
2253 | llvm_unreachable("Unknown unconditional branch opcode!" ); |
2254 | } |
2255 | |
2256 | MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI, |
2257 | bool NewMI, |
2258 | unsigned OpIdx1, |
2259 | unsigned OpIdx2) const { |
2260 | switch (MI.getOpcode()) { |
2261 | case ARM::MOVCCr: |
2262 | case ARM::t2MOVCCr: { |
2263 | // MOVCC can be commuted by inverting the condition. |
2264 | Register PredReg; |
2265 | ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); |
2266 | // MOVCC AL can't be inverted. Shouldn't happen. |
2267 | if (CC == ARMCC::AL || PredReg != ARM::CPSR) |
2268 | return nullptr; |
2269 | MachineInstr *CommutedMI = |
2270 | TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
2271 | if (!CommutedMI) |
2272 | return nullptr; |
2273 | // After swapping the MOVCC operands, also invert the condition. |
2274 | CommutedMI->getOperand(i: CommutedMI->findFirstPredOperandIdx()) |
2275 | .setImm(ARMCC::getOppositeCondition(CC)); |
2276 | return CommutedMI; |
2277 | } |
2278 | } |
2279 | return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
2280 | } |
2281 | |
2282 | /// Identify instructions that can be folded into a MOVCC instruction, and |
2283 | /// return the defining instruction. |
2284 | MachineInstr * |
2285 | ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, |
2286 | const TargetInstrInfo *TII) const { |
2287 | if (!Reg.isVirtual()) |
2288 | return nullptr; |
2289 | if (!MRI.hasOneNonDBGUse(RegNo: Reg)) |
2290 | return nullptr; |
2291 | MachineInstr *MI = MRI.getVRegDef(Reg); |
2292 | if (!MI) |
2293 | return nullptr; |
2294 | // Check if MI can be predicated and folded into the MOVCC. |
2295 | if (!isPredicable(MI: *MI)) |
2296 | return nullptr; |
2297 | // Check if MI has any non-dead defs or physreg uses. This also detects |
2298 | // predicated instructions which will be reading CPSR. |
2299 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 1)) { |
2300 | // Reject frame index operands, PEI can't handle the predicated pseudos. |
2301 | if (MO.isFI() || MO.isCPI() || MO.isJTI()) |
2302 | return nullptr; |
2303 | if (!MO.isReg()) |
2304 | continue; |
2305 | // MI can't have any tied operands, that would conflict with predication. |
2306 | if (MO.isTied()) |
2307 | return nullptr; |
2308 | if (MO.getReg().isPhysical()) |
2309 | return nullptr; |
2310 | if (MO.isDef() && !MO.isDead()) |
2311 | return nullptr; |
2312 | } |
2313 | bool DontMoveAcrossStores = true; |
2314 | if (!MI->isSafeToMove(/* AliasAnalysis = */ AA: nullptr, SawStore&: DontMoveAcrossStores)) |
2315 | return nullptr; |
2316 | return MI; |
2317 | } |
2318 | |
2319 | bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI, |
2320 | SmallVectorImpl<MachineOperand> &Cond, |
2321 | unsigned &TrueOp, unsigned &FalseOp, |
2322 | bool &Optimizable) const { |
2323 | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
2324 | "Unknown select instruction" ); |
2325 | // MOVCC operands: |
2326 | // 0: Def. |
2327 | // 1: True use. |
2328 | // 2: False use. |
2329 | // 3: Condition code. |
2330 | // 4: CPSR use. |
2331 | TrueOp = 1; |
2332 | FalseOp = 2; |
2333 | Cond.push_back(Elt: MI.getOperand(i: 3)); |
2334 | Cond.push_back(Elt: MI.getOperand(i: 4)); |
2335 | // We can always fold a def. |
2336 | Optimizable = true; |
2337 | return false; |
2338 | } |
2339 | |
2340 | MachineInstr * |
2341 | ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI, |
2342 | SmallPtrSetImpl<MachineInstr *> &SeenMIs, |
2343 | bool PreferFalse) const { |
2344 | assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) && |
2345 | "Unknown select instruction" ); |
2346 | MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); |
2347 | MachineInstr *DefMI = canFoldIntoMOVCC(Reg: MI.getOperand(i: 2).getReg(), MRI, TII: this); |
2348 | bool Invert = !DefMI; |
2349 | if (!DefMI) |
2350 | DefMI = canFoldIntoMOVCC(Reg: MI.getOperand(i: 1).getReg(), MRI, TII: this); |
2351 | if (!DefMI) |
2352 | return nullptr; |
2353 | |
2354 | // Find new register class to use. |
2355 | MachineOperand FalseReg = MI.getOperand(i: Invert ? 2 : 1); |
2356 | MachineOperand TrueReg = MI.getOperand(i: Invert ? 1 : 2); |
2357 | Register DestReg = MI.getOperand(i: 0).getReg(); |
2358 | const TargetRegisterClass *FalseClass = MRI.getRegClass(Reg: FalseReg.getReg()); |
2359 | const TargetRegisterClass *TrueClass = MRI.getRegClass(Reg: TrueReg.getReg()); |
2360 | if (!MRI.constrainRegClass(Reg: DestReg, RC: FalseClass)) |
2361 | return nullptr; |
2362 | if (!MRI.constrainRegClass(Reg: DestReg, RC: TrueClass)) |
2363 | return nullptr; |
2364 | |
2365 | // Create a new predicated version of DefMI. |
2366 | // Rfalse is the first use. |
2367 | MachineInstrBuilder NewMI = |
2368 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: DefMI->getDesc(), DestReg); |
2369 | |
2370 | // Copy all the DefMI operands, excluding its (null) predicate. |
2371 | const MCInstrDesc &DefDesc = DefMI->getDesc(); |
2372 | for (unsigned i = 1, e = DefDesc.getNumOperands(); |
2373 | i != e && !DefDesc.operands()[i].isPredicate(); ++i) |
2374 | NewMI.add(MO: DefMI->getOperand(i)); |
2375 | |
2376 | unsigned CondCode = MI.getOperand(i: 3).getImm(); |
2377 | if (Invert) |
2378 | NewMI.addImm(Val: ARMCC::getOppositeCondition(CC: ARMCC::CondCodes(CondCode))); |
2379 | else |
2380 | NewMI.addImm(Val: CondCode); |
2381 | NewMI.add(MO: MI.getOperand(i: 4)); |
2382 | |
2383 | // DefMI is not the -S version that sets CPSR, so add an optional %noreg. |
2384 | if (NewMI->hasOptionalDef()) |
2385 | NewMI.add(MO: condCodeOp()); |
2386 | |
2387 | // The output register value when the predicate is false is an implicit |
2388 | // register operand tied to the first def. |
2389 | // The tie makes the register allocator ensure the FalseReg is allocated the |
2390 | // same register as operand 0. |
2391 | FalseReg.setImplicit(); |
2392 | NewMI.add(MO: FalseReg); |
2393 | NewMI->tieOperands(DefIdx: 0, UseIdx: NewMI->getNumOperands() - 1); |
2394 | |
2395 | // Update SeenMIs set: register newly created MI and erase removed DefMI. |
2396 | SeenMIs.insert(Ptr: NewMI); |
2397 | SeenMIs.erase(Ptr: DefMI); |
2398 | |
2399 | // If MI is inside a loop, and DefMI is outside the loop, then kill flags on |
2400 | // DefMI would be invalid when tranferred inside the loop. Checking for a |
2401 | // loop is expensive, but at least remove kill flags if they are in different |
2402 | // BBs. |
2403 | if (DefMI->getParent() != MI.getParent()) |
2404 | NewMI->clearKillInfo(); |
2405 | |
2406 | // The caller will erase MI, but not DefMI. |
2407 | DefMI->eraseFromParent(); |
2408 | return NewMI; |
2409 | } |
2410 | |
2411 | /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the |
2412 | /// instruction is encoded with an 'S' bit is determined by the optional CPSR |
2413 | /// def operand. |
2414 | /// |
2415 | /// This will go away once we can teach tblgen how to set the optional CPSR def |
2416 | /// operand itself. |
2417 | struct AddSubFlagsOpcodePair { |
2418 | uint16_t PseudoOpc; |
2419 | uint16_t MachineOpc; |
2420 | }; |
2421 | |
2422 | static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { |
2423 | {.PseudoOpc: ARM::ADDSri, .MachineOpc: ARM::ADDri}, |
2424 | {.PseudoOpc: ARM::ADDSrr, .MachineOpc: ARM::ADDrr}, |
2425 | {.PseudoOpc: ARM::ADDSrsi, .MachineOpc: ARM::ADDrsi}, |
2426 | {.PseudoOpc: ARM::ADDSrsr, .MachineOpc: ARM::ADDrsr}, |
2427 | |
2428 | {.PseudoOpc: ARM::SUBSri, .MachineOpc: ARM::SUBri}, |
2429 | {.PseudoOpc: ARM::SUBSrr, .MachineOpc: ARM::SUBrr}, |
2430 | {.PseudoOpc: ARM::SUBSrsi, .MachineOpc: ARM::SUBrsi}, |
2431 | {.PseudoOpc: ARM::SUBSrsr, .MachineOpc: ARM::SUBrsr}, |
2432 | |
2433 | {.PseudoOpc: ARM::RSBSri, .MachineOpc: ARM::RSBri}, |
2434 | {.PseudoOpc: ARM::RSBSrsi, .MachineOpc: ARM::RSBrsi}, |
2435 | {.PseudoOpc: ARM::RSBSrsr, .MachineOpc: ARM::RSBrsr}, |
2436 | |
2437 | {.PseudoOpc: ARM::tADDSi3, .MachineOpc: ARM::tADDi3}, |
2438 | {.PseudoOpc: ARM::tADDSi8, .MachineOpc: ARM::tADDi8}, |
2439 | {.PseudoOpc: ARM::tADDSrr, .MachineOpc: ARM::tADDrr}, |
2440 | {.PseudoOpc: ARM::tADCS, .MachineOpc: ARM::tADC}, |
2441 | |
2442 | {.PseudoOpc: ARM::tSUBSi3, .MachineOpc: ARM::tSUBi3}, |
2443 | {.PseudoOpc: ARM::tSUBSi8, .MachineOpc: ARM::tSUBi8}, |
2444 | {.PseudoOpc: ARM::tSUBSrr, .MachineOpc: ARM::tSUBrr}, |
2445 | {.PseudoOpc: ARM::tSBCS, .MachineOpc: ARM::tSBC}, |
2446 | {.PseudoOpc: ARM::tRSBS, .MachineOpc: ARM::tRSB}, |
2447 | {.PseudoOpc: ARM::tLSLSri, .MachineOpc: ARM::tLSLri}, |
2448 | |
2449 | {.PseudoOpc: ARM::t2ADDSri, .MachineOpc: ARM::t2ADDri}, |
2450 | {.PseudoOpc: ARM::t2ADDSrr, .MachineOpc: ARM::t2ADDrr}, |
2451 | {.PseudoOpc: ARM::t2ADDSrs, .MachineOpc: ARM::t2ADDrs}, |
2452 | |
2453 | {.PseudoOpc: ARM::t2SUBSri, .MachineOpc: ARM::t2SUBri}, |
2454 | {.PseudoOpc: ARM::t2SUBSrr, .MachineOpc: ARM::t2SUBrr}, |
2455 | {.PseudoOpc: ARM::t2SUBSrs, .MachineOpc: ARM::t2SUBrs}, |
2456 | |
2457 | {.PseudoOpc: ARM::t2RSBSri, .MachineOpc: ARM::t2RSBri}, |
2458 | {.PseudoOpc: ARM::t2RSBSrs, .MachineOpc: ARM::t2RSBrs}, |
2459 | }; |
2460 | |
2461 | unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { |
2462 | for (const auto &Entry : AddSubFlagsOpcodeMap) |
2463 | if (OldOpc == Entry.PseudoOpc) |
2464 | return Entry.MachineOpc; |
2465 | return 0; |
2466 | } |
2467 | |
2468 | void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, |
2469 | MachineBasicBlock::iterator &MBBI, |
2470 | const DebugLoc &dl, Register DestReg, |
2471 | Register BaseReg, int NumBytes, |
2472 | ARMCC::CondCodes Pred, Register PredReg, |
2473 | const ARMBaseInstrInfo &TII, |
2474 | unsigned MIFlags) { |
2475 | if (NumBytes == 0 && DestReg != BaseReg) { |
2476 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::MOVr), DestReg) |
2477 | .addReg(RegNo: BaseReg, flags: RegState::Kill) |
2478 | .add(MOs: predOps(Pred, PredReg)) |
2479 | .add(MO: condCodeOp()) |
2480 | .setMIFlags(MIFlags); |
2481 | return; |
2482 | } |
2483 | |
2484 | bool isSub = NumBytes < 0; |
2485 | if (isSub) NumBytes = -NumBytes; |
2486 | |
2487 | while (NumBytes) { |
2488 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Imm: NumBytes); |
2489 | unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(V: 0xFF, R: RotAmt); |
2490 | assert(ThisVal && "Didn't extract field correctly" ); |
2491 | |
2492 | // We will handle these bits from offset, clear them. |
2493 | NumBytes &= ~ThisVal; |
2494 | |
2495 | assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?" ); |
2496 | |
2497 | // Build the new ADD / SUB. |
2498 | unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; |
2499 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: Opc), DestReg) |
2500 | .addReg(RegNo: BaseReg, flags: RegState::Kill) |
2501 | .addImm(Val: ThisVal) |
2502 | .add(MOs: predOps(Pred, PredReg)) |
2503 | .add(MO: condCodeOp()) |
2504 | .setMIFlags(MIFlags); |
2505 | BaseReg = DestReg; |
2506 | } |
2507 | } |
2508 | |
2509 | bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, |
2510 | MachineFunction &MF, MachineInstr *MI, |
2511 | unsigned NumBytes) { |
2512 | // This optimisation potentially adds lots of load and store |
2513 | // micro-operations, it's only really a great benefit to code-size. |
2514 | if (!Subtarget.hasMinSize()) |
2515 | return false; |
2516 | |
2517 | // If only one register is pushed/popped, LLVM can use an LDR/STR |
2518 | // instead. We can't modify those so make sure we're dealing with an |
2519 | // instruction we understand. |
2520 | bool IsPop = isPopOpcode(Opc: MI->getOpcode()); |
2521 | bool IsPush = isPushOpcode(Opc: MI->getOpcode()); |
2522 | if (!IsPush && !IsPop) |
2523 | return false; |
2524 | |
2525 | bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || |
2526 | MI->getOpcode() == ARM::VLDMDIA_UPD; |
2527 | bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || |
2528 | MI->getOpcode() == ARM::tPOP || |
2529 | MI->getOpcode() == ARM::tPOP_RET; |
2530 | |
2531 | assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && |
2532 | MI->getOperand(1).getReg() == ARM::SP)) && |
2533 | "trying to fold sp update into non-sp-updating push/pop" ); |
2534 | |
2535 | // The VFP push & pop act on D-registers, so we can only fold an adjustment |
2536 | // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try |
2537 | // if this is violated. |
2538 | if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) |
2539 | return false; |
2540 | |
2541 | // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ |
2542 | // pred) so the list starts at 4. Thumb1 starts after the predicate. |
2543 | int RegListIdx = IsT1PushPop ? 2 : 4; |
2544 | |
2545 | // Calculate the space we'll need in terms of registers. |
2546 | unsigned RegsNeeded; |
2547 | const TargetRegisterClass *RegClass; |
2548 | if (IsVFPPushPop) { |
2549 | RegsNeeded = NumBytes / 8; |
2550 | RegClass = &ARM::DPRRegClass; |
2551 | } else { |
2552 | RegsNeeded = NumBytes / 4; |
2553 | RegClass = &ARM::GPRRegClass; |
2554 | } |
2555 | |
2556 | // We're going to have to strip all list operands off before |
2557 | // re-adding them since the order matters, so save the existing ones |
2558 | // for later. |
2559 | SmallVector<MachineOperand, 4> RegList; |
2560 | |
2561 | // We're also going to need the first register transferred by this |
2562 | // instruction, which won't necessarily be the first register in the list. |
2563 | unsigned FirstRegEnc = -1; |
2564 | |
2565 | const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); |
2566 | for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) { |
2567 | MachineOperand &MO = MI->getOperand(i); |
2568 | RegList.push_back(Elt: MO); |
2569 | |
2570 | if (MO.isReg() && !MO.isImplicit() && |
2571 | TRI->getEncodingValue(RegNo: MO.getReg()) < FirstRegEnc) |
2572 | FirstRegEnc = TRI->getEncodingValue(RegNo: MO.getReg()); |
2573 | } |
2574 | |
2575 | const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF: &MF); |
2576 | |
2577 | // Now try to find enough space in the reglist to allocate NumBytes. |
2578 | for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded; |
2579 | --CurRegEnc) { |
2580 | unsigned CurReg = RegClass->getRegister(i: CurRegEnc); |
2581 | if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(RegNo: ARM::R7)) |
2582 | continue; |
2583 | if (!IsPop) { |
2584 | // Pushing any register is completely harmless, mark the register involved |
2585 | // as undef since we don't care about its value and must not restore it |
2586 | // during stack unwinding. |
2587 | RegList.push_back(Elt: MachineOperand::CreateReg(Reg: CurReg, isDef: false, isImp: false, |
2588 | isKill: false, isDead: false, isUndef: true)); |
2589 | --RegsNeeded; |
2590 | continue; |
2591 | } |
2592 | |
2593 | // However, we can only pop an extra register if it's not live. For |
2594 | // registers live within the function we might clobber a return value |
2595 | // register; the other way a register can be live here is if it's |
2596 | // callee-saved. |
2597 | if (isCalleeSavedRegister(Reg: CurReg, CSRegs) || |
2598 | MI->getParent()->computeRegisterLiveness(TRI, Reg: CurReg, Before: MI) != |
2599 | MachineBasicBlock::LQR_Dead) { |
2600 | // VFP pops don't allow holes in the register list, so any skip is fatal |
2601 | // for our transformation. GPR pops do, so we should just keep looking. |
2602 | if (IsVFPPushPop) |
2603 | return false; |
2604 | else |
2605 | continue; |
2606 | } |
2607 | |
2608 | // Mark the unimportant registers as <def,dead> in the POP. |
2609 | RegList.push_back(Elt: MachineOperand::CreateReg(Reg: CurReg, isDef: true, isImp: false, isKill: false, |
2610 | isDead: true)); |
2611 | --RegsNeeded; |
2612 | } |
2613 | |
2614 | if (RegsNeeded > 0) |
2615 | return false; |
2616 | |
2617 | // Finally we know we can profitably perform the optimisation so go |
2618 | // ahead: strip all existing registers off and add them back again |
2619 | // in the right order. |
2620 | for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) |
2621 | MI->removeOperand(OpNo: i); |
2622 | |
2623 | // Add the complete list back in. |
2624 | MachineInstrBuilder MIB(MF, &*MI); |
2625 | for (const MachineOperand &MO : llvm::reverse(C&: RegList)) |
2626 | MIB.add(MO); |
2627 | |
2628 | return true; |
2629 | } |
2630 | |
2631 | bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
2632 | Register FrameReg, int &Offset, |
2633 | const ARMBaseInstrInfo &TII) { |
2634 | unsigned Opcode = MI.getOpcode(); |
2635 | const MCInstrDesc &Desc = MI.getDesc(); |
2636 | unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); |
2637 | bool isSub = false; |
2638 | |
2639 | // Memory operands in inline assembly always use AddrMode2. |
2640 | if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) |
2641 | AddrMode = ARMII::AddrMode2; |
2642 | |
2643 | if (Opcode == ARM::ADDri) { |
2644 | Offset += MI.getOperand(i: FrameRegIdx+1).getImm(); |
2645 | if (Offset == 0) { |
2646 | // Turn it into a move. |
2647 | MI.setDesc(TII.get(Opcode: ARM::MOVr)); |
2648 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
2649 | MI.removeOperand(OpNo: FrameRegIdx+1); |
2650 | Offset = 0; |
2651 | return true; |
2652 | } else if (Offset < 0) { |
2653 | Offset = -Offset; |
2654 | isSub = true; |
2655 | MI.setDesc(TII.get(Opcode: ARM::SUBri)); |
2656 | } |
2657 | |
2658 | // Common case: small offset, fits into instruction. |
2659 | if (ARM_AM::getSOImmVal(Arg: Offset) != -1) { |
2660 | // Replace the FrameIndex with sp / fp |
2661 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
2662 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset); |
2663 | Offset = 0; |
2664 | return true; |
2665 | } |
2666 | |
2667 | // Otherwise, pull as much of the immedidate into this ADDri/SUBri |
2668 | // as possible. |
2669 | unsigned RotAmt = ARM_AM::getSOImmValRotate(Imm: Offset); |
2670 | unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(V: 0xFF, R: RotAmt); |
2671 | |
2672 | // We will handle these bits from offset, clear them. |
2673 | Offset &= ~ThisImmVal; |
2674 | |
2675 | // Get the properly encoded SOImmVal field. |
2676 | assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && |
2677 | "Bit extraction didn't work?" ); |
2678 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: ThisImmVal); |
2679 | } else { |
2680 | unsigned ImmIdx = 0; |
2681 | int InstrOffs = 0; |
2682 | unsigned NumBits = 0; |
2683 | unsigned Scale = 1; |
2684 | switch (AddrMode) { |
2685 | case ARMII::AddrMode_i12: |
2686 | ImmIdx = FrameRegIdx + 1; |
2687 | InstrOffs = MI.getOperand(i: ImmIdx).getImm(); |
2688 | NumBits = 12; |
2689 | break; |
2690 | case ARMII::AddrMode2: |
2691 | ImmIdx = FrameRegIdx+2; |
2692 | InstrOffs = ARM_AM::getAM2Offset(AM2Opc: MI.getOperand(i: ImmIdx).getImm()); |
2693 | if (ARM_AM::getAM2Op(AM2Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2694 | InstrOffs *= -1; |
2695 | NumBits = 12; |
2696 | break; |
2697 | case ARMII::AddrMode3: |
2698 | ImmIdx = FrameRegIdx+2; |
2699 | InstrOffs = ARM_AM::getAM3Offset(AM3Opc: MI.getOperand(i: ImmIdx).getImm()); |
2700 | if (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2701 | InstrOffs *= -1; |
2702 | NumBits = 8; |
2703 | break; |
2704 | case ARMII::AddrMode4: |
2705 | case ARMII::AddrMode6: |
2706 | // Can't fold any offset even if it's zero. |
2707 | return false; |
2708 | case ARMII::AddrMode5: |
2709 | ImmIdx = FrameRegIdx+1; |
2710 | InstrOffs = ARM_AM::getAM5Offset(AM5Opc: MI.getOperand(i: ImmIdx).getImm()); |
2711 | if (ARM_AM::getAM5Op(AM5Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2712 | InstrOffs *= -1; |
2713 | NumBits = 8; |
2714 | Scale = 4; |
2715 | break; |
2716 | case ARMII::AddrMode5FP16: |
2717 | ImmIdx = FrameRegIdx+1; |
2718 | InstrOffs = ARM_AM::getAM5Offset(AM5Opc: MI.getOperand(i: ImmIdx).getImm()); |
2719 | if (ARM_AM::getAM5Op(AM5Opc: MI.getOperand(i: ImmIdx).getImm()) == ARM_AM::sub) |
2720 | InstrOffs *= -1; |
2721 | NumBits = 8; |
2722 | Scale = 2; |
2723 | break; |
2724 | case ARMII::AddrModeT2_i7: |
2725 | case ARMII::AddrModeT2_i7s2: |
2726 | case ARMII::AddrModeT2_i7s4: |
2727 | ImmIdx = FrameRegIdx+1; |
2728 | InstrOffs = MI.getOperand(i: ImmIdx).getImm(); |
2729 | NumBits = 7; |
2730 | Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 : |
2731 | AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1); |
2732 | break; |
2733 | default: |
2734 | llvm_unreachable("Unsupported addressing mode!" ); |
2735 | } |
2736 | |
2737 | Offset += InstrOffs * Scale; |
2738 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!" ); |
2739 | if (Offset < 0) { |
2740 | Offset = -Offset; |
2741 | isSub = true; |
2742 | } |
2743 | |
2744 | // Attempt to fold address comp. if opcode has offset bits |
2745 | if (NumBits > 0) { |
2746 | // Common case: small offset, fits into instruction. |
2747 | MachineOperand &ImmOp = MI.getOperand(i: ImmIdx); |
2748 | int ImmedOffset = Offset / Scale; |
2749 | unsigned Mask = (1 << NumBits) - 1; |
2750 | if ((unsigned)Offset <= Mask * Scale) { |
2751 | // Replace the FrameIndex with sp |
2752 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
2753 | // FIXME: When addrmode2 goes away, this will simplify (like the |
2754 | // T2 version), as the LDR.i12 versions don't need the encoding |
2755 | // tricks for the offset value. |
2756 | if (isSub) { |
2757 | if (AddrMode == ARMII::AddrMode_i12) |
2758 | ImmedOffset = -ImmedOffset; |
2759 | else |
2760 | ImmedOffset |= 1 << NumBits; |
2761 | } |
2762 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
2763 | Offset = 0; |
2764 | return true; |
2765 | } |
2766 | |
2767 | // Otherwise, it didn't fit. Pull in what we can to simplify the immed. |
2768 | ImmedOffset = ImmedOffset & Mask; |
2769 | if (isSub) { |
2770 | if (AddrMode == ARMII::AddrMode_i12) |
2771 | ImmedOffset = -ImmedOffset; |
2772 | else |
2773 | ImmedOffset |= 1 << NumBits; |
2774 | } |
2775 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
2776 | Offset &= ~(Mask*Scale); |
2777 | } |
2778 | } |
2779 | |
2780 | Offset = (isSub) ? -Offset : Offset; |
2781 | return Offset == 0; |
2782 | } |
2783 | |
2784 | /// analyzeCompare - For a comparison instruction, return the source registers |
2785 | /// in SrcReg and SrcReg2 if having two register operands, and the value it |
2786 | /// compares against in CmpValue. Return true if the comparison instruction |
2787 | /// can be analyzed. |
2788 | bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg, |
2789 | Register &SrcReg2, int64_t &CmpMask, |
2790 | int64_t &CmpValue) const { |
2791 | switch (MI.getOpcode()) { |
2792 | default: break; |
2793 | case ARM::CMPri: |
2794 | case ARM::t2CMPri: |
2795 | case ARM::tCMPi8: |
2796 | SrcReg = MI.getOperand(i: 0).getReg(); |
2797 | SrcReg2 = 0; |
2798 | CmpMask = ~0; |
2799 | CmpValue = MI.getOperand(i: 1).getImm(); |
2800 | return true; |
2801 | case ARM::CMPrr: |
2802 | case ARM::t2CMPrr: |
2803 | case ARM::tCMPr: |
2804 | SrcReg = MI.getOperand(i: 0).getReg(); |
2805 | SrcReg2 = MI.getOperand(i: 1).getReg(); |
2806 | CmpMask = ~0; |
2807 | CmpValue = 0; |
2808 | return true; |
2809 | case ARM::TSTri: |
2810 | case ARM::t2TSTri: |
2811 | SrcReg = MI.getOperand(i: 0).getReg(); |
2812 | SrcReg2 = 0; |
2813 | CmpMask = MI.getOperand(i: 1).getImm(); |
2814 | CmpValue = 0; |
2815 | return true; |
2816 | } |
2817 | |
2818 | return false; |
2819 | } |
2820 | |
2821 | /// isSuitableForMask - Identify a suitable 'and' instruction that |
2822 | /// operates on the given source register and applies the same mask |
2823 | /// as a 'tst' instruction. Provide a limited look-through for copies. |
2824 | /// When successful, MI will hold the found instruction. |
2825 | static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg, |
2826 | int CmpMask, bool CommonUse) { |
2827 | switch (MI->getOpcode()) { |
2828 | case ARM::ANDri: |
2829 | case ARM::t2ANDri: |
2830 | if (CmpMask != MI->getOperand(i: 2).getImm()) |
2831 | return false; |
2832 | if (SrcReg == MI->getOperand(i: CommonUse ? 1 : 0).getReg()) |
2833 | return true; |
2834 | break; |
2835 | } |
2836 | |
2837 | return false; |
2838 | } |
2839 | |
2840 | /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return |
2841 | /// the condition code if we modify the instructions such that flags are |
2842 | /// set by ADD(a,b,X). |
2843 | inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) { |
2844 | switch (CC) { |
2845 | default: return ARMCC::AL; |
2846 | case ARMCC::HS: return ARMCC::LO; |
2847 | case ARMCC::LO: return ARMCC::HS; |
2848 | case ARMCC::VS: return ARMCC::VS; |
2849 | case ARMCC::VC: return ARMCC::VC; |
2850 | } |
2851 | } |
2852 | |
2853 | /// isRedundantFlagInstr - check whether the first instruction, whose only |
2854 | /// purpose is to update flags, can be made redundant. |
2855 | /// CMPrr can be made redundant by SUBrr if the operands are the same. |
2856 | /// CMPri can be made redundant by SUBri if the operands are the same. |
2857 | /// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X). |
2858 | /// This function can be extended later on. |
2859 | inline static bool isRedundantFlagInstr(const MachineInstr *CmpI, |
2860 | Register SrcReg, Register SrcReg2, |
2861 | int64_t ImmValue, |
2862 | const MachineInstr *OI, |
2863 | bool &IsThumb1) { |
2864 | if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && |
2865 | (OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) && |
2866 | ((OI->getOperand(i: 1).getReg() == SrcReg && |
2867 | OI->getOperand(i: 2).getReg() == SrcReg2) || |
2868 | (OI->getOperand(i: 1).getReg() == SrcReg2 && |
2869 | OI->getOperand(i: 2).getReg() == SrcReg))) { |
2870 | IsThumb1 = false; |
2871 | return true; |
2872 | } |
2873 | |
2874 | if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr && |
2875 | ((OI->getOperand(i: 2).getReg() == SrcReg && |
2876 | OI->getOperand(i: 3).getReg() == SrcReg2) || |
2877 | (OI->getOperand(i: 2).getReg() == SrcReg2 && |
2878 | OI->getOperand(i: 3).getReg() == SrcReg))) { |
2879 | IsThumb1 = true; |
2880 | return true; |
2881 | } |
2882 | |
2883 | if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) && |
2884 | (OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) && |
2885 | OI->getOperand(i: 1).getReg() == SrcReg && |
2886 | OI->getOperand(i: 2).getImm() == ImmValue) { |
2887 | IsThumb1 = false; |
2888 | return true; |
2889 | } |
2890 | |
2891 | if (CmpI->getOpcode() == ARM::tCMPi8 && |
2892 | (OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) && |
2893 | OI->getOperand(i: 2).getReg() == SrcReg && |
2894 | OI->getOperand(i: 3).getImm() == ImmValue) { |
2895 | IsThumb1 = true; |
2896 | return true; |
2897 | } |
2898 | |
2899 | if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) && |
2900 | (OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr || |
2901 | OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) && |
2902 | OI->getOperand(i: 0).isReg() && OI->getOperand(i: 1).isReg() && |
2903 | OI->getOperand(i: 0).getReg() == SrcReg && |
2904 | OI->getOperand(i: 1).getReg() == SrcReg2) { |
2905 | IsThumb1 = false; |
2906 | return true; |
2907 | } |
2908 | |
2909 | if (CmpI->getOpcode() == ARM::tCMPr && |
2910 | (OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 || |
2911 | OI->getOpcode() == ARM::tADDrr) && |
2912 | OI->getOperand(i: 0).getReg() == SrcReg && |
2913 | OI->getOperand(i: 2).getReg() == SrcReg2) { |
2914 | IsThumb1 = true; |
2915 | return true; |
2916 | } |
2917 | |
2918 | return false; |
2919 | } |
2920 | |
2921 | static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) { |
2922 | switch (MI->getOpcode()) { |
2923 | default: return false; |
2924 | case ARM::tLSLri: |
2925 | case ARM::tLSRri: |
2926 | case ARM::tLSLrr: |
2927 | case ARM::tLSRrr: |
2928 | case ARM::tSUBrr: |
2929 | case ARM::tADDrr: |
2930 | case ARM::tADDi3: |
2931 | case ARM::tADDi8: |
2932 | case ARM::tSUBi3: |
2933 | case ARM::tSUBi8: |
2934 | case ARM::tMUL: |
2935 | case ARM::tADC: |
2936 | case ARM::tSBC: |
2937 | case ARM::tRSB: |
2938 | case ARM::tAND: |
2939 | case ARM::tORR: |
2940 | case ARM::tEOR: |
2941 | case ARM::tBIC: |
2942 | case ARM::tMVN: |
2943 | case ARM::tASRri: |
2944 | case ARM::tASRrr: |
2945 | case ARM::tROR: |
2946 | IsThumb1 = true; |
2947 | [[fallthrough]]; |
2948 | case ARM::RSBrr: |
2949 | case ARM::RSBri: |
2950 | case ARM::RSCrr: |
2951 | case ARM::RSCri: |
2952 | case ARM::ADDrr: |
2953 | case ARM::ADDri: |
2954 | case ARM::ADCrr: |
2955 | case ARM::ADCri: |
2956 | case ARM::SUBrr: |
2957 | case ARM::SUBri: |
2958 | case ARM::SBCrr: |
2959 | case ARM::SBCri: |
2960 | case ARM::t2RSBri: |
2961 | case ARM::t2ADDrr: |
2962 | case ARM::t2ADDri: |
2963 | case ARM::t2ADCrr: |
2964 | case ARM::t2ADCri: |
2965 | case ARM::t2SUBrr: |
2966 | case ARM::t2SUBri: |
2967 | case ARM::t2SBCrr: |
2968 | case ARM::t2SBCri: |
2969 | case ARM::ANDrr: |
2970 | case ARM::ANDri: |
2971 | case ARM::ANDrsr: |
2972 | case ARM::ANDrsi: |
2973 | case ARM::t2ANDrr: |
2974 | case ARM::t2ANDri: |
2975 | case ARM::t2ANDrs: |
2976 | case ARM::ORRrr: |
2977 | case ARM::ORRri: |
2978 | case ARM::ORRrsr: |
2979 | case ARM::ORRrsi: |
2980 | case ARM::t2ORRrr: |
2981 | case ARM::t2ORRri: |
2982 | case ARM::t2ORRrs: |
2983 | case ARM::EORrr: |
2984 | case ARM::EORri: |
2985 | case ARM::EORrsr: |
2986 | case ARM::EORrsi: |
2987 | case ARM::t2EORrr: |
2988 | case ARM::t2EORri: |
2989 | case ARM::t2EORrs: |
2990 | case ARM::BICri: |
2991 | case ARM::BICrr: |
2992 | case ARM::BICrsi: |
2993 | case ARM::BICrsr: |
2994 | case ARM::t2BICri: |
2995 | case ARM::t2BICrr: |
2996 | case ARM::t2BICrs: |
2997 | case ARM::t2LSRri: |
2998 | case ARM::t2LSRrr: |
2999 | case ARM::t2LSLri: |
3000 | case ARM::t2LSLrr: |
3001 | case ARM::MOVsr: |
3002 | case ARM::MOVsi: |
3003 | return true; |
3004 | } |
3005 | } |
3006 | |
3007 | /// optimizeCompareInstr - Convert the instruction supplying the argument to the |
3008 | /// comparison into one that sets the zero bit in the flags register; |
3009 | /// Remove a redundant Compare instruction if an earlier instruction can set the |
3010 | /// flags in the same way as Compare. |
3011 | /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two |
3012 | /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the |
3013 | /// condition code of instructions which use the flags. |
3014 | bool ARMBaseInstrInfo::optimizeCompareInstr( |
3015 | MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, |
3016 | int64_t CmpValue, const MachineRegisterInfo *MRI) const { |
3017 | // Get the unique definition of SrcReg. |
3018 | MachineInstr *MI = MRI->getUniqueVRegDef(Reg: SrcReg); |
3019 | if (!MI) return false; |
3020 | |
3021 | // Masked compares sometimes use the same register as the corresponding 'and'. |
3022 | if (CmpMask != ~0) { |
3023 | if (!isSuitableForMask(MI, SrcReg, CmpMask, CommonUse: false) || isPredicated(MI: *MI)) { |
3024 | MI = nullptr; |
3025 | for (MachineRegisterInfo::use_instr_iterator |
3026 | UI = MRI->use_instr_begin(RegNo: SrcReg), UE = MRI->use_instr_end(); |
3027 | UI != UE; ++UI) { |
3028 | if (UI->getParent() != CmpInstr.getParent()) |
3029 | continue; |
3030 | MachineInstr *PotentialAND = &*UI; |
3031 | if (!isSuitableForMask(MI&: PotentialAND, SrcReg, CmpMask, CommonUse: true) || |
3032 | isPredicated(MI: *PotentialAND)) |
3033 | continue; |
3034 | MI = PotentialAND; |
3035 | break; |
3036 | } |
3037 | if (!MI) return false; |
3038 | } |
3039 | } |
3040 | |
3041 | // Get ready to iterate backward from CmpInstr. |
3042 | MachineBasicBlock::iterator I = CmpInstr, E = MI, |
3043 | B = CmpInstr.getParent()->begin(); |
3044 | |
3045 | // Early exit if CmpInstr is at the beginning of the BB. |
3046 | if (I == B) return false; |
3047 | |
3048 | // There are two possible candidates which can be changed to set CPSR: |
3049 | // One is MI, the other is a SUB or ADD instruction. |
3050 | // For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or |
3051 | // ADDr[ri](r1, r2, X). |
3052 | // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). |
3053 | MachineInstr *SubAdd = nullptr; |
3054 | if (SrcReg2 != 0) |
3055 | // MI is not a candidate for CMPrr. |
3056 | MI = nullptr; |
3057 | else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) { |
3058 | // Conservatively refuse to convert an instruction which isn't in the same |
3059 | // BB as the comparison. |
3060 | // For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate. |
3061 | // Thus we cannot return here. |
3062 | if (CmpInstr.getOpcode() == ARM::CMPri || |
3063 | CmpInstr.getOpcode() == ARM::t2CMPri || |
3064 | CmpInstr.getOpcode() == ARM::tCMPi8) |
3065 | MI = nullptr; |
3066 | else |
3067 | return false; |
3068 | } |
3069 | |
3070 | bool IsThumb1 = false; |
3071 | if (MI && !isOptimizeCompareCandidate(MI, IsThumb1)) |
3072 | return false; |
3073 | |
3074 | // We also want to do this peephole for cases like this: if (a*b == 0), |
3075 | // and optimise away the CMP instruction from the generated code sequence: |
3076 | // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values |
3077 | // resulting from the select instruction, but these MOVS instructions for |
3078 | // Thumb1 (V6M) are flag setting and are thus preventing this optimisation. |
3079 | // However, if we only have MOVS instructions in between the CMP and the |
3080 | // other instruction (the MULS in this example), then the CPSR is dead so we |
3081 | // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this |
3082 | // reordering and then continue the analysis hoping we can eliminate the |
3083 | // CMP. This peephole works on the vregs, so is still in SSA form. As a |
3084 | // consequence, the movs won't redefine/kill the MUL operands which would |
3085 | // make this reordering illegal. |
3086 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
3087 | if (MI && IsThumb1) { |
3088 | --I; |
3089 | if (I != E && !MI->readsRegister(Reg: ARM::CPSR, TRI)) { |
3090 | bool CanReorder = true; |
3091 | for (; I != E; --I) { |
3092 | if (I->getOpcode() != ARM::tMOVi8) { |
3093 | CanReorder = false; |
3094 | break; |
3095 | } |
3096 | } |
3097 | if (CanReorder) { |
3098 | MI = MI->removeFromParent(); |
3099 | E = CmpInstr; |
3100 | CmpInstr.getParent()->insert(I: E, MI); |
3101 | } |
3102 | } |
3103 | I = CmpInstr; |
3104 | E = MI; |
3105 | } |
3106 | |
3107 | // Check that CPSR isn't set between the comparison instruction and the one we |
3108 | // want to change. At the same time, search for SubAdd. |
3109 | bool SubAddIsThumb1 = false; |
3110 | do { |
3111 | const MachineInstr &Instr = *--I; |
3112 | |
3113 | // Check whether CmpInstr can be made redundant by the current instruction. |
3114 | if (isRedundantFlagInstr(CmpI: &CmpInstr, SrcReg, SrcReg2, ImmValue: CmpValue, OI: &Instr, |
3115 | IsThumb1&: SubAddIsThumb1)) { |
3116 | SubAdd = &*I; |
3117 | break; |
3118 | } |
3119 | |
3120 | // Allow E (which was initially MI) to be SubAdd but do not search before E. |
3121 | if (I == E) |
3122 | break; |
3123 | |
3124 | if (Instr.modifiesRegister(Reg: ARM::CPSR, TRI) || |
3125 | Instr.readsRegister(Reg: ARM::CPSR, TRI)) |
3126 | // This instruction modifies or uses CPSR after the one we want to |
3127 | // change. We can't do this transformation. |
3128 | return false; |
3129 | |
3130 | if (I == B) { |
3131 | // In some cases, we scan the use-list of an instruction for an AND; |
3132 | // that AND is in the same BB, but may not be scheduled before the |
3133 | // corresponding TST. In that case, bail out. |
3134 | // |
3135 | // FIXME: We could try to reschedule the AND. |
3136 | return false; |
3137 | } |
3138 | } while (true); |
3139 | |
3140 | // Return false if no candidates exist. |
3141 | if (!MI && !SubAdd) |
3142 | return false; |
3143 | |
3144 | // If we found a SubAdd, use it as it will be closer to the CMP |
3145 | if (SubAdd) { |
3146 | MI = SubAdd; |
3147 | IsThumb1 = SubAddIsThumb1; |
3148 | } |
3149 | |
3150 | // We can't use a predicated instruction - it doesn't always write the flags. |
3151 | if (isPredicated(MI: *MI)) |
3152 | return false; |
3153 | |
3154 | // Scan forward for the use of CPSR |
3155 | // When checking against MI: if it's a conditional code that requires |
3156 | // checking of the V bit or C bit, then this is not safe to do. |
3157 | // It is safe to remove CmpInstr if CPSR is redefined or killed. |
3158 | // If we are done with the basic block, we need to check whether CPSR is |
3159 | // live-out. |
3160 | SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4> |
3161 | OperandsToUpdate; |
3162 | bool isSafe = false; |
3163 | I = CmpInstr; |
3164 | E = CmpInstr.getParent()->end(); |
3165 | while (!isSafe && ++I != E) { |
3166 | const MachineInstr &Instr = *I; |
3167 | for (unsigned IO = 0, EO = Instr.getNumOperands(); |
3168 | !isSafe && IO != EO; ++IO) { |
3169 | const MachineOperand &MO = Instr.getOperand(i: IO); |
3170 | if (MO.isRegMask() && MO.clobbersPhysReg(PhysReg: ARM::CPSR)) { |
3171 | isSafe = true; |
3172 | break; |
3173 | } |
3174 | if (!MO.isReg() || MO.getReg() != ARM::CPSR) |
3175 | continue; |
3176 | if (MO.isDef()) { |
3177 | isSafe = true; |
3178 | break; |
3179 | } |
3180 | // Condition code is after the operand before CPSR except for VSELs. |
3181 | ARMCC::CondCodes CC; |
3182 | bool IsInstrVSel = true; |
3183 | switch (Instr.getOpcode()) { |
3184 | default: |
3185 | IsInstrVSel = false; |
3186 | CC = (ARMCC::CondCodes)Instr.getOperand(i: IO - 1).getImm(); |
3187 | break; |
3188 | case ARM::VSELEQD: |
3189 | case ARM::VSELEQS: |
3190 | case ARM::VSELEQH: |
3191 | CC = ARMCC::EQ; |
3192 | break; |
3193 | case ARM::VSELGTD: |
3194 | case ARM::VSELGTS: |
3195 | case ARM::VSELGTH: |
3196 | CC = ARMCC::GT; |
3197 | break; |
3198 | case ARM::VSELGED: |
3199 | case ARM::VSELGES: |
3200 | case ARM::VSELGEH: |
3201 | CC = ARMCC::GE; |
3202 | break; |
3203 | case ARM::VSELVSD: |
3204 | case ARM::VSELVSS: |
3205 | case ARM::VSELVSH: |
3206 | CC = ARMCC::VS; |
3207 | break; |
3208 | } |
3209 | |
3210 | if (SubAdd) { |
3211 | // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based |
3212 | // on CMP needs to be updated to be based on SUB. |
3213 | // If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also |
3214 | // needs to be modified. |
3215 | // Push the condition code operands to OperandsToUpdate. |
3216 | // If it is safe to remove CmpInstr, the condition code of these |
3217 | // operands will be modified. |
3218 | unsigned Opc = SubAdd->getOpcode(); |
3219 | bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr || |
3220 | Opc == ARM::SUBri || Opc == ARM::t2SUBri || |
3221 | Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 || |
3222 | Opc == ARM::tSUBi8; |
3223 | unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2; |
3224 | if (!IsSub || |
3225 | (SrcReg2 != 0 && SubAdd->getOperand(i: OpI).getReg() == SrcReg2 && |
3226 | SubAdd->getOperand(i: OpI + 1).getReg() == SrcReg)) { |
3227 | // VSel doesn't support condition code update. |
3228 | if (IsInstrVSel) |
3229 | return false; |
3230 | // Ensure we can swap the condition. |
3231 | ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC)); |
3232 | if (NewCC == ARMCC::AL) |
3233 | return false; |
3234 | OperandsToUpdate.push_back( |
3235 | Elt: std::make_pair(x: &((*I).getOperand(i: IO - 1)), y&: NewCC)); |
3236 | } |
3237 | } else { |
3238 | // No SubAdd, so this is x = <op> y, z; cmp x, 0. |
3239 | switch (CC) { |
3240 | case ARMCC::EQ: // Z |
3241 | case ARMCC::NE: // Z |
3242 | case ARMCC::MI: // N |
3243 | case ARMCC::PL: // N |
3244 | case ARMCC::AL: // none |
3245 | // CPSR can be used multiple times, we should continue. |
3246 | break; |
3247 | case ARMCC::HS: // C |
3248 | case ARMCC::LO: // C |
3249 | case ARMCC::VS: // V |
3250 | case ARMCC::VC: // V |
3251 | case ARMCC::HI: // C Z |
3252 | case ARMCC::LS: // C Z |
3253 | case ARMCC::GE: // N V |
3254 | case ARMCC::LT: // N V |
3255 | case ARMCC::GT: // Z N V |
3256 | case ARMCC::LE: // Z N V |
3257 | // The instruction uses the V bit or C bit which is not safe. |
3258 | return false; |
3259 | } |
3260 | } |
3261 | } |
3262 | } |
3263 | |
3264 | // If CPSR is not killed nor re-defined, we should check whether it is |
3265 | // live-out. If it is live-out, do not optimize. |
3266 | if (!isSafe) { |
3267 | MachineBasicBlock *MBB = CmpInstr.getParent(); |
3268 | for (MachineBasicBlock *Succ : MBB->successors()) |
3269 | if (Succ->isLiveIn(Reg: ARM::CPSR)) |
3270 | return false; |
3271 | } |
3272 | |
3273 | // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always |
3274 | // set CPSR so this is represented as an explicit output) |
3275 | if (!IsThumb1) { |
3276 | unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1; |
3277 | MI->getOperand(i: CPSRRegNum).setReg(ARM::CPSR); |
3278 | MI->getOperand(i: CPSRRegNum).setIsDef(true); |
3279 | } |
3280 | assert(!isPredicated(*MI) && "Can't use flags from predicated instruction" ); |
3281 | CmpInstr.eraseFromParent(); |
3282 | |
3283 | // Modify the condition code of operands in OperandsToUpdate. |
3284 | // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to |
3285 | // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. |
3286 | for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) |
3287 | OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); |
3288 | |
3289 | MI->clearRegisterDeads(Reg: ARM::CPSR); |
3290 | |
3291 | return true; |
3292 | } |
3293 | |
3294 | bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const { |
3295 | // Do not sink MI if it might be used to optimize a redundant compare. |
3296 | // We heuristically only look at the instruction immediately following MI to |
3297 | // avoid potentially searching the entire basic block. |
3298 | if (isPredicated(MI)) |
3299 | return true; |
3300 | MachineBasicBlock::const_iterator Next = &MI; |
3301 | ++Next; |
3302 | Register SrcReg, SrcReg2; |
3303 | int64_t CmpMask, CmpValue; |
3304 | bool IsThumb1; |
3305 | if (Next != MI.getParent()->end() && |
3306 | analyzeCompare(MI: *Next, SrcReg, SrcReg2, CmpMask, CmpValue) && |
3307 | isRedundantFlagInstr(CmpI: &*Next, SrcReg, SrcReg2, ImmValue: CmpValue, OI: &MI, IsThumb1)) |
3308 | return false; |
3309 | return true; |
3310 | } |
3311 | |
3312 | bool ARMBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, |
3313 | Register Reg, |
3314 | MachineRegisterInfo *MRI) const { |
3315 | // Fold large immediates into add, sub, or, xor. |
3316 | unsigned DefOpc = DefMI.getOpcode(); |
3317 | if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm && |
3318 | DefOpc != ARM::tMOVi32imm) |
3319 | return false; |
3320 | if (!DefMI.getOperand(i: 1).isImm()) |
3321 | // Could be t2MOVi32imm @xx |
3322 | return false; |
3323 | |
3324 | if (!MRI->hasOneNonDBGUse(RegNo: Reg)) |
3325 | return false; |
3326 | |
3327 | const MCInstrDesc &DefMCID = DefMI.getDesc(); |
3328 | if (DefMCID.hasOptionalDef()) { |
3329 | unsigned NumOps = DefMCID.getNumOperands(); |
3330 | const MachineOperand &MO = DefMI.getOperand(i: NumOps - 1); |
3331 | if (MO.getReg() == ARM::CPSR && !MO.isDead()) |
3332 | // If DefMI defines CPSR and it is not dead, it's obviously not safe |
3333 | // to delete DefMI. |
3334 | return false; |
3335 | } |
3336 | |
3337 | const MCInstrDesc &UseMCID = UseMI.getDesc(); |
3338 | if (UseMCID.hasOptionalDef()) { |
3339 | unsigned NumOps = UseMCID.getNumOperands(); |
3340 | if (UseMI.getOperand(i: NumOps - 1).getReg() == ARM::CPSR) |
3341 | // If the instruction sets the flag, do not attempt this optimization |
3342 | // since it may change the semantics of the code. |
3343 | return false; |
3344 | } |
3345 | |
3346 | unsigned UseOpc = UseMI.getOpcode(); |
3347 | unsigned NewUseOpc = 0; |
3348 | uint32_t ImmVal = (uint32_t)DefMI.getOperand(i: 1).getImm(); |
3349 | uint32_t SOImmValV1 = 0, SOImmValV2 = 0; |
3350 | bool Commute = false; |
3351 | switch (UseOpc) { |
3352 | default: return false; |
3353 | case ARM::SUBrr: |
3354 | case ARM::ADDrr: |
3355 | case ARM::ORRrr: |
3356 | case ARM::EORrr: |
3357 | case ARM::t2SUBrr: |
3358 | case ARM::t2ADDrr: |
3359 | case ARM::t2ORRrr: |
3360 | case ARM::t2EORrr: { |
3361 | Commute = UseMI.getOperand(i: 2).getReg() != Reg; |
3362 | switch (UseOpc) { |
3363 | default: break; |
3364 | case ARM::ADDrr: |
3365 | case ARM::SUBrr: |
3366 | if (UseOpc == ARM::SUBrr && Commute) |
3367 | return false; |
3368 | |
3369 | // ADD/SUB are special because they're essentially the same operation, so |
3370 | // we can handle a larger range of immediates. |
3371 | if (ARM_AM::isSOImmTwoPartVal(V: ImmVal)) |
3372 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri; |
3373 | else if (ARM_AM::isSOImmTwoPartVal(V: -ImmVal)) { |
3374 | ImmVal = -ImmVal; |
3375 | NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri; |
3376 | } else |
3377 | return false; |
3378 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(V: ImmVal); |
3379 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(V: ImmVal); |
3380 | break; |
3381 | case ARM::ORRrr: |
3382 | case ARM::EORrr: |
3383 | if (!ARM_AM::isSOImmTwoPartVal(V: ImmVal)) |
3384 | return false; |
3385 | SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(V: ImmVal); |
3386 | SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(V: ImmVal); |
3387 | switch (UseOpc) { |
3388 | default: break; |
3389 | case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; |
3390 | case ARM::EORrr: NewUseOpc = ARM::EORri; break; |
3391 | } |
3392 | break; |
3393 | case ARM::t2ADDrr: |
3394 | case ARM::t2SUBrr: { |
3395 | if (UseOpc == ARM::t2SUBrr && Commute) |
3396 | return false; |
3397 | |
3398 | // ADD/SUB are special because they're essentially the same operation, so |
3399 | // we can handle a larger range of immediates. |
3400 | const bool ToSP = DefMI.getOperand(i: 0).getReg() == ARM::SP; |
3401 | const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; |
3402 | const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; |
3403 | if (ARM_AM::isT2SOImmTwoPartVal(Imm: ImmVal)) |
3404 | NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB; |
3405 | else if (ARM_AM::isT2SOImmTwoPartVal(Imm: -ImmVal)) { |
3406 | ImmVal = -ImmVal; |
3407 | NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD; |
3408 | } else |
3409 | return false; |
3410 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(Imm: ImmVal); |
3411 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(Imm: ImmVal); |
3412 | break; |
3413 | } |
3414 | case ARM::t2ORRrr: |
3415 | case ARM::t2EORrr: |
3416 | if (!ARM_AM::isT2SOImmTwoPartVal(Imm: ImmVal)) |
3417 | return false; |
3418 | SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(Imm: ImmVal); |
3419 | SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(Imm: ImmVal); |
3420 | switch (UseOpc) { |
3421 | default: break; |
3422 | case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; |
3423 | case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; |
3424 | } |
3425 | break; |
3426 | } |
3427 | } |
3428 | } |
3429 | |
3430 | unsigned OpIdx = Commute ? 2 : 1; |
3431 | Register Reg1 = UseMI.getOperand(i: OpIdx).getReg(); |
3432 | bool isKill = UseMI.getOperand(i: OpIdx).isKill(); |
3433 | const TargetRegisterClass *TRC = MRI->getRegClass(Reg); |
3434 | Register NewReg = MRI->createVirtualRegister(RegClass: TRC); |
3435 | BuildMI(BB&: *UseMI.getParent(), I&: UseMI, MIMD: UseMI.getDebugLoc(), MCID: get(Opcode: NewUseOpc), |
3436 | DestReg: NewReg) |
3437 | .addReg(RegNo: Reg1, flags: getKillRegState(B: isKill)) |
3438 | .addImm(Val: SOImmValV1) |
3439 | .add(MOs: predOps(Pred: ARMCC::AL)) |
3440 | .add(MO: condCodeOp()); |
3441 | UseMI.setDesc(get(Opcode: NewUseOpc)); |
3442 | UseMI.getOperand(i: 1).setReg(NewReg); |
3443 | UseMI.getOperand(i: 1).setIsKill(); |
3444 | UseMI.getOperand(i: 2).ChangeToImmediate(ImmVal: SOImmValV2); |
3445 | DefMI.eraseFromParent(); |
3446 | // FIXME: t2ADDrr should be split, as different rulles apply when writing to SP. |
3447 | // Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm]. |
3448 | // Then the below code will not be needed, as the input/output register |
3449 | // classes will be rgpr or gprSP. |
3450 | // For now, we fix the UseMI operand explicitly here: |
3451 | switch(NewUseOpc){ |
3452 | case ARM::t2ADDspImm: |
3453 | case ARM::t2SUBspImm: |
3454 | case ARM::t2ADDri: |
3455 | case ARM::t2SUBri: |
3456 | MRI->constrainRegClass(Reg: UseMI.getOperand(i: 0).getReg(), RC: TRC); |
3457 | } |
3458 | return true; |
3459 | } |
3460 | |
3461 | static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, |
3462 | const MachineInstr &MI) { |
3463 | switch (MI.getOpcode()) { |
3464 | default: { |
3465 | const MCInstrDesc &Desc = MI.getDesc(); |
3466 | int UOps = ItinData->getNumMicroOps(ItinClassIndx: Desc.getSchedClass()); |
3467 | assert(UOps >= 0 && "bad # UOps" ); |
3468 | return UOps; |
3469 | } |
3470 | |
3471 | case ARM::LDRrs: |
3472 | case ARM::LDRBrs: |
3473 | case ARM::STRrs: |
3474 | case ARM::STRBrs: { |
3475 | unsigned ShOpVal = MI.getOperand(i: 3).getImm(); |
3476 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3477 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3478 | if (!isSub && |
3479 | (ShImm == 0 || |
3480 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3481 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3482 | return 1; |
3483 | return 2; |
3484 | } |
3485 | |
3486 | case ARM::LDRH: |
3487 | case ARM::STRH: { |
3488 | if (!MI.getOperand(i: 2).getReg()) |
3489 | return 1; |
3490 | |
3491 | unsigned ShOpVal = MI.getOperand(i: 3).getImm(); |
3492 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3493 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3494 | if (!isSub && |
3495 | (ShImm == 0 || |
3496 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3497 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3498 | return 1; |
3499 | return 2; |
3500 | } |
3501 | |
3502 | case ARM::LDRSB: |
3503 | case ARM::LDRSH: |
3504 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 3).getImm()) == ARM_AM::sub) ? 3 : 2; |
3505 | |
3506 | case ARM::LDRSB_POST: |
3507 | case ARM::LDRSH_POST: { |
3508 | Register Rt = MI.getOperand(i: 0).getReg(); |
3509 | Register Rm = MI.getOperand(i: 3).getReg(); |
3510 | return (Rt == Rm) ? 4 : 3; |
3511 | } |
3512 | |
3513 | case ARM::LDR_PRE_REG: |
3514 | case ARM::LDRB_PRE_REG: { |
3515 | Register Rt = MI.getOperand(i: 0).getReg(); |
3516 | Register Rm = MI.getOperand(i: 3).getReg(); |
3517 | if (Rt == Rm) |
3518 | return 3; |
3519 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
3520 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3521 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3522 | if (!isSub && |
3523 | (ShImm == 0 || |
3524 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3525 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3526 | return 2; |
3527 | return 3; |
3528 | } |
3529 | |
3530 | case ARM::STR_PRE_REG: |
3531 | case ARM::STRB_PRE_REG: { |
3532 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
3533 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3534 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3535 | if (!isSub && |
3536 | (ShImm == 0 || |
3537 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3538 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3539 | return 2; |
3540 | return 3; |
3541 | } |
3542 | |
3543 | case ARM::LDRH_PRE: |
3544 | case ARM::STRH_PRE: { |
3545 | Register Rt = MI.getOperand(i: 0).getReg(); |
3546 | Register Rm = MI.getOperand(i: 3).getReg(); |
3547 | if (!Rm) |
3548 | return 2; |
3549 | if (Rt == Rm) |
3550 | return 3; |
3551 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 3 : 2; |
3552 | } |
3553 | |
3554 | case ARM::LDR_POST_REG: |
3555 | case ARM::LDRB_POST_REG: |
3556 | case ARM::LDRH_POST: { |
3557 | Register Rt = MI.getOperand(i: 0).getReg(); |
3558 | Register Rm = MI.getOperand(i: 3).getReg(); |
3559 | return (Rt == Rm) ? 3 : 2; |
3560 | } |
3561 | |
3562 | case ARM::LDR_PRE_IMM: |
3563 | case ARM::LDRB_PRE_IMM: |
3564 | case ARM::LDR_POST_IMM: |
3565 | case ARM::LDRB_POST_IMM: |
3566 | case ARM::STRB_POST_IMM: |
3567 | case ARM::STRB_POST_REG: |
3568 | case ARM::STRB_PRE_IMM: |
3569 | case ARM::STRH_POST: |
3570 | case ARM::STR_POST_IMM: |
3571 | case ARM::STR_POST_REG: |
3572 | case ARM::STR_PRE_IMM: |
3573 | return 2; |
3574 | |
3575 | case ARM::LDRSB_PRE: |
3576 | case ARM::LDRSH_PRE: { |
3577 | Register Rm = MI.getOperand(i: 3).getReg(); |
3578 | if (Rm == 0) |
3579 | return 3; |
3580 | Register Rt = MI.getOperand(i: 0).getReg(); |
3581 | if (Rt == Rm) |
3582 | return 4; |
3583 | unsigned ShOpVal = MI.getOperand(i: 4).getImm(); |
3584 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
3585 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
3586 | if (!isSub && |
3587 | (ShImm == 0 || |
3588 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
3589 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
3590 | return 3; |
3591 | return 4; |
3592 | } |
3593 | |
3594 | case ARM::LDRD: { |
3595 | Register Rt = MI.getOperand(i: 0).getReg(); |
3596 | Register Rn = MI.getOperand(i: 2).getReg(); |
3597 | Register Rm = MI.getOperand(i: 3).getReg(); |
3598 | if (Rm) |
3599 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 4 |
3600 | : 3; |
3601 | return (Rt == Rn) ? 3 : 2; |
3602 | } |
3603 | |
3604 | case ARM::STRD: { |
3605 | Register Rm = MI.getOperand(i: 3).getReg(); |
3606 | if (Rm) |
3607 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 4).getImm()) == ARM_AM::sub) ? 4 |
3608 | : 3; |
3609 | return 2; |
3610 | } |
3611 | |
3612 | case ARM::LDRD_POST: |
3613 | case ARM::t2LDRD_POST: |
3614 | return 3; |
3615 | |
3616 | case ARM::STRD_POST: |
3617 | case ARM::t2STRD_POST: |
3618 | return 4; |
3619 | |
3620 | case ARM::LDRD_PRE: { |
3621 | Register Rt = MI.getOperand(i: 0).getReg(); |
3622 | Register Rn = MI.getOperand(i: 3).getReg(); |
3623 | Register Rm = MI.getOperand(i: 4).getReg(); |
3624 | if (Rm) |
3625 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 5).getImm()) == ARM_AM::sub) ? 5 |
3626 | : 4; |
3627 | return (Rt == Rn) ? 4 : 3; |
3628 | } |
3629 | |
3630 | case ARM::t2LDRD_PRE: { |
3631 | Register Rt = MI.getOperand(i: 0).getReg(); |
3632 | Register Rn = MI.getOperand(i: 3).getReg(); |
3633 | return (Rt == Rn) ? 4 : 3; |
3634 | } |
3635 | |
3636 | case ARM::STRD_PRE: { |
3637 | Register Rm = MI.getOperand(i: 4).getReg(); |
3638 | if (Rm) |
3639 | return (ARM_AM::getAM3Op(AM3Opc: MI.getOperand(i: 5).getImm()) == ARM_AM::sub) ? 5 |
3640 | : 4; |
3641 | return 3; |
3642 | } |
3643 | |
3644 | case ARM::t2STRD_PRE: |
3645 | return 3; |
3646 | |
3647 | case ARM::t2LDR_POST: |
3648 | case ARM::t2LDRB_POST: |
3649 | case ARM::t2LDRB_PRE: |
3650 | case ARM::t2LDRSBi12: |
3651 | case ARM::t2LDRSBi8: |
3652 | case ARM::t2LDRSBpci: |
3653 | case ARM::t2LDRSBs: |
3654 | case ARM::t2LDRH_POST: |
3655 | case ARM::t2LDRH_PRE: |
3656 | case ARM::t2LDRSBT: |
3657 | case ARM::t2LDRSB_POST: |
3658 | case ARM::t2LDRSB_PRE: |
3659 | case ARM::t2LDRSH_POST: |
3660 | case ARM::t2LDRSH_PRE: |
3661 | case ARM::t2LDRSHi12: |
3662 | case ARM::t2LDRSHi8: |
3663 | case ARM::t2LDRSHpci: |
3664 | case ARM::t2LDRSHs: |
3665 | return 2; |
3666 | |
3667 | case ARM::t2LDRDi8: { |
3668 | Register Rt = MI.getOperand(i: 0).getReg(); |
3669 | Register Rn = MI.getOperand(i: 2).getReg(); |
3670 | return (Rt == Rn) ? 3 : 2; |
3671 | } |
3672 | |
3673 | case ARM::t2STRB_POST: |
3674 | case ARM::t2STRB_PRE: |
3675 | case ARM::t2STRBs: |
3676 | case ARM::t2STRDi8: |
3677 | case ARM::t2STRH_POST: |
3678 | case ARM::t2STRH_PRE: |
3679 | case ARM::t2STRHs: |
3680 | case ARM::t2STR_POST: |
3681 | case ARM::t2STR_PRE: |
3682 | case ARM::t2STRs: |
3683 | return 2; |
3684 | } |
3685 | } |
3686 | |
3687 | // Return the number of 32-bit words loaded by LDM or stored by STM. If this |
3688 | // can't be easily determined return 0 (missing MachineMemOperand). |
3689 | // |
3690 | // FIXME: The current MachineInstr design does not support relying on machine |
3691 | // mem operands to determine the width of a memory access. Instead, we expect |
3692 | // the target to provide this information based on the instruction opcode and |
3693 | // operands. However, using MachineMemOperand is the best solution now for |
3694 | // two reasons: |
3695 | // |
3696 | // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI |
3697 | // operands. This is much more dangerous than using the MachineMemOperand |
3698 | // sizes because CodeGen passes can insert/remove optional machine operands. In |
3699 | // fact, it's totally incorrect for preRA passes and appears to be wrong for |
3700 | // postRA passes as well. |
3701 | // |
3702 | // 2) getNumLDMAddresses is only used by the scheduling machine model and any |
3703 | // machine model that calls this should handle the unknown (zero size) case. |
3704 | // |
3705 | // Long term, we should require a target hook that verifies MachineMemOperand |
3706 | // sizes during MC lowering. That target hook should be local to MC lowering |
3707 | // because we can't ensure that it is aware of other MI forms. Doing this will |
3708 | // ensure that MachineMemOperands are correctly propagated through all passes. |
3709 | unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const { |
3710 | unsigned Size = 0; |
3711 | for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), |
3712 | E = MI.memoperands_end(); |
3713 | I != E; ++I) { |
3714 | Size += (*I)->getSize().getValue(); |
3715 | } |
3716 | // FIXME: The scheduler currently can't handle values larger than 16. But |
3717 | // the values can actually go up to 32 for floating-point load/store |
3718 | // multiple (VLDMIA etc.). Also, the way this code is reasoning about memory |
3719 | // operations isn't right; we could end up with "extra" memory operands for |
3720 | // various reasons, like tail merge merging two memory operations. |
3721 | return std::min(a: Size / 4, b: 16U); |
3722 | } |
3723 | |
3724 | static unsigned (unsigned Opc, |
3725 | unsigned NumRegs) { |
3726 | unsigned UOps = 1 + NumRegs; // 1 for address computation. |
3727 | switch (Opc) { |
3728 | default: |
3729 | break; |
3730 | case ARM::VLDMDIA_UPD: |
3731 | case ARM::VLDMDDB_UPD: |
3732 | case ARM::VLDMSIA_UPD: |
3733 | case ARM::VLDMSDB_UPD: |
3734 | case ARM::VSTMDIA_UPD: |
3735 | case ARM::VSTMDDB_UPD: |
3736 | case ARM::VSTMSIA_UPD: |
3737 | case ARM::VSTMSDB_UPD: |
3738 | case ARM::LDMIA_UPD: |
3739 | case ARM::LDMDA_UPD: |
3740 | case ARM::LDMDB_UPD: |
3741 | case ARM::LDMIB_UPD: |
3742 | case ARM::STMIA_UPD: |
3743 | case ARM::STMDA_UPD: |
3744 | case ARM::STMDB_UPD: |
3745 | case ARM::STMIB_UPD: |
3746 | case ARM::tLDMIA_UPD: |
3747 | case ARM::tSTMIA_UPD: |
3748 | case ARM::t2LDMIA_UPD: |
3749 | case ARM::t2LDMDB_UPD: |
3750 | case ARM::t2STMIA_UPD: |
3751 | case ARM::t2STMDB_UPD: |
3752 | ++UOps; // One for base register writeback. |
3753 | break; |
3754 | case ARM::LDMIA_RET: |
3755 | case ARM::tPOP_RET: |
3756 | case ARM::t2LDMIA_RET: |
3757 | UOps += 2; // One for base reg wb, one for write to pc. |
3758 | break; |
3759 | } |
3760 | return UOps; |
3761 | } |
3762 | |
3763 | unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, |
3764 | const MachineInstr &MI) const { |
3765 | if (!ItinData || ItinData->isEmpty()) |
3766 | return 1; |
3767 | |
3768 | const MCInstrDesc &Desc = MI.getDesc(); |
3769 | unsigned Class = Desc.getSchedClass(); |
3770 | int ItinUOps = ItinData->getNumMicroOps(ItinClassIndx: Class); |
3771 | if (ItinUOps >= 0) { |
3772 | if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore())) |
3773 | return getNumMicroOpsSwiftLdSt(ItinData, MI); |
3774 | |
3775 | return ItinUOps; |
3776 | } |
3777 | |
3778 | unsigned Opc = MI.getOpcode(); |
3779 | switch (Opc) { |
3780 | default: |
3781 | llvm_unreachable("Unexpected multi-uops instruction!" ); |
3782 | case ARM::VLDMQIA: |
3783 | case ARM::VSTMQIA: |
3784 | return 2; |
3785 | |
3786 | // The number of uOps for load / store multiple are determined by the number |
3787 | // registers. |
3788 | // |
3789 | // On Cortex-A8, each pair of register loads / stores can be scheduled on the |
3790 | // same cycle. The scheduling for the first load / store must be done |
3791 | // separately by assuming the address is not 64-bit aligned. |
3792 | // |
3793 | // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address |
3794 | // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON |
3795 | // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. |
3796 | case ARM::VLDMDIA: |
3797 | case ARM::VLDMDIA_UPD: |
3798 | case ARM::VLDMDDB_UPD: |
3799 | case ARM::VLDMSIA: |
3800 | case ARM::VLDMSIA_UPD: |
3801 | case ARM::VLDMSDB_UPD: |
3802 | case ARM::VSTMDIA: |
3803 | case ARM::VSTMDIA_UPD: |
3804 | case ARM::VSTMDDB_UPD: |
3805 | case ARM::VSTMSIA: |
3806 | case ARM::VSTMSIA_UPD: |
3807 | case ARM::VSTMSDB_UPD: { |
3808 | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands(); |
3809 | return (NumRegs / 2) + (NumRegs % 2) + 1; |
3810 | } |
3811 | |
3812 | case ARM::LDMIA_RET: |
3813 | case ARM::LDMIA: |
3814 | case ARM::LDMDA: |
3815 | case ARM::LDMDB: |
3816 | case ARM::LDMIB: |
3817 | case ARM::LDMIA_UPD: |
3818 | case ARM::LDMDA_UPD: |
3819 | case ARM::LDMDB_UPD: |
3820 | case ARM::LDMIB_UPD: |
3821 | case ARM::STMIA: |
3822 | case ARM::STMDA: |
3823 | case ARM::STMDB: |
3824 | case ARM::STMIB: |
3825 | case ARM::STMIA_UPD: |
3826 | case ARM::STMDA_UPD: |
3827 | case ARM::STMDB_UPD: |
3828 | case ARM::STMIB_UPD: |
3829 | case ARM::tLDMIA: |
3830 | case ARM::tLDMIA_UPD: |
3831 | case ARM::tSTMIA_UPD: |
3832 | case ARM::tPOP_RET: |
3833 | case ARM::tPOP: |
3834 | case ARM::tPUSH: |
3835 | case ARM::t2LDMIA_RET: |
3836 | case ARM::t2LDMIA: |
3837 | case ARM::t2LDMDB: |
3838 | case ARM::t2LDMIA_UPD: |
3839 | case ARM::t2LDMDB_UPD: |
3840 | case ARM::t2STMIA: |
3841 | case ARM::t2STMDB: |
3842 | case ARM::t2STMIA_UPD: |
3843 | case ARM::t2STMDB_UPD: { |
3844 | unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1; |
3845 | switch (Subtarget.getLdStMultipleTiming()) { |
3846 | case ARMSubtarget::SingleIssuePlusExtras: |
3847 | return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs); |
3848 | case ARMSubtarget::SingleIssue: |
3849 | // Assume the worst. |
3850 | return NumRegs; |
3851 | case ARMSubtarget::DoubleIssue: { |
3852 | if (NumRegs < 4) |
3853 | return 2; |
3854 | // 4 registers would be issued: 2, 2. |
3855 | // 5 registers would be issued: 2, 2, 1. |
3856 | unsigned UOps = (NumRegs / 2); |
3857 | if (NumRegs % 2) |
3858 | ++UOps; |
3859 | return UOps; |
3860 | } |
3861 | case ARMSubtarget::DoubleIssueCheckUnalignedAccess: { |
3862 | unsigned UOps = (NumRegs / 2); |
3863 | // If there are odd number of registers or if it's not 64-bit aligned, |
3864 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3865 | if ((NumRegs % 2) || !MI.hasOneMemOperand() || |
3866 | (*MI.memoperands_begin())->getAlign() < Align(8)) |
3867 | ++UOps; |
3868 | return UOps; |
3869 | } |
3870 | } |
3871 | } |
3872 | } |
3873 | llvm_unreachable("Didn't find the number of microops" ); |
3874 | } |
3875 | |
3876 | std::optional<unsigned> |
3877 | ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, |
3878 | const MCInstrDesc &DefMCID, unsigned DefClass, |
3879 | unsigned DefIdx, unsigned DefAlign) const { |
3880 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
3881 | if (RegNo <= 0) |
3882 | // Def is the address writeback. |
3883 | return ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
3884 | |
3885 | unsigned DefCycle; |
3886 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3887 | // (regno / 2) + (regno % 2) + 1 |
3888 | DefCycle = RegNo / 2 + 1; |
3889 | if (RegNo % 2) |
3890 | ++DefCycle; |
3891 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3892 | DefCycle = RegNo; |
3893 | bool isSLoad = false; |
3894 | |
3895 | switch (DefMCID.getOpcode()) { |
3896 | default: break; |
3897 | case ARM::VLDMSIA: |
3898 | case ARM::VLDMSIA_UPD: |
3899 | case ARM::VLDMSDB_UPD: |
3900 | isSLoad = true; |
3901 | break; |
3902 | } |
3903 | |
3904 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
3905 | // then it takes an extra cycle. |
3906 | if ((isSLoad && (RegNo % 2)) || DefAlign < 8) |
3907 | ++DefCycle; |
3908 | } else { |
3909 | // Assume the worst. |
3910 | DefCycle = RegNo + 2; |
3911 | } |
3912 | |
3913 | return DefCycle; |
3914 | } |
3915 | |
3916 | std::optional<unsigned> |
3917 | ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, |
3918 | const MCInstrDesc &DefMCID, unsigned DefClass, |
3919 | unsigned DefIdx, unsigned DefAlign) const { |
3920 | int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; |
3921 | if (RegNo <= 0) |
3922 | // Def is the address writeback. |
3923 | return ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
3924 | |
3925 | unsigned DefCycle; |
3926 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3927 | // 4 registers would be issued: 1, 2, 1. |
3928 | // 5 registers would be issued: 1, 2, 2. |
3929 | DefCycle = RegNo / 2; |
3930 | if (DefCycle < 1) |
3931 | DefCycle = 1; |
3932 | // Result latency is issue cycle + 2: E2. |
3933 | DefCycle += 2; |
3934 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3935 | DefCycle = (RegNo / 2); |
3936 | // If there are odd number of registers or if it's not 64-bit aligned, |
3937 | // then it takes an extra AGU (Address Generation Unit) cycle. |
3938 | if ((RegNo % 2) || DefAlign < 8) |
3939 | ++DefCycle; |
3940 | // Result latency is AGU cycles + 2. |
3941 | DefCycle += 2; |
3942 | } else { |
3943 | // Assume the worst. |
3944 | DefCycle = RegNo + 2; |
3945 | } |
3946 | |
3947 | return DefCycle; |
3948 | } |
3949 | |
3950 | std::optional<unsigned> |
3951 | ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, |
3952 | const MCInstrDesc &UseMCID, unsigned UseClass, |
3953 | unsigned UseIdx, unsigned UseAlign) const { |
3954 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
3955 | if (RegNo <= 0) |
3956 | return ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
3957 | |
3958 | unsigned UseCycle; |
3959 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3960 | // (regno / 2) + (regno % 2) + 1 |
3961 | UseCycle = RegNo / 2 + 1; |
3962 | if (RegNo % 2) |
3963 | ++UseCycle; |
3964 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
3965 | UseCycle = RegNo; |
3966 | bool isSStore = false; |
3967 | |
3968 | switch (UseMCID.getOpcode()) { |
3969 | default: break; |
3970 | case ARM::VSTMSIA: |
3971 | case ARM::VSTMSIA_UPD: |
3972 | case ARM::VSTMSDB_UPD: |
3973 | isSStore = true; |
3974 | break; |
3975 | } |
3976 | |
3977 | // If there are odd number of 'S' registers or if it's not 64-bit aligned, |
3978 | // then it takes an extra cycle. |
3979 | if ((isSStore && (RegNo % 2)) || UseAlign < 8) |
3980 | ++UseCycle; |
3981 | } else { |
3982 | // Assume the worst. |
3983 | UseCycle = RegNo + 2; |
3984 | } |
3985 | |
3986 | return UseCycle; |
3987 | } |
3988 | |
3989 | std::optional<unsigned> |
3990 | ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, |
3991 | const MCInstrDesc &UseMCID, unsigned UseClass, |
3992 | unsigned UseIdx, unsigned UseAlign) const { |
3993 | int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; |
3994 | if (RegNo <= 0) |
3995 | return ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
3996 | |
3997 | unsigned UseCycle; |
3998 | if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) { |
3999 | UseCycle = RegNo / 2; |
4000 | if (UseCycle < 2) |
4001 | UseCycle = 2; |
4002 | // Read in E3. |
4003 | UseCycle += 2; |
4004 | } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) { |
4005 | UseCycle = (RegNo / 2); |
4006 | // If there are odd number of registers or if it's not 64-bit aligned, |
4007 | // then it takes an extra AGU (Address Generation Unit) cycle. |
4008 | if ((RegNo % 2) || UseAlign < 8) |
4009 | ++UseCycle; |
4010 | } else { |
4011 | // Assume the worst. |
4012 | UseCycle = 1; |
4013 | } |
4014 | return UseCycle; |
4015 | } |
4016 | |
4017 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency( |
4018 | const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, |
4019 | unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID, |
4020 | unsigned UseIdx, unsigned UseAlign) const { |
4021 | unsigned DefClass = DefMCID.getSchedClass(); |
4022 | unsigned UseClass = UseMCID.getSchedClass(); |
4023 | |
4024 | if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) |
4025 | return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); |
4026 | |
4027 | // This may be a def / use of a variable_ops instruction, the operand |
4028 | // latency might be determinable dynamically. Let the target try to |
4029 | // figure it out. |
4030 | std::optional<unsigned> DefCycle; |
4031 | bool LdmBypass = false; |
4032 | switch (DefMCID.getOpcode()) { |
4033 | default: |
4034 | DefCycle = ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
4035 | break; |
4036 | |
4037 | case ARM::VLDMDIA: |
4038 | case ARM::VLDMDIA_UPD: |
4039 | case ARM::VLDMDDB_UPD: |
4040 | case ARM::VLDMSIA: |
4041 | case ARM::VLDMSIA_UPD: |
4042 | case ARM::VLDMSDB_UPD: |
4043 | DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
4044 | break; |
4045 | |
4046 | case ARM::LDMIA_RET: |
4047 | case ARM::LDMIA: |
4048 | case ARM::LDMDA: |
4049 | case ARM::LDMDB: |
4050 | case ARM::LDMIB: |
4051 | case ARM::LDMIA_UPD: |
4052 | case ARM::LDMDA_UPD: |
4053 | case ARM::LDMDB_UPD: |
4054 | case ARM::LDMIB_UPD: |
4055 | case ARM::tLDMIA: |
4056 | case ARM::tLDMIA_UPD: |
4057 | case ARM::tPUSH: |
4058 | case ARM::t2LDMIA_RET: |
4059 | case ARM::t2LDMIA: |
4060 | case ARM::t2LDMDB: |
4061 | case ARM::t2LDMIA_UPD: |
4062 | case ARM::t2LDMDB_UPD: |
4063 | LdmBypass = true; |
4064 | DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); |
4065 | break; |
4066 | } |
4067 | |
4068 | if (!DefCycle) |
4069 | // We can't seem to determine the result latency of the def, assume it's 2. |
4070 | DefCycle = 2; |
4071 | |
4072 | std::optional<unsigned> UseCycle; |
4073 | switch (UseMCID.getOpcode()) { |
4074 | default: |
4075 | UseCycle = ItinData->getOperandCycle(ItinClassIndx: UseClass, OperandIdx: UseIdx); |
4076 | break; |
4077 | |
4078 | case ARM::VSTMDIA: |
4079 | case ARM::VSTMDIA_UPD: |
4080 | case ARM::VSTMDDB_UPD: |
4081 | case ARM::VSTMSIA: |
4082 | case ARM::VSTMSIA_UPD: |
4083 | case ARM::VSTMSDB_UPD: |
4084 | UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
4085 | break; |
4086 | |
4087 | case ARM::STMIA: |
4088 | case ARM::STMDA: |
4089 | case ARM::STMDB: |
4090 | case ARM::STMIB: |
4091 | case ARM::STMIA_UPD: |
4092 | case ARM::STMDA_UPD: |
4093 | case ARM::STMDB_UPD: |
4094 | case ARM::STMIB_UPD: |
4095 | case ARM::tSTMIA_UPD: |
4096 | case ARM::tPOP_RET: |
4097 | case ARM::tPOP: |
4098 | case ARM::t2STMIA: |
4099 | case ARM::t2STMDB: |
4100 | case ARM::t2STMIA_UPD: |
4101 | case ARM::t2STMDB_UPD: |
4102 | UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); |
4103 | break; |
4104 | } |
4105 | |
4106 | if (!UseCycle) |
4107 | // Assume it's read in the first stage. |
4108 | UseCycle = 1; |
4109 | |
4110 | if (UseCycle > *DefCycle + 1) |
4111 | return std::nullopt; |
4112 | |
4113 | UseCycle = *DefCycle - *UseCycle + 1; |
4114 | if (UseCycle > 0u) { |
4115 | if (LdmBypass) { |
4116 | // It's a variable_ops instruction so we can't use DefIdx here. Just use |
4117 | // first def operand. |
4118 | if (ItinData->hasPipelineForwarding(DefClass, DefIdx: DefMCID.getNumOperands()-1, |
4119 | UseClass, UseIdx)) |
4120 | UseCycle = *UseCycle - 1; |
4121 | } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, |
4122 | UseClass, UseIdx)) { |
4123 | UseCycle = *UseCycle - 1; |
4124 | } |
4125 | } |
4126 | |
4127 | return UseCycle; |
4128 | } |
4129 | |
4130 | static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, |
4131 | const MachineInstr *MI, unsigned Reg, |
4132 | unsigned &DefIdx, unsigned &Dist) { |
4133 | Dist = 0; |
4134 | |
4135 | MachineBasicBlock::const_iterator I = MI; ++I; |
4136 | MachineBasicBlock::const_instr_iterator II = std::prev(x: I.getInstrIterator()); |
4137 | assert(II->isInsideBundle() && "Empty bundle?" ); |
4138 | |
4139 | int Idx = -1; |
4140 | while (II->isInsideBundle()) { |
4141 | Idx = II->findRegisterDefOperandIdx(Reg, TRI, isDead: false, Overlap: true); |
4142 | if (Idx != -1) |
4143 | break; |
4144 | --II; |
4145 | ++Dist; |
4146 | } |
4147 | |
4148 | assert(Idx != -1 && "Cannot find bundled definition!" ); |
4149 | DefIdx = Idx; |
4150 | return &*II; |
4151 | } |
4152 | |
4153 | static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, |
4154 | const MachineInstr &MI, unsigned Reg, |
4155 | unsigned &UseIdx, unsigned &Dist) { |
4156 | Dist = 0; |
4157 | |
4158 | MachineBasicBlock::const_instr_iterator II = ++MI.getIterator(); |
4159 | assert(II->isInsideBundle() && "Empty bundle?" ); |
4160 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
4161 | |
4162 | // FIXME: This doesn't properly handle multiple uses. |
4163 | int Idx = -1; |
4164 | while (II != E && II->isInsideBundle()) { |
4165 | Idx = II->findRegisterUseOperandIdx(Reg, TRI, isKill: false); |
4166 | if (Idx != -1) |
4167 | break; |
4168 | if (II->getOpcode() != ARM::t2IT) |
4169 | ++Dist; |
4170 | ++II; |
4171 | } |
4172 | |
4173 | if (Idx == -1) { |
4174 | Dist = 0; |
4175 | return nullptr; |
4176 | } |
4177 | |
4178 | UseIdx = Idx; |
4179 | return &*II; |
4180 | } |
4181 | |
4182 | /// Return the number of cycles to add to (or subtract from) the static |
4183 | /// itinerary based on the def opcode and alignment. The caller will ensure that |
4184 | /// adjusted latency is at least one cycle. |
4185 | static int adjustDefLatency(const ARMSubtarget &Subtarget, |
4186 | const MachineInstr &DefMI, |
4187 | const MCInstrDesc &DefMCID, unsigned DefAlign) { |
4188 | int Adjust = 0; |
4189 | if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) { |
4190 | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
4191 | // variants are one cycle cheaper. |
4192 | switch (DefMCID.getOpcode()) { |
4193 | default: break; |
4194 | case ARM::LDRrs: |
4195 | case ARM::LDRBrs: { |
4196 | unsigned ShOpVal = DefMI.getOperand(i: 3).getImm(); |
4197 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4198 | if (ShImm == 0 || |
4199 | (ShImm == 2 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
4200 | --Adjust; |
4201 | break; |
4202 | } |
4203 | case ARM::t2LDRs: |
4204 | case ARM::t2LDRBs: |
4205 | case ARM::t2LDRHs: |
4206 | case ARM::t2LDRSHs: { |
4207 | // Thumb2 mode: lsl only. |
4208 | unsigned ShAmt = DefMI.getOperand(i: 3).getImm(); |
4209 | if (ShAmt == 0 || ShAmt == 2) |
4210 | --Adjust; |
4211 | break; |
4212 | } |
4213 | } |
4214 | } else if (Subtarget.isSwift()) { |
4215 | // FIXME: Properly handle all of the latency adjustments for address |
4216 | // writeback. |
4217 | switch (DefMCID.getOpcode()) { |
4218 | default: break; |
4219 | case ARM::LDRrs: |
4220 | case ARM::LDRBrs: { |
4221 | unsigned ShOpVal = DefMI.getOperand(i: 3).getImm(); |
4222 | bool isSub = ARM_AM::getAM2Op(AM2Opc: ShOpVal) == ARM_AM::sub; |
4223 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4224 | if (!isSub && |
4225 | (ShImm == 0 || |
4226 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
4227 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl))) |
4228 | Adjust -= 2; |
4229 | else if (!isSub && |
4230 | ShImm == 1 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsr) |
4231 | --Adjust; |
4232 | break; |
4233 | } |
4234 | case ARM::t2LDRs: |
4235 | case ARM::t2LDRBs: |
4236 | case ARM::t2LDRHs: |
4237 | case ARM::t2LDRSHs: { |
4238 | // Thumb2 mode: lsl only. |
4239 | unsigned ShAmt = DefMI.getOperand(i: 3).getImm(); |
4240 | if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3) |
4241 | Adjust -= 2; |
4242 | break; |
4243 | } |
4244 | } |
4245 | } |
4246 | |
4247 | if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) { |
4248 | switch (DefMCID.getOpcode()) { |
4249 | default: break; |
4250 | case ARM::VLD1q8: |
4251 | case ARM::VLD1q16: |
4252 | case ARM::VLD1q32: |
4253 | case ARM::VLD1q64: |
4254 | case ARM::VLD1q8wb_fixed: |
4255 | case ARM::VLD1q16wb_fixed: |
4256 | case ARM::VLD1q32wb_fixed: |
4257 | case ARM::VLD1q64wb_fixed: |
4258 | case ARM::VLD1q8wb_register: |
4259 | case ARM::VLD1q16wb_register: |
4260 | case ARM::VLD1q32wb_register: |
4261 | case ARM::VLD1q64wb_register: |
4262 | case ARM::VLD2d8: |
4263 | case ARM::VLD2d16: |
4264 | case ARM::VLD2d32: |
4265 | case ARM::VLD2q8: |
4266 | case ARM::VLD2q16: |
4267 | case ARM::VLD2q32: |
4268 | case ARM::VLD2d8wb_fixed: |
4269 | case ARM::VLD2d16wb_fixed: |
4270 | case ARM::VLD2d32wb_fixed: |
4271 | case ARM::VLD2q8wb_fixed: |
4272 | case ARM::VLD2q16wb_fixed: |
4273 | case ARM::VLD2q32wb_fixed: |
4274 | case ARM::VLD2d8wb_register: |
4275 | case ARM::VLD2d16wb_register: |
4276 | case ARM::VLD2d32wb_register: |
4277 | case ARM::VLD2q8wb_register: |
4278 | case ARM::VLD2q16wb_register: |
4279 | case ARM::VLD2q32wb_register: |
4280 | case ARM::VLD3d8: |
4281 | case ARM::VLD3d16: |
4282 | case ARM::VLD3d32: |
4283 | case ARM::VLD1d64T: |
4284 | case ARM::VLD3d8_UPD: |
4285 | case ARM::VLD3d16_UPD: |
4286 | case ARM::VLD3d32_UPD: |
4287 | case ARM::VLD1d64Twb_fixed: |
4288 | case ARM::VLD1d64Twb_register: |
4289 | case ARM::VLD3q8_UPD: |
4290 | case ARM::VLD3q16_UPD: |
4291 | case ARM::VLD3q32_UPD: |
4292 | case ARM::VLD4d8: |
4293 | case ARM::VLD4d16: |
4294 | case ARM::VLD4d32: |
4295 | case ARM::VLD1d64Q: |
4296 | case ARM::VLD4d8_UPD: |
4297 | case ARM::VLD4d16_UPD: |
4298 | case ARM::VLD4d32_UPD: |
4299 | case ARM::VLD1d64Qwb_fixed: |
4300 | case ARM::VLD1d64Qwb_register: |
4301 | case ARM::VLD4q8_UPD: |
4302 | case ARM::VLD4q16_UPD: |
4303 | case ARM::VLD4q32_UPD: |
4304 | case ARM::VLD1DUPq8: |
4305 | case ARM::VLD1DUPq16: |
4306 | case ARM::VLD1DUPq32: |
4307 | case ARM::VLD1DUPq8wb_fixed: |
4308 | case ARM::VLD1DUPq16wb_fixed: |
4309 | case ARM::VLD1DUPq32wb_fixed: |
4310 | case ARM::VLD1DUPq8wb_register: |
4311 | case ARM::VLD1DUPq16wb_register: |
4312 | case ARM::VLD1DUPq32wb_register: |
4313 | case ARM::VLD2DUPd8: |
4314 | case ARM::VLD2DUPd16: |
4315 | case ARM::VLD2DUPd32: |
4316 | case ARM::VLD2DUPd8wb_fixed: |
4317 | case ARM::VLD2DUPd16wb_fixed: |
4318 | case ARM::VLD2DUPd32wb_fixed: |
4319 | case ARM::VLD2DUPd8wb_register: |
4320 | case ARM::VLD2DUPd16wb_register: |
4321 | case ARM::VLD2DUPd32wb_register: |
4322 | case ARM::VLD4DUPd8: |
4323 | case ARM::VLD4DUPd16: |
4324 | case ARM::VLD4DUPd32: |
4325 | case ARM::VLD4DUPd8_UPD: |
4326 | case ARM::VLD4DUPd16_UPD: |
4327 | case ARM::VLD4DUPd32_UPD: |
4328 | case ARM::VLD1LNd8: |
4329 | case ARM::VLD1LNd16: |
4330 | case ARM::VLD1LNd32: |
4331 | case ARM::VLD1LNd8_UPD: |
4332 | case ARM::VLD1LNd16_UPD: |
4333 | case ARM::VLD1LNd32_UPD: |
4334 | case ARM::VLD2LNd8: |
4335 | case ARM::VLD2LNd16: |
4336 | case ARM::VLD2LNd32: |
4337 | case ARM::VLD2LNq16: |
4338 | case ARM::VLD2LNq32: |
4339 | case ARM::VLD2LNd8_UPD: |
4340 | case ARM::VLD2LNd16_UPD: |
4341 | case ARM::VLD2LNd32_UPD: |
4342 | case ARM::VLD2LNq16_UPD: |
4343 | case ARM::VLD2LNq32_UPD: |
4344 | case ARM::VLD4LNd8: |
4345 | case ARM::VLD4LNd16: |
4346 | case ARM::VLD4LNd32: |
4347 | case ARM::VLD4LNq16: |
4348 | case ARM::VLD4LNq32: |
4349 | case ARM::VLD4LNd8_UPD: |
4350 | case ARM::VLD4LNd16_UPD: |
4351 | case ARM::VLD4LNd32_UPD: |
4352 | case ARM::VLD4LNq16_UPD: |
4353 | case ARM::VLD4LNq32_UPD: |
4354 | // If the address is not 64-bit aligned, the latencies of these |
4355 | // instructions increases by one. |
4356 | ++Adjust; |
4357 | break; |
4358 | } |
4359 | } |
4360 | return Adjust; |
4361 | } |
4362 | |
4363 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency( |
4364 | const InstrItineraryData *ItinData, const MachineInstr &DefMI, |
4365 | unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { |
4366 | // No operand latency. The caller may fall back to getInstrLatency. |
4367 | if (!ItinData || ItinData->isEmpty()) |
4368 | return std::nullopt; |
4369 | |
4370 | const MachineOperand &DefMO = DefMI.getOperand(i: DefIdx); |
4371 | Register Reg = DefMO.getReg(); |
4372 | |
4373 | const MachineInstr *ResolvedDefMI = &DefMI; |
4374 | unsigned DefAdj = 0; |
4375 | if (DefMI.isBundle()) |
4376 | ResolvedDefMI = |
4377 | getBundledDefMI(TRI: &getRegisterInfo(), MI: &DefMI, Reg, DefIdx, Dist&: DefAdj); |
4378 | if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() || |
4379 | ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) { |
4380 | return 1; |
4381 | } |
4382 | |
4383 | const MachineInstr *ResolvedUseMI = &UseMI; |
4384 | unsigned UseAdj = 0; |
4385 | if (UseMI.isBundle()) { |
4386 | ResolvedUseMI = |
4387 | getBundledUseMI(TRI: &getRegisterInfo(), MI: UseMI, Reg, UseIdx, Dist&: UseAdj); |
4388 | if (!ResolvedUseMI) |
4389 | return std::nullopt; |
4390 | } |
4391 | |
4392 | return getOperandLatencyImpl( |
4393 | ItinData, DefMI: *ResolvedDefMI, DefIdx, DefMCID: ResolvedDefMI->getDesc(), DefAdj, DefMO, |
4394 | Reg, UseMI: *ResolvedUseMI, UseIdx, UseMCID: ResolvedUseMI->getDesc(), UseAdj); |
4395 | } |
4396 | |
4397 | std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl( |
4398 | const InstrItineraryData *ItinData, const MachineInstr &DefMI, |
4399 | unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj, |
4400 | const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI, |
4401 | unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const { |
4402 | if (Reg == ARM::CPSR) { |
4403 | if (DefMI.getOpcode() == ARM::FMSTAT) { |
4404 | // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) |
4405 | return Subtarget.isLikeA9() ? 1 : 20; |
4406 | } |
4407 | |
4408 | // CPSR set and branch can be paired in the same cycle. |
4409 | if (UseMI.isBranch()) |
4410 | return 0; |
4411 | |
4412 | // Otherwise it takes the instruction latency (generally one). |
4413 | unsigned Latency = getInstrLatency(ItinData, MI: DefMI); |
4414 | |
4415 | // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to |
4416 | // its uses. Instructions which are otherwise scheduled between them may |
4417 | // incur a code size penalty (not able to use the CPSR setting 16-bit |
4418 | // instructions). |
4419 | if (Latency > 0 && Subtarget.isThumb2()) { |
4420 | const MachineFunction *MF = DefMI.getParent()->getParent(); |
4421 | // FIXME: Use Function::hasOptSize(). |
4422 | if (MF->getFunction().hasFnAttribute(Kind: Attribute::OptimizeForSize)) |
4423 | --Latency; |
4424 | } |
4425 | return Latency; |
4426 | } |
4427 | |
4428 | if (DefMO.isImplicit() || UseMI.getOperand(i: UseIdx).isImplicit()) |
4429 | return std::nullopt; |
4430 | |
4431 | unsigned DefAlign = DefMI.hasOneMemOperand() |
4432 | ? (*DefMI.memoperands_begin())->getAlign().value() |
4433 | : 0; |
4434 | unsigned UseAlign = UseMI.hasOneMemOperand() |
4435 | ? (*UseMI.memoperands_begin())->getAlign().value() |
4436 | : 0; |
4437 | |
4438 | // Get the itinerary's latency if possible, and handle variable_ops. |
4439 | std::optional<unsigned> Latency = getOperandLatency( |
4440 | ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); |
4441 | // Unable to find operand latency. The caller may resort to getInstrLatency. |
4442 | if (!Latency) |
4443 | return std::nullopt; |
4444 | |
4445 | // Adjust for IT block position. |
4446 | int Adj = DefAdj + UseAdj; |
4447 | |
4448 | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
4449 | Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign); |
4450 | if (Adj >= 0 || (int)*Latency > -Adj) { |
4451 | return *Latency + Adj; |
4452 | } |
4453 | // Return the itinerary latency, which may be zero but not less than zero. |
4454 | return Latency; |
4455 | } |
4456 | |
4457 | std::optional<unsigned> |
4458 | ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, |
4459 | SDNode *DefNode, unsigned DefIdx, |
4460 | SDNode *UseNode, unsigned UseIdx) const { |
4461 | if (!DefNode->isMachineOpcode()) |
4462 | return 1; |
4463 | |
4464 | const MCInstrDesc &DefMCID = get(Opcode: DefNode->getMachineOpcode()); |
4465 | |
4466 | if (isZeroCost(Opcode: DefMCID.Opcode)) |
4467 | return 0; |
4468 | |
4469 | if (!ItinData || ItinData->isEmpty()) |
4470 | return DefMCID.mayLoad() ? 3 : 1; |
4471 | |
4472 | if (!UseNode->isMachineOpcode()) { |
4473 | std::optional<unsigned> Latency = |
4474 | ItinData->getOperandCycle(ItinClassIndx: DefMCID.getSchedClass(), OperandIdx: DefIdx); |
4475 | int Adj = Subtarget.getPreISelOperandLatencyAdjustment(); |
4476 | int Threshold = 1 + Adj; |
4477 | return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj; |
4478 | } |
4479 | |
4480 | const MCInstrDesc &UseMCID = get(Opcode: UseNode->getMachineOpcode()); |
4481 | auto *DefMN = cast<MachineSDNode>(Val: DefNode); |
4482 | unsigned DefAlign = !DefMN->memoperands_empty() |
4483 | ? (*DefMN->memoperands_begin())->getAlign().value() |
4484 | : 0; |
4485 | auto *UseMN = cast<MachineSDNode>(Val: UseNode); |
4486 | unsigned UseAlign = !UseMN->memoperands_empty() |
4487 | ? (*UseMN->memoperands_begin())->getAlign().value() |
4488 | : 0; |
4489 | std::optional<unsigned> Latency = getOperandLatency( |
4490 | ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign); |
4491 | if (!Latency) |
4492 | return std::nullopt; |
4493 | |
4494 | if (Latency > 1U && |
4495 | (Subtarget.isCortexA8() || Subtarget.isLikeA9() || |
4496 | Subtarget.isCortexA7())) { |
4497 | // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] |
4498 | // variants are one cycle cheaper. |
4499 | switch (DefMCID.getOpcode()) { |
4500 | default: break; |
4501 | case ARM::LDRrs: |
4502 | case ARM::LDRBrs: { |
4503 | unsigned ShOpVal = DefNode->getConstantOperandVal(Num: 2); |
4504 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4505 | if (ShImm == 0 || |
4506 | (ShImm == 2 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
4507 | Latency = *Latency - 1; |
4508 | break; |
4509 | } |
4510 | case ARM::t2LDRs: |
4511 | case ARM::t2LDRBs: |
4512 | case ARM::t2LDRHs: |
4513 | case ARM::t2LDRSHs: { |
4514 | // Thumb2 mode: lsl only. |
4515 | unsigned ShAmt = DefNode->getConstantOperandVal(Num: 2); |
4516 | if (ShAmt == 0 || ShAmt == 2) |
4517 | Latency = *Latency - 1; |
4518 | break; |
4519 | } |
4520 | } |
4521 | } else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) { |
4522 | // FIXME: Properly handle all of the latency adjustments for address |
4523 | // writeback. |
4524 | switch (DefMCID.getOpcode()) { |
4525 | default: break; |
4526 | case ARM::LDRrs: |
4527 | case ARM::LDRBrs: { |
4528 | unsigned ShOpVal = DefNode->getConstantOperandVal(Num: 2); |
4529 | unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc: ShOpVal); |
4530 | if (ShImm == 0 || |
4531 | ((ShImm == 1 || ShImm == 2 || ShImm == 3) && |
4532 | ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsl)) |
4533 | Latency = *Latency - 2; |
4534 | else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(AM2Opc: ShOpVal) == ARM_AM::lsr) |
4535 | Latency = *Latency - 1; |
4536 | break; |
4537 | } |
4538 | case ARM::t2LDRs: |
4539 | case ARM::t2LDRBs: |
4540 | case ARM::t2LDRHs: |
4541 | case ARM::t2LDRSHs: |
4542 | // Thumb2 mode: lsl 0-3 only. |
4543 | Latency = *Latency - 2; |
4544 | break; |
4545 | } |
4546 | } |
4547 | |
4548 | if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) |
4549 | switch (DefMCID.getOpcode()) { |
4550 | default: break; |
4551 | case ARM::VLD1q8: |
4552 | case ARM::VLD1q16: |
4553 | case ARM::VLD1q32: |
4554 | case ARM::VLD1q64: |
4555 | case ARM::VLD1q8wb_register: |
4556 | case ARM::VLD1q16wb_register: |
4557 | case ARM::VLD1q32wb_register: |
4558 | case ARM::VLD1q64wb_register: |
4559 | case ARM::VLD1q8wb_fixed: |
4560 | case ARM::VLD1q16wb_fixed: |
4561 | case ARM::VLD1q32wb_fixed: |
4562 | case ARM::VLD1q64wb_fixed: |
4563 | case ARM::VLD2d8: |
4564 | case ARM::VLD2d16: |
4565 | case ARM::VLD2d32: |
4566 | case ARM::VLD2q8Pseudo: |
4567 | case ARM::VLD2q16Pseudo: |
4568 | case ARM::VLD2q32Pseudo: |
4569 | case ARM::VLD2d8wb_fixed: |
4570 | case ARM::VLD2d16wb_fixed: |
4571 | case ARM::VLD2d32wb_fixed: |
4572 | case ARM::VLD2q8PseudoWB_fixed: |
4573 | case ARM::VLD2q16PseudoWB_fixed: |
4574 | case ARM::VLD2q32PseudoWB_fixed: |
4575 | case ARM::VLD2d8wb_register: |
4576 | case ARM::VLD2d16wb_register: |
4577 | case ARM::VLD2d32wb_register: |
4578 | case ARM::VLD2q8PseudoWB_register: |
4579 | case ARM::VLD2q16PseudoWB_register: |
4580 | case ARM::VLD2q32PseudoWB_register: |
4581 | case ARM::VLD3d8Pseudo: |
4582 | case ARM::VLD3d16Pseudo: |
4583 | case ARM::VLD3d32Pseudo: |
4584 | case ARM::VLD1d8TPseudo: |
4585 | case ARM::VLD1d16TPseudo: |
4586 | case ARM::VLD1d32TPseudo: |
4587 | case ARM::VLD1d64TPseudo: |
4588 | case ARM::VLD1d64TPseudoWB_fixed: |
4589 | case ARM::VLD1d64TPseudoWB_register: |
4590 | case ARM::VLD3d8Pseudo_UPD: |
4591 | case ARM::VLD3d16Pseudo_UPD: |
4592 | case ARM::VLD3d32Pseudo_UPD: |
4593 | case ARM::VLD3q8Pseudo_UPD: |
4594 | case ARM::VLD3q16Pseudo_UPD: |
4595 | case ARM::VLD3q32Pseudo_UPD: |
4596 | case ARM::VLD3q8oddPseudo: |
4597 | case ARM::VLD3q16oddPseudo: |
4598 | case ARM::VLD3q32oddPseudo: |
4599 | case ARM::VLD3q8oddPseudo_UPD: |
4600 | case ARM::VLD3q16oddPseudo_UPD: |
4601 | case ARM::VLD3q32oddPseudo_UPD: |
4602 | case ARM::VLD4d8Pseudo: |
4603 | case ARM::VLD4d16Pseudo: |
4604 | case ARM::VLD4d32Pseudo: |
4605 | case ARM::VLD1d8QPseudo: |
4606 | case ARM::VLD1d16QPseudo: |
4607 | case ARM::VLD1d32QPseudo: |
4608 | case ARM::VLD1d64QPseudo: |
4609 | case ARM::VLD1d64QPseudoWB_fixed: |
4610 | case ARM::VLD1d64QPseudoWB_register: |
4611 | case ARM::VLD1q8HighQPseudo: |
4612 | case ARM::VLD1q8LowQPseudo_UPD: |
4613 | case ARM::VLD1q8HighTPseudo: |
4614 | case ARM::VLD1q8LowTPseudo_UPD: |
4615 | case ARM::VLD1q16HighQPseudo: |
4616 | case ARM::VLD1q16LowQPseudo_UPD: |
4617 | case ARM::VLD1q16HighTPseudo: |
4618 | case ARM::VLD1q16LowTPseudo_UPD: |
4619 | case ARM::VLD1q32HighQPseudo: |
4620 | case ARM::VLD1q32LowQPseudo_UPD: |
4621 | case ARM::VLD1q32HighTPseudo: |
4622 | case ARM::VLD1q32LowTPseudo_UPD: |
4623 | case ARM::VLD1q64HighQPseudo: |
4624 | case ARM::VLD1q64LowQPseudo_UPD: |
4625 | case ARM::VLD1q64HighTPseudo: |
4626 | case ARM::VLD1q64LowTPseudo_UPD: |
4627 | case ARM::VLD4d8Pseudo_UPD: |
4628 | case ARM::VLD4d16Pseudo_UPD: |
4629 | case ARM::VLD4d32Pseudo_UPD: |
4630 | case ARM::VLD4q8Pseudo_UPD: |
4631 | case ARM::VLD4q16Pseudo_UPD: |
4632 | case ARM::VLD4q32Pseudo_UPD: |
4633 | case ARM::VLD4q8oddPseudo: |
4634 | case ARM::VLD4q16oddPseudo: |
4635 | case ARM::VLD4q32oddPseudo: |
4636 | case ARM::VLD4q8oddPseudo_UPD: |
4637 | case ARM::VLD4q16oddPseudo_UPD: |
4638 | case ARM::VLD4q32oddPseudo_UPD: |
4639 | case ARM::VLD1DUPq8: |
4640 | case ARM::VLD1DUPq16: |
4641 | case ARM::VLD1DUPq32: |
4642 | case ARM::VLD1DUPq8wb_fixed: |
4643 | case ARM::VLD1DUPq16wb_fixed: |
4644 | case ARM::VLD1DUPq32wb_fixed: |
4645 | case ARM::VLD1DUPq8wb_register: |
4646 | case ARM::VLD1DUPq16wb_register: |
4647 | case ARM::VLD1DUPq32wb_register: |
4648 | case ARM::VLD2DUPd8: |
4649 | case ARM::VLD2DUPd16: |
4650 | case ARM::VLD2DUPd32: |
4651 | case ARM::VLD2DUPd8wb_fixed: |
4652 | case ARM::VLD2DUPd16wb_fixed: |
4653 | case ARM::VLD2DUPd32wb_fixed: |
4654 | case ARM::VLD2DUPd8wb_register: |
4655 | case ARM::VLD2DUPd16wb_register: |
4656 | case ARM::VLD2DUPd32wb_register: |
4657 | case ARM::VLD2DUPq8EvenPseudo: |
4658 | case ARM::VLD2DUPq8OddPseudo: |
4659 | case ARM::VLD2DUPq16EvenPseudo: |
4660 | case ARM::VLD2DUPq16OddPseudo: |
4661 | case ARM::VLD2DUPq32EvenPseudo: |
4662 | case ARM::VLD2DUPq32OddPseudo: |
4663 | case ARM::VLD3DUPq8EvenPseudo: |
4664 | case ARM::VLD3DUPq8OddPseudo: |
4665 | case ARM::VLD3DUPq16EvenPseudo: |
4666 | case ARM::VLD3DUPq16OddPseudo: |
4667 | case ARM::VLD3DUPq32EvenPseudo: |
4668 | case ARM::VLD3DUPq32OddPseudo: |
4669 | case ARM::VLD4DUPd8Pseudo: |
4670 | case ARM::VLD4DUPd16Pseudo: |
4671 | case ARM::VLD4DUPd32Pseudo: |
4672 | case ARM::VLD4DUPd8Pseudo_UPD: |
4673 | case ARM::VLD4DUPd16Pseudo_UPD: |
4674 | case ARM::VLD4DUPd32Pseudo_UPD: |
4675 | case ARM::VLD4DUPq8EvenPseudo: |
4676 | case ARM::VLD4DUPq8OddPseudo: |
4677 | case ARM::VLD4DUPq16EvenPseudo: |
4678 | case ARM::VLD4DUPq16OddPseudo: |
4679 | case ARM::VLD4DUPq32EvenPseudo: |
4680 | case ARM::VLD4DUPq32OddPseudo: |
4681 | case ARM::VLD1LNq8Pseudo: |
4682 | case ARM::VLD1LNq16Pseudo: |
4683 | case ARM::VLD1LNq32Pseudo: |
4684 | case ARM::VLD1LNq8Pseudo_UPD: |
4685 | case ARM::VLD1LNq16Pseudo_UPD: |
4686 | case ARM::VLD1LNq32Pseudo_UPD: |
4687 | case ARM::VLD2LNd8Pseudo: |
4688 | case ARM::VLD2LNd16Pseudo: |
4689 | case ARM::VLD2LNd32Pseudo: |
4690 | case ARM::VLD2LNq16Pseudo: |
4691 | case ARM::VLD2LNq32Pseudo: |
4692 | case ARM::VLD2LNd8Pseudo_UPD: |
4693 | case ARM::VLD2LNd16Pseudo_UPD: |
4694 | case ARM::VLD2LNd32Pseudo_UPD: |
4695 | case ARM::VLD2LNq16Pseudo_UPD: |
4696 | case ARM::VLD2LNq32Pseudo_UPD: |
4697 | case ARM::VLD4LNd8Pseudo: |
4698 | case ARM::VLD4LNd16Pseudo: |
4699 | case ARM::VLD4LNd32Pseudo: |
4700 | case ARM::VLD4LNq16Pseudo: |
4701 | case ARM::VLD4LNq32Pseudo: |
4702 | case ARM::VLD4LNd8Pseudo_UPD: |
4703 | case ARM::VLD4LNd16Pseudo_UPD: |
4704 | case ARM::VLD4LNd32Pseudo_UPD: |
4705 | case ARM::VLD4LNq16Pseudo_UPD: |
4706 | case ARM::VLD4LNq32Pseudo_UPD: |
4707 | // If the address is not 64-bit aligned, the latencies of these |
4708 | // instructions increases by one. |
4709 | Latency = *Latency + 1; |
4710 | break; |
4711 | } |
4712 | |
4713 | return Latency; |
4714 | } |
4715 | |
4716 | unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { |
4717 | if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || |
4718 | MI.isImplicitDef()) |
4719 | return 0; |
4720 | |
4721 | if (MI.isBundle()) |
4722 | return 0; |
4723 | |
4724 | const MCInstrDesc &MCID = MI.getDesc(); |
4725 | |
4726 | if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(Reg: ARM::CPSR) && |
4727 | !Subtarget.cheapPredicableCPSRDef())) { |
4728 | // When predicated, CPSR is an additional source operand for CPSR updating |
4729 | // instructions, this apparently increases their latencies. |
4730 | return 1; |
4731 | } |
4732 | return 0; |
4733 | } |
4734 | |
4735 | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
4736 | const MachineInstr &MI, |
4737 | unsigned *PredCost) const { |
4738 | if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() || |
4739 | MI.isImplicitDef()) |
4740 | return 1; |
4741 | |
4742 | // An instruction scheduler typically runs on unbundled instructions, however |
4743 | // other passes may query the latency of a bundled instruction. |
4744 | if (MI.isBundle()) { |
4745 | unsigned Latency = 0; |
4746 | MachineBasicBlock::const_instr_iterator I = MI.getIterator(); |
4747 | MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); |
4748 | while (++I != E && I->isInsideBundle()) { |
4749 | if (I->getOpcode() != ARM::t2IT) |
4750 | Latency += getInstrLatency(ItinData, MI: *I, PredCost); |
4751 | } |
4752 | return Latency; |
4753 | } |
4754 | |
4755 | const MCInstrDesc &MCID = MI.getDesc(); |
4756 | if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(Reg: ARM::CPSR) && |
4757 | !Subtarget.cheapPredicableCPSRDef()))) { |
4758 | // When predicated, CPSR is an additional source operand for CPSR updating |
4759 | // instructions, this apparently increases their latencies. |
4760 | *PredCost = 1; |
4761 | } |
4762 | // Be sure to call getStageLatency for an empty itinerary in case it has a |
4763 | // valid MinLatency property. |
4764 | if (!ItinData) |
4765 | return MI.mayLoad() ? 3 : 1; |
4766 | |
4767 | unsigned Class = MCID.getSchedClass(); |
4768 | |
4769 | // For instructions with variable uops, use uops as latency. |
4770 | if (!ItinData->isEmpty() && ItinData->getNumMicroOps(ItinClassIndx: Class) < 0) |
4771 | return getNumMicroOps(ItinData, MI); |
4772 | |
4773 | // For the common case, fall back on the itinerary's latency. |
4774 | unsigned Latency = ItinData->getStageLatency(ItinClassIndx: Class); |
4775 | |
4776 | // Adjust for dynamic def-side opcode variants not captured by the itinerary. |
4777 | unsigned DefAlign = |
4778 | MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0; |
4779 | int Adj = adjustDefLatency(Subtarget, DefMI: MI, DefMCID: MCID, DefAlign); |
4780 | if (Adj >= 0 || (int)Latency > -Adj) { |
4781 | return Latency + Adj; |
4782 | } |
4783 | return Latency; |
4784 | } |
4785 | |
4786 | unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
4787 | SDNode *Node) const { |
4788 | if (!Node->isMachineOpcode()) |
4789 | return 1; |
4790 | |
4791 | if (!ItinData || ItinData->isEmpty()) |
4792 | return 1; |
4793 | |
4794 | unsigned Opcode = Node->getMachineOpcode(); |
4795 | switch (Opcode) { |
4796 | default: |
4797 | return ItinData->getStageLatency(ItinClassIndx: get(Opcode).getSchedClass()); |
4798 | case ARM::VLDMQIA: |
4799 | case ARM::VSTMQIA: |
4800 | return 2; |
4801 | } |
4802 | } |
4803 | |
4804 | bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel, |
4805 | const MachineRegisterInfo *MRI, |
4806 | const MachineInstr &DefMI, |
4807 | unsigned DefIdx, |
4808 | const MachineInstr &UseMI, |
4809 | unsigned UseIdx) const { |
4810 | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
4811 | unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask; |
4812 | if (Subtarget.nonpipelinedVFP() && |
4813 | (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) |
4814 | return true; |
4815 | |
4816 | // Hoist VFP / NEON instructions with 4 or higher latency. |
4817 | unsigned Latency = |
4818 | SchedModel.computeOperandLatency(DefMI: &DefMI, DefOperIdx: DefIdx, UseMI: &UseMI, UseOperIdx: UseIdx); |
4819 | if (Latency <= 3) |
4820 | return false; |
4821 | return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || |
4822 | UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; |
4823 | } |
4824 | |
4825 | bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, |
4826 | const MachineInstr &DefMI, |
4827 | unsigned DefIdx) const { |
4828 | const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); |
4829 | if (!ItinData || ItinData->isEmpty()) |
4830 | return false; |
4831 | |
4832 | unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask; |
4833 | if (DDomain == ARMII::DomainGeneral) { |
4834 | unsigned DefClass = DefMI.getDesc().getSchedClass(); |
4835 | std::optional<unsigned> DefCycle = |
4836 | ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: DefIdx); |
4837 | return DefCycle && DefCycle <= 2U; |
4838 | } |
4839 | return false; |
4840 | } |
4841 | |
4842 | bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI, |
4843 | StringRef &ErrInfo) const { |
4844 | if (convertAddSubFlagsOpcode(OldOpc: MI.getOpcode())) { |
4845 | ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG" ; |
4846 | return false; |
4847 | } |
4848 | if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) { |
4849 | // Make sure we don't generate a lo-lo mov that isn't supported. |
4850 | if (!ARM::hGPRRegClass.contains(Reg: MI.getOperand(i: 0).getReg()) && |
4851 | !ARM::hGPRRegClass.contains(Reg: MI.getOperand(i: 1).getReg())) { |
4852 | ErrInfo = "Non-flag-setting Thumb1 mov is v6-only" ; |
4853 | return false; |
4854 | } |
4855 | } |
4856 | if (MI.getOpcode() == ARM::tPUSH || |
4857 | MI.getOpcode() == ARM::tPOP || |
4858 | MI.getOpcode() == ARM::tPOP_RET) { |
4859 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands(), N: 2)) { |
4860 | if (MO.isImplicit() || !MO.isReg()) |
4861 | continue; |
4862 | Register Reg = MO.getReg(); |
4863 | if (Reg < ARM::R0 || Reg > ARM::R7) { |
4864 | if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) && |
4865 | !(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) { |
4866 | ErrInfo = "Unsupported register in Thumb1 push/pop" ; |
4867 | return false; |
4868 | } |
4869 | } |
4870 | } |
4871 | } |
4872 | if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) { |
4873 | assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm()); |
4874 | if ((MI.getOperand(i: 4).getImm() != 2 && MI.getOperand(i: 4).getImm() != 3) || |
4875 | MI.getOperand(i: 4).getImm() != MI.getOperand(i: 5).getImm() + 2) { |
4876 | ErrInfo = "Incorrect array index for MVE_VMOV_q_rr" ; |
4877 | return false; |
4878 | } |
4879 | } |
4880 | |
4881 | // Check the address model by taking the first Imm operand and checking it is |
4882 | // legal for that addressing mode. |
4883 | ARMII::AddrMode AddrMode = |
4884 | (ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask); |
4885 | switch (AddrMode) { |
4886 | default: |
4887 | break; |
4888 | case ARMII::AddrModeT2_i7: |
4889 | case ARMII::AddrModeT2_i7s2: |
4890 | case ARMII::AddrModeT2_i7s4: |
4891 | case ARMII::AddrModeT2_i8: |
4892 | case ARMII::AddrModeT2_i8pos: |
4893 | case ARMII::AddrModeT2_i8neg: |
4894 | case ARMII::AddrModeT2_i8s4: |
4895 | case ARMII::AddrModeT2_i12: { |
4896 | uint32_t Imm = 0; |
4897 | for (auto Op : MI.operands()) { |
4898 | if (Op.isImm()) { |
4899 | Imm = Op.getImm(); |
4900 | break; |
4901 | } |
4902 | } |
4903 | if (!isLegalAddressImm(Opcode: MI.getOpcode(), Imm, TII: this)) { |
4904 | ErrInfo = "Incorrect AddrMode Imm for instruction" ; |
4905 | return false; |
4906 | } |
4907 | break; |
4908 | } |
4909 | } |
4910 | return true; |
4911 | } |
4912 | |
4913 | void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, |
4914 | unsigned LoadImmOpc, |
4915 | unsigned LoadOpc) const { |
4916 | assert(!Subtarget.isROPI() && !Subtarget.isRWPI() && |
4917 | "ROPI/RWPI not currently supported with stack guard" ); |
4918 | |
4919 | MachineBasicBlock &MBB = *MI->getParent(); |
4920 | DebugLoc DL = MI->getDebugLoc(); |
4921 | Register Reg = MI->getOperand(i: 0).getReg(); |
4922 | MachineInstrBuilder MIB; |
4923 | unsigned int Offset = 0; |
4924 | |
4925 | if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) { |
4926 | assert(!Subtarget.isReadTPSoft() && |
4927 | "TLS stack protector requires hardware TLS register" ); |
4928 | |
4929 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg) |
4930 | .addImm(Val: 15) |
4931 | .addImm(Val: 0) |
4932 | .addImm(Val: 13) |
4933 | .addImm(Val: 0) |
4934 | .addImm(Val: 3) |
4935 | .add(MOs: predOps(Pred: ARMCC::AL)); |
4936 | |
4937 | Module &M = *MBB.getParent()->getFunction().getParent(); |
4938 | Offset = M.getStackProtectorGuardOffset(); |
4939 | if (Offset & ~0xfffU) { |
4940 | // The offset won't fit in the LDR's 12-bit immediate field, so emit an |
4941 | // extra ADD to cover the delta. This gives us a guaranteed 8 additional |
4942 | // bits, resulting in a range of 0 to +1 MiB for the guard offset. |
4943 | unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri; |
4944 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: AddOpc), DestReg: Reg) |
4945 | .addReg(RegNo: Reg, flags: RegState::Kill) |
4946 | .addImm(Val: Offset & ~0xfffU) |
4947 | .add(MOs: predOps(Pred: ARMCC::AL)) |
4948 | .addReg(RegNo: 0); |
4949 | Offset &= 0xfffU; |
4950 | } |
4951 | } else { |
4952 | const GlobalValue *GV = |
4953 | cast<GlobalValue>(Val: (*MI->memoperands_begin())->getValue()); |
4954 | bool IsIndirect = Subtarget.isGVIndirectSymbol(GV); |
4955 | |
4956 | unsigned TargetFlags = ARMII::MO_NO_FLAG; |
4957 | if (Subtarget.isTargetMachO()) { |
4958 | TargetFlags |= ARMII::MO_NONLAZY; |
4959 | } else if (Subtarget.isTargetCOFF()) { |
4960 | if (GV->hasDLLImportStorageClass()) |
4961 | TargetFlags |= ARMII::MO_DLLIMPORT; |
4962 | else if (IsIndirect) |
4963 | TargetFlags |= ARMII::MO_COFFSTUB; |
4964 | } else if (IsIndirect) { |
4965 | TargetFlags |= ARMII::MO_GOT; |
4966 | } |
4967 | |
4968 | if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only |
4969 | Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register |
4970 | auto APSREncoding = |
4971 | ARMSysReg::lookupMClassSysRegByName(Name: "apsr_nzcvq" )->Encoding; |
4972 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: ARM::t2MRS_M), DestReg: CPSRSaveReg) |
4973 | .addImm(Val: APSREncoding) |
4974 | .add(MOs: predOps(Pred: ARMCC::AL)); |
4975 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg) |
4976 | .addGlobalAddress(GV, Offset: 0, TargetFlags); |
4977 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: ARM::t2MSR_M)) |
4978 | .addImm(Val: APSREncoding) |
4979 | .addReg(RegNo: CPSRSaveReg, flags: RegState::Kill) |
4980 | .add(MOs: predOps(Pred: ARMCC::AL)); |
4981 | } else { |
4982 | BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadImmOpc), DestReg: Reg) |
4983 | .addGlobalAddress(GV, Offset: 0, TargetFlags); |
4984 | } |
4985 | |
4986 | if (IsIndirect) { |
4987 | MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadOpc), DestReg: Reg); |
4988 | MIB.addReg(RegNo: Reg, flags: RegState::Kill).addImm(Val: 0); |
4989 | auto Flags = MachineMemOperand::MOLoad | |
4990 | MachineMemOperand::MODereferenceable | |
4991 | MachineMemOperand::MOInvariant; |
4992 | MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( |
4993 | PtrInfo: MachinePointerInfo::getGOT(MF&: *MBB.getParent()), F: Flags, Size: 4, BaseAlignment: Align(4)); |
4994 | MIB.addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL)); |
4995 | } |
4996 | } |
4997 | |
4998 | MIB = BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode: LoadOpc), DestReg: Reg); |
4999 | MIB.addReg(RegNo: Reg, flags: RegState::Kill) |
5000 | .addImm(Val: Offset) |
5001 | .cloneMemRefs(OtherMI: *MI) |
5002 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5003 | } |
5004 | |
5005 | bool |
5006 | ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, |
5007 | unsigned &AddSubOpc, |
5008 | bool &NegAcc, bool &HasLane) const { |
5009 | DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Val: Opcode); |
5010 | if (I == MLxEntryMap.end()) |
5011 | return false; |
5012 | |
5013 | const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; |
5014 | MulOpc = Entry.MulOpc; |
5015 | AddSubOpc = Entry.AddSubOpc; |
5016 | NegAcc = Entry.NegAcc; |
5017 | HasLane = Entry.HasLane; |
5018 | return true; |
5019 | } |
5020 | |
5021 | //===----------------------------------------------------------------------===// |
5022 | // Execution domains. |
5023 | //===----------------------------------------------------------------------===// |
5024 | // |
5025 | // Some instructions go down the NEON pipeline, some go down the VFP pipeline, |
5026 | // and some can go down both. The vmov instructions go down the VFP pipeline, |
5027 | // but they can be changed to vorr equivalents that are executed by the NEON |
5028 | // pipeline. |
5029 | // |
5030 | // We use the following execution domain numbering: |
5031 | // |
5032 | enum ARMExeDomain { |
5033 | ExeGeneric = 0, |
5034 | ExeVFP = 1, |
5035 | ExeNEON = 2 |
5036 | }; |
5037 | |
5038 | // |
5039 | // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h |
5040 | // |
5041 | std::pair<uint16_t, uint16_t> |
5042 | ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const { |
5043 | // If we don't have access to NEON instructions then we won't be able |
5044 | // to swizzle anything to the NEON domain. Check to make sure. |
5045 | if (Subtarget.hasNEON()) { |
5046 | // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON |
5047 | // if they are not predicated. |
5048 | if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI)) |
5049 | return std::make_pair(x: ExeVFP, y: (1 << ExeVFP) | (1 << ExeNEON)); |
5050 | |
5051 | // CortexA9 is particularly picky about mixing the two and wants these |
5052 | // converted. |
5053 | if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) && |
5054 | (MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR || |
5055 | MI.getOpcode() == ARM::VMOVS)) |
5056 | return std::make_pair(x: ExeVFP, y: (1 << ExeVFP) | (1 << ExeNEON)); |
5057 | } |
5058 | // No other instructions can be swizzled, so just determine their domain. |
5059 | unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask; |
5060 | |
5061 | if (Domain & ARMII::DomainNEON) |
5062 | return std::make_pair(x: ExeNEON, y: 0); |
5063 | |
5064 | // Certain instructions can go either way on Cortex-A8. |
5065 | // Treat them as NEON instructions. |
5066 | if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) |
5067 | return std::make_pair(x: ExeNEON, y: 0); |
5068 | |
5069 | if (Domain & ARMII::DomainVFP) |
5070 | return std::make_pair(x: ExeVFP, y: 0); |
5071 | |
5072 | return std::make_pair(x: ExeGeneric, y: 0); |
5073 | } |
5074 | |
5075 | static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI, |
5076 | unsigned SReg, unsigned &Lane) { |
5077 | unsigned DReg = TRI->getMatchingSuperReg(Reg: SReg, SubIdx: ARM::ssub_0, RC: &ARM::DPRRegClass); |
5078 | Lane = 0; |
5079 | |
5080 | if (DReg != ARM::NoRegister) |
5081 | return DReg; |
5082 | |
5083 | Lane = 1; |
5084 | DReg = TRI->getMatchingSuperReg(Reg: SReg, SubIdx: ARM::ssub_1, RC: &ARM::DPRRegClass); |
5085 | |
5086 | assert(DReg && "S-register with no D super-register?" ); |
5087 | return DReg; |
5088 | } |
5089 | |
5090 | /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane, |
5091 | /// set ImplicitSReg to a register number that must be marked as implicit-use or |
5092 | /// zero if no register needs to be defined as implicit-use. |
5093 | /// |
5094 | /// If the function cannot determine if an SPR should be marked implicit use or |
5095 | /// not, it returns false. |
5096 | /// |
5097 | /// This function handles cases where an instruction is being modified from taking |
5098 | /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict |
5099 | /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other |
5100 | /// lane of the DPR). |
5101 | /// |
5102 | /// If the other SPR is defined, an implicit-use of it should be added. Else, |
5103 | /// (including the case where the DPR itself is defined), it should not. |
5104 | /// |
5105 | static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI, |
5106 | MachineInstr &MI, unsigned DReg, |
5107 | unsigned Lane, unsigned &ImplicitSReg) { |
5108 | // If the DPR is defined or used already, the other SPR lane will be chained |
5109 | // correctly, so there is nothing to be done. |
5110 | if (MI.definesRegister(Reg: DReg, TRI) || MI.readsRegister(Reg: DReg, TRI)) { |
5111 | ImplicitSReg = 0; |
5112 | return true; |
5113 | } |
5114 | |
5115 | // Otherwise we need to go searching to see if the SPR is set explicitly. |
5116 | ImplicitSReg = TRI->getSubReg(Reg: DReg, |
5117 | Idx: (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1); |
5118 | MachineBasicBlock::LivenessQueryResult LQR = |
5119 | MI.getParent()->computeRegisterLiveness(TRI, Reg: ImplicitSReg, Before: MI); |
5120 | |
5121 | if (LQR == MachineBasicBlock::LQR_Live) |
5122 | return true; |
5123 | else if (LQR == MachineBasicBlock::LQR_Unknown) |
5124 | return false; |
5125 | |
5126 | // If the register is known not to be live, there is no need to add an |
5127 | // implicit-use. |
5128 | ImplicitSReg = 0; |
5129 | return true; |
5130 | } |
5131 | |
5132 | void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, |
5133 | unsigned Domain) const { |
5134 | unsigned DstReg, SrcReg, DReg; |
5135 | unsigned Lane; |
5136 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
5137 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
5138 | switch (MI.getOpcode()) { |
5139 | default: |
5140 | llvm_unreachable("cannot handle opcode!" ); |
5141 | break; |
5142 | case ARM::VMOVD: |
5143 | if (Domain != ExeNEON) |
5144 | break; |
5145 | |
5146 | // Zap the predicate operands. |
5147 | assert(!isPredicated(MI) && "Cannot predicate a VORRd" ); |
5148 | |
5149 | // Make sure we've got NEON instructions. |
5150 | assert(Subtarget.hasNEON() && "VORRd requires NEON" ); |
5151 | |
5152 | // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) |
5153 | DstReg = MI.getOperand(i: 0).getReg(); |
5154 | SrcReg = MI.getOperand(i: 1).getReg(); |
5155 | |
5156 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5157 | MI.removeOperand(OpNo: i - 1); |
5158 | |
5159 | // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits) |
5160 | MI.setDesc(get(Opcode: ARM::VORRd)); |
5161 | MIB.addReg(RegNo: DstReg, flags: RegState::Define) |
5162 | .addReg(RegNo: SrcReg) |
5163 | .addReg(RegNo: SrcReg) |
5164 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5165 | break; |
5166 | case ARM::VMOVRS: |
5167 | if (Domain != ExeNEON) |
5168 | break; |
5169 | assert(!isPredicated(MI) && "Cannot predicate a VGETLN" ); |
5170 | |
5171 | // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits) |
5172 | DstReg = MI.getOperand(i: 0).getReg(); |
5173 | SrcReg = MI.getOperand(i: 1).getReg(); |
5174 | |
5175 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5176 | MI.removeOperand(OpNo: i - 1); |
5177 | |
5178 | DReg = getCorrespondingDRegAndLane(TRI, SReg: SrcReg, Lane); |
5179 | |
5180 | // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps) |
5181 | // Note that DSrc has been widened and the other lane may be undef, which |
5182 | // contaminates the entire register. |
5183 | MI.setDesc(get(Opcode: ARM::VGETLNi32)); |
5184 | MIB.addReg(RegNo: DstReg, flags: RegState::Define) |
5185 | .addReg(RegNo: DReg, flags: RegState::Undef) |
5186 | .addImm(Val: Lane) |
5187 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5188 | |
5189 | // The old source should be an implicit use, otherwise we might think it |
5190 | // was dead before here. |
5191 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5192 | break; |
5193 | case ARM::VMOVSR: { |
5194 | if (Domain != ExeNEON) |
5195 | break; |
5196 | assert(!isPredicated(MI) && "Cannot predicate a VSETLN" ); |
5197 | |
5198 | // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits) |
5199 | DstReg = MI.getOperand(i: 0).getReg(); |
5200 | SrcReg = MI.getOperand(i: 1).getReg(); |
5201 | |
5202 | DReg = getCorrespondingDRegAndLane(TRI, SReg: DstReg, Lane); |
5203 | |
5204 | unsigned ImplicitSReg; |
5205 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg)) |
5206 | break; |
5207 | |
5208 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5209 | MI.removeOperand(OpNo: i - 1); |
5210 | |
5211 | // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps) |
5212 | // Again DDst may be undefined at the beginning of this instruction. |
5213 | MI.setDesc(get(Opcode: ARM::VSETLNi32)); |
5214 | MIB.addReg(RegNo: DReg, flags: RegState::Define) |
5215 | .addReg(RegNo: DReg, flags: getUndefRegState(B: !MI.readsRegister(Reg: DReg, TRI))) |
5216 | .addReg(RegNo: SrcReg) |
5217 | .addImm(Val: Lane) |
5218 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5219 | |
5220 | // The narrower destination must be marked as set to keep previous chains |
5221 | // in place. |
5222 | MIB.addReg(RegNo: DstReg, flags: RegState::Define | RegState::Implicit); |
5223 | if (ImplicitSReg != 0) |
5224 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
5225 | break; |
5226 | } |
5227 | case ARM::VMOVS: { |
5228 | if (Domain != ExeNEON) |
5229 | break; |
5230 | |
5231 | // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits) |
5232 | DstReg = MI.getOperand(i: 0).getReg(); |
5233 | SrcReg = MI.getOperand(i: 1).getReg(); |
5234 | |
5235 | unsigned DstLane = 0, SrcLane = 0, DDst, DSrc; |
5236 | DDst = getCorrespondingDRegAndLane(TRI, SReg: DstReg, Lane&: DstLane); |
5237 | DSrc = getCorrespondingDRegAndLane(TRI, SReg: SrcReg, Lane&: SrcLane); |
5238 | |
5239 | unsigned ImplicitSReg; |
5240 | if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg: DSrc, Lane: SrcLane, ImplicitSReg)) |
5241 | break; |
5242 | |
5243 | for (unsigned i = MI.getDesc().getNumOperands(); i; --i) |
5244 | MI.removeOperand(OpNo: i - 1); |
5245 | |
5246 | if (DSrc == DDst) { |
5247 | // Destination can be: |
5248 | // %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits) |
5249 | MI.setDesc(get(Opcode: ARM::VDUPLN32d)); |
5250 | MIB.addReg(RegNo: DDst, flags: RegState::Define) |
5251 | .addReg(RegNo: DDst, flags: getUndefRegState(B: !MI.readsRegister(Reg: DDst, TRI))) |
5252 | .addImm(Val: SrcLane) |
5253 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5254 | |
5255 | // Neither the source or the destination are naturally represented any |
5256 | // more, so add them in manually. |
5257 | MIB.addReg(RegNo: DstReg, flags: RegState::Implicit | RegState::Define); |
5258 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5259 | if (ImplicitSReg != 0) |
5260 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
5261 | break; |
5262 | } |
5263 | |
5264 | // In general there's no single instruction that can perform an S <-> S |
5265 | // move in NEON space, but a pair of VEXT instructions *can* do the |
5266 | // job. It turns out that the VEXTs needed will only use DSrc once, with |
5267 | // the position based purely on the combination of lane-0 and lane-1 |
5268 | // involved. For example |
5269 | // vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #1 |
5270 | // vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #1 |
5271 | // vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #1 |
5272 | // vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #1 |
5273 | // |
5274 | // Pattern of the MachineInstrs is: |
5275 | // %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits) |
5276 | MachineInstrBuilder NewMIB; |
5277 | NewMIB = BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: ARM::VEXTd32), |
5278 | DestReg: DDst); |
5279 | |
5280 | // On the first instruction, both DSrc and DDst may be undef if present. |
5281 | // Specifically when the original instruction didn't have them as an |
5282 | // <imp-use>. |
5283 | unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; |
5284 | bool CurUndef = !MI.readsRegister(Reg: CurReg, TRI); |
5285 | NewMIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)); |
5286 | |
5287 | CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst; |
5288 | CurUndef = !MI.readsRegister(Reg: CurReg, TRI); |
5289 | NewMIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)) |
5290 | .addImm(Val: 1) |
5291 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5292 | |
5293 | if (SrcLane == DstLane) |
5294 | NewMIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5295 | |
5296 | MI.setDesc(get(Opcode: ARM::VEXTd32)); |
5297 | MIB.addReg(RegNo: DDst, flags: RegState::Define); |
5298 | |
5299 | // On the second instruction, DDst has definitely been defined above, so |
5300 | // it is not undef. DSrc, if present, can be undef as above. |
5301 | CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; |
5302 | CurUndef = CurReg == DSrc && !MI.readsRegister(Reg: CurReg, TRI); |
5303 | MIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)); |
5304 | |
5305 | CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst; |
5306 | CurUndef = CurReg == DSrc && !MI.readsRegister(Reg: CurReg, TRI); |
5307 | MIB.addReg(RegNo: CurReg, flags: getUndefRegState(B: CurUndef)) |
5308 | .addImm(Val: 1) |
5309 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5310 | |
5311 | if (SrcLane != DstLane) |
5312 | MIB.addReg(RegNo: SrcReg, flags: RegState::Implicit); |
5313 | |
5314 | // As before, the original destination is no longer represented, add it |
5315 | // implicitly. |
5316 | MIB.addReg(RegNo: DstReg, flags: RegState::Define | RegState::Implicit); |
5317 | if (ImplicitSReg != 0) |
5318 | MIB.addReg(RegNo: ImplicitSReg, flags: RegState::Implicit); |
5319 | break; |
5320 | } |
5321 | } |
5322 | } |
5323 | |
5324 | //===----------------------------------------------------------------------===// |
5325 | // Partial register updates |
5326 | //===----------------------------------------------------------------------===// |
5327 | // |
5328 | // Swift renames NEON registers with 64-bit granularity. That means any |
5329 | // instruction writing an S-reg implicitly reads the containing D-reg. The |
5330 | // problem is mostly avoided by translating f32 operations to v2f32 operations |
5331 | // on D-registers, but f32 loads are still a problem. |
5332 | // |
5333 | // These instructions can load an f32 into a NEON register: |
5334 | // |
5335 | // VLDRS - Only writes S, partial D update. |
5336 | // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops. |
5337 | // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. |
5338 | // |
5339 | // FCONSTD can be used as a dependency-breaking instruction. |
5340 | unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( |
5341 | const MachineInstr &MI, unsigned OpNum, |
5342 | const TargetRegisterInfo *TRI) const { |
5343 | auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance(); |
5344 | if (!PartialUpdateClearance) |
5345 | return 0; |
5346 | |
5347 | assert(TRI && "Need TRI instance" ); |
5348 | |
5349 | const MachineOperand &MO = MI.getOperand(i: OpNum); |
5350 | if (MO.readsReg()) |
5351 | return 0; |
5352 | Register Reg = MO.getReg(); |
5353 | int UseOp = -1; |
5354 | |
5355 | switch (MI.getOpcode()) { |
5356 | // Normal instructions writing only an S-register. |
5357 | case ARM::VLDRS: |
5358 | case ARM::FCONSTS: |
5359 | case ARM::VMOVSR: |
5360 | case ARM::VMOVv8i8: |
5361 | case ARM::VMOVv4i16: |
5362 | case ARM::VMOVv2i32: |
5363 | case ARM::VMOVv2f32: |
5364 | case ARM::VMOVv1i64: |
5365 | UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, isKill: false); |
5366 | break; |
5367 | |
5368 | // Explicitly reads the dependency. |
5369 | case ARM::VLD1LNd32: |
5370 | UseOp = 3; |
5371 | break; |
5372 | default: |
5373 | return 0; |
5374 | } |
5375 | |
5376 | // If this instruction actually reads a value from Reg, there is no unwanted |
5377 | // dependency. |
5378 | if (UseOp != -1 && MI.getOperand(i: UseOp).readsReg()) |
5379 | return 0; |
5380 | |
5381 | // We must be able to clobber the whole D-reg. |
5382 | if (Reg.isVirtual()) { |
5383 | // Virtual register must be a def undef foo:ssub_0 operand. |
5384 | if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) |
5385 | return 0; |
5386 | } else if (ARM::SPRRegClass.contains(Reg)) { |
5387 | // Physical register: MI must define the full D-reg. |
5388 | unsigned DReg = TRI->getMatchingSuperReg(Reg, SubIdx: ARM::ssub_0, |
5389 | RC: &ARM::DPRRegClass); |
5390 | if (!DReg || !MI.definesRegister(Reg: DReg, TRI)) |
5391 | return 0; |
5392 | } |
5393 | |
5394 | // MI has an unwanted D-register dependency. |
5395 | // Avoid defs in the previous N instructrions. |
5396 | return PartialUpdateClearance; |
5397 | } |
5398 | |
5399 | // Break a partial register dependency after getPartialRegUpdateClearance |
5400 | // returned non-zero. |
5401 | void ARMBaseInstrInfo::breakPartialRegDependency( |
5402 | MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { |
5403 | assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def" ); |
5404 | assert(TRI && "Need TRI instance" ); |
5405 | |
5406 | const MachineOperand &MO = MI.getOperand(i: OpNum); |
5407 | Register Reg = MO.getReg(); |
5408 | assert(Reg.isPhysical() && "Can't break virtual register dependencies." ); |
5409 | unsigned DReg = Reg; |
5410 | |
5411 | // If MI defines an S-reg, find the corresponding D super-register. |
5412 | if (ARM::SPRRegClass.contains(Reg)) { |
5413 | DReg = ARM::D0 + (Reg - ARM::S0) / 2; |
5414 | assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken" ); |
5415 | } |
5416 | |
5417 | assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps" ); |
5418 | assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg" ); |
5419 | |
5420 | // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines |
5421 | // the full D-register by loading the same value to both lanes. The |
5422 | // instruction is micro-coded with 2 uops, so don't do this until we can |
5423 | // properly schedule micro-coded instructions. The dispatcher stalls cause |
5424 | // too big regressions. |
5425 | |
5426 | // Insert the dependency-breaking FCONSTD before MI. |
5427 | // 96 is the encoding of 0.5, but the actual value doesn't matter here. |
5428 | BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), MCID: get(Opcode: ARM::FCONSTD), DestReg: DReg) |
5429 | .addImm(Val: 96) |
5430 | .add(MOs: predOps(Pred: ARMCC::AL)); |
5431 | MI.addRegisterKilled(IncomingReg: DReg, RegInfo: TRI, AddIfNotFound: true); |
5432 | } |
5433 | |
5434 | bool ARMBaseInstrInfo::hasNOP() const { |
5435 | return Subtarget.hasFeature(Feature: ARM::HasV6KOps); |
5436 | } |
5437 | |
5438 | bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { |
5439 | if (MI->getNumOperands() < 4) |
5440 | return true; |
5441 | unsigned ShOpVal = MI->getOperand(i: 3).getImm(); |
5442 | unsigned ShImm = ARM_AM::getSORegOffset(Op: ShOpVal); |
5443 | // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. |
5444 | if ((ShImm == 1 && ARM_AM::getSORegShOp(Op: ShOpVal) == ARM_AM::lsr) || |
5445 | ((ShImm == 1 || ShImm == 2) && |
5446 | ARM_AM::getSORegShOp(Op: ShOpVal) == ARM_AM::lsl)) |
5447 | return true; |
5448 | |
5449 | return false; |
5450 | } |
5451 | |
5452 | bool ARMBaseInstrInfo::getRegSequenceLikeInputs( |
5453 | const MachineInstr &MI, unsigned DefIdx, |
5454 | SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { |
5455 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
5456 | assert(MI.isRegSequenceLike() && "Invalid kind of instruction" ); |
5457 | |
5458 | switch (MI.getOpcode()) { |
5459 | case ARM::VMOVDRR: |
5460 | // dX = VMOVDRR rY, rZ |
5461 | // is the same as: |
5462 | // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 |
5463 | // Populate the InputRegs accordingly. |
5464 | // rY |
5465 | const MachineOperand *MOReg = &MI.getOperand(i: 1); |
5466 | if (!MOReg->isUndef()) |
5467 | InputRegs.push_back(Elt: RegSubRegPairAndIdx(MOReg->getReg(), |
5468 | MOReg->getSubReg(), ARM::ssub_0)); |
5469 | // rZ |
5470 | MOReg = &MI.getOperand(i: 2); |
5471 | if (!MOReg->isUndef()) |
5472 | InputRegs.push_back(Elt: RegSubRegPairAndIdx(MOReg->getReg(), |
5473 | MOReg->getSubReg(), ARM::ssub_1)); |
5474 | return true; |
5475 | } |
5476 | llvm_unreachable("Target dependent opcode missing" ); |
5477 | } |
5478 | |
5479 | bool ARMBaseInstrInfo::getExtractSubregLikeInputs( |
5480 | const MachineInstr &MI, unsigned DefIdx, |
5481 | RegSubRegPairAndIdx &InputReg) const { |
5482 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
5483 | assert(MI.isExtractSubregLike() && "Invalid kind of instruction" ); |
5484 | |
5485 | switch (MI.getOpcode()) { |
5486 | case ARM::VMOVRRD: |
5487 | // rX, rY = VMOVRRD dZ |
5488 | // is the same as: |
5489 | // rX = EXTRACT_SUBREG dZ, ssub_0 |
5490 | // rY = EXTRACT_SUBREG dZ, ssub_1 |
5491 | const MachineOperand &MOReg = MI.getOperand(i: 2); |
5492 | if (MOReg.isUndef()) |
5493 | return false; |
5494 | InputReg.Reg = MOReg.getReg(); |
5495 | InputReg.SubReg = MOReg.getSubReg(); |
5496 | InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; |
5497 | return true; |
5498 | } |
5499 | llvm_unreachable("Target dependent opcode missing" ); |
5500 | } |
5501 | |
5502 | bool ARMBaseInstrInfo::getInsertSubregLikeInputs( |
5503 | const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, |
5504 | RegSubRegPairAndIdx &InsertedReg) const { |
5505 | assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index" ); |
5506 | assert(MI.isInsertSubregLike() && "Invalid kind of instruction" ); |
5507 | |
5508 | switch (MI.getOpcode()) { |
5509 | case ARM::VSETLNi32: |
5510 | case ARM::MVE_VMOV_to_lane_32: |
5511 | // dX = VSETLNi32 dY, rZ, imm |
5512 | // qX = MVE_VMOV_to_lane_32 qY, rZ, imm |
5513 | const MachineOperand &MOBaseReg = MI.getOperand(i: 1); |
5514 | const MachineOperand &MOInsertedReg = MI.getOperand(i: 2); |
5515 | if (MOInsertedReg.isUndef()) |
5516 | return false; |
5517 | const MachineOperand &MOIndex = MI.getOperand(i: 3); |
5518 | BaseReg.Reg = MOBaseReg.getReg(); |
5519 | BaseReg.SubReg = MOBaseReg.getSubReg(); |
5520 | |
5521 | InsertedReg.Reg = MOInsertedReg.getReg(); |
5522 | InsertedReg.SubReg = MOInsertedReg.getSubReg(); |
5523 | InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm(); |
5524 | return true; |
5525 | } |
5526 | llvm_unreachable("Target dependent opcode missing" ); |
5527 | } |
5528 | |
5529 | std::pair<unsigned, unsigned> |
5530 | ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { |
5531 | const unsigned Mask = ARMII::MO_OPTION_MASK; |
5532 | return std::make_pair(x: TF & Mask, y: TF & ~Mask); |
5533 | } |
5534 | |
5535 | ArrayRef<std::pair<unsigned, const char *>> |
5536 | ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { |
5537 | using namespace ARMII; |
5538 | |
5539 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
5540 | {MO_LO16, "arm-lo16" }, {MO_HI16, "arm-hi16" }, |
5541 | {MO_LO_0_7, "arm-lo-0-7" }, {MO_HI_0_7, "arm-hi-0-7" }, |
5542 | {MO_LO_8_15, "arm-lo-8-15" }, {MO_HI_8_15, "arm-hi-8-15" }, |
5543 | }; |
5544 | return ArrayRef(TargetFlags); |
5545 | } |
5546 | |
5547 | ArrayRef<std::pair<unsigned, const char *>> |
5548 | ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { |
5549 | using namespace ARMII; |
5550 | |
5551 | static const std::pair<unsigned, const char *> TargetFlags[] = { |
5552 | {MO_COFFSTUB, "arm-coffstub" }, |
5553 | {MO_GOT, "arm-got" }, |
5554 | {MO_SBREL, "arm-sbrel" }, |
5555 | {MO_DLLIMPORT, "arm-dllimport" }, |
5556 | {MO_SECREL, "arm-secrel" }, |
5557 | {MO_NONLAZY, "arm-nonlazy" }}; |
5558 | return ArrayRef(TargetFlags); |
5559 | } |
5560 | |
5561 | std::optional<RegImmPair> |
5562 | ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const { |
5563 | int Sign = 1; |
5564 | unsigned Opcode = MI.getOpcode(); |
5565 | int64_t Offset = 0; |
5566 | |
5567 | // TODO: Handle cases where Reg is a super- or sub-register of the |
5568 | // destination register. |
5569 | const MachineOperand &Op0 = MI.getOperand(i: 0); |
5570 | if (!Op0.isReg() || Reg != Op0.getReg()) |
5571 | return std::nullopt; |
5572 | |
5573 | // We describe SUBri or ADDri instructions. |
5574 | if (Opcode == ARM::SUBri) |
5575 | Sign = -1; |
5576 | else if (Opcode != ARM::ADDri) |
5577 | return std::nullopt; |
5578 | |
5579 | // TODO: Third operand can be global address (usually some string). Since |
5580 | // strings can be relocated we cannot calculate their offsets for |
5581 | // now. |
5582 | if (!MI.getOperand(i: 1).isReg() || !MI.getOperand(i: 2).isImm()) |
5583 | return std::nullopt; |
5584 | |
5585 | Offset = MI.getOperand(i: 2).getImm() * Sign; |
5586 | return RegImmPair{MI.getOperand(i: 1).getReg(), Offset}; |
5587 | } |
5588 | |
5589 | bool llvm::registerDefinedBetween(unsigned Reg, |
5590 | MachineBasicBlock::iterator From, |
5591 | MachineBasicBlock::iterator To, |
5592 | const TargetRegisterInfo *TRI) { |
5593 | for (auto I = From; I != To; ++I) |
5594 | if (I->modifiesRegister(Reg, TRI)) |
5595 | return true; |
5596 | return false; |
5597 | } |
5598 | |
5599 | MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br, |
5600 | const TargetRegisterInfo *TRI) { |
5601 | // Search backwards to the instruction that defines CSPR. This may or not |
5602 | // be a CMP, we check that after this loop. If we find another instruction |
5603 | // that reads cpsr, we return nullptr. |
5604 | MachineBasicBlock::iterator CmpMI = Br; |
5605 | while (CmpMI != Br->getParent()->begin()) { |
5606 | --CmpMI; |
5607 | if (CmpMI->modifiesRegister(Reg: ARM::CPSR, TRI)) |
5608 | break; |
5609 | if (CmpMI->readsRegister(Reg: ARM::CPSR, TRI)) |
5610 | break; |
5611 | } |
5612 | |
5613 | // Check that this inst is a CMP r[0-7], #0 and that the register |
5614 | // is not redefined between the cmp and the br. |
5615 | if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri) |
5616 | return nullptr; |
5617 | Register Reg = CmpMI->getOperand(i: 0).getReg(); |
5618 | Register PredReg; |
5619 | ARMCC::CondCodes Pred = getInstrPredicate(MI: *CmpMI, PredReg); |
5620 | if (Pred != ARMCC::AL || CmpMI->getOperand(i: 1).getImm() != 0) |
5621 | return nullptr; |
5622 | if (!isARMLowRegister(Reg)) |
5623 | return nullptr; |
5624 | if (registerDefinedBetween(Reg, From: CmpMI->getNextNode(), To: Br, TRI)) |
5625 | return nullptr; |
5626 | |
5627 | return &*CmpMI; |
5628 | } |
5629 | |
5630 | unsigned llvm::ConstantMaterializationCost(unsigned Val, |
5631 | const ARMSubtarget *Subtarget, |
5632 | bool ForCodesize) { |
5633 | if (Subtarget->isThumb()) { |
5634 | if (Val <= 255) // MOV |
5635 | return ForCodesize ? 2 : 1; |
5636 | if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV |
5637 | ARM_AM::getT2SOImmVal(Arg: Val) != -1 || // MOVW |
5638 | ARM_AM::getT2SOImmVal(Arg: ~Val) != -1)) // MVN |
5639 | return ForCodesize ? 4 : 1; |
5640 | if (Val <= 510) // MOV + ADDi8 |
5641 | return ForCodesize ? 4 : 2; |
5642 | if (~Val <= 255) // MOV + MVN |
5643 | return ForCodesize ? 4 : 2; |
5644 | if (ARM_AM::isThumbImmShiftedVal(V: Val)) // MOV + LSL |
5645 | return ForCodesize ? 4 : 2; |
5646 | } else { |
5647 | if (ARM_AM::getSOImmVal(Arg: Val) != -1) // MOV |
5648 | return ForCodesize ? 4 : 1; |
5649 | if (ARM_AM::getSOImmVal(Arg: ~Val) != -1) // MVN |
5650 | return ForCodesize ? 4 : 1; |
5651 | if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW |
5652 | return ForCodesize ? 4 : 1; |
5653 | if (ARM_AM::isSOImmTwoPartVal(V: Val)) // two instrs |
5654 | return ForCodesize ? 8 : 2; |
5655 | if (ARM_AM::isSOImmTwoPartValNeg(V: Val)) // two instrs |
5656 | return ForCodesize ? 8 : 2; |
5657 | } |
5658 | if (Subtarget->useMovt()) // MOVW + MOVT |
5659 | return ForCodesize ? 8 : 2; |
5660 | return ForCodesize ? 8 : 3; // Literal pool load |
5661 | } |
5662 | |
5663 | bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, |
5664 | const ARMSubtarget *Subtarget, |
5665 | bool ForCodesize) { |
5666 | // Check with ForCodesize |
5667 | unsigned Cost1 = ConstantMaterializationCost(Val: Val1, Subtarget, ForCodesize); |
5668 | unsigned Cost2 = ConstantMaterializationCost(Val: Val2, Subtarget, ForCodesize); |
5669 | if (Cost1 < Cost2) |
5670 | return true; |
5671 | if (Cost1 > Cost2) |
5672 | return false; |
5673 | |
5674 | // If they are equal, try with !ForCodesize |
5675 | return ConstantMaterializationCost(Val: Val1, Subtarget, ForCodesize: !ForCodesize) < |
5676 | ConstantMaterializationCost(Val: Val2, Subtarget, ForCodesize: !ForCodesize); |
5677 | } |
5678 | |
5679 | /// Constants defining how certain sequences should be outlined. |
5680 | /// This encompasses how an outlined function should be called, and what kind of |
5681 | /// frame should be emitted for that outlined function. |
5682 | /// |
5683 | /// \p MachineOutlinerTailCall implies that the function is being created from |
5684 | /// a sequence of instructions ending in a return. |
5685 | /// |
5686 | /// That is, |
5687 | /// |
5688 | /// I1 OUTLINED_FUNCTION: |
5689 | /// I2 --> B OUTLINED_FUNCTION I1 |
5690 | /// BX LR I2 |
5691 | /// BX LR |
5692 | /// |
5693 | /// +-------------------------+--------+-----+ |
5694 | /// | | Thumb2 | ARM | |
5695 | /// +-------------------------+--------+-----+ |
5696 | /// | Call overhead in Bytes | 4 | 4 | |
5697 | /// | Frame overhead in Bytes | 0 | 0 | |
5698 | /// | Stack fixup required | No | No | |
5699 | /// +-------------------------+--------+-----+ |
5700 | /// |
5701 | /// \p MachineOutlinerThunk implies that the function is being created from |
5702 | /// a sequence of instructions ending in a call. The outlined function is |
5703 | /// called with a BL instruction, and the outlined function tail-calls the |
5704 | /// original call destination. |
5705 | /// |
5706 | /// That is, |
5707 | /// |
5708 | /// I1 OUTLINED_FUNCTION: |
5709 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5710 | /// BL f I2 |
5711 | /// B f |
5712 | /// |
5713 | /// +-------------------------+--------+-----+ |
5714 | /// | | Thumb2 | ARM | |
5715 | /// +-------------------------+--------+-----+ |
5716 | /// | Call overhead in Bytes | 4 | 4 | |
5717 | /// | Frame overhead in Bytes | 0 | 0 | |
5718 | /// | Stack fixup required | No | No | |
5719 | /// +-------------------------+--------+-----+ |
5720 | /// |
5721 | /// \p MachineOutlinerNoLRSave implies that the function should be called using |
5722 | /// a BL instruction, but doesn't require LR to be saved and restored. This |
5723 | /// happens when LR is known to be dead. |
5724 | /// |
5725 | /// That is, |
5726 | /// |
5727 | /// I1 OUTLINED_FUNCTION: |
5728 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5729 | /// I3 I2 |
5730 | /// I3 |
5731 | /// BX LR |
5732 | /// |
5733 | /// +-------------------------+--------+-----+ |
5734 | /// | | Thumb2 | ARM | |
5735 | /// +-------------------------+--------+-----+ |
5736 | /// | Call overhead in Bytes | 4 | 4 | |
5737 | /// | Frame overhead in Bytes | 2 | 4 | |
5738 | /// | Stack fixup required | No | No | |
5739 | /// +-------------------------+--------+-----+ |
5740 | /// |
5741 | /// \p MachineOutlinerRegSave implies that the function should be called with a |
5742 | /// save and restore of LR to an available register. This allows us to avoid |
5743 | /// stack fixups. Note that this outlining variant is compatible with the |
5744 | /// NoLRSave case. |
5745 | /// |
5746 | /// That is, |
5747 | /// |
5748 | /// I1 Save LR OUTLINED_FUNCTION: |
5749 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5750 | /// I3 Restore LR I2 |
5751 | /// I3 |
5752 | /// BX LR |
5753 | /// |
5754 | /// +-------------------------+--------+-----+ |
5755 | /// | | Thumb2 | ARM | |
5756 | /// +-------------------------+--------+-----+ |
5757 | /// | Call overhead in Bytes | 8 | 12 | |
5758 | /// | Frame overhead in Bytes | 2 | 4 | |
5759 | /// | Stack fixup required | No | No | |
5760 | /// +-------------------------+--------+-----+ |
5761 | /// |
5762 | /// \p MachineOutlinerDefault implies that the function should be called with |
5763 | /// a save and restore of LR to the stack. |
5764 | /// |
5765 | /// That is, |
5766 | /// |
5767 | /// I1 Save LR OUTLINED_FUNCTION: |
5768 | /// I2 --> BL OUTLINED_FUNCTION I1 |
5769 | /// I3 Restore LR I2 |
5770 | /// I3 |
5771 | /// BX LR |
5772 | /// |
5773 | /// +-------------------------+--------+-----+ |
5774 | /// | | Thumb2 | ARM | |
5775 | /// +-------------------------+--------+-----+ |
5776 | /// | Call overhead in Bytes | 8 | 12 | |
5777 | /// | Frame overhead in Bytes | 2 | 4 | |
5778 | /// | Stack fixup required | Yes | Yes | |
5779 | /// +-------------------------+--------+-----+ |
5780 | |
5781 | enum MachineOutlinerClass { |
5782 | MachineOutlinerTailCall, |
5783 | MachineOutlinerThunk, |
5784 | MachineOutlinerNoLRSave, |
5785 | MachineOutlinerRegSave, |
5786 | MachineOutlinerDefault |
5787 | }; |
5788 | |
5789 | enum MachineOutlinerMBBFlags { |
5790 | LRUnavailableSomewhere = 0x2, |
5791 | HasCalls = 0x4, |
5792 | UnsafeRegsDead = 0x8 |
5793 | }; |
5794 | |
5795 | struct OutlinerCosts { |
5796 | int CallTailCall; |
5797 | int FrameTailCall; |
5798 | int CallThunk; |
5799 | int FrameThunk; |
5800 | int CallNoLRSave; |
5801 | int FrameNoLRSave; |
5802 | int CallRegSave; |
5803 | int FrameRegSave; |
5804 | int CallDefault; |
5805 | int FrameDefault; |
5806 | int SaveRestoreLROnStack; |
5807 | |
5808 | OutlinerCosts(const ARMSubtarget &target) |
5809 | : CallTailCall(target.isThumb() ? 4 : 4), |
5810 | FrameTailCall(target.isThumb() ? 0 : 0), |
5811 | CallThunk(target.isThumb() ? 4 : 4), |
5812 | FrameThunk(target.isThumb() ? 0 : 0), |
5813 | CallNoLRSave(target.isThumb() ? 4 : 4), |
5814 | FrameNoLRSave(target.isThumb() ? 2 : 4), |
5815 | CallRegSave(target.isThumb() ? 8 : 12), |
5816 | FrameRegSave(target.isThumb() ? 2 : 4), |
5817 | CallDefault(target.isThumb() ? 8 : 12), |
5818 | FrameDefault(target.isThumb() ? 2 : 4), |
5819 | SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {} |
5820 | }; |
5821 | |
5822 | Register |
5823 | ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const { |
5824 | MachineFunction *MF = C.getMF(); |
5825 | const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); |
5826 | const ARMBaseRegisterInfo *ARI = |
5827 | static_cast<const ARMBaseRegisterInfo *>(&TRI); |
5828 | |
5829 | BitVector regsReserved = ARI->getReservedRegs(MF: *MF); |
5830 | // Check if there is an available register across the sequence that we can |
5831 | // use. |
5832 | for (Register Reg : ARM::rGPRRegClass) { |
5833 | if (!(Reg < regsReserved.size() && regsReserved.test(Idx: Reg)) && |
5834 | Reg != ARM::LR && // LR is not reserved, but don't use it. |
5835 | Reg != ARM::R12 && // R12 is not guaranteed to be preserved. |
5836 | C.isAvailableAcrossAndOutOfSeq(Reg, TRI) && |
5837 | C.isAvailableInsideSeq(Reg, TRI)) |
5838 | return Reg; |
5839 | } |
5840 | return Register(); |
5841 | } |
5842 | |
5843 | // Compute liveness of LR at the point after the interval [I, E), which |
5844 | // denotes a *backward* iteration through instructions. Used only for return |
5845 | // basic blocks, which do not end with a tail call. |
5846 | static bool isLRAvailable(const TargetRegisterInfo &TRI, |
5847 | MachineBasicBlock::reverse_iterator I, |
5848 | MachineBasicBlock::reverse_iterator E) { |
5849 | // At the end of the function LR dead. |
5850 | bool Live = false; |
5851 | for (; I != E; ++I) { |
5852 | const MachineInstr &MI = *I; |
5853 | |
5854 | // Check defs of LR. |
5855 | if (MI.modifiesRegister(Reg: ARM::LR, TRI: &TRI)) |
5856 | Live = false; |
5857 | |
5858 | // Check uses of LR. |
5859 | unsigned Opcode = MI.getOpcode(); |
5860 | if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR || |
5861 | Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET || |
5862 | Opcode == ARM::tBXNS_RET) { |
5863 | // These instructions use LR, but it's not an (explicit or implicit) |
5864 | // operand. |
5865 | Live = true; |
5866 | continue; |
5867 | } |
5868 | if (MI.readsRegister(Reg: ARM::LR, TRI: &TRI)) |
5869 | Live = true; |
5870 | } |
5871 | return !Live; |
5872 | } |
5873 | |
5874 | std::optional<outliner::OutlinedFunction> |
5875 | ARMBaseInstrInfo::getOutliningCandidateInfo( |
5876 | std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { |
5877 | unsigned SequenceSize = 0; |
5878 | for (auto &MI : RepeatedSequenceLocs[0]) |
5879 | SequenceSize += getInstSizeInBytes(MI); |
5880 | |
5881 | // Properties about candidate MBBs that hold for all of them. |
5882 | unsigned FlagsSetInAll = 0xF; |
5883 | |
5884 | // Compute liveness information for each candidate, and set FlagsSetInAll. |
5885 | const TargetRegisterInfo &TRI = getRegisterInfo(); |
5886 | for (outliner::Candidate &C : RepeatedSequenceLocs) |
5887 | FlagsSetInAll &= C.Flags; |
5888 | |
5889 | // According to the ARM Procedure Call Standard, the following are |
5890 | // undefined on entry/exit from a function call: |
5891 | // |
5892 | // * Register R12(IP), |
5893 | // * Condition codes (and thus the CPSR register) |
5894 | // |
5895 | // Since we control the instructions which are part of the outlined regions |
5896 | // we don't need to be fully compliant with the AAPCS, but we have to |
5897 | // guarantee that if a veneer is inserted at link time the code is still |
5898 | // correct. Because of this, we can't outline any sequence of instructions |
5899 | // where one of these registers is live into/across it. Thus, we need to |
5900 | // delete those candidates. |
5901 | auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { |
5902 | // If the unsafe registers in this block are all dead, then we don't need |
5903 | // to compute liveness here. |
5904 | if (C.Flags & UnsafeRegsDead) |
5905 | return false; |
5906 | return C.isAnyUnavailableAcrossOrOutOfSeq(Regs: {ARM::R12, ARM::CPSR}, TRI); |
5907 | }; |
5908 | |
5909 | // Are there any candidates where those registers are live? |
5910 | if (!(FlagsSetInAll & UnsafeRegsDead)) { |
5911 | // Erase every candidate that violates the restrictions above. (It could be |
5912 | // true that we have viable candidates, so it's not worth bailing out in |
5913 | // the case that, say, 1 out of 20 candidates violate the restructions.) |
5914 | llvm::erase_if(C&: RepeatedSequenceLocs, P: CantGuaranteeValueAcrossCall); |
5915 | |
5916 | // If the sequence doesn't have enough candidates left, then we're done. |
5917 | if (RepeatedSequenceLocs.size() < 2) |
5918 | return std::nullopt; |
5919 | } |
5920 | |
5921 | // We expect the majority of the outlining candidates to be in consensus with |
5922 | // regard to return address sign and authentication, and branch target |
5923 | // enforcement, in other words, partitioning according to all the four |
5924 | // possible combinations of PAC-RET and BTI is going to yield one big subset |
5925 | // and three small (likely empty) subsets. That allows us to cull incompatible |
5926 | // candidates separately for PAC-RET and BTI. |
5927 | |
5928 | // Partition the candidates in two sets: one with BTI enabled and one with BTI |
5929 | // disabled. Remove the candidates from the smaller set. If they are the same |
5930 | // number prefer the non-BTI ones for outlining, since they have less |
5931 | // overhead. |
5932 | auto NoBTI = |
5933 | llvm::partition(Range&: RepeatedSequenceLocs, P: [](const outliner::Candidate &C) { |
5934 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
5935 | return AFI.branchTargetEnforcement(); |
5936 | }); |
5937 | if (std::distance(first: RepeatedSequenceLocs.begin(), last: NoBTI) > |
5938 | std::distance(first: NoBTI, last: RepeatedSequenceLocs.end())) |
5939 | RepeatedSequenceLocs.erase(first: NoBTI, last: RepeatedSequenceLocs.end()); |
5940 | else |
5941 | RepeatedSequenceLocs.erase(first: RepeatedSequenceLocs.begin(), last: NoBTI); |
5942 | |
5943 | if (RepeatedSequenceLocs.size() < 2) |
5944 | return std::nullopt; |
5945 | |
5946 | // Likewise, partition the candidates according to PAC-RET enablement. |
5947 | auto NoPAC = |
5948 | llvm::partition(Range&: RepeatedSequenceLocs, P: [](const outliner::Candidate &C) { |
5949 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
5950 | // If the function happens to not spill the LR, do not disqualify it |
5951 | // from the outlining. |
5952 | return AFI.shouldSignReturnAddress(SpillsLR: true); |
5953 | }); |
5954 | if (std::distance(first: RepeatedSequenceLocs.begin(), last: NoPAC) > |
5955 | std::distance(first: NoPAC, last: RepeatedSequenceLocs.end())) |
5956 | RepeatedSequenceLocs.erase(first: NoPAC, last: RepeatedSequenceLocs.end()); |
5957 | else |
5958 | RepeatedSequenceLocs.erase(first: RepeatedSequenceLocs.begin(), last: NoPAC); |
5959 | |
5960 | if (RepeatedSequenceLocs.size() < 2) |
5961 | return std::nullopt; |
5962 | |
5963 | // At this point, we have only "safe" candidates to outline. Figure out |
5964 | // frame + call instruction information. |
5965 | |
5966 | unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode(); |
5967 | |
5968 | // Helper lambda which sets call information for every candidate. |
5969 | auto SetCandidateCallInfo = |
5970 | [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { |
5971 | for (outliner::Candidate &C : RepeatedSequenceLocs) |
5972 | C.setCallInfo(CID: CallID, CO: NumBytesForCall); |
5973 | }; |
5974 | |
5975 | OutlinerCosts Costs(Subtarget); |
5976 | |
5977 | const auto &SomeMFI = |
5978 | *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>(); |
5979 | // Adjust costs to account for the BTI instructions. |
5980 | if (SomeMFI.branchTargetEnforcement()) { |
5981 | Costs.FrameDefault += 4; |
5982 | Costs.FrameNoLRSave += 4; |
5983 | Costs.FrameRegSave += 4; |
5984 | Costs.FrameTailCall += 4; |
5985 | Costs.FrameThunk += 4; |
5986 | } |
5987 | |
5988 | // Adjust costs to account for sign and authentication instructions. |
5989 | if (SomeMFI.shouldSignReturnAddress(SpillsLR: true)) { |
5990 | Costs.CallDefault += 8; // +PAC instr, +AUT instr |
5991 | Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr |
5992 | } |
5993 | |
5994 | unsigned FrameID = MachineOutlinerDefault; |
5995 | unsigned NumBytesToCreateFrame = Costs.FrameDefault; |
5996 | |
5997 | // If the last instruction in any candidate is a terminator, then we should |
5998 | // tail call all of the candidates. |
5999 | if (RepeatedSequenceLocs[0].back().isTerminator()) { |
6000 | FrameID = MachineOutlinerTailCall; |
6001 | NumBytesToCreateFrame = Costs.FrameTailCall; |
6002 | SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall); |
6003 | } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || |
6004 | LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL || |
6005 | LastInstrOpcode == ARM::tBLXr || |
6006 | LastInstrOpcode == ARM::tBLXr_noip || |
6007 | LastInstrOpcode == ARM::tBLXi) { |
6008 | FrameID = MachineOutlinerThunk; |
6009 | NumBytesToCreateFrame = Costs.FrameThunk; |
6010 | SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk); |
6011 | } else { |
6012 | // We need to decide how to emit calls + frames. We can always emit the same |
6013 | // frame if we don't need to save to the stack. If we have to save to the |
6014 | // stack, then we need a different frame. |
6015 | unsigned NumBytesNoStackCalls = 0; |
6016 | std::vector<outliner::Candidate> CandidatesWithoutStackFixups; |
6017 | |
6018 | for (outliner::Candidate &C : RepeatedSequenceLocs) { |
6019 | // LR liveness is overestimated in return blocks, unless they end with a |
6020 | // tail call. |
6021 | const auto Last = C.getMBB()->rbegin(); |
6022 | const bool LRIsAvailable = |
6023 | C.getMBB()->isReturnBlock() && !Last->isCall() |
6024 | ? isLRAvailable(TRI, I: Last, |
6025 | E: (MachineBasicBlock::reverse_iterator)C.begin()) |
6026 | : C.isAvailableAcrossAndOutOfSeq(Reg: ARM::LR, TRI); |
6027 | if (LRIsAvailable) { |
6028 | FrameID = MachineOutlinerNoLRSave; |
6029 | NumBytesNoStackCalls += Costs.CallNoLRSave; |
6030 | C.setCallInfo(CID: MachineOutlinerNoLRSave, CO: Costs.CallNoLRSave); |
6031 | CandidatesWithoutStackFixups.push_back(x: C); |
6032 | } |
6033 | |
6034 | // Is an unused register available? If so, we won't modify the stack, so |
6035 | // we can outline with the same frame type as those that don't save LR. |
6036 | else if (findRegisterToSaveLRTo(C)) { |
6037 | FrameID = MachineOutlinerRegSave; |
6038 | NumBytesNoStackCalls += Costs.CallRegSave; |
6039 | C.setCallInfo(CID: MachineOutlinerRegSave, CO: Costs.CallRegSave); |
6040 | CandidatesWithoutStackFixups.push_back(x: C); |
6041 | } |
6042 | |
6043 | // Is SP used in the sequence at all? If not, we don't have to modify |
6044 | // the stack, so we are guaranteed to get the same frame. |
6045 | else if (C.isAvailableInsideSeq(Reg: ARM::SP, TRI)) { |
6046 | NumBytesNoStackCalls += Costs.CallDefault; |
6047 | C.setCallInfo(CID: MachineOutlinerDefault, CO: Costs.CallDefault); |
6048 | CandidatesWithoutStackFixups.push_back(x: C); |
6049 | } |
6050 | |
6051 | // If we outline this, we need to modify the stack. Pretend we don't |
6052 | // outline this by saving all of its bytes. |
6053 | else |
6054 | NumBytesNoStackCalls += SequenceSize; |
6055 | } |
6056 | |
6057 | // If there are no places where we have to save LR, then note that we don't |
6058 | // have to update the stack. Otherwise, give every candidate the default |
6059 | // call type |
6060 | if (NumBytesNoStackCalls <= |
6061 | RepeatedSequenceLocs.size() * Costs.CallDefault) { |
6062 | RepeatedSequenceLocs = CandidatesWithoutStackFixups; |
6063 | FrameID = MachineOutlinerNoLRSave; |
6064 | if (RepeatedSequenceLocs.size() < 2) |
6065 | return std::nullopt; |
6066 | } else |
6067 | SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault); |
6068 | } |
6069 | |
6070 | // Does every candidate's MBB contain a call? If so, then we might have a |
6071 | // call in the range. |
6072 | if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) { |
6073 | // check if the range contains a call. These require a save + restore of |
6074 | // the link register. |
6075 | outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; |
6076 | if (std::any_of(first: FirstCand.begin(), last: std::prev(x: FirstCand.end()), |
6077 | pred: [](const MachineInstr &MI) { return MI.isCall(); })) |
6078 | NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; |
6079 | |
6080 | // Handle the last instruction separately. If it is tail call, then the |
6081 | // last instruction is a call, we don't want to save + restore in this |
6082 | // case. However, it could be possible that the last instruction is a |
6083 | // call without it being valid to tail call this sequence. We should |
6084 | // consider this as well. |
6085 | else if (FrameID != MachineOutlinerThunk && |
6086 | FrameID != MachineOutlinerTailCall && FirstCand.back().isCall()) |
6087 | NumBytesToCreateFrame += Costs.SaveRestoreLROnStack; |
6088 | } |
6089 | |
6090 | return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, |
6091 | NumBytesToCreateFrame, FrameID); |
6092 | } |
6093 | |
6094 | bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, |
6095 | int64_t Fixup, |
6096 | bool Updt) const { |
6097 | int SPIdx = MI->findRegisterUseOperandIdx(Reg: ARM::SP, /*TRI=*/nullptr); |
6098 | unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask); |
6099 | if (SPIdx < 0) |
6100 | // No SP operand |
6101 | return true; |
6102 | else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2)) |
6103 | // If SP is not the base register we can't do much |
6104 | return false; |
6105 | |
6106 | // Stack might be involved but addressing mode doesn't handle any offset. |
6107 | // Rq: AddrModeT1_[1|2|4] don't operate on SP |
6108 | if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions |
6109 | AddrMode == ARMII::AddrMode4 || // Load/Store Multiple |
6110 | AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple |
6111 | AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register |
6112 | AddrMode == ARMII::AddrModeT2_pc || // PCrel access |
6113 | AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST |
6114 | AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE |
6115 | AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE |
6116 | AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR |
6117 | AddrMode == ARMII::AddrModeNone || |
6118 | AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions |
6119 | AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm |
6120 | return false; |
6121 | |
6122 | unsigned NumOps = MI->getDesc().getNumOperands(); |
6123 | unsigned ImmIdx = NumOps - 3; |
6124 | |
6125 | const MachineOperand &Offset = MI->getOperand(i: ImmIdx); |
6126 | assert(Offset.isImm() && "Is not an immediate" ); |
6127 | int64_t OffVal = Offset.getImm(); |
6128 | |
6129 | if (OffVal < 0) |
6130 | // Don't override data if the are below SP. |
6131 | return false; |
6132 | |
6133 | unsigned NumBits = 0; |
6134 | unsigned Scale = 1; |
6135 | |
6136 | switch (AddrMode) { |
6137 | case ARMII::AddrMode3: |
6138 | if (ARM_AM::getAM3Op(AM3Opc: OffVal) == ARM_AM::sub) |
6139 | return false; |
6140 | OffVal = ARM_AM::getAM3Offset(AM3Opc: OffVal); |
6141 | NumBits = 8; |
6142 | break; |
6143 | case ARMII::AddrMode5: |
6144 | if (ARM_AM::getAM5Op(AM5Opc: OffVal) == ARM_AM::sub) |
6145 | return false; |
6146 | OffVal = ARM_AM::getAM5Offset(AM5Opc: OffVal); |
6147 | NumBits = 8; |
6148 | Scale = 4; |
6149 | break; |
6150 | case ARMII::AddrMode5FP16: |
6151 | if (ARM_AM::getAM5FP16Op(AM5Opc: OffVal) == ARM_AM::sub) |
6152 | return false; |
6153 | OffVal = ARM_AM::getAM5FP16Offset(AM5Opc: OffVal); |
6154 | NumBits = 8; |
6155 | Scale = 2; |
6156 | break; |
6157 | case ARMII::AddrModeT2_i8pos: |
6158 | NumBits = 8; |
6159 | break; |
6160 | case ARMII::AddrModeT2_i8s4: |
6161 | // FIXME: Values are already scaled in this addressing mode. |
6162 | assert((Fixup & 3) == 0 && "Can't encode this offset!" ); |
6163 | NumBits = 10; |
6164 | break; |
6165 | case ARMII::AddrModeT2_ldrex: |
6166 | NumBits = 8; |
6167 | Scale = 4; |
6168 | break; |
6169 | case ARMII::AddrModeT2_i12: |
6170 | case ARMII::AddrMode_i12: |
6171 | NumBits = 12; |
6172 | break; |
6173 | case ARMII::AddrModeT1_s: // SP-relative LD/ST |
6174 | NumBits = 8; |
6175 | Scale = 4; |
6176 | break; |
6177 | default: |
6178 | llvm_unreachable("Unsupported addressing mode!" ); |
6179 | } |
6180 | // Make sure the offset is encodable for instructions that scale the |
6181 | // immediate. |
6182 | assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 && |
6183 | "Can't encode this offset!" ); |
6184 | OffVal += Fixup / Scale; |
6185 | |
6186 | unsigned Mask = (1 << NumBits) - 1; |
6187 | |
6188 | if (OffVal <= Mask) { |
6189 | if (Updt) |
6190 | MI->getOperand(i: ImmIdx).setImm(OffVal); |
6191 | return true; |
6192 | } |
6193 | |
6194 | return false; |
6195 | } |
6196 | |
6197 | void ARMBaseInstrInfo::mergeOutliningCandidateAttributes( |
6198 | Function &F, std::vector<outliner::Candidate> &Candidates) const { |
6199 | outliner::Candidate &C = Candidates.front(); |
6200 | // branch-target-enforcement is guaranteed to be consistent between all |
6201 | // candidates, so we only need to look at one. |
6202 | const Function &CFn = C.getMF()->getFunction(); |
6203 | if (CFn.hasFnAttribute(Kind: "branch-target-enforcement" )) |
6204 | F.addFnAttr(Attr: CFn.getFnAttribute(Kind: "branch-target-enforcement" )); |
6205 | |
6206 | ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates); |
6207 | } |
6208 | |
6209 | bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( |
6210 | MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { |
6211 | const Function &F = MF.getFunction(); |
6212 | |
6213 | // Can F be deduplicated by the linker? If it can, don't outline from it. |
6214 | if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) |
6215 | return false; |
6216 | |
6217 | // Don't outline from functions with section markings; the program could |
6218 | // expect that all the code is in the named section. |
6219 | // FIXME: Allow outlining from multiple functions with the same section |
6220 | // marking. |
6221 | if (F.hasSection()) |
6222 | return false; |
6223 | |
6224 | // FIXME: Thumb1 outlining is not handled |
6225 | if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction()) |
6226 | return false; |
6227 | |
6228 | // It's safe to outline from MF. |
6229 | return true; |
6230 | } |
6231 | |
6232 | bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, |
6233 | unsigned &Flags) const { |
6234 | // Check if LR is available through all of the MBB. If it's not, then set |
6235 | // a flag. |
6236 | assert(MBB.getParent()->getRegInfo().tracksLiveness() && |
6237 | "Suitable Machine Function for outlining must track liveness" ); |
6238 | |
6239 | LiveRegUnits LRU(getRegisterInfo()); |
6240 | |
6241 | for (MachineInstr &MI : llvm::reverse(C&: MBB)) |
6242 | LRU.accumulate(MI); |
6243 | |
6244 | // Check if each of the unsafe registers are available... |
6245 | bool R12AvailableInBlock = LRU.available(Reg: ARM::R12); |
6246 | bool CPSRAvailableInBlock = LRU.available(Reg: ARM::CPSR); |
6247 | |
6248 | // If all of these are dead (and not live out), we know we don't have to check |
6249 | // them later. |
6250 | if (R12AvailableInBlock && CPSRAvailableInBlock) |
6251 | Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; |
6252 | |
6253 | // Now, add the live outs to the set. |
6254 | LRU.addLiveOuts(MBB); |
6255 | |
6256 | // If any of these registers is available in the MBB, but also a live out of |
6257 | // the block, then we know outlining is unsafe. |
6258 | if (R12AvailableInBlock && !LRU.available(Reg: ARM::R12)) |
6259 | return false; |
6260 | if (CPSRAvailableInBlock && !LRU.available(Reg: ARM::CPSR)) |
6261 | return false; |
6262 | |
6263 | // Check if there's a call inside this MachineBasicBlock. If there is, then |
6264 | // set a flag. |
6265 | if (any_of(Range&: MBB, P: [](MachineInstr &MI) { return MI.isCall(); })) |
6266 | Flags |= MachineOutlinerMBBFlags::HasCalls; |
6267 | |
6268 | // LR liveness is overestimated in return blocks. |
6269 | |
6270 | bool LRIsAvailable = |
6271 | MBB.isReturnBlock() && !MBB.back().isCall() |
6272 | ? isLRAvailable(TRI: getRegisterInfo(), I: MBB.rbegin(), E: MBB.rend()) |
6273 | : LRU.available(Reg: ARM::LR); |
6274 | if (!LRIsAvailable) |
6275 | Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; |
6276 | |
6277 | return true; |
6278 | } |
6279 | |
6280 | outliner::InstrType |
6281 | ARMBaseInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, |
6282 | unsigned Flags) const { |
6283 | MachineInstr &MI = *MIT; |
6284 | const TargetRegisterInfo *TRI = &getRegisterInfo(); |
6285 | |
6286 | // PIC instructions contain labels, outlining them would break offset |
6287 | // computing. unsigned Opc = MI.getOpcode(); |
6288 | unsigned Opc = MI.getOpcode(); |
6289 | if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR || |
6290 | Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR || |
6291 | Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB || |
6292 | Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic || |
6293 | Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel || |
6294 | Opc == ARM::t2MOV_ga_pcrel) |
6295 | return outliner::InstrType::Illegal; |
6296 | |
6297 | // Be conservative with ARMv8.1 MVE instructions. |
6298 | if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart || |
6299 | Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart || |
6300 | Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP || |
6301 | Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd || |
6302 | Opc == ARM::t2LoopEndDec) |
6303 | return outliner::InstrType::Illegal; |
6304 | |
6305 | const MCInstrDesc &MCID = MI.getDesc(); |
6306 | uint64_t MIFlags = MCID.TSFlags; |
6307 | if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE) |
6308 | return outliner::InstrType::Illegal; |
6309 | |
6310 | // Is this a terminator for a basic block? |
6311 | if (MI.isTerminator()) |
6312 | // TargetInstrInfo::getOutliningType has already filtered out anything |
6313 | // that would break this, so we can allow it here. |
6314 | return outliner::InstrType::Legal; |
6315 | |
6316 | // Don't outline if link register or program counter value are used. |
6317 | if (MI.readsRegister(Reg: ARM::LR, TRI) || MI.readsRegister(Reg: ARM::PC, TRI)) |
6318 | return outliner::InstrType::Illegal; |
6319 | |
6320 | if (MI.isCall()) { |
6321 | // Get the function associated with the call. Look at each operand and find |
6322 | // the one that represents the calle and get its name. |
6323 | const Function *Callee = nullptr; |
6324 | for (const MachineOperand &MOP : MI.operands()) { |
6325 | if (MOP.isGlobal()) { |
6326 | Callee = dyn_cast<Function>(Val: MOP.getGlobal()); |
6327 | break; |
6328 | } |
6329 | } |
6330 | |
6331 | // Dont't outline calls to "mcount" like functions, in particular Linux |
6332 | // kernel function tracing relies on it. |
6333 | if (Callee && |
6334 | (Callee->getName() == "\01__gnu_mcount_nc" || |
6335 | Callee->getName() == "\01mcount" || Callee->getName() == "__mcount" )) |
6336 | return outliner::InstrType::Illegal; |
6337 | |
6338 | // If we don't know anything about the callee, assume it depends on the |
6339 | // stack layout of the caller. In that case, it's only legal to outline |
6340 | // as a tail-call. Explicitly list the call instructions we know about so |
6341 | // we don't get unexpected results with call pseudo-instructions. |
6342 | auto UnknownCallOutlineType = outliner::InstrType::Illegal; |
6343 | if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX || |
6344 | Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip || |
6345 | Opc == ARM::tBLXi) |
6346 | UnknownCallOutlineType = outliner::InstrType::LegalTerminator; |
6347 | |
6348 | if (!Callee) |
6349 | return UnknownCallOutlineType; |
6350 | |
6351 | // We have a function we have information about. Check if it's something we |
6352 | // can safely outline. |
6353 | MachineFunction *MF = MI.getParent()->getParent(); |
6354 | MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(F: *Callee); |
6355 | |
6356 | // We don't know what's going on with the callee at all. Don't touch it. |
6357 | if (!CalleeMF) |
6358 | return UnknownCallOutlineType; |
6359 | |
6360 | // Check if we know anything about the callee saves on the function. If we |
6361 | // don't, then don't touch it, since that implies that we haven't computed |
6362 | // anything about its stack frame yet. |
6363 | MachineFrameInfo &MFI = CalleeMF->getFrameInfo(); |
6364 | if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 || |
6365 | MFI.getNumObjects() > 0) |
6366 | return UnknownCallOutlineType; |
6367 | |
6368 | // At this point, we can say that CalleeMF ought to not pass anything on the |
6369 | // stack. Therefore, we can outline it. |
6370 | return outliner::InstrType::Legal; |
6371 | } |
6372 | |
6373 | // Since calls are handled, don't touch LR or PC |
6374 | if (MI.modifiesRegister(Reg: ARM::LR, TRI) || MI.modifiesRegister(Reg: ARM::PC, TRI)) |
6375 | return outliner::InstrType::Illegal; |
6376 | |
6377 | // Does this use the stack? |
6378 | if (MI.modifiesRegister(Reg: ARM::SP, TRI) || MI.readsRegister(Reg: ARM::SP, TRI)) { |
6379 | // True if there is no chance that any outlined candidate from this range |
6380 | // could require stack fixups. That is, both |
6381 | // * LR is available in the range (No save/restore around call) |
6382 | // * The range doesn't include calls (No save/restore in outlined frame) |
6383 | // are true. |
6384 | // These conditions also ensure correctness of the return address |
6385 | // authentication - we insert sign and authentication instructions only if |
6386 | // we save/restore LR on stack, but then this condition ensures that the |
6387 | // outlined range does not modify the SP, therefore the SP value used for |
6388 | // signing is the same as the one used for authentication. |
6389 | // FIXME: This is very restrictive; the flags check the whole block, |
6390 | // not just the bit we will try to outline. |
6391 | bool MightNeedStackFixUp = |
6392 | (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere | |
6393 | MachineOutlinerMBBFlags::HasCalls)); |
6394 | |
6395 | if (!MightNeedStackFixUp) |
6396 | return outliner::InstrType::Legal; |
6397 | |
6398 | // Any modification of SP will break our code to save/restore LR. |
6399 | // FIXME: We could handle some instructions which add a constant offset to |
6400 | // SP, with a bit more work. |
6401 | if (MI.modifiesRegister(Reg: ARM::SP, TRI)) |
6402 | return outliner::InstrType::Illegal; |
6403 | |
6404 | // At this point, we have a stack instruction that we might need to fix up. |
6405 | // up. We'll handle it if it's a load or store. |
6406 | if (checkAndUpdateStackOffset(MI: &MI, Fixup: Subtarget.getStackAlignment().value(), |
6407 | Updt: false)) |
6408 | return outliner::InstrType::Legal; |
6409 | |
6410 | // We can't fix it up, so don't outline it. |
6411 | return outliner::InstrType::Illegal; |
6412 | } |
6413 | |
6414 | // Be conservative with IT blocks. |
6415 | if (MI.readsRegister(Reg: ARM::ITSTATE, TRI) || |
6416 | MI.modifiesRegister(Reg: ARM::ITSTATE, TRI)) |
6417 | return outliner::InstrType::Illegal; |
6418 | |
6419 | // Don't outline CFI instructions. |
6420 | if (MI.isCFIInstruction()) |
6421 | return outliner::InstrType::Illegal; |
6422 | |
6423 | return outliner::InstrType::Legal; |
6424 | } |
6425 | |
6426 | void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { |
6427 | for (MachineInstr &MI : MBB) { |
6428 | checkAndUpdateStackOffset(MI: &MI, Fixup: Subtarget.getStackAlignment().value(), Updt: true); |
6429 | } |
6430 | } |
6431 | |
6432 | void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB, |
6433 | MachineBasicBlock::iterator It, bool CFI, |
6434 | bool Auth) const { |
6435 | int Align = std::max(a: Subtarget.getStackAlignment().value(), b: uint64_t(8)); |
6436 | unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0; |
6437 | assert(Align >= 8 && Align <= 256); |
6438 | if (Auth) { |
6439 | assert(Subtarget.isThumb2()); |
6440 | // Compute PAC in R12. Outlining ensures R12 is dead across the outlined |
6441 | // sequence. |
6442 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2PAC)).setMIFlags(MIFlags); |
6443 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2STRD_PRE), DestReg: ARM::SP) |
6444 | .addReg(RegNo: ARM::R12, flags: RegState::Kill) |
6445 | .addReg(RegNo: ARM::LR, flags: RegState::Kill) |
6446 | .addReg(RegNo: ARM::SP) |
6447 | .addImm(Val: -Align) |
6448 | .add(MOs: predOps(Pred: ARMCC::AL)) |
6449 | .setMIFlags(MIFlags); |
6450 | } else { |
6451 | unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; |
6452 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: Opc), DestReg: ARM::SP) |
6453 | .addReg(RegNo: ARM::LR, flags: RegState::Kill) |
6454 | .addReg(RegNo: ARM::SP) |
6455 | .addImm(Val: -Align) |
6456 | .add(MOs: predOps(Pred: ARMCC::AL)) |
6457 | .setMIFlags(MIFlags); |
6458 | } |
6459 | |
6460 | if (!CFI) |
6461 | return; |
6462 | |
6463 | MachineFunction &MF = *MBB.getParent(); |
6464 | |
6465 | // Add a CFI, saying CFA is offset by Align bytes from SP. |
6466 | int64_t StackPosEntry = |
6467 | MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: Align)); |
6468 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::CFI_INSTRUCTION)) |
6469 | .addCFIIndex(CFIIndex: StackPosEntry) |
6470 | .setMIFlags(MachineInstr::FrameSetup); |
6471 | |
6472 | // Add a CFI saying that the LR that we want to find is now higher than |
6473 | // before. |
6474 | int LROffset = Auth ? Align - 4 : Align; |
6475 | const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); |
6476 | unsigned DwarfLR = MRI->getDwarfRegNum(RegNum: ARM::LR, isEH: true); |
6477 | int64_t LRPosEntry = MF.addFrameInst( |
6478 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfLR, Offset: -LROffset)); |
6479 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::CFI_INSTRUCTION)) |
6480 | .addCFIIndex(CFIIndex: LRPosEntry) |
6481 | .setMIFlags(MachineInstr::FrameSetup); |
6482 | if (Auth) { |
6483 | // Add a CFI for the location of the return adddress PAC. |
6484 | unsigned DwarfRAC = MRI->getDwarfRegNum(RegNum: ARM::RA_AUTH_CODE, isEH: true); |
6485 | int64_t RACPosEntry = MF.addFrameInst( |
6486 | Inst: MCCFIInstruction::createOffset(L: nullptr, Register: DwarfRAC, Offset: -Align)); |
6487 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::CFI_INSTRUCTION)) |
6488 | .addCFIIndex(CFIIndex: RACPosEntry) |
6489 | .setMIFlags(MachineInstr::FrameSetup); |
6490 | } |
6491 | } |
6492 | |
6493 | void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB, |
6494 | MachineBasicBlock::iterator It, |
6495 | Register Reg) const { |
6496 | MachineFunction &MF = *MBB.getParent(); |
6497 | const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); |
6498 | unsigned DwarfLR = MRI->getDwarfRegNum(RegNum: ARM::LR, isEH: true); |
6499 | unsigned DwarfReg = MRI->getDwarfRegNum(RegNum: Reg, isEH: true); |
6500 | |
6501 | int64_t LRPosEntry = MF.addFrameInst( |
6502 | Inst: MCCFIInstruction::createRegister(L: nullptr, Register1: DwarfLR, Register2: DwarfReg)); |
6503 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::CFI_INSTRUCTION)) |
6504 | .addCFIIndex(CFIIndex: LRPosEntry) |
6505 | .setMIFlags(MachineInstr::FrameSetup); |
6506 | } |
6507 | |
6508 | void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB, |
6509 | MachineBasicBlock::iterator It, |
6510 | bool CFI, bool Auth) const { |
6511 | int Align = Subtarget.getStackAlignment().value(); |
6512 | unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0; |
6513 | if (Auth) { |
6514 | assert(Subtarget.isThumb2()); |
6515 | // Restore return address PAC and LR. |
6516 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2LDRD_POST)) |
6517 | .addReg(RegNo: ARM::R12, flags: RegState::Define) |
6518 | .addReg(RegNo: ARM::LR, flags: RegState::Define) |
6519 | .addReg(RegNo: ARM::SP, flags: RegState::Define) |
6520 | .addReg(RegNo: ARM::SP) |
6521 | .addImm(Val: Align) |
6522 | .add(MOs: predOps(Pred: ARMCC::AL)) |
6523 | .setMIFlags(MIFlags); |
6524 | // LR authentication is after the CFI instructions, below. |
6525 | } else { |
6526 | unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; |
6527 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: Opc), DestReg: ARM::LR) |
6528 | .addReg(RegNo: ARM::SP, flags: RegState::Define) |
6529 | .addReg(RegNo: ARM::SP); |
6530 | if (!Subtarget.isThumb()) |
6531 | MIB.addReg(RegNo: 0); |
6532 | MIB.addImm(Val: Subtarget.getStackAlignment().value()) |
6533 | .add(MOs: predOps(Pred: ARMCC::AL)) |
6534 | .setMIFlags(MIFlags); |
6535 | } |
6536 | |
6537 | if (CFI) { |
6538 | // Now stack has moved back up... |
6539 | MachineFunction &MF = *MBB.getParent(); |
6540 | const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); |
6541 | unsigned DwarfLR = MRI->getDwarfRegNum(RegNum: ARM::LR, isEH: true); |
6542 | int64_t StackPosEntry = |
6543 | MF.addFrameInst(Inst: MCCFIInstruction::cfiDefCfaOffset(L: nullptr, Offset: 0)); |
6544 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::CFI_INSTRUCTION)) |
6545 | .addCFIIndex(CFIIndex: StackPosEntry) |
6546 | .setMIFlags(MachineInstr::FrameDestroy); |
6547 | |
6548 | // ... and we have restored LR. |
6549 | int64_t LRPosEntry = |
6550 | MF.addFrameInst(Inst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfLR)); |
6551 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::CFI_INSTRUCTION)) |
6552 | .addCFIIndex(CFIIndex: LRPosEntry) |
6553 | .setMIFlags(MachineInstr::FrameDestroy); |
6554 | |
6555 | if (Auth) { |
6556 | unsigned DwarfRAC = MRI->getDwarfRegNum(RegNum: ARM::RA_AUTH_CODE, isEH: true); |
6557 | int64_t Entry = |
6558 | MF.addFrameInst(Inst: MCCFIInstruction::createUndefined(L: nullptr, Register: DwarfRAC)); |
6559 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::CFI_INSTRUCTION)) |
6560 | .addCFIIndex(CFIIndex: Entry) |
6561 | .setMIFlags(MachineInstr::FrameDestroy); |
6562 | } |
6563 | } |
6564 | |
6565 | if (Auth) |
6566 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::t2AUT)); |
6567 | } |
6568 | |
6569 | void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg( |
6570 | MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { |
6571 | MachineFunction &MF = *MBB.getParent(); |
6572 | const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); |
6573 | unsigned DwarfLR = MRI->getDwarfRegNum(RegNum: ARM::LR, isEH: true); |
6574 | |
6575 | int64_t LRPosEntry = |
6576 | MF.addFrameInst(Inst: MCCFIInstruction::createRestore(L: nullptr, Register: DwarfLR)); |
6577 | BuildMI(BB&: MBB, I: It, MIMD: DebugLoc(), MCID: get(Opcode: ARM::CFI_INSTRUCTION)) |
6578 | .addCFIIndex(CFIIndex: LRPosEntry) |
6579 | .setMIFlags(MachineInstr::FrameDestroy); |
6580 | } |
6581 | |
6582 | void ARMBaseInstrInfo::buildOutlinedFrame( |
6583 | MachineBasicBlock &MBB, MachineFunction &MF, |
6584 | const outliner::OutlinedFunction &OF) const { |
6585 | // For thunk outlining, rewrite the last instruction from a call to a |
6586 | // tail-call. |
6587 | if (OF.FrameConstructionID == MachineOutlinerThunk) { |
6588 | MachineInstr *Call = &*--MBB.instr_end(); |
6589 | bool isThumb = Subtarget.isThumb(); |
6590 | unsigned FuncOp = isThumb ? 2 : 0; |
6591 | unsigned Opc = Call->getOperand(i: FuncOp).isReg() |
6592 | ? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr |
6593 | : isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd |
6594 | : ARM::tTAILJMPdND |
6595 | : ARM::TAILJMPd; |
6596 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: get(Opcode: Opc)) |
6597 | .add(MO: Call->getOperand(i: FuncOp)); |
6598 | if (isThumb && !Call->getOperand(i: FuncOp).isReg()) |
6599 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
6600 | Call->eraseFromParent(); |
6601 | } |
6602 | |
6603 | // Is there a call in the outlined range? |
6604 | auto IsNonTailCall = [](MachineInstr &MI) { |
6605 | return MI.isCall() && !MI.isReturn(); |
6606 | }; |
6607 | if (llvm::any_of(Range: MBB.instrs(), P: IsNonTailCall)) { |
6608 | MachineBasicBlock::iterator It = MBB.begin(); |
6609 | MachineBasicBlock::iterator Et = MBB.end(); |
6610 | |
6611 | if (OF.FrameConstructionID == MachineOutlinerTailCall || |
6612 | OF.FrameConstructionID == MachineOutlinerThunk) |
6613 | Et = std::prev(x: MBB.end()); |
6614 | |
6615 | // We have to save and restore LR, we need to add it to the liveins if it |
6616 | // is not already part of the set. This is suffient since outlined |
6617 | // functions only have one block. |
6618 | if (!MBB.isLiveIn(Reg: ARM::LR)) |
6619 | MBB.addLiveIn(PhysReg: ARM::LR); |
6620 | |
6621 | // Insert a save before the outlined region |
6622 | bool Auth = OF.Candidates.front() |
6623 | .getMF() |
6624 | ->getInfo<ARMFunctionInfo>() |
6625 | ->shouldSignReturnAddress(SpillsLR: true); |
6626 | saveLROnStack(MBB, It, CFI: true, Auth); |
6627 | |
6628 | // Fix up the instructions in the range, since we're going to modify the |
6629 | // stack. |
6630 | assert(OF.FrameConstructionID != MachineOutlinerDefault && |
6631 | "Can only fix up stack references once" ); |
6632 | fixupPostOutline(MBB); |
6633 | |
6634 | // Insert a restore before the terminator for the function. Restore LR. |
6635 | restoreLRFromStack(MBB, It: Et, CFI: true, Auth); |
6636 | } |
6637 | |
6638 | // If this is a tail call outlined function, then there's already a return. |
6639 | if (OF.FrameConstructionID == MachineOutlinerTailCall || |
6640 | OF.FrameConstructionID == MachineOutlinerThunk) |
6641 | return; |
6642 | |
6643 | // Here we have to insert the return ourselves. Get the correct opcode from |
6644 | // current feature set. |
6645 | BuildMI(BB&: MBB, I: MBB.end(), MIMD: DebugLoc(), MCID: get(Opcode: Subtarget.getReturnOpcode())) |
6646 | .add(MOs: predOps(Pred: ARMCC::AL)); |
6647 | |
6648 | // Did we have to modify the stack by saving the link register? |
6649 | if (OF.FrameConstructionID != MachineOutlinerDefault && |
6650 | OF.Candidates[0].CallConstructionID != MachineOutlinerDefault) |
6651 | return; |
6652 | |
6653 | // We modified the stack. |
6654 | // Walk over the basic block and fix up all the stack accesses. |
6655 | fixupPostOutline(MBB); |
6656 | } |
6657 | |
6658 | MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( |
6659 | Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, |
6660 | MachineFunction &MF, outliner::Candidate &C) const { |
6661 | MachineInstrBuilder MIB; |
6662 | MachineBasicBlock::iterator CallPt; |
6663 | unsigned Opc; |
6664 | bool isThumb = Subtarget.isThumb(); |
6665 | |
6666 | // Are we tail calling? |
6667 | if (C.CallConstructionID == MachineOutlinerTailCall) { |
6668 | // If yes, then we can just branch to the label. |
6669 | Opc = isThumb |
6670 | ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND |
6671 | : ARM::TAILJMPd; |
6672 | MIB = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: Opc)) |
6673 | .addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())); |
6674 | if (isThumb) |
6675 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
6676 | It = MBB.insert(I: It, MI: MIB); |
6677 | return It; |
6678 | } |
6679 | |
6680 | // Create the call instruction. |
6681 | Opc = isThumb ? ARM::tBL : ARM::BL; |
6682 | MachineInstrBuilder CallMIB = BuildMI(MF, MIMD: DebugLoc(), MCID: get(Opcode: Opc)); |
6683 | if (isThumb) |
6684 | CallMIB.add(MOs: predOps(Pred: ARMCC::AL)); |
6685 | CallMIB.addGlobalAddress(GV: M.getNamedValue(Name: MF.getName())); |
6686 | |
6687 | if (C.CallConstructionID == MachineOutlinerNoLRSave || |
6688 | C.CallConstructionID == MachineOutlinerThunk) { |
6689 | // No, so just insert the call. |
6690 | It = MBB.insert(I: It, MI: CallMIB); |
6691 | return It; |
6692 | } |
6693 | |
6694 | const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>(); |
6695 | // Can we save to a register? |
6696 | if (C.CallConstructionID == MachineOutlinerRegSave) { |
6697 | Register Reg = findRegisterToSaveLRTo(C); |
6698 | assert(Reg != 0 && "No callee-saved register available?" ); |
6699 | |
6700 | // Save and restore LR from that register. |
6701 | copyPhysReg(MBB, I: It, DL: DebugLoc(), DestReg: Reg, SrcReg: ARM::LR, KillSrc: true); |
6702 | if (!AFI.isLRSpilled()) |
6703 | emitCFIForLRSaveToReg(MBB, It, Reg); |
6704 | CallPt = MBB.insert(I: It, MI: CallMIB); |
6705 | copyPhysReg(MBB, I: It, DL: DebugLoc(), DestReg: ARM::LR, SrcReg: Reg, KillSrc: true); |
6706 | if (!AFI.isLRSpilled()) |
6707 | emitCFIForLRRestoreFromReg(MBB, It); |
6708 | It--; |
6709 | return CallPt; |
6710 | } |
6711 | // We have the default case. Save and restore from SP. |
6712 | if (!MBB.isLiveIn(Reg: ARM::LR)) |
6713 | MBB.addLiveIn(PhysReg: ARM::LR); |
6714 | bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(SpillsLR: true); |
6715 | saveLROnStack(MBB, It, CFI: !AFI.isLRSpilled(), Auth); |
6716 | CallPt = MBB.insert(I: It, MI: CallMIB); |
6717 | restoreLRFromStack(MBB, It, CFI: !AFI.isLRSpilled(), Auth); |
6718 | It--; |
6719 | return CallPt; |
6720 | } |
6721 | |
6722 | bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault( |
6723 | MachineFunction &MF) const { |
6724 | return Subtarget.isMClass() && MF.getFunction().hasMinSize(); |
6725 | } |
6726 | |
6727 | bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable( |
6728 | const MachineInstr &MI) const { |
6729 | // Try hard to rematerialize any VCTPs because if we spill P0, it will block |
6730 | // the tail predication conversion. This means that the element count |
6731 | // register has to be live for longer, but that has to be better than |
6732 | // spill/restore and VPT predication. |
6733 | return (isVCTP(MI: &MI) && !isPredicated(MI)) || |
6734 | TargetInstrInfo::isReallyTriviallyReMaterializable(MI); |
6735 | } |
6736 | |
6737 | unsigned llvm::getBLXOpcode(const MachineFunction &MF) { |
6738 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip |
6739 | : ARM::BLX; |
6740 | } |
6741 | |
6742 | unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) { |
6743 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip |
6744 | : ARM::tBLXr; |
6745 | } |
6746 | |
6747 | unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) { |
6748 | return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip |
6749 | : ARM::BLX_pred; |
6750 | } |
6751 | |
6752 | namespace { |
6753 | class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { |
6754 | MachineInstr *EndLoop, *LoopCount; |
6755 | MachineFunction *MF; |
6756 | const TargetInstrInfo *TII; |
6757 | |
6758 | // Bitset[0 .. MAX_STAGES-1] ... iterations needed |
6759 | // [LAST_IS_USE] : last reference to register in schedule is a use |
6760 | // [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live |
6761 | static int constexpr MAX_STAGES = 30; |
6762 | static int constexpr LAST_IS_USE = MAX_STAGES; |
6763 | static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1; |
6764 | typedef std::bitset<MAX_STAGES + 2> IterNeed; |
6765 | typedef std::map<unsigned, IterNeed> IterNeeds; |
6766 | |
6767 | void bumpCrossIterationPressure(RegPressureTracker &RPT, |
6768 | const IterNeeds &CIN); |
6769 | bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS); |
6770 | |
6771 | // Meanings of the various stuff with loop types: |
6772 | // t2Bcc: |
6773 | // EndLoop = branch at end of original BB that will become a kernel |
6774 | // LoopCount = CC setter live into branch |
6775 | // t2LoopEnd: |
6776 | // EndLoop = branch at end of original BB |
6777 | // LoopCount = t2LoopDec |
6778 | public: |
6779 | ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount) |
6780 | : EndLoop(EndLoop), LoopCount(LoopCount), |
6781 | MF(EndLoop->getParent()->getParent()), |
6782 | TII(MF->getSubtarget().getInstrInfo()) {} |
6783 | |
6784 | bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { |
6785 | // Only ignore the terminator. |
6786 | return MI == EndLoop || MI == LoopCount; |
6787 | } |
6788 | |
6789 | bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override { |
6790 | if (tooMuchRegisterPressure(SSD, SMS)) |
6791 | return false; |
6792 | |
6793 | return true; |
6794 | } |
6795 | |
6796 | std::optional<bool> createTripCountGreaterCondition( |
6797 | int TC, MachineBasicBlock &MBB, |
6798 | SmallVectorImpl<MachineOperand> &Cond) override { |
6799 | |
6800 | if (isCondBranchOpcode(Opc: EndLoop->getOpcode())) { |
6801 | Cond.push_back(Elt: EndLoop->getOperand(i: 1)); |
6802 | Cond.push_back(Elt: EndLoop->getOperand(i: 2)); |
6803 | if (EndLoop->getOperand(i: 0).getMBB() == EndLoop->getParent()) { |
6804 | TII->reverseBranchCondition(Cond); |
6805 | } |
6806 | return {}; |
6807 | } else if (EndLoop->getOpcode() == ARM::t2LoopEnd) { |
6808 | // General case just lets the unrolled t2LoopDec do the subtraction and |
6809 | // therefore just needs to check if zero has been reached. |
6810 | MachineInstr *LoopDec = nullptr; |
6811 | for (auto &I : MBB.instrs()) |
6812 | if (I.getOpcode() == ARM::t2LoopDec) |
6813 | LoopDec = &I; |
6814 | assert(LoopDec && "Unable to find copied LoopDec" ); |
6815 | // Check if we're done with the loop. |
6816 | BuildMI(BB: &MBB, MIMD: LoopDec->getDebugLoc(), MCID: TII->get(Opcode: ARM::t2CMPri)) |
6817 | .addReg(RegNo: LoopDec->getOperand(i: 0).getReg()) |
6818 | .addImm(Val: 0) |
6819 | .addImm(Val: ARMCC::AL) |
6820 | .addReg(RegNo: ARM::NoRegister); |
6821 | Cond.push_back(Elt: MachineOperand::CreateImm(Val: ARMCC::EQ)); |
6822 | Cond.push_back(Elt: MachineOperand::CreateReg(Reg: ARM::CPSR, isDef: false)); |
6823 | return {}; |
6824 | } else |
6825 | llvm_unreachable("Unknown EndLoop" ); |
6826 | } |
6827 | |
6828 | void (MachineBasicBlock *) override {} |
6829 | |
6830 | void adjustTripCount(int TripCountAdjust) override {} |
6831 | |
6832 | void disposed() override {} |
6833 | }; |
6834 | |
6835 | void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT, |
6836 | const IterNeeds &CIN) { |
6837 | // Increase pressure by the amounts in CrossIterationNeeds |
6838 | for (const auto &N : CIN) { |
6839 | int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2; |
6840 | for (int I = 0; I < Cnt; ++I) |
6841 | RPT.increaseRegPressure(RegUnit: Register(N.first), PreviousMask: LaneBitmask::getNone(), |
6842 | NewMask: LaneBitmask::getAll()); |
6843 | } |
6844 | // Decrease pressure by the amounts in CrossIterationNeeds |
6845 | for (const auto &N : CIN) { |
6846 | int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2; |
6847 | for (int I = 0; I < Cnt; ++I) |
6848 | RPT.decreaseRegPressure(RegUnit: Register(N.first), PreviousMask: LaneBitmask::getAll(), |
6849 | NewMask: LaneBitmask::getNone()); |
6850 | } |
6851 | } |
6852 | |
6853 | bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD, |
6854 | SMSchedule &SMS) { |
6855 | IterNeeds CrossIterationNeeds; |
6856 | |
6857 | // Determine which values will be loop-carried after the schedule is |
6858 | // applied |
6859 | |
6860 | for (auto &SU : SSD.SUnits) { |
6861 | const MachineInstr *MI = SU.getInstr(); |
6862 | int Stg = SMS.stageScheduled(SU: const_cast<SUnit *>(&SU)); |
6863 | for (auto &S : SU.Succs) |
6864 | if (MI->isPHI() && S.getKind() == SDep::Anti) { |
6865 | Register Reg = S.getReg(); |
6866 | if (Reg.isVirtual()) |
6867 | CrossIterationNeeds.insert(x: std::make_pair(x: Reg.id(), y: IterNeed())) |
6868 | .first->second.set(position: 0); |
6869 | } else if (S.isAssignedRegDep()) { |
6870 | int OStg = SMS.stageScheduled(SU: S.getSUnit()); |
6871 | if (OStg >= 0 && OStg != Stg) { |
6872 | Register Reg = S.getReg(); |
6873 | if (Reg.isVirtual()) |
6874 | CrossIterationNeeds.insert(x: std::make_pair(x: Reg.id(), y: IterNeed())) |
6875 | .first->second |= ((1 << (OStg - Stg)) - 1); |
6876 | } |
6877 | } |
6878 | } |
6879 | |
6880 | // Determine more-or-less what the proposed schedule (reversed) is going to |
6881 | // be; it might not be quite the same because the within-cycle ordering |
6882 | // created by SMSchedule depends upon changes to help with address offsets and |
6883 | // the like. |
6884 | std::vector<SUnit *> ProposedSchedule; |
6885 | for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle) |
6886 | for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd; |
6887 | ++Stage) { |
6888 | std::deque<SUnit *> Instrs = |
6889 | SMS.getInstructions(cycle: Cycle + Stage * SMS.getInitiationInterval()); |
6890 | std::sort(first: Instrs.begin(), last: Instrs.end(), |
6891 | comp: [](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; }); |
6892 | for (SUnit *SU : Instrs) |
6893 | ProposedSchedule.push_back(x: SU); |
6894 | } |
6895 | |
6896 | // Learn whether the last use/def of each cross-iteration register is a use or |
6897 | // def. If it is a def, RegisterPressure will implicitly increase max pressure |
6898 | // and we do not have to add the pressure. |
6899 | for (auto *SU : ProposedSchedule) |
6900 | for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid(); |
6901 | ++OperI) { |
6902 | auto MO = *OperI; |
6903 | if (!MO.isReg() || !MO.getReg()) |
6904 | continue; |
6905 | Register Reg = MO.getReg(); |
6906 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
6907 | if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] || |
6908 | CIter->second[SEEN_AS_LIVE]) |
6909 | continue; |
6910 | if (MO.isDef() && !MO.isDead()) |
6911 | CIter->second.set(position: SEEN_AS_LIVE); |
6912 | else if (MO.isUse()) |
6913 | CIter->second.set(position: LAST_IS_USE); |
6914 | } |
6915 | for (auto &CI : CrossIterationNeeds) |
6916 | CI.second.reset(position: LAST_IS_USE); |
6917 | |
6918 | RegionPressure RecRegPressure; |
6919 | RegPressureTracker RPTracker(RecRegPressure); |
6920 | RegisterClassInfo RegClassInfo; |
6921 | RegClassInfo.runOnMachineFunction(MF: *MF); |
6922 | RPTracker.init(mf: MF, rci: &RegClassInfo, lis: nullptr, mbb: EndLoop->getParent(), |
6923 | pos: EndLoop->getParent()->end(), TrackLaneMasks: false, TrackUntiedDefs: false); |
6924 | const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); |
6925 | |
6926 | bumpCrossIterationPressure(RPT&: RPTracker, CIN: CrossIterationNeeds); |
6927 | |
6928 | for (auto *SU : ProposedSchedule) { |
6929 | MachineBasicBlock::const_iterator CurInstI = SU->getInstr(); |
6930 | RPTracker.setPos(std::next(x: CurInstI)); |
6931 | RPTracker.recede(); |
6932 | |
6933 | // Track what cross-iteration registers would be seen as live |
6934 | for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) { |
6935 | auto MO = *OperI; |
6936 | if (!MO.isReg() || !MO.getReg()) |
6937 | continue; |
6938 | Register Reg = MO.getReg(); |
6939 | if (MO.isDef() && !MO.isDead()) { |
6940 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
6941 | if (CIter != CrossIterationNeeds.end()) { |
6942 | CIter->second.reset(position: 0); |
6943 | CIter->second.reset(position: SEEN_AS_LIVE); |
6944 | } |
6945 | } |
6946 | } |
6947 | for (auto &S : SU->Preds) { |
6948 | auto Stg = SMS.stageScheduled(SU); |
6949 | if (S.isAssignedRegDep()) { |
6950 | Register Reg = S.getReg(); |
6951 | auto CIter = CrossIterationNeeds.find(x: Reg.id()); |
6952 | if (CIter != CrossIterationNeeds.end()) { |
6953 | auto Stg2 = SMS.stageScheduled(SU: const_cast<SUnit *>(S.getSUnit())); |
6954 | assert(Stg2 <= Stg && "Data dependence upon earlier stage" ); |
6955 | if (Stg - Stg2 < MAX_STAGES) |
6956 | CIter->second.set(position: Stg - Stg2); |
6957 | CIter->second.set(position: SEEN_AS_LIVE); |
6958 | } |
6959 | } |
6960 | } |
6961 | |
6962 | bumpCrossIterationPressure(RPT&: RPTracker, CIN: CrossIterationNeeds); |
6963 | } |
6964 | |
6965 | auto &P = RPTracker.getPressure().MaxSetPressure; |
6966 | for (unsigned I = 0, E = P.size(); I < E; ++I) |
6967 | if (P[I] > TRI->getRegPressureSetLimit(MF: *MF, Idx: I)) { |
6968 | return true; |
6969 | } |
6970 | return false; |
6971 | } |
6972 | |
6973 | } // namespace |
6974 | |
6975 | std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> |
6976 | ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { |
6977 | MachineBasicBlock::iterator I = LoopBB->getFirstTerminator(); |
6978 | MachineBasicBlock * = *LoopBB->pred_begin(); |
6979 | if (Preheader == LoopBB) |
6980 | Preheader = *std::next(x: LoopBB->pred_begin()); |
6981 | |
6982 | if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) { |
6983 | // If the branch is a Bcc, then the CPSR should be set somewhere within the |
6984 | // block. We need to determine the reaching definition of CPSR so that |
6985 | // it can be marked as non-pipelineable, allowing the pipeliner to force |
6986 | // it into stage 0 or give up if it cannot or will not do so. |
6987 | MachineInstr *CCSetter = nullptr; |
6988 | for (auto &L : LoopBB->instrs()) { |
6989 | if (L.isCall()) |
6990 | return nullptr; |
6991 | if (isCPSRDefined(MI: L)) |
6992 | CCSetter = &L; |
6993 | } |
6994 | if (CCSetter) |
6995 | return std::make_unique<ARMPipelinerLoopInfo>(args: &*I, args&: CCSetter); |
6996 | else |
6997 | return nullptr; // Unable to find the CC setter, so unable to guarantee |
6998 | // that pipeline will work |
6999 | } |
7000 | |
7001 | // Recognize: |
7002 | // preheader: |
7003 | // %1 = t2DoopLoopStart %0 |
7004 | // loop: |
7005 | // %2 = phi %1, <not loop>, %..., %loop |
7006 | // %3 = t2LoopDec %2, <imm> |
7007 | // t2LoopEnd %3, %loop |
7008 | |
7009 | if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) { |
7010 | for (auto &L : LoopBB->instrs()) |
7011 | if (L.isCall()) |
7012 | return nullptr; |
7013 | else if (isVCTP(MI: &L)) |
7014 | return nullptr; |
7015 | Register LoopDecResult = I->getOperand(i: 0).getReg(); |
7016 | MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); |
7017 | MachineInstr *LoopDec = MRI.getUniqueVRegDef(Reg: LoopDecResult); |
7018 | if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) |
7019 | return nullptr; |
7020 | MachineInstr *LoopStart = nullptr; |
7021 | for (auto &J : Preheader->instrs()) |
7022 | if (J.getOpcode() == ARM::t2DoLoopStart) |
7023 | LoopStart = &J; |
7024 | if (!LoopStart) |
7025 | return nullptr; |
7026 | return std::make_unique<ARMPipelinerLoopInfo>(args: &*I, args&: LoopDec); |
7027 | } |
7028 | return nullptr; |
7029 | } |
7030 | |