1 | //===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the Thumb-2 implementation of the TargetInstrInfo class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "Thumb2InstrInfo.h" |
14 | #include "ARMMachineFunctionInfo.h" |
15 | #include "ARMSubtarget.h" |
16 | #include "MCTargetDesc/ARMAddressingModes.h" |
17 | #include "llvm/CodeGen/MachineBasicBlock.h" |
18 | #include "llvm/CodeGen/MachineFrameInfo.h" |
19 | #include "llvm/CodeGen/MachineFunction.h" |
20 | #include "llvm/CodeGen/MachineInstr.h" |
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | #include "llvm/CodeGen/MachineMemOperand.h" |
23 | #include "llvm/CodeGen/MachineOperand.h" |
24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
25 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
26 | #include "llvm/IR/DebugLoc.h" |
27 | #include "llvm/IR/Module.h" |
28 | #include "llvm/MC/MCInst.h" |
29 | #include "llvm/MC/MCInstBuilder.h" |
30 | #include "llvm/MC/MCInstrDesc.h" |
31 | #include "llvm/Support/CommandLine.h" |
32 | #include "llvm/Support/ErrorHandling.h" |
33 | #include "llvm/Target/TargetMachine.h" |
34 | #include <cassert> |
35 | |
36 | using namespace llvm; |
37 | |
38 | static cl::opt<bool> |
39 | OldT2IfCvt("old-thumb2-ifcvt" , cl::Hidden, |
40 | cl::desc("Use old-style Thumb2 if-conversion heuristics" ), |
41 | cl::init(Val: false)); |
42 | |
43 | static cl::opt<bool> |
44 | PreferNoCSEL("prefer-no-csel" , cl::Hidden, |
45 | cl::desc("Prefer predicated Move to CSEL" ), |
46 | cl::init(Val: false)); |
47 | |
48 | Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) |
49 | : ARMBaseInstrInfo(STI) {} |
50 | |
51 | /// Return the noop instruction to use for a noop. |
52 | MCInst Thumb2InstrInfo::getNop() const { |
53 | return MCInstBuilder(ARM::tHINT).addImm(Val: 0).addImm(Val: ARMCC::AL).addReg(Reg: 0); |
54 | } |
55 | |
56 | unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { |
57 | // FIXME |
58 | return 0; |
59 | } |
60 | |
61 | void |
62 | Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, |
63 | MachineBasicBlock *NewDest) const { |
64 | MachineBasicBlock *MBB = Tail->getParent(); |
65 | ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); |
66 | if (!AFI->hasITBlocks() || Tail->isBranch()) { |
67 | TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); |
68 | return; |
69 | } |
70 | |
71 | // If the first instruction of Tail is predicated, we may have to update |
72 | // the IT instruction. |
73 | Register PredReg; |
74 | ARMCC::CondCodes CC = getInstrPredicate(MI: *Tail, PredReg); |
75 | MachineBasicBlock::iterator MBBI = Tail; |
76 | if (CC != ARMCC::AL) |
77 | // Expecting at least the t2IT instruction before it. |
78 | --MBBI; |
79 | |
80 | // Actually replace the tail. |
81 | TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); |
82 | |
83 | // Fix up IT. |
84 | if (CC != ARMCC::AL) { |
85 | MachineBasicBlock::iterator E = MBB->begin(); |
86 | unsigned Count = 4; // At most 4 instructions in an IT block. |
87 | while (Count && MBBI != E) { |
88 | if (MBBI->isDebugInstr()) { |
89 | --MBBI; |
90 | continue; |
91 | } |
92 | if (MBBI->getOpcode() == ARM::t2IT) { |
93 | unsigned Mask = MBBI->getOperand(i: 1).getImm(); |
94 | if (Count == 4) |
95 | MBBI->eraseFromParent(); |
96 | else { |
97 | unsigned MaskOn = 1 << Count; |
98 | unsigned MaskOff = ~(MaskOn - 1); |
99 | MBBI->getOperand(i: 1).setImm((Mask & MaskOff) | MaskOn); |
100 | } |
101 | return; |
102 | } |
103 | --MBBI; |
104 | --Count; |
105 | } |
106 | |
107 | // Ctrl flow can reach here if branch folding is run before IT block |
108 | // formation pass. |
109 | } |
110 | } |
111 | |
112 | bool |
113 | Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, |
114 | MachineBasicBlock::iterator MBBI) const { |
115 | while (MBBI->isDebugInstr()) { |
116 | ++MBBI; |
117 | if (MBBI == MBB.end()) |
118 | return false; |
119 | } |
120 | |
121 | Register PredReg; |
122 | return getITInstrPredicate(MI: *MBBI, PredReg) == ARMCC::AL; |
123 | } |
124 | |
125 | MachineInstr * |
126 | Thumb2InstrInfo::optimizeSelect(MachineInstr &MI, |
127 | SmallPtrSetImpl<MachineInstr *> &SeenMIs, |
128 | bool PreferFalse) const { |
129 | // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the |
130 | // MOVCC into another instruction. If that fails on 8.1-M fall back to using a |
131 | // CSEL. |
132 | MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse); |
133 | if (!RV && getSubtarget().hasV8_1MMainlineOps() && !PreferNoCSEL) { |
134 | Register DestReg = MI.getOperand(i: 0).getReg(); |
135 | |
136 | if (!DestReg.isVirtual()) |
137 | return nullptr; |
138 | |
139 | MachineInstrBuilder NewMI = BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), |
140 | MCID: get(Opcode: ARM::t2CSEL), DestReg) |
141 | .add(MO: MI.getOperand(i: 2)) |
142 | .add(MO: MI.getOperand(i: 1)) |
143 | .add(MO: MI.getOperand(i: 3)); |
144 | SeenMIs.insert(Ptr: NewMI); |
145 | return NewMI; |
146 | } |
147 | return RV; |
148 | } |
149 | |
150 | void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
151 | MachineBasicBlock::iterator I, |
152 | const DebugLoc &DL, Register DestReg, |
153 | Register SrcReg, bool KillSrc, |
154 | bool RenamableDest, bool RenamableSrc) const { |
155 | // Handle SPR, DPR, and QPR copies. |
156 | if (!ARM::GPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) |
157 | return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); |
158 | |
159 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::tMOVr), DestReg) |
160 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
161 | .add(MOs: predOps(Pred: ARMCC::AL)); |
162 | } |
163 | |
164 | void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
165 | MachineBasicBlock::iterator I, |
166 | Register SrcReg, bool isKill, int FI, |
167 | const TargetRegisterClass *RC, |
168 | const TargetRegisterInfo *TRI, |
169 | Register VReg, |
170 | MachineInstr::MIFlag Flags) const { |
171 | DebugLoc DL; |
172 | if (I != MBB.end()) DL = I->getDebugLoc(); |
173 | |
174 | MachineFunction &MF = *MBB.getParent(); |
175 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
176 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
177 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOStore, |
178 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
179 | |
180 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
181 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::t2STRi12)) |
182 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
183 | .addFrameIndex(Idx: FI) |
184 | .addImm(Val: 0) |
185 | .addMemOperand(MMO) |
186 | .add(MOs: predOps(Pred: ARMCC::AL)); |
187 | return; |
188 | } |
189 | |
190 | if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
191 | // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for |
192 | // gsub_0, but needs an extra constraint for gsub_1 (which could be sp |
193 | // otherwise). |
194 | if (SrcReg.isVirtual()) { |
195 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
196 | MRI->constrainRegClass(Reg: SrcReg, RC: &ARM::GPRPairnospRegClass); |
197 | } |
198 | |
199 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::t2STRDi8)); |
200 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_0, State: getKillRegState(B: isKill), TRI); |
201 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_1, State: 0, TRI); |
202 | MIB.addFrameIndex(Idx: FI).addImm(Val: 0).addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL)); |
203 | return; |
204 | } |
205 | |
206 | ARMBaseInstrInfo::storeRegToStackSlot(MBB, MBBI: I, SrcReg, isKill, FrameIndex: FI, RC, TRI, |
207 | VReg: Register()); |
208 | } |
209 | |
210 | void Thumb2InstrInfo::loadRegFromStackSlot( |
211 | MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg, |
212 | int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, |
213 | Register VReg, MachineInstr::MIFlag Flags) const { |
214 | MachineFunction &MF = *MBB.getParent(); |
215 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
216 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
217 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOLoad, |
218 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
219 | DebugLoc DL; |
220 | if (I != MBB.end()) DL = I->getDebugLoc(); |
221 | |
222 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
223 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::t2LDRi12), DestReg) |
224 | .addFrameIndex(Idx: FI) |
225 | .addImm(Val: 0) |
226 | .addMemOperand(MMO) |
227 | .add(MOs: predOps(Pred: ARMCC::AL)); |
228 | return; |
229 | } |
230 | |
231 | if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
232 | // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for |
233 | // gsub_0, but needs an extra constraint for gsub_1 (which could be sp |
234 | // otherwise). |
235 | if (DestReg.isVirtual()) { |
236 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
237 | MRI->constrainRegClass(Reg: DestReg, RC: &ARM::GPRPairnospRegClass); |
238 | } |
239 | |
240 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::t2LDRDi8)); |
241 | AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_0, State: RegState::DefineNoRead, TRI); |
242 | AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_1, State: RegState::DefineNoRead, TRI); |
243 | MIB.addFrameIndex(Idx: FI).addImm(Val: 0).addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL)); |
244 | |
245 | if (DestReg.isPhysical()) |
246 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
247 | return; |
248 | } |
249 | |
250 | ARMBaseInstrInfo::loadRegFromStackSlot(MBB, MBBI: I, DestReg, FrameIndex: FI, RC, TRI, |
251 | VReg: Register()); |
252 | } |
253 | |
254 | void Thumb2InstrInfo::expandLoadStackGuard( |
255 | MachineBasicBlock::iterator MI) const { |
256 | MachineFunction &MF = *MI->getParent()->getParent(); |
257 | Module &M = *MF.getFunction().getParent(); |
258 | |
259 | if (M.getStackProtectorGuard() == "tls" ) { |
260 | expandLoadStackGuardBase(MI, LoadImmOpc: ARM::t2MRC, LoadOpc: ARM::t2LDRi12); |
261 | return; |
262 | } |
263 | |
264 | const auto *GV = cast<GlobalValue>(Val: (*MI->memoperands_begin())->getValue()); |
265 | const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>(); |
266 | if (Subtarget.isTargetELF() && !GV->isDSOLocal()) |
267 | expandLoadStackGuardBase(MI, LoadImmOpc: ARM::t2LDRLIT_ga_pcrel, LoadOpc: ARM::t2LDRi12); |
268 | else if (!Subtarget.useMovt()) |
269 | expandLoadStackGuardBase(MI, LoadImmOpc: ARM::tLDRLIT_ga_abs, LoadOpc: ARM::t2LDRi12); |
270 | else if (MF.getTarget().isPositionIndependent()) |
271 | expandLoadStackGuardBase(MI, LoadImmOpc: ARM::t2MOV_ga_pcrel, LoadOpc: ARM::t2LDRi12); |
272 | else |
273 | expandLoadStackGuardBase(MI, LoadImmOpc: ARM::t2MOVi32imm, LoadOpc: ARM::t2LDRi12); |
274 | } |
275 | |
276 | MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI, |
277 | bool NewMI, |
278 | unsigned OpIdx1, |
279 | unsigned OpIdx2) const { |
280 | switch (MI.getOpcode()) { |
281 | case ARM::MVE_VMAXNMAf16: |
282 | case ARM::MVE_VMAXNMAf32: |
283 | case ARM::MVE_VMINNMAf16: |
284 | case ARM::MVE_VMINNMAf32: |
285 | // Don't allow predicated instructions to be commuted. |
286 | if (getVPTInstrPredicate(MI) != ARMVCC::None) |
287 | return nullptr; |
288 | } |
289 | return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
290 | } |
291 | |
292 | bool Thumb2InstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
293 | const MachineBasicBlock *MBB, |
294 | const MachineFunction &MF) const { |
295 | // BTI clearing instructions shall not take part in scheduling regions as |
296 | // they must stay in their intended place. Although PAC isn't BTI clearing, |
297 | // it can be transformed into PACBTI after the pre-RA Machine Scheduling |
298 | // has taken place, so its movement must also be restricted. |
299 | switch (MI.getOpcode()) { |
300 | case ARM::t2BTI: |
301 | case ARM::t2PAC: |
302 | case ARM::t2PACBTI: |
303 | case ARM::t2SG: |
304 | return true; |
305 | default: |
306 | break; |
307 | } |
308 | return ARMBaseInstrInfo::isSchedulingBoundary(MI, MBB, MF); |
309 | } |
310 | |
311 | void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, |
312 | MachineBasicBlock::iterator &MBBI, |
313 | const DebugLoc &dl, Register DestReg, |
314 | Register BaseReg, int NumBytes, |
315 | ARMCC::CondCodes Pred, Register PredReg, |
316 | const ARMBaseInstrInfo &TII, |
317 | unsigned MIFlags) { |
318 | if (NumBytes == 0 && DestReg != BaseReg) { |
319 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg) |
320 | .addReg(RegNo: BaseReg, flags: RegState::Kill) |
321 | .addImm(Val: (unsigned)Pred).addReg(RegNo: PredReg).setMIFlags(MIFlags); |
322 | return; |
323 | } |
324 | |
325 | bool isSub = NumBytes < 0; |
326 | if (isSub) NumBytes = -NumBytes; |
327 | |
328 | // If profitable, use a movw or movt to materialize the offset. |
329 | // FIXME: Use the scavenger to grab a scratch register. |
330 | if (DestReg != ARM::SP && DestReg != BaseReg && |
331 | NumBytes >= 4096 && |
332 | ARM_AM::getT2SOImmVal(Arg: NumBytes) == -1) { |
333 | bool Fits = false; |
334 | if (NumBytes < 65536) { |
335 | // Use a movw to materialize the 16-bit constant. |
336 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg) |
337 | .addImm(Val: NumBytes) |
338 | .addImm(Val: (unsigned)Pred).addReg(RegNo: PredReg).setMIFlags(MIFlags); |
339 | Fits = true; |
340 | } else if ((NumBytes & 0xffff) == 0) { |
341 | // Use a movt to materialize the 32-bit constant. |
342 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVTi16), DestReg) |
343 | .addReg(RegNo: DestReg) |
344 | .addImm(Val: NumBytes >> 16) |
345 | .addImm(Val: (unsigned)Pred).addReg(RegNo: PredReg).setMIFlags(MIFlags); |
346 | Fits = true; |
347 | } |
348 | |
349 | if (Fits) { |
350 | if (isSub) { |
351 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2SUBrr), DestReg) |
352 | .addReg(RegNo: BaseReg) |
353 | .addReg(RegNo: DestReg, flags: RegState::Kill) |
354 | .add(MOs: predOps(Pred, PredReg)) |
355 | .add(MO: condCodeOp()) |
356 | .setMIFlags(MIFlags); |
357 | } else { |
358 | // Here we know that DestReg is not SP but we do not |
359 | // know anything about BaseReg. t2ADDrr is an invalid |
360 | // instruction is SP is used as the second argument, but |
361 | // is fine if SP is the first argument. To be sure we |
362 | // do not generate invalid encoding, put BaseReg first. |
363 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2ADDrr), DestReg) |
364 | .addReg(RegNo: BaseReg) |
365 | .addReg(RegNo: DestReg, flags: RegState::Kill) |
366 | .add(MOs: predOps(Pred, PredReg)) |
367 | .add(MO: condCodeOp()) |
368 | .setMIFlags(MIFlags); |
369 | } |
370 | return; |
371 | } |
372 | } |
373 | |
374 | while (NumBytes) { |
375 | unsigned ThisVal = NumBytes; |
376 | unsigned Opc = 0; |
377 | if (DestReg == ARM::SP && BaseReg != ARM::SP) { |
378 | // mov sp, rn. Note t2MOVr cannot be used. |
379 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg) |
380 | .addReg(RegNo: BaseReg) |
381 | .setMIFlags(MIFlags) |
382 | .add(MOs: predOps(Pred: ARMCC::AL)); |
383 | BaseReg = ARM::SP; |
384 | continue; |
385 | } |
386 | |
387 | assert((DestReg != ARM::SP || BaseReg == ARM::SP) && |
388 | "Writing to SP, from other register." ); |
389 | |
390 | // Try to use T1, as it smaller |
391 | if ((DestReg == ARM::SP) && (ThisVal < ((1 << 7) - 1) * 4)) { |
392 | assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?" ); |
393 | Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; |
394 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: Opc), DestReg) |
395 | .addReg(RegNo: BaseReg) |
396 | .addImm(Val: ThisVal / 4) |
397 | .setMIFlags(MIFlags) |
398 | .add(MOs: predOps(Pred: ARMCC::AL)); |
399 | break; |
400 | } |
401 | bool HasCCOut = true; |
402 | int ImmIsT2SO = ARM_AM::getT2SOImmVal(Arg: ThisVal); |
403 | bool ToSP = DestReg == ARM::SP; |
404 | unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; |
405 | unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; |
406 | unsigned t2SUBi12 = ToSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12; |
407 | unsigned t2ADDi12 = ToSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12; |
408 | Opc = isSub ? t2SUB : t2ADD; |
409 | // Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm |
410 | if (ImmIsT2SO != -1) { |
411 | NumBytes = 0; |
412 | } else if (ThisVal < 4096) { |
413 | // Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp, |
414 | // sp, imm12 |
415 | Opc = isSub ? t2SUBi12 : t2ADDi12; |
416 | HasCCOut = false; |
417 | NumBytes = 0; |
418 | } else { |
419 | // Use one T2 instruction to reduce NumBytes |
420 | // FIXME: Move this to ARMAddressingModes.h? |
421 | unsigned RotAmt = llvm::countl_zero(Val: ThisVal); |
422 | ThisVal = ThisVal & llvm::rotr<uint32_t>(V: 0xff000000U, R: RotAmt); |
423 | NumBytes &= ~ThisVal; |
424 | assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && |
425 | "Bit extraction didn't work?" ); |
426 | } |
427 | |
428 | // Build the new ADD / SUB. |
429 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: Opc), DestReg) |
430 | .addReg(RegNo: BaseReg, flags: RegState::Kill) |
431 | .addImm(Val: ThisVal) |
432 | .add(MOs: predOps(Pred: ARMCC::AL)) |
433 | .setMIFlags(MIFlags); |
434 | if (HasCCOut) |
435 | MIB.add(MO: condCodeOp()); |
436 | |
437 | BaseReg = DestReg; |
438 | } |
439 | } |
440 | |
441 | static unsigned |
442 | negativeOffsetOpcode(unsigned opcode) |
443 | { |
444 | switch (opcode) { |
445 | case ARM::t2LDRi12: return ARM::t2LDRi8; |
446 | case ARM::t2LDRHi12: return ARM::t2LDRHi8; |
447 | case ARM::t2LDRBi12: return ARM::t2LDRBi8; |
448 | case ARM::t2LDRSHi12: return ARM::t2LDRSHi8; |
449 | case ARM::t2LDRSBi12: return ARM::t2LDRSBi8; |
450 | case ARM::t2STRi12: return ARM::t2STRi8; |
451 | case ARM::t2STRBi12: return ARM::t2STRBi8; |
452 | case ARM::t2STRHi12: return ARM::t2STRHi8; |
453 | case ARM::t2PLDi12: return ARM::t2PLDi8; |
454 | case ARM::t2PLDWi12: return ARM::t2PLDWi8; |
455 | case ARM::t2PLIi12: return ARM::t2PLIi8; |
456 | |
457 | case ARM::t2LDRi8: |
458 | case ARM::t2LDRHi8: |
459 | case ARM::t2LDRBi8: |
460 | case ARM::t2LDRSHi8: |
461 | case ARM::t2LDRSBi8: |
462 | case ARM::t2STRi8: |
463 | case ARM::t2STRBi8: |
464 | case ARM::t2STRHi8: |
465 | case ARM::t2PLDi8: |
466 | case ARM::t2PLDWi8: |
467 | case ARM::t2PLIi8: |
468 | return opcode; |
469 | |
470 | default: |
471 | llvm_unreachable("unknown thumb2 opcode." ); |
472 | } |
473 | } |
474 | |
475 | static unsigned |
476 | positiveOffsetOpcode(unsigned opcode) |
477 | { |
478 | switch (opcode) { |
479 | case ARM::t2LDRi8: return ARM::t2LDRi12; |
480 | case ARM::t2LDRHi8: return ARM::t2LDRHi12; |
481 | case ARM::t2LDRBi8: return ARM::t2LDRBi12; |
482 | case ARM::t2LDRSHi8: return ARM::t2LDRSHi12; |
483 | case ARM::t2LDRSBi8: return ARM::t2LDRSBi12; |
484 | case ARM::t2STRi8: return ARM::t2STRi12; |
485 | case ARM::t2STRBi8: return ARM::t2STRBi12; |
486 | case ARM::t2STRHi8: return ARM::t2STRHi12; |
487 | case ARM::t2PLDi8: return ARM::t2PLDi12; |
488 | case ARM::t2PLDWi8: return ARM::t2PLDWi12; |
489 | case ARM::t2PLIi8: return ARM::t2PLIi12; |
490 | |
491 | case ARM::t2LDRi12: |
492 | case ARM::t2LDRHi12: |
493 | case ARM::t2LDRBi12: |
494 | case ARM::t2LDRSHi12: |
495 | case ARM::t2LDRSBi12: |
496 | case ARM::t2STRi12: |
497 | case ARM::t2STRBi12: |
498 | case ARM::t2STRHi12: |
499 | case ARM::t2PLDi12: |
500 | case ARM::t2PLDWi12: |
501 | case ARM::t2PLIi12: |
502 | return opcode; |
503 | |
504 | default: |
505 | llvm_unreachable("unknown thumb2 opcode." ); |
506 | } |
507 | } |
508 | |
509 | static unsigned |
510 | immediateOffsetOpcode(unsigned opcode) |
511 | { |
512 | switch (opcode) { |
513 | case ARM::t2LDRs: return ARM::t2LDRi12; |
514 | case ARM::t2LDRHs: return ARM::t2LDRHi12; |
515 | case ARM::t2LDRBs: return ARM::t2LDRBi12; |
516 | case ARM::t2LDRSHs: return ARM::t2LDRSHi12; |
517 | case ARM::t2LDRSBs: return ARM::t2LDRSBi12; |
518 | case ARM::t2STRs: return ARM::t2STRi12; |
519 | case ARM::t2STRBs: return ARM::t2STRBi12; |
520 | case ARM::t2STRHs: return ARM::t2STRHi12; |
521 | case ARM::t2PLDs: return ARM::t2PLDi12; |
522 | case ARM::t2PLDWs: return ARM::t2PLDWi12; |
523 | case ARM::t2PLIs: return ARM::t2PLIi12; |
524 | |
525 | case ARM::t2LDRi12: |
526 | case ARM::t2LDRHi12: |
527 | case ARM::t2LDRBi12: |
528 | case ARM::t2LDRSHi12: |
529 | case ARM::t2LDRSBi12: |
530 | case ARM::t2STRi12: |
531 | case ARM::t2STRBi12: |
532 | case ARM::t2STRHi12: |
533 | case ARM::t2PLDi12: |
534 | case ARM::t2PLDWi12: |
535 | case ARM::t2PLIi12: |
536 | case ARM::t2LDRi8: |
537 | case ARM::t2LDRHi8: |
538 | case ARM::t2LDRBi8: |
539 | case ARM::t2LDRSHi8: |
540 | case ARM::t2LDRSBi8: |
541 | case ARM::t2STRi8: |
542 | case ARM::t2STRBi8: |
543 | case ARM::t2STRHi8: |
544 | case ARM::t2PLDi8: |
545 | case ARM::t2PLDWi8: |
546 | case ARM::t2PLIi8: |
547 | return opcode; |
548 | |
549 | default: |
550 | llvm_unreachable("unknown thumb2 opcode." ); |
551 | } |
552 | } |
553 | |
554 | bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
555 | Register FrameReg, int &Offset, |
556 | const ARMBaseInstrInfo &TII, |
557 | const TargetRegisterInfo *TRI) { |
558 | unsigned Opcode = MI.getOpcode(); |
559 | const MCInstrDesc &Desc = MI.getDesc(); |
560 | unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); |
561 | bool isSub = false; |
562 | |
563 | MachineFunction &MF = *MI.getParent()->getParent(); |
564 | const TargetRegisterClass *RegClass = |
565 | TII.getRegClass(MCID: Desc, OpNum: FrameRegIdx, TRI, MF); |
566 | |
567 | // Memory operands in inline assembly always use AddrModeT2_i12. |
568 | if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) |
569 | AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2? |
570 | |
571 | const bool IsSP = Opcode == ARM::t2ADDspImm12 || Opcode == ARM::t2ADDspImm; |
572 | if (IsSP || Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { |
573 | Offset += MI.getOperand(i: FrameRegIdx+1).getImm(); |
574 | |
575 | Register PredReg; |
576 | if (Offset == 0 && getInstrPredicate(MI, PredReg) == ARMCC::AL && |
577 | !MI.definesRegister(Reg: ARM::CPSR, /*TRI=*/nullptr)) { |
578 | // Turn it into a move. |
579 | MI.setDesc(TII.get(Opcode: ARM::tMOVr)); |
580 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
581 | // Remove offset and remaining explicit predicate operands. |
582 | do MI.removeOperand(OpNo: FrameRegIdx+1); |
583 | while (MI.getNumOperands() > FrameRegIdx+1); |
584 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); |
585 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
586 | return true; |
587 | } |
588 | |
589 | bool HasCCOut = (Opcode != ARM::t2ADDspImm12 && Opcode != ARM::t2ADDri12); |
590 | |
591 | if (Offset < 0) { |
592 | Offset = -Offset; |
593 | isSub = true; |
594 | MI.setDesc(IsSP ? TII.get(Opcode: ARM::t2SUBspImm) : TII.get(Opcode: ARM::t2SUBri)); |
595 | } else { |
596 | MI.setDesc(IsSP ? TII.get(Opcode: ARM::t2ADDspImm) : TII.get(Opcode: ARM::t2ADDri)); |
597 | } |
598 | |
599 | // Common case: small offset, fits into instruction. |
600 | if (ARM_AM::getT2SOImmVal(Arg: Offset) != -1) { |
601 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
602 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset); |
603 | // Add cc_out operand if the original instruction did not have one. |
604 | if (!HasCCOut) |
605 | MI.addOperand(Op: MachineOperand::CreateReg(Reg: 0, isDef: false)); |
606 | Offset = 0; |
607 | return true; |
608 | } |
609 | // Another common case: imm12. |
610 | if (Offset < 4096 && |
611 | (!HasCCOut || MI.getOperand(i: MI.getNumOperands()-1).getReg() == 0)) { |
612 | unsigned NewOpc = isSub ? IsSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12 |
613 | : IsSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12; |
614 | MI.setDesc(TII.get(Opcode: NewOpc)); |
615 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
616 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset); |
617 | // Remove the cc_out operand. |
618 | if (HasCCOut) |
619 | MI.removeOperand(OpNo: MI.getNumOperands()-1); |
620 | Offset = 0; |
621 | return true; |
622 | } |
623 | |
624 | // Otherwise, extract 8 adjacent bits from the immediate into this |
625 | // t2ADDri/t2SUBri. |
626 | unsigned RotAmt = llvm::countl_zero<unsigned>(Val: Offset); |
627 | unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(V: 0xff000000U, R: RotAmt); |
628 | |
629 | // We will handle these bits from offset, clear them. |
630 | Offset &= ~ThisImmVal; |
631 | |
632 | assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 && |
633 | "Bit extraction didn't work?" ); |
634 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: ThisImmVal); |
635 | // Add cc_out operand if the original instruction did not have one. |
636 | if (!HasCCOut) |
637 | MI.addOperand(Op: MachineOperand::CreateReg(Reg: 0, isDef: false)); |
638 | } else { |
639 | // AddrMode4 and AddrMode6 cannot handle any offset. |
640 | if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) |
641 | return false; |
642 | |
643 | // AddrModeT2_so cannot handle any offset. If there is no offset |
644 | // register then we change to an immediate version. |
645 | unsigned NewOpc = Opcode; |
646 | if (AddrMode == ARMII::AddrModeT2_so) { |
647 | Register OffsetReg = MI.getOperand(i: FrameRegIdx + 1).getReg(); |
648 | if (OffsetReg != 0) { |
649 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
650 | return Offset == 0; |
651 | } |
652 | |
653 | MI.removeOperand(OpNo: FrameRegIdx+1); |
654 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: 0); |
655 | NewOpc = immediateOffsetOpcode(opcode: Opcode); |
656 | AddrMode = ARMII::AddrModeT2_i12; |
657 | } |
658 | |
659 | unsigned NumBits = 0; |
660 | unsigned Scale = 1; |
661 | if (AddrMode == ARMII::AddrModeT2_i8neg || |
662 | AddrMode == ARMII::AddrModeT2_i12) { |
663 | // i8 supports only negative, and i12 supports only positive, so |
664 | // based on Offset sign convert Opcode to the appropriate |
665 | // instruction |
666 | Offset += MI.getOperand(i: FrameRegIdx+1).getImm(); |
667 | if (Offset < 0) { |
668 | NewOpc = negativeOffsetOpcode(opcode: Opcode); |
669 | NumBits = 8; |
670 | isSub = true; |
671 | Offset = -Offset; |
672 | } else { |
673 | NewOpc = positiveOffsetOpcode(opcode: Opcode); |
674 | NumBits = 12; |
675 | } |
676 | } else if (AddrMode == ARMII::AddrMode5) { |
677 | // VFP address mode. |
678 | const MachineOperand &OffOp = MI.getOperand(i: FrameRegIdx+1); |
679 | int InstrOffs = ARM_AM::getAM5Offset(AM5Opc: OffOp.getImm()); |
680 | if (ARM_AM::getAM5Op(AM5Opc: OffOp.getImm()) == ARM_AM::sub) |
681 | InstrOffs *= -1; |
682 | NumBits = 8; |
683 | Scale = 4; |
684 | Offset += InstrOffs * 4; |
685 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!" ); |
686 | if (Offset < 0) { |
687 | Offset = -Offset; |
688 | isSub = true; |
689 | } |
690 | } else if (AddrMode == ARMII::AddrMode5FP16) { |
691 | // VFP address mode. |
692 | const MachineOperand &OffOp = MI.getOperand(i: FrameRegIdx+1); |
693 | int InstrOffs = ARM_AM::getAM5FP16Offset(AM5Opc: OffOp.getImm()); |
694 | if (ARM_AM::getAM5FP16Op(AM5Opc: OffOp.getImm()) == ARM_AM::sub) |
695 | InstrOffs *= -1; |
696 | NumBits = 8; |
697 | Scale = 2; |
698 | Offset += InstrOffs * 2; |
699 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!" ); |
700 | if (Offset < 0) { |
701 | Offset = -Offset; |
702 | isSub = true; |
703 | } |
704 | } else if (AddrMode == ARMII::AddrModeT2_i7s4 || |
705 | AddrMode == ARMII::AddrModeT2_i7s2 || |
706 | AddrMode == ARMII::AddrModeT2_i7) { |
707 | Offset += MI.getOperand(i: FrameRegIdx + 1).getImm(); |
708 | unsigned OffsetMask; |
709 | switch (AddrMode) { |
710 | case ARMII::AddrModeT2_i7s4: NumBits = 9; OffsetMask = 0x3; break; |
711 | case ARMII::AddrModeT2_i7s2: NumBits = 8; OffsetMask = 0x1; break; |
712 | default: NumBits = 7; OffsetMask = 0x0; break; |
713 | } |
714 | // MCInst operand expects already scaled value. |
715 | Scale = 1; |
716 | assert((Offset & OffsetMask) == 0 && "Can't encode this offset!" ); |
717 | (void)OffsetMask; // squash unused-variable warning at -NDEBUG |
718 | } else if (AddrMode == ARMII::AddrModeT2_i8s4) { |
719 | Offset += MI.getOperand(i: FrameRegIdx + 1).getImm(); |
720 | NumBits = 8 + 2; |
721 | // MCInst operand expects already scaled value. |
722 | Scale = 1; |
723 | assert((Offset & 3) == 0 && "Can't encode this offset!" ); |
724 | } else if (AddrMode == ARMII::AddrModeT2_ldrex) { |
725 | Offset += MI.getOperand(i: FrameRegIdx + 1).getImm() * 4; |
726 | NumBits = 8; // 8 bits scaled by 4 |
727 | Scale = 4; |
728 | assert((Offset & 3) == 0 && "Can't encode this offset!" ); |
729 | } else { |
730 | llvm_unreachable("Unsupported addressing mode!" ); |
731 | } |
732 | |
733 | if (NewOpc != Opcode) |
734 | MI.setDesc(TII.get(Opcode: NewOpc)); |
735 | |
736 | MachineOperand &ImmOp = MI.getOperand(i: FrameRegIdx+1); |
737 | |
738 | // Attempt to fold address computation |
739 | // Common case: small offset, fits into instruction. We need to make sure |
740 | // the register class is correct too, for instructions like the MVE |
741 | // VLDRH.32, which only accepts low tGPR registers. |
742 | int ImmedOffset = Offset / Scale; |
743 | unsigned Mask = (1 << NumBits) - 1; |
744 | if ((unsigned)Offset <= Mask * Scale && |
745 | (FrameReg.isVirtual() || RegClass->contains(Reg: FrameReg))) { |
746 | if (FrameReg.isVirtual()) { |
747 | // Make sure the register class for the virtual register is correct |
748 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
749 | if (!MRI->constrainRegClass(Reg: FrameReg, RC: RegClass)) |
750 | llvm_unreachable("Unable to constrain virtual register class." ); |
751 | } |
752 | |
753 | // Replace the FrameIndex with fp/sp |
754 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
755 | if (isSub) { |
756 | if (AddrMode == ARMII::AddrMode5 || AddrMode == ARMII::AddrMode5FP16) |
757 | // FIXME: Not consistent. |
758 | ImmedOffset |= 1 << NumBits; |
759 | else |
760 | ImmedOffset = -ImmedOffset; |
761 | } |
762 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
763 | Offset = 0; |
764 | return true; |
765 | } |
766 | |
767 | // Otherwise, offset doesn't fit. Pull in what we can to simplify |
768 | ImmedOffset = ImmedOffset & Mask; |
769 | if (isSub) { |
770 | if (AddrMode == ARMII::AddrMode5 || AddrMode == ARMII::AddrMode5FP16) |
771 | // FIXME: Not consistent. |
772 | ImmedOffset |= 1 << NumBits; |
773 | else { |
774 | ImmedOffset = -ImmedOffset; |
775 | if (ImmedOffset == 0) |
776 | // Change the opcode back if the encoded offset is zero. |
777 | MI.setDesc(TII.get(Opcode: positiveOffsetOpcode(opcode: NewOpc))); |
778 | } |
779 | } |
780 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
781 | Offset &= ~(Mask*Scale); |
782 | } |
783 | |
784 | Offset = (isSub) ? -Offset : Offset; |
785 | return Offset == 0 && (FrameReg.isVirtual() || RegClass->contains(Reg: FrameReg)); |
786 | } |
787 | |
788 | ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI, |
789 | Register &PredReg) { |
790 | unsigned Opc = MI.getOpcode(); |
791 | if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) |
792 | return ARMCC::AL; |
793 | return getInstrPredicate(MI, PredReg); |
794 | } |
795 | |
796 | int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) { |
797 | const MCInstrDesc &MCID = MI.getDesc(); |
798 | |
799 | for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) |
800 | if (ARM::isVpred(op: MCID.operands()[i].OperandType)) |
801 | return i; |
802 | |
803 | return -1; |
804 | } |
805 | |
806 | ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI, |
807 | Register &PredReg) { |
808 | int PIdx = findFirstVPTPredOperandIdx(MI); |
809 | if (PIdx == -1) { |
810 | PredReg = 0; |
811 | return ARMVCC::None; |
812 | } |
813 | |
814 | PredReg = MI.getOperand(i: PIdx+1).getReg(); |
815 | return (ARMVCC::VPTCodes)MI.getOperand(i: PIdx).getImm(); |
816 | } |
817 | |
818 | void llvm::recomputeVPTBlockMask(MachineInstr &Instr) { |
819 | assert(isVPTOpcode(Instr.getOpcode()) && "Not a VPST or VPT Instruction!" ); |
820 | |
821 | MachineOperand &MaskOp = Instr.getOperand(i: 0); |
822 | assert(MaskOp.isImm() && "Operand 0 is not the block mask of the VPT/VPST?!" ); |
823 | |
824 | MachineBasicBlock::iterator Iter = ++Instr.getIterator(), |
825 | End = Instr.getParent()->end(); |
826 | |
827 | while (Iter != End && Iter->isDebugInstr()) |
828 | ++Iter; |
829 | |
830 | // Verify that the instruction after the VPT/VPST is predicated (it should |
831 | // be), and skip it. |
832 | assert(Iter != End && "Expected some instructions in any VPT block" ); |
833 | assert( |
834 | getVPTInstrPredicate(*Iter) == ARMVCC::Then && |
835 | "VPT/VPST should be followed by an instruction with a 'then' predicate!" ); |
836 | ++Iter; |
837 | |
838 | // Iterate over the predicated instructions, updating the BlockMask as we go. |
839 | ARM::PredBlockMask BlockMask = ARM::PredBlockMask::T; |
840 | while (Iter != End) { |
841 | if (Iter->isDebugInstr()) { |
842 | ++Iter; |
843 | continue; |
844 | } |
845 | ARMVCC::VPTCodes Pred = getVPTInstrPredicate(MI: *Iter); |
846 | if (Pred == ARMVCC::None) |
847 | break; |
848 | BlockMask = expandPredBlockMask(BlockMask, Kind: Pred); |
849 | ++Iter; |
850 | } |
851 | |
852 | // Rewrite the BlockMask. |
853 | MaskOp.setImm((int64_t)(BlockMask)); |
854 | } |
855 | |