1 | //===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the Thumb-2 implementation of the TargetInstrInfo class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "Thumb2InstrInfo.h" |
14 | #include "ARMMachineFunctionInfo.h" |
15 | #include "ARMSubtarget.h" |
16 | #include "MCTargetDesc/ARMAddressingModes.h" |
17 | #include "llvm/CodeGen/MachineBasicBlock.h" |
18 | #include "llvm/CodeGen/MachineFrameInfo.h" |
19 | #include "llvm/CodeGen/MachineFunction.h" |
20 | #include "llvm/CodeGen/MachineInstr.h" |
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | #include "llvm/CodeGen/MachineMemOperand.h" |
23 | #include "llvm/CodeGen/MachineOperand.h" |
24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
25 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
26 | #include "llvm/IR/DebugLoc.h" |
27 | #include "llvm/IR/Module.h" |
28 | #include "llvm/MC/MCInst.h" |
29 | #include "llvm/MC/MCInstBuilder.h" |
30 | #include "llvm/MC/MCInstrDesc.h" |
31 | #include "llvm/Support/CommandLine.h" |
32 | #include "llvm/Support/ErrorHandling.h" |
33 | #include "llvm/Support/MathExtras.h" |
34 | #include "llvm/Target/TargetMachine.h" |
35 | #include <cassert> |
36 | |
37 | using namespace llvm; |
38 | |
39 | static cl::opt<bool> |
40 | OldT2IfCvt("old-thumb2-ifcvt" , cl::Hidden, |
41 | cl::desc("Use old-style Thumb2 if-conversion heuristics" ), |
42 | cl::init(Val: false)); |
43 | |
44 | static cl::opt<bool> |
45 | PreferNoCSEL("prefer-no-csel" , cl::Hidden, |
46 | cl::desc("Prefer predicated Move to CSEL" ), |
47 | cl::init(Val: false)); |
48 | |
49 | Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) |
50 | : ARMBaseInstrInfo(STI) {} |
51 | |
52 | /// Return the noop instruction to use for a noop. |
53 | MCInst Thumb2InstrInfo::getNop() const { |
54 | return MCInstBuilder(ARM::tHINT).addImm(Val: 0).addImm(Val: ARMCC::AL).addReg(Reg: 0); |
55 | } |
56 | |
57 | unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { |
58 | // FIXME |
59 | return 0; |
60 | } |
61 | |
62 | void |
63 | Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, |
64 | MachineBasicBlock *NewDest) const { |
65 | MachineBasicBlock *MBB = Tail->getParent(); |
66 | ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); |
67 | if (!AFI->hasITBlocks() || Tail->isBranch()) { |
68 | TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); |
69 | return; |
70 | } |
71 | |
72 | // If the first instruction of Tail is predicated, we may have to update |
73 | // the IT instruction. |
74 | Register PredReg; |
75 | ARMCC::CondCodes CC = getInstrPredicate(MI: *Tail, PredReg); |
76 | MachineBasicBlock::iterator MBBI = Tail; |
77 | if (CC != ARMCC::AL) |
78 | // Expecting at least the t2IT instruction before it. |
79 | --MBBI; |
80 | |
81 | // Actually replace the tail. |
82 | TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); |
83 | |
84 | // Fix up IT. |
85 | if (CC != ARMCC::AL) { |
86 | MachineBasicBlock::iterator E = MBB->begin(); |
87 | unsigned Count = 4; // At most 4 instructions in an IT block. |
88 | while (Count && MBBI != E) { |
89 | if (MBBI->isDebugInstr()) { |
90 | --MBBI; |
91 | continue; |
92 | } |
93 | if (MBBI->getOpcode() == ARM::t2IT) { |
94 | unsigned Mask = MBBI->getOperand(i: 1).getImm(); |
95 | if (Count == 4) |
96 | MBBI->eraseFromParent(); |
97 | else { |
98 | unsigned MaskOn = 1 << Count; |
99 | unsigned MaskOff = ~(MaskOn - 1); |
100 | MBBI->getOperand(i: 1).setImm((Mask & MaskOff) | MaskOn); |
101 | } |
102 | return; |
103 | } |
104 | --MBBI; |
105 | --Count; |
106 | } |
107 | |
108 | // Ctrl flow can reach here if branch folding is run before IT block |
109 | // formation pass. |
110 | } |
111 | } |
112 | |
113 | bool |
114 | Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, |
115 | MachineBasicBlock::iterator MBBI) const { |
116 | while (MBBI->isDebugInstr()) { |
117 | ++MBBI; |
118 | if (MBBI == MBB.end()) |
119 | return false; |
120 | } |
121 | |
122 | Register PredReg; |
123 | return getITInstrPredicate(MI: *MBBI, PredReg) == ARMCC::AL; |
124 | } |
125 | |
126 | MachineInstr * |
127 | Thumb2InstrInfo::optimizeSelect(MachineInstr &MI, |
128 | SmallPtrSetImpl<MachineInstr *> &SeenMIs, |
129 | bool PreferFalse) const { |
130 | // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the |
131 | // MOVCC into another instruction. If that fails on 8.1-M fall back to using a |
132 | // CSEL. |
133 | MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse); |
134 | if (!RV && getSubtarget().hasV8_1MMainlineOps() && !PreferNoCSEL) { |
135 | Register DestReg = MI.getOperand(i: 0).getReg(); |
136 | |
137 | if (!DestReg.isVirtual()) |
138 | return nullptr; |
139 | |
140 | MachineInstrBuilder NewMI = BuildMI(BB&: *MI.getParent(), I&: MI, MIMD: MI.getDebugLoc(), |
141 | MCID: get(Opcode: ARM::t2CSEL), DestReg) |
142 | .add(MO: MI.getOperand(i: 2)) |
143 | .add(MO: MI.getOperand(i: 1)) |
144 | .add(MO: MI.getOperand(i: 3)); |
145 | SeenMIs.insert(Ptr: NewMI); |
146 | return NewMI; |
147 | } |
148 | return RV; |
149 | } |
150 | |
151 | void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
152 | MachineBasicBlock::iterator I, |
153 | const DebugLoc &DL, MCRegister DestReg, |
154 | MCRegister SrcReg, bool KillSrc) const { |
155 | // Handle SPR, DPR, and QPR copies. |
156 | if (!ARM::GPRRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) |
157 | return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); |
158 | |
159 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::tMOVr), DestReg) |
160 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc)) |
161 | .add(MOs: predOps(Pred: ARMCC::AL)); |
162 | } |
163 | |
164 | void Thumb2InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, |
165 | MachineBasicBlock::iterator I, |
166 | Register SrcReg, bool isKill, int FI, |
167 | const TargetRegisterClass *RC, |
168 | const TargetRegisterInfo *TRI, |
169 | Register VReg) const { |
170 | DebugLoc DL; |
171 | if (I != MBB.end()) DL = I->getDebugLoc(); |
172 | |
173 | MachineFunction &MF = *MBB.getParent(); |
174 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
175 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
176 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOStore, |
177 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
178 | |
179 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
180 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::t2STRi12)) |
181 | .addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)) |
182 | .addFrameIndex(Idx: FI) |
183 | .addImm(Val: 0) |
184 | .addMemOperand(MMO) |
185 | .add(MOs: predOps(Pred: ARMCC::AL)); |
186 | return; |
187 | } |
188 | |
189 | if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
190 | // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for |
191 | // gsub_0, but needs an extra constraint for gsub_1 (which could be sp |
192 | // otherwise). |
193 | if (SrcReg.isVirtual()) { |
194 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
195 | MRI->constrainRegClass(Reg: SrcReg, RC: &ARM::GPRPairnospRegClass); |
196 | } |
197 | |
198 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::t2STRDi8)); |
199 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_0, State: getKillRegState(B: isKill), TRI); |
200 | AddDReg(MIB, Reg: SrcReg, SubIdx: ARM::gsub_1, State: 0, TRI); |
201 | MIB.addFrameIndex(Idx: FI).addImm(Val: 0).addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL)); |
202 | return; |
203 | } |
204 | |
205 | ARMBaseInstrInfo::storeRegToStackSlot(MBB, MBBI: I, SrcReg, isKill, FrameIndex: FI, RC, TRI, |
206 | VReg: Register()); |
207 | } |
208 | |
209 | void Thumb2InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, |
210 | MachineBasicBlock::iterator I, |
211 | Register DestReg, int FI, |
212 | const TargetRegisterClass *RC, |
213 | const TargetRegisterInfo *TRI, |
214 | Register VReg) const { |
215 | MachineFunction &MF = *MBB.getParent(); |
216 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
217 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
218 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: MachineMemOperand::MOLoad, |
219 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
220 | DebugLoc DL; |
221 | if (I != MBB.end()) DL = I->getDebugLoc(); |
222 | |
223 | if (ARM::GPRRegClass.hasSubClassEq(RC)) { |
224 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::t2LDRi12), DestReg) |
225 | .addFrameIndex(Idx: FI) |
226 | .addImm(Val: 0) |
227 | .addMemOperand(MMO) |
228 | .add(MOs: predOps(Pred: ARMCC::AL)); |
229 | return; |
230 | } |
231 | |
232 | if (ARM::GPRPairRegClass.hasSubClassEq(RC)) { |
233 | // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for |
234 | // gsub_0, but needs an extra constraint for gsub_1 (which could be sp |
235 | // otherwise). |
236 | if (DestReg.isVirtual()) { |
237 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
238 | MRI->constrainRegClass(Reg: DestReg, RC: &ARM::GPRPairnospRegClass); |
239 | } |
240 | |
241 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ARM::t2LDRDi8)); |
242 | AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_0, State: RegState::DefineNoRead, TRI); |
243 | AddDReg(MIB, Reg: DestReg, SubIdx: ARM::gsub_1, State: RegState::DefineNoRead, TRI); |
244 | MIB.addFrameIndex(Idx: FI).addImm(Val: 0).addMemOperand(MMO).add(MOs: predOps(Pred: ARMCC::AL)); |
245 | |
246 | if (DestReg.isPhysical()) |
247 | MIB.addReg(RegNo: DestReg, flags: RegState::ImplicitDefine); |
248 | return; |
249 | } |
250 | |
251 | ARMBaseInstrInfo::loadRegFromStackSlot(MBB, MBBI: I, DestReg, FrameIndex: FI, RC, TRI, |
252 | VReg: Register()); |
253 | } |
254 | |
255 | void Thumb2InstrInfo::expandLoadStackGuard( |
256 | MachineBasicBlock::iterator MI) const { |
257 | MachineFunction &MF = *MI->getParent()->getParent(); |
258 | Module &M = *MF.getFunction().getParent(); |
259 | |
260 | if (M.getStackProtectorGuard() == "tls" ) { |
261 | expandLoadStackGuardBase(MI, LoadImmOpc: ARM::t2MRC, LoadOpc: ARM::t2LDRi12); |
262 | return; |
263 | } |
264 | |
265 | const auto *GV = cast<GlobalValue>(Val: (*MI->memoperands_begin())->getValue()); |
266 | if (MF.getSubtarget<ARMSubtarget>().isTargetELF() && !GV->isDSOLocal()) |
267 | expandLoadStackGuardBase(MI, LoadImmOpc: ARM::t2LDRLIT_ga_pcrel, LoadOpc: ARM::t2LDRi12); |
268 | else if (MF.getTarget().isPositionIndependent()) |
269 | expandLoadStackGuardBase(MI, LoadImmOpc: ARM::t2MOV_ga_pcrel, LoadOpc: ARM::t2LDRi12); |
270 | else |
271 | expandLoadStackGuardBase(MI, LoadImmOpc: ARM::t2MOVi32imm, LoadOpc: ARM::t2LDRi12); |
272 | } |
273 | |
274 | MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI, |
275 | bool NewMI, |
276 | unsigned OpIdx1, |
277 | unsigned OpIdx2) const { |
278 | switch (MI.getOpcode()) { |
279 | case ARM::MVE_VMAXNMAf16: |
280 | case ARM::MVE_VMAXNMAf32: |
281 | case ARM::MVE_VMINNMAf16: |
282 | case ARM::MVE_VMINNMAf32: |
283 | // Don't allow predicated instructions to be commuted. |
284 | if (getVPTInstrPredicate(MI) != ARMVCC::None) |
285 | return nullptr; |
286 | } |
287 | return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); |
288 | } |
289 | |
290 | bool Thumb2InstrInfo::isSchedulingBoundary(const MachineInstr &MI, |
291 | const MachineBasicBlock *MBB, |
292 | const MachineFunction &MF) const { |
293 | // BTI clearing instructions shall not take part in scheduling regions as |
294 | // they must stay in their intended place. Although PAC isn't BTI clearing, |
295 | // it can be transformed into PACBTI after the pre-RA Machine Scheduling |
296 | // has taken place, so its movement must also be restricted. |
297 | switch (MI.getOpcode()) { |
298 | case ARM::t2BTI: |
299 | case ARM::t2PAC: |
300 | case ARM::t2PACBTI: |
301 | case ARM::t2SG: |
302 | return true; |
303 | default: |
304 | break; |
305 | } |
306 | return ARMBaseInstrInfo::isSchedulingBoundary(MI, MBB, MF); |
307 | } |
308 | |
309 | void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, |
310 | MachineBasicBlock::iterator &MBBI, |
311 | const DebugLoc &dl, Register DestReg, |
312 | Register BaseReg, int NumBytes, |
313 | ARMCC::CondCodes Pred, Register PredReg, |
314 | const ARMBaseInstrInfo &TII, |
315 | unsigned MIFlags) { |
316 | if (NumBytes == 0 && DestReg != BaseReg) { |
317 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg) |
318 | .addReg(RegNo: BaseReg, flags: RegState::Kill) |
319 | .addImm(Val: (unsigned)Pred).addReg(RegNo: PredReg).setMIFlags(MIFlags); |
320 | return; |
321 | } |
322 | |
323 | bool isSub = NumBytes < 0; |
324 | if (isSub) NumBytes = -NumBytes; |
325 | |
326 | // If profitable, use a movw or movt to materialize the offset. |
327 | // FIXME: Use the scavenger to grab a scratch register. |
328 | if (DestReg != ARM::SP && DestReg != BaseReg && |
329 | NumBytes >= 4096 && |
330 | ARM_AM::getT2SOImmVal(Arg: NumBytes) == -1) { |
331 | bool Fits = false; |
332 | if (NumBytes < 65536) { |
333 | // Use a movw to materialize the 16-bit constant. |
334 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVi16), DestReg) |
335 | .addImm(Val: NumBytes) |
336 | .addImm(Val: (unsigned)Pred).addReg(RegNo: PredReg).setMIFlags(MIFlags); |
337 | Fits = true; |
338 | } else if ((NumBytes & 0xffff) == 0) { |
339 | // Use a movt to materialize the 32-bit constant. |
340 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2MOVTi16), DestReg) |
341 | .addReg(RegNo: DestReg) |
342 | .addImm(Val: NumBytes >> 16) |
343 | .addImm(Val: (unsigned)Pred).addReg(RegNo: PredReg).setMIFlags(MIFlags); |
344 | Fits = true; |
345 | } |
346 | |
347 | if (Fits) { |
348 | if (isSub) { |
349 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2SUBrr), DestReg) |
350 | .addReg(RegNo: BaseReg) |
351 | .addReg(RegNo: DestReg, flags: RegState::Kill) |
352 | .add(MOs: predOps(Pred, PredReg)) |
353 | .add(MO: condCodeOp()) |
354 | .setMIFlags(MIFlags); |
355 | } else { |
356 | // Here we know that DestReg is not SP but we do not |
357 | // know anything about BaseReg. t2ADDrr is an invalid |
358 | // instruction is SP is used as the second argument, but |
359 | // is fine if SP is the first argument. To be sure we |
360 | // do not generate invalid encoding, put BaseReg first. |
361 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::t2ADDrr), DestReg) |
362 | .addReg(RegNo: BaseReg) |
363 | .addReg(RegNo: DestReg, flags: RegState::Kill) |
364 | .add(MOs: predOps(Pred, PredReg)) |
365 | .add(MO: condCodeOp()) |
366 | .setMIFlags(MIFlags); |
367 | } |
368 | return; |
369 | } |
370 | } |
371 | |
372 | while (NumBytes) { |
373 | unsigned ThisVal = NumBytes; |
374 | unsigned Opc = 0; |
375 | if (DestReg == ARM::SP && BaseReg != ARM::SP) { |
376 | // mov sp, rn. Note t2MOVr cannot be used. |
377 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: ARM::tMOVr), DestReg) |
378 | .addReg(RegNo: BaseReg) |
379 | .setMIFlags(MIFlags) |
380 | .add(MOs: predOps(Pred: ARMCC::AL)); |
381 | BaseReg = ARM::SP; |
382 | continue; |
383 | } |
384 | |
385 | assert((DestReg != ARM::SP || BaseReg == ARM::SP) && |
386 | "Writing to SP, from other register." ); |
387 | |
388 | // Try to use T1, as it smaller |
389 | if ((DestReg == ARM::SP) && (ThisVal < ((1 << 7) - 1) * 4)) { |
390 | assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?" ); |
391 | Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; |
392 | BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: Opc), DestReg) |
393 | .addReg(RegNo: BaseReg) |
394 | .addImm(Val: ThisVal / 4) |
395 | .setMIFlags(MIFlags) |
396 | .add(MOs: predOps(Pred: ARMCC::AL)); |
397 | break; |
398 | } |
399 | bool HasCCOut = true; |
400 | int ImmIsT2SO = ARM_AM::getT2SOImmVal(Arg: ThisVal); |
401 | bool ToSP = DestReg == ARM::SP; |
402 | unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; |
403 | unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; |
404 | unsigned t2SUBi12 = ToSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12; |
405 | unsigned t2ADDi12 = ToSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12; |
406 | Opc = isSub ? t2SUB : t2ADD; |
407 | // Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm |
408 | if (ImmIsT2SO != -1) { |
409 | NumBytes = 0; |
410 | } else if (ThisVal < 4096) { |
411 | // Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp, |
412 | // sp, imm12 |
413 | Opc = isSub ? t2SUBi12 : t2ADDi12; |
414 | HasCCOut = false; |
415 | NumBytes = 0; |
416 | } else { |
417 | // Use one T2 instruction to reduce NumBytes |
418 | // FIXME: Move this to ARMAddressingModes.h? |
419 | unsigned RotAmt = llvm::countl_zero(Val: ThisVal); |
420 | ThisVal = ThisVal & llvm::rotr<uint32_t>(V: 0xff000000U, R: RotAmt); |
421 | NumBytes &= ~ThisVal; |
422 | assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && |
423 | "Bit extraction didn't work?" ); |
424 | } |
425 | |
426 | // Build the new ADD / SUB. |
427 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MBBI, MIMD: dl, MCID: TII.get(Opcode: Opc), DestReg) |
428 | .addReg(RegNo: BaseReg, flags: RegState::Kill) |
429 | .addImm(Val: ThisVal) |
430 | .add(MOs: predOps(Pred: ARMCC::AL)) |
431 | .setMIFlags(MIFlags); |
432 | if (HasCCOut) |
433 | MIB.add(MO: condCodeOp()); |
434 | |
435 | BaseReg = DestReg; |
436 | } |
437 | } |
438 | |
439 | static unsigned |
440 | negativeOffsetOpcode(unsigned opcode) |
441 | { |
442 | switch (opcode) { |
443 | case ARM::t2LDRi12: return ARM::t2LDRi8; |
444 | case ARM::t2LDRHi12: return ARM::t2LDRHi8; |
445 | case ARM::t2LDRBi12: return ARM::t2LDRBi8; |
446 | case ARM::t2LDRSHi12: return ARM::t2LDRSHi8; |
447 | case ARM::t2LDRSBi12: return ARM::t2LDRSBi8; |
448 | case ARM::t2STRi12: return ARM::t2STRi8; |
449 | case ARM::t2STRBi12: return ARM::t2STRBi8; |
450 | case ARM::t2STRHi12: return ARM::t2STRHi8; |
451 | case ARM::t2PLDi12: return ARM::t2PLDi8; |
452 | case ARM::t2PLDWi12: return ARM::t2PLDWi8; |
453 | case ARM::t2PLIi12: return ARM::t2PLIi8; |
454 | |
455 | case ARM::t2LDRi8: |
456 | case ARM::t2LDRHi8: |
457 | case ARM::t2LDRBi8: |
458 | case ARM::t2LDRSHi8: |
459 | case ARM::t2LDRSBi8: |
460 | case ARM::t2STRi8: |
461 | case ARM::t2STRBi8: |
462 | case ARM::t2STRHi8: |
463 | case ARM::t2PLDi8: |
464 | case ARM::t2PLDWi8: |
465 | case ARM::t2PLIi8: |
466 | return opcode; |
467 | |
468 | default: |
469 | llvm_unreachable("unknown thumb2 opcode." ); |
470 | } |
471 | } |
472 | |
473 | static unsigned |
474 | positiveOffsetOpcode(unsigned opcode) |
475 | { |
476 | switch (opcode) { |
477 | case ARM::t2LDRi8: return ARM::t2LDRi12; |
478 | case ARM::t2LDRHi8: return ARM::t2LDRHi12; |
479 | case ARM::t2LDRBi8: return ARM::t2LDRBi12; |
480 | case ARM::t2LDRSHi8: return ARM::t2LDRSHi12; |
481 | case ARM::t2LDRSBi8: return ARM::t2LDRSBi12; |
482 | case ARM::t2STRi8: return ARM::t2STRi12; |
483 | case ARM::t2STRBi8: return ARM::t2STRBi12; |
484 | case ARM::t2STRHi8: return ARM::t2STRHi12; |
485 | case ARM::t2PLDi8: return ARM::t2PLDi12; |
486 | case ARM::t2PLDWi8: return ARM::t2PLDWi12; |
487 | case ARM::t2PLIi8: return ARM::t2PLIi12; |
488 | |
489 | case ARM::t2LDRi12: |
490 | case ARM::t2LDRHi12: |
491 | case ARM::t2LDRBi12: |
492 | case ARM::t2LDRSHi12: |
493 | case ARM::t2LDRSBi12: |
494 | case ARM::t2STRi12: |
495 | case ARM::t2STRBi12: |
496 | case ARM::t2STRHi12: |
497 | case ARM::t2PLDi12: |
498 | case ARM::t2PLDWi12: |
499 | case ARM::t2PLIi12: |
500 | return opcode; |
501 | |
502 | default: |
503 | llvm_unreachable("unknown thumb2 opcode." ); |
504 | } |
505 | } |
506 | |
507 | static unsigned |
508 | immediateOffsetOpcode(unsigned opcode) |
509 | { |
510 | switch (opcode) { |
511 | case ARM::t2LDRs: return ARM::t2LDRi12; |
512 | case ARM::t2LDRHs: return ARM::t2LDRHi12; |
513 | case ARM::t2LDRBs: return ARM::t2LDRBi12; |
514 | case ARM::t2LDRSHs: return ARM::t2LDRSHi12; |
515 | case ARM::t2LDRSBs: return ARM::t2LDRSBi12; |
516 | case ARM::t2STRs: return ARM::t2STRi12; |
517 | case ARM::t2STRBs: return ARM::t2STRBi12; |
518 | case ARM::t2STRHs: return ARM::t2STRHi12; |
519 | case ARM::t2PLDs: return ARM::t2PLDi12; |
520 | case ARM::t2PLDWs: return ARM::t2PLDWi12; |
521 | case ARM::t2PLIs: return ARM::t2PLIi12; |
522 | |
523 | case ARM::t2LDRi12: |
524 | case ARM::t2LDRHi12: |
525 | case ARM::t2LDRBi12: |
526 | case ARM::t2LDRSHi12: |
527 | case ARM::t2LDRSBi12: |
528 | case ARM::t2STRi12: |
529 | case ARM::t2STRBi12: |
530 | case ARM::t2STRHi12: |
531 | case ARM::t2PLDi12: |
532 | case ARM::t2PLDWi12: |
533 | case ARM::t2PLIi12: |
534 | case ARM::t2LDRi8: |
535 | case ARM::t2LDRHi8: |
536 | case ARM::t2LDRBi8: |
537 | case ARM::t2LDRSHi8: |
538 | case ARM::t2LDRSBi8: |
539 | case ARM::t2STRi8: |
540 | case ARM::t2STRBi8: |
541 | case ARM::t2STRHi8: |
542 | case ARM::t2PLDi8: |
543 | case ARM::t2PLDWi8: |
544 | case ARM::t2PLIi8: |
545 | return opcode; |
546 | |
547 | default: |
548 | llvm_unreachable("unknown thumb2 opcode." ); |
549 | } |
550 | } |
551 | |
552 | bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, |
553 | Register FrameReg, int &Offset, |
554 | const ARMBaseInstrInfo &TII, |
555 | const TargetRegisterInfo *TRI) { |
556 | unsigned Opcode = MI.getOpcode(); |
557 | const MCInstrDesc &Desc = MI.getDesc(); |
558 | unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); |
559 | bool isSub = false; |
560 | |
561 | MachineFunction &MF = *MI.getParent()->getParent(); |
562 | const TargetRegisterClass *RegClass = |
563 | TII.getRegClass(MCID: Desc, OpNum: FrameRegIdx, TRI, MF); |
564 | |
565 | // Memory operands in inline assembly always use AddrModeT2_i12. |
566 | if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) |
567 | AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2? |
568 | |
569 | const bool IsSP = Opcode == ARM::t2ADDspImm12 || Opcode == ARM::t2ADDspImm; |
570 | if (IsSP || Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { |
571 | Offset += MI.getOperand(i: FrameRegIdx+1).getImm(); |
572 | |
573 | Register PredReg; |
574 | if (Offset == 0 && getInstrPredicate(MI, PredReg) == ARMCC::AL && |
575 | !MI.definesRegister(Reg: ARM::CPSR, /*TRI=*/nullptr)) { |
576 | // Turn it into a move. |
577 | MI.setDesc(TII.get(Opcode: ARM::tMOVr)); |
578 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
579 | // Remove offset and remaining explicit predicate operands. |
580 | do MI.removeOperand(OpNo: FrameRegIdx+1); |
581 | while (MI.getNumOperands() > FrameRegIdx+1); |
582 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); |
583 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
584 | return true; |
585 | } |
586 | |
587 | bool HasCCOut = (Opcode != ARM::t2ADDspImm12 && Opcode != ARM::t2ADDri12); |
588 | |
589 | if (Offset < 0) { |
590 | Offset = -Offset; |
591 | isSub = true; |
592 | MI.setDesc(IsSP ? TII.get(Opcode: ARM::t2SUBspImm) : TII.get(Opcode: ARM::t2SUBri)); |
593 | } else { |
594 | MI.setDesc(IsSP ? TII.get(Opcode: ARM::t2ADDspImm) : TII.get(Opcode: ARM::t2ADDri)); |
595 | } |
596 | |
597 | // Common case: small offset, fits into instruction. |
598 | if (ARM_AM::getT2SOImmVal(Arg: Offset) != -1) { |
599 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
600 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset); |
601 | // Add cc_out operand if the original instruction did not have one. |
602 | if (!HasCCOut) |
603 | MI.addOperand(Op: MachineOperand::CreateReg(Reg: 0, isDef: false)); |
604 | Offset = 0; |
605 | return true; |
606 | } |
607 | // Another common case: imm12. |
608 | if (Offset < 4096 && |
609 | (!HasCCOut || MI.getOperand(i: MI.getNumOperands()-1).getReg() == 0)) { |
610 | unsigned NewOpc = isSub ? IsSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12 |
611 | : IsSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12; |
612 | MI.setDesc(TII.get(Opcode: NewOpc)); |
613 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
614 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: Offset); |
615 | // Remove the cc_out operand. |
616 | if (HasCCOut) |
617 | MI.removeOperand(OpNo: MI.getNumOperands()-1); |
618 | Offset = 0; |
619 | return true; |
620 | } |
621 | |
622 | // Otherwise, extract 8 adjacent bits from the immediate into this |
623 | // t2ADDri/t2SUBri. |
624 | unsigned RotAmt = llvm::countl_zero<unsigned>(Val: Offset); |
625 | unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(V: 0xff000000U, R: RotAmt); |
626 | |
627 | // We will handle these bits from offset, clear them. |
628 | Offset &= ~ThisImmVal; |
629 | |
630 | assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 && |
631 | "Bit extraction didn't work?" ); |
632 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: ThisImmVal); |
633 | // Add cc_out operand if the original instruction did not have one. |
634 | if (!HasCCOut) |
635 | MI.addOperand(Op: MachineOperand::CreateReg(Reg: 0, isDef: false)); |
636 | } else { |
637 | // AddrMode4 and AddrMode6 cannot handle any offset. |
638 | if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) |
639 | return false; |
640 | |
641 | // AddrModeT2_so cannot handle any offset. If there is no offset |
642 | // register then we change to an immediate version. |
643 | unsigned NewOpc = Opcode; |
644 | if (AddrMode == ARMII::AddrModeT2_so) { |
645 | Register OffsetReg = MI.getOperand(i: FrameRegIdx + 1).getReg(); |
646 | if (OffsetReg != 0) { |
647 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
648 | return Offset == 0; |
649 | } |
650 | |
651 | MI.removeOperand(OpNo: FrameRegIdx+1); |
652 | MI.getOperand(i: FrameRegIdx+1).ChangeToImmediate(ImmVal: 0); |
653 | NewOpc = immediateOffsetOpcode(opcode: Opcode); |
654 | AddrMode = ARMII::AddrModeT2_i12; |
655 | } |
656 | |
657 | unsigned NumBits = 0; |
658 | unsigned Scale = 1; |
659 | if (AddrMode == ARMII::AddrModeT2_i8neg || |
660 | AddrMode == ARMII::AddrModeT2_i12) { |
661 | // i8 supports only negative, and i12 supports only positive, so |
662 | // based on Offset sign convert Opcode to the appropriate |
663 | // instruction |
664 | Offset += MI.getOperand(i: FrameRegIdx+1).getImm(); |
665 | if (Offset < 0) { |
666 | NewOpc = negativeOffsetOpcode(opcode: Opcode); |
667 | NumBits = 8; |
668 | isSub = true; |
669 | Offset = -Offset; |
670 | } else { |
671 | NewOpc = positiveOffsetOpcode(opcode: Opcode); |
672 | NumBits = 12; |
673 | } |
674 | } else if (AddrMode == ARMII::AddrMode5) { |
675 | // VFP address mode. |
676 | const MachineOperand &OffOp = MI.getOperand(i: FrameRegIdx+1); |
677 | int InstrOffs = ARM_AM::getAM5Offset(AM5Opc: OffOp.getImm()); |
678 | if (ARM_AM::getAM5Op(AM5Opc: OffOp.getImm()) == ARM_AM::sub) |
679 | InstrOffs *= -1; |
680 | NumBits = 8; |
681 | Scale = 4; |
682 | Offset += InstrOffs * 4; |
683 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!" ); |
684 | if (Offset < 0) { |
685 | Offset = -Offset; |
686 | isSub = true; |
687 | } |
688 | } else if (AddrMode == ARMII::AddrMode5FP16) { |
689 | // VFP address mode. |
690 | const MachineOperand &OffOp = MI.getOperand(i: FrameRegIdx+1); |
691 | int InstrOffs = ARM_AM::getAM5FP16Offset(AM5Opc: OffOp.getImm()); |
692 | if (ARM_AM::getAM5FP16Op(AM5Opc: OffOp.getImm()) == ARM_AM::sub) |
693 | InstrOffs *= -1; |
694 | NumBits = 8; |
695 | Scale = 2; |
696 | Offset += InstrOffs * 2; |
697 | assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!" ); |
698 | if (Offset < 0) { |
699 | Offset = -Offset; |
700 | isSub = true; |
701 | } |
702 | } else if (AddrMode == ARMII::AddrModeT2_i7s4 || |
703 | AddrMode == ARMII::AddrModeT2_i7s2 || |
704 | AddrMode == ARMII::AddrModeT2_i7) { |
705 | Offset += MI.getOperand(i: FrameRegIdx + 1).getImm(); |
706 | unsigned OffsetMask; |
707 | switch (AddrMode) { |
708 | case ARMII::AddrModeT2_i7s4: NumBits = 9; OffsetMask = 0x3; break; |
709 | case ARMII::AddrModeT2_i7s2: NumBits = 8; OffsetMask = 0x1; break; |
710 | default: NumBits = 7; OffsetMask = 0x0; break; |
711 | } |
712 | // MCInst operand expects already scaled value. |
713 | Scale = 1; |
714 | assert((Offset & OffsetMask) == 0 && "Can't encode this offset!" ); |
715 | (void)OffsetMask; // squash unused-variable warning at -NDEBUG |
716 | } else if (AddrMode == ARMII::AddrModeT2_i8s4) { |
717 | Offset += MI.getOperand(i: FrameRegIdx + 1).getImm(); |
718 | NumBits = 8 + 2; |
719 | // MCInst operand expects already scaled value. |
720 | Scale = 1; |
721 | assert((Offset & 3) == 0 && "Can't encode this offset!" ); |
722 | } else if (AddrMode == ARMII::AddrModeT2_ldrex) { |
723 | Offset += MI.getOperand(i: FrameRegIdx + 1).getImm() * 4; |
724 | NumBits = 8; // 8 bits scaled by 4 |
725 | Scale = 4; |
726 | assert((Offset & 3) == 0 && "Can't encode this offset!" ); |
727 | } else { |
728 | llvm_unreachable("Unsupported addressing mode!" ); |
729 | } |
730 | |
731 | if (NewOpc != Opcode) |
732 | MI.setDesc(TII.get(Opcode: NewOpc)); |
733 | |
734 | MachineOperand &ImmOp = MI.getOperand(i: FrameRegIdx+1); |
735 | |
736 | // Attempt to fold address computation |
737 | // Common case: small offset, fits into instruction. We need to make sure |
738 | // the register class is correct too, for instructions like the MVE |
739 | // VLDRH.32, which only accepts low tGPR registers. |
740 | int ImmedOffset = Offset / Scale; |
741 | unsigned Mask = (1 << NumBits) - 1; |
742 | if ((unsigned)Offset <= Mask * Scale && |
743 | (FrameReg.isVirtual() || RegClass->contains(Reg: FrameReg))) { |
744 | if (FrameReg.isVirtual()) { |
745 | // Make sure the register class for the virtual register is correct |
746 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
747 | if (!MRI->constrainRegClass(Reg: FrameReg, RC: RegClass)) |
748 | llvm_unreachable("Unable to constrain virtual register class." ); |
749 | } |
750 | |
751 | // Replace the FrameIndex with fp/sp |
752 | MI.getOperand(i: FrameRegIdx).ChangeToRegister(Reg: FrameReg, isDef: false); |
753 | if (isSub) { |
754 | if (AddrMode == ARMII::AddrMode5 || AddrMode == ARMII::AddrMode5FP16) |
755 | // FIXME: Not consistent. |
756 | ImmedOffset |= 1 << NumBits; |
757 | else |
758 | ImmedOffset = -ImmedOffset; |
759 | } |
760 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
761 | Offset = 0; |
762 | return true; |
763 | } |
764 | |
765 | // Otherwise, offset doesn't fit. Pull in what we can to simplify |
766 | ImmedOffset = ImmedOffset & Mask; |
767 | if (isSub) { |
768 | if (AddrMode == ARMII::AddrMode5 || AddrMode == ARMII::AddrMode5FP16) |
769 | // FIXME: Not consistent. |
770 | ImmedOffset |= 1 << NumBits; |
771 | else { |
772 | ImmedOffset = -ImmedOffset; |
773 | if (ImmedOffset == 0) |
774 | // Change the opcode back if the encoded offset is zero. |
775 | MI.setDesc(TII.get(Opcode: positiveOffsetOpcode(opcode: NewOpc))); |
776 | } |
777 | } |
778 | ImmOp.ChangeToImmediate(ImmVal: ImmedOffset); |
779 | Offset &= ~(Mask*Scale); |
780 | } |
781 | |
782 | Offset = (isSub) ? -Offset : Offset; |
783 | return Offset == 0 && (FrameReg.isVirtual() || RegClass->contains(Reg: FrameReg)); |
784 | } |
785 | |
786 | ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr &MI, |
787 | Register &PredReg) { |
788 | unsigned Opc = MI.getOpcode(); |
789 | if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) |
790 | return ARMCC::AL; |
791 | return getInstrPredicate(MI, PredReg); |
792 | } |
793 | |
794 | int llvm::findFirstVPTPredOperandIdx(const MachineInstr &MI) { |
795 | const MCInstrDesc &MCID = MI.getDesc(); |
796 | |
797 | for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) |
798 | if (ARM::isVpred(op: MCID.operands()[i].OperandType)) |
799 | return i; |
800 | |
801 | return -1; |
802 | } |
803 | |
804 | ARMVCC::VPTCodes llvm::getVPTInstrPredicate(const MachineInstr &MI, |
805 | Register &PredReg) { |
806 | int PIdx = findFirstVPTPredOperandIdx(MI); |
807 | if (PIdx == -1) { |
808 | PredReg = 0; |
809 | return ARMVCC::None; |
810 | } |
811 | |
812 | PredReg = MI.getOperand(i: PIdx+1).getReg(); |
813 | return (ARMVCC::VPTCodes)MI.getOperand(i: PIdx).getImm(); |
814 | } |
815 | |
816 | void llvm::recomputeVPTBlockMask(MachineInstr &Instr) { |
817 | assert(isVPTOpcode(Instr.getOpcode()) && "Not a VPST or VPT Instruction!" ); |
818 | |
819 | MachineOperand &MaskOp = Instr.getOperand(i: 0); |
820 | assert(MaskOp.isImm() && "Operand 0 is not the block mask of the VPT/VPST?!" ); |
821 | |
822 | MachineBasicBlock::iterator Iter = ++Instr.getIterator(), |
823 | End = Instr.getParent()->end(); |
824 | |
825 | while (Iter != End && Iter->isDebugInstr()) |
826 | ++Iter; |
827 | |
828 | // Verify that the instruction after the VPT/VPST is predicated (it should |
829 | // be), and skip it. |
830 | assert(Iter != End && "Expected some instructions in any VPT block" ); |
831 | assert( |
832 | getVPTInstrPredicate(*Iter) == ARMVCC::Then && |
833 | "VPT/VPST should be followed by an instruction with a 'then' predicate!" ); |
834 | ++Iter; |
835 | |
836 | // Iterate over the predicated instructions, updating the BlockMask as we go. |
837 | ARM::PredBlockMask BlockMask = ARM::PredBlockMask::T; |
838 | while (Iter != End) { |
839 | if (Iter->isDebugInstr()) { |
840 | ++Iter; |
841 | continue; |
842 | } |
843 | ARMVCC::VPTCodes Pred = getVPTInstrPredicate(MI: *Iter); |
844 | if (Pred == ARMVCC::None) |
845 | break; |
846 | BlockMask = expandPredBlockMask(BlockMask, Kind: Pred); |
847 | ++Iter; |
848 | } |
849 | |
850 | // Rewrite the BlockMask. |
851 | MaskOp.setImm((int64_t)(BlockMask)); |
852 | } |
853 | |