1 | //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ARM.h" |
10 | #include "ARMBaseInstrInfo.h" |
11 | #include "ARMSubtarget.h" |
12 | #include "MCTargetDesc/ARMBaseInfo.h" |
13 | #include "Thumb2InstrInfo.h" |
14 | #include "llvm/ADT/DenseMap.h" |
15 | #include "llvm/ADT/PostOrderIterator.h" |
16 | #include "llvm/ADT/STLExtras.h" |
17 | #include "llvm/ADT/SmallSet.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/Statistic.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/CodeGen/MachineBasicBlock.h" |
22 | #include "llvm/CodeGen/MachineFunction.h" |
23 | #include "llvm/CodeGen/MachineFunctionPass.h" |
24 | #include "llvm/CodeGen/MachineInstr.h" |
25 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
26 | #include "llvm/CodeGen/MachineOperand.h" |
27 | #include "llvm/CodeGen/TargetInstrInfo.h" |
28 | #include "llvm/IR/DebugLoc.h" |
29 | #include "llvm/IR/Function.h" |
30 | #include "llvm/MC/MCAsmInfo.h" |
31 | #include "llvm/MC/MCInstrDesc.h" |
32 | #include "llvm/MC/MCRegisterInfo.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | #include "llvm/Support/Compiler.h" |
35 | #include "llvm/Support/Debug.h" |
36 | #include "llvm/Support/ErrorHandling.h" |
37 | #include "llvm/Support/raw_ostream.h" |
38 | #include <algorithm> |
39 | #include <cassert> |
40 | #include <cstdint> |
41 | #include <functional> |
42 | #include <iterator> |
43 | #include <utility> |
44 | |
45 | using namespace llvm; |
46 | |
47 | #define DEBUG_TYPE "thumb2-reduce-size" |
48 | #define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass" |
49 | |
50 | STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones" ); |
51 | STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones" ); |
52 | STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones" ); |
53 | |
54 | static cl::opt<int> ReduceLimit("t2-reduce-limit" , |
55 | cl::init(Val: -1), cl::Hidden); |
56 | static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2" , |
57 | cl::init(Val: -1), cl::Hidden); |
58 | static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3" , |
59 | cl::init(Val: -1), cl::Hidden); |
60 | |
61 | namespace { |
62 | |
63 | /// ReduceTable - A static table with information on mapping from wide |
64 | /// opcodes to narrow |
65 | struct ReduceEntry { |
66 | uint16_t WideOpc; // Wide opcode |
67 | uint16_t NarrowOpc1; // Narrow opcode to transform to |
68 | uint16_t NarrowOpc2; // Narrow opcode when it's two-address |
69 | uint8_t Imm1Limit; // Limit of immediate field (bits) |
70 | uint8_t Imm2Limit; // Limit of immediate field when it's two-address |
71 | unsigned LowRegs1 : 1; // Only possible if low-registers are used |
72 | unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) |
73 | unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. |
74 | // 1 - No cc field. |
75 | // 2 - Always set CPSR. |
76 | unsigned PredCC2 : 2; |
77 | unsigned PartFlag : 1; // 16-bit instruction does partial flag update |
78 | unsigned Special : 1; // Needs to be dealt with specially |
79 | unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) |
80 | }; |
81 | |
82 | static const ReduceEntry ReduceTable[] = { |
83 | // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM |
84 | { .WideOpc: ARM::t2ADCrr, .NarrowOpc1: 0, .NarrowOpc2: ARM::tADC, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
85 | { .WideOpc: ARM::t2ADDri, .NarrowOpc1: ARM::tADDi3, .NarrowOpc2: ARM::tADDi8, .Imm1Limit: 3, .Imm2Limit: 8, .LowRegs1: 1, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
86 | { .WideOpc: ARM::t2ADDrr, .NarrowOpc1: ARM::tADDrr, .NarrowOpc2: ARM::tADDhirr, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 1, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
87 | { .WideOpc: ARM::t2ADDSri,.NarrowOpc1: ARM::tADDi3, .NarrowOpc2: ARM::tADDi8, .Imm1Limit: 3, .Imm2Limit: 8, .LowRegs1: 1, .LowRegs2: 1, .PredCC1: 2,.PredCC2: 2, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
88 | { .WideOpc: ARM::t2ADDSrr,.NarrowOpc1: ARM::tADDrr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 2,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
89 | { .WideOpc: ARM::t2ANDrr, .NarrowOpc1: 0, .NarrowOpc2: ARM::tAND, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 0 }, |
90 | { .WideOpc: ARM::t2ASRri, .NarrowOpc1: ARM::tASRri, .NarrowOpc2: 0, .Imm1Limit: 5, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 1 }, |
91 | { .WideOpc: ARM::t2ASRrr, .NarrowOpc1: 0, .NarrowOpc2: ARM::tASRrr, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 1 }, |
92 | { .WideOpc: ARM::t2BICrr, .NarrowOpc1: 0, .NarrowOpc2: ARM::tBIC, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 0 }, |
93 | //FIXME: Disable CMN, as CCodes are backwards from compare expectations |
94 | //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, |
95 | { .WideOpc: ARM::t2CMNzrr, .NarrowOpc1: ARM::tCMNz, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 2,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
96 | { .WideOpc: ARM::t2CMPri, .NarrowOpc1: ARM::tCMPi8, .NarrowOpc2: 0, .Imm1Limit: 8, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 2,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
97 | { .WideOpc: ARM::t2CMPrr, .NarrowOpc1: ARM::tCMPhir, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 0, .PredCC1: 2,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
98 | { .WideOpc: ARM::t2EORrr, .NarrowOpc1: 0, .NarrowOpc2: ARM::tEOR, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 0 }, |
99 | // FIXME: adr.n immediate offset must be multiple of 4. |
100 | //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, |
101 | { .WideOpc: ARM::t2LSLri, .NarrowOpc1: ARM::tLSLri, .NarrowOpc2: 0, .Imm1Limit: 5, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 1 }, |
102 | { .WideOpc: ARM::t2LSLrr, .NarrowOpc1: 0, .NarrowOpc2: ARM::tLSLrr, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 1 }, |
103 | { .WideOpc: ARM::t2LSRri, .NarrowOpc1: ARM::tLSRri, .NarrowOpc2: 0, .Imm1Limit: 5, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 1 }, |
104 | { .WideOpc: ARM::t2LSRrr, .NarrowOpc1: 0, .NarrowOpc2: ARM::tLSRrr, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 1 }, |
105 | { .WideOpc: ARM::t2MOVi, .NarrowOpc1: ARM::tMOVi8, .NarrowOpc2: 0, .Imm1Limit: 8, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 0 }, |
106 | { .WideOpc: ARM::t2MOVi16,.NarrowOpc1: ARM::tMOVi8, .NarrowOpc2: 0, .Imm1Limit: 8, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 1,.AvoidMovs: 0 }, |
107 | // FIXME: Do we need the 16-bit 'S' variant? |
108 | { .WideOpc: ARM::t2MOVr,.NarrowOpc1: ARM::tMOVr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 0, .PredCC1: 1,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
109 | { .WideOpc: ARM::t2MUL, .NarrowOpc1: 0, .NarrowOpc2: ARM::tMUL, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 0 }, |
110 | { .WideOpc: ARM::t2MVNr, .NarrowOpc1: ARM::tMVN, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
111 | { .WideOpc: ARM::t2ORRrr, .NarrowOpc1: 0, .NarrowOpc2: ARM::tORR, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 0 }, |
112 | { .WideOpc: ARM::t2REV, .NarrowOpc1: ARM::tREV, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 1,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
113 | { .WideOpc: ARM::t2REV16, .NarrowOpc1: ARM::tREV16, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 1,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
114 | { .WideOpc: ARM::t2REVSH, .NarrowOpc1: ARM::tREVSH, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 1,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
115 | { .WideOpc: ARM::t2RORrr, .NarrowOpc1: 0, .NarrowOpc2: ARM::tROR, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 1,.Special: 0,.AvoidMovs: 0 }, |
116 | { .WideOpc: ARM::t2RSBri, .NarrowOpc1: ARM::tRSB, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
117 | { .WideOpc: ARM::t2RSBSri,.NarrowOpc1: ARM::tRSB, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 2,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
118 | { .WideOpc: ARM::t2SBCrr, .NarrowOpc1: 0, .NarrowOpc2: ARM::tSBC, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 0, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
119 | { .WideOpc: ARM::t2SUBri, .NarrowOpc1: ARM::tSUBi3, .NarrowOpc2: ARM::tSUBi8, .Imm1Limit: 3, .Imm2Limit: 8, .LowRegs1: 1, .LowRegs2: 1, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
120 | { .WideOpc: ARM::t2SUBrr, .NarrowOpc1: ARM::tSUBrr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
121 | { .WideOpc: ARM::t2SUBSri,.NarrowOpc1: ARM::tSUBi3, .NarrowOpc2: ARM::tSUBi8, .Imm1Limit: 3, .Imm2Limit: 8, .LowRegs1: 1, .LowRegs2: 1, .PredCC1: 2,.PredCC2: 2, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
122 | { .WideOpc: ARM::t2SUBSrr,.NarrowOpc1: ARM::tSUBrr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 2,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
123 | { .WideOpc: ARM::t2SXTB, .NarrowOpc1: ARM::tSXTB, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 1,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
124 | { .WideOpc: ARM::t2SXTH, .NarrowOpc1: ARM::tSXTH, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 1,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
125 | { .WideOpc: ARM::t2TEQrr, .NarrowOpc1: ARM::tEOR, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 2,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
126 | { .WideOpc: ARM::t2TSTrr, .NarrowOpc1: ARM::tTST, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 2,.PredCC2: 0, .PartFlag: 0,.Special: 0,.AvoidMovs: 0 }, |
127 | { .WideOpc: ARM::t2UXTB, .NarrowOpc1: ARM::tUXTB, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 1,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
128 | { .WideOpc: ARM::t2UXTH, .NarrowOpc1: ARM::tUXTH, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 1,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
129 | |
130 | // FIXME: Clean this up after splitting each Thumb load / store opcode |
131 | // into multiple ones. |
132 | { .WideOpc: ARM::t2LDRi12,.NarrowOpc1: ARM::tLDRi, .NarrowOpc2: ARM::tLDRspi, .Imm1Limit: 5, .Imm2Limit: 8, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
133 | { .WideOpc: ARM::t2LDRs, .NarrowOpc1: ARM::tLDRr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
134 | { .WideOpc: ARM::t2LDRBi12,.NarrowOpc1: ARM::tLDRBi, .NarrowOpc2: 0, .Imm1Limit: 5, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
135 | { .WideOpc: ARM::t2LDRBs, .NarrowOpc1: ARM::tLDRBr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
136 | { .WideOpc: ARM::t2LDRHi12,.NarrowOpc1: ARM::tLDRHi, .NarrowOpc2: 0, .Imm1Limit: 5, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
137 | { .WideOpc: ARM::t2LDRHs, .NarrowOpc1: ARM::tLDRHr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
138 | { .WideOpc: ARM::t2LDRSBs,.NarrowOpc1: ARM::tLDRSB, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
139 | { .WideOpc: ARM::t2LDRSHs,.NarrowOpc1: ARM::tLDRSH, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
140 | { .WideOpc: ARM::t2LDR_POST,.NarrowOpc1: ARM::tLDMIA_UPD,.NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
141 | { .WideOpc: ARM::t2STRi12,.NarrowOpc1: ARM::tSTRi, .NarrowOpc2: ARM::tSTRspi, .Imm1Limit: 5, .Imm2Limit: 8, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
142 | { .WideOpc: ARM::t2STRs, .NarrowOpc1: ARM::tSTRr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
143 | { .WideOpc: ARM::t2STRBi12,.NarrowOpc1: ARM::tSTRBi, .NarrowOpc2: 0, .Imm1Limit: 5, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
144 | { .WideOpc: ARM::t2STRBs, .NarrowOpc1: ARM::tSTRBr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
145 | { .WideOpc: ARM::t2STRHi12,.NarrowOpc1: ARM::tSTRHi, .NarrowOpc2: 0, .Imm1Limit: 5, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
146 | { .WideOpc: ARM::t2STRHs, .NarrowOpc1: ARM::tSTRHr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
147 | { .WideOpc: ARM::t2STR_POST,.NarrowOpc1: ARM::tSTMIA_UPD,.NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 0, .PredCC1: 0,.PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
148 | |
149 | { .WideOpc: ARM::t2LDMIA, .NarrowOpc1: ARM::tLDMIA, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 1, .PredCC1: 1,.PredCC2: 1, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
150 | { .WideOpc: ARM::t2LDMIA_RET,.NarrowOpc1: 0, .NarrowOpc2: ARM::tPOP_RET, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 1, .PredCC1: 1,.PredCC2: 1, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
151 | { .WideOpc: ARM::t2LDMIA_UPD,.NarrowOpc1: ARM::tLDMIA_UPD,.NarrowOpc2: ARM::tPOP,.Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 1, .PredCC1: 1,.PredCC2: 1, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
152 | // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent. |
153 | // tSTMIA_UPD is a change in semantics which can only be used if the base |
154 | // register is killed. This difference is correctly handled elsewhere. |
155 | { .WideOpc: ARM::t2STMIA, .NarrowOpc1: ARM::tSTMIA_UPD, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 1, .PredCC1: 1,.PredCC2: 1, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
156 | { .WideOpc: ARM::t2STMIA_UPD,.NarrowOpc1: ARM::tSTMIA_UPD, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 1, .PredCC1: 1,.PredCC2: 1, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }, |
157 | { .WideOpc: ARM::t2STMDB_UPD, .NarrowOpc1: 0, .NarrowOpc2: ARM::tPUSH, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 1, .PredCC1: 1,.PredCC2: 1, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 } |
158 | }; |
159 | |
160 | class Thumb2SizeReduce : public MachineFunctionPass { |
161 | public: |
162 | static char ID; |
163 | |
164 | const Thumb2InstrInfo *TII; |
165 | const ARMSubtarget *STI; |
166 | |
167 | Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr); |
168 | |
169 | bool runOnMachineFunction(MachineFunction &MF) override; |
170 | |
171 | MachineFunctionProperties getRequiredProperties() const override { |
172 | return MachineFunctionProperties().set( |
173 | MachineFunctionProperties::Property::NoVRegs); |
174 | } |
175 | |
176 | StringRef getPassName() const override { |
177 | return THUMB2_SIZE_REDUCE_NAME; |
178 | } |
179 | |
180 | private: |
181 | /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. |
182 | DenseMap<unsigned, unsigned> ReduceOpcodeMap; |
183 | |
184 | bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop); |
185 | |
186 | bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, |
187 | bool is2Addr, ARMCC::CondCodes Pred, |
188 | bool LiveCPSR, bool &HasCC, bool &CCDead); |
189 | |
190 | bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, |
191 | const ReduceEntry &Entry); |
192 | |
193 | bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, |
194 | const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop); |
195 | |
196 | /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address |
197 | /// instruction. |
198 | bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, |
199 | const ReduceEntry &Entry, bool LiveCPSR, |
200 | bool IsSelfLoop); |
201 | |
202 | /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit |
203 | /// non-two-address instruction. |
204 | bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, |
205 | const ReduceEntry &Entry, bool LiveCPSR, |
206 | bool IsSelfLoop); |
207 | |
208 | /// ReduceMI - Attempt to reduce MI, return true on success. |
209 | bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR, |
210 | bool IsSelfLoop, bool SkipPrologueEpilogue); |
211 | |
212 | /// ReduceMBB - Reduce width of instructions in the specified basic block. |
213 | bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue); |
214 | |
215 | bool OptimizeSize; |
216 | bool MinimizeSize; |
217 | |
218 | // Last instruction to define CPSR in the current block. |
219 | MachineInstr *CPSRDef; |
220 | // Was CPSR last defined by a high latency instruction? |
221 | // When CPSRDef is null, this refers to CPSR defs in predecessors. |
222 | bool HighLatencyCPSR; |
223 | |
224 | struct MBBInfo { |
225 | // The flags leaving this block have high latency. |
226 | bool HighLatencyCPSR = false; |
227 | // Has this block been visited yet? |
228 | bool Visited = false; |
229 | |
230 | MBBInfo() = default; |
231 | }; |
232 | |
233 | SmallVector<MBBInfo, 8> BlockInfo; |
234 | |
235 | std::function<bool(const Function &)> PredicateFtor; |
236 | }; |
237 | |
238 | char Thumb2SizeReduce::ID = 0; |
239 | |
240 | } // end anonymous namespace |
241 | |
242 | INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false, |
243 | false) |
244 | |
245 | Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) |
246 | : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { |
247 | OptimizeSize = MinimizeSize = false; |
248 | for (unsigned i = 0, e = std::size(ReduceTable); i != e; ++i) { |
249 | unsigned FromOpc = ReduceTable[i].WideOpc; |
250 | if (!ReduceOpcodeMap.insert(KV: std::make_pair(x&: FromOpc, y&: i)).second) |
251 | llvm_unreachable("Duplicated entries?" ); |
252 | } |
253 | } |
254 | |
255 | static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { |
256 | return is_contained(Range: MCID.implicit_defs(), Element: ARM::CPSR); |
257 | } |
258 | |
259 | // Check for a likely high-latency flag def. |
260 | static bool isHighLatencyCPSR(MachineInstr *Def) { |
261 | switch(Def->getOpcode()) { |
262 | case ARM::FMSTAT: |
263 | case ARM::tMUL: |
264 | return true; |
265 | } |
266 | return false; |
267 | } |
268 | |
269 | /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, |
270 | /// the 's' 16-bit instruction partially update CPSR. Abort the |
271 | /// transformation to avoid adding false dependency on last CPSR setting |
272 | /// instruction which hurts the ability for out-of-order execution engine |
273 | /// to do register renaming magic. |
274 | /// This function checks if there is a read-of-write dependency between the |
275 | /// last instruction that defines the CPSR and the current instruction. If there |
276 | /// is, then there is no harm done since the instruction cannot be retired |
277 | /// before the CPSR setting instruction anyway. |
278 | /// Note, we are not doing full dependency analysis here for the sake of compile |
279 | /// time. We're not looking for cases like: |
280 | /// r0 = muls ... |
281 | /// r1 = add.w r0, ... |
282 | /// ... |
283 | /// = mul.w r1 |
284 | /// In this case it would have been ok to narrow the mul.w to muls since there |
285 | /// are indirect RAW dependency between the muls and the mul.w |
286 | bool |
287 | Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { |
288 | // Disable the check for -Oz (aka OptimizeForSizeHarder). |
289 | if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) |
290 | return false; |
291 | |
292 | if (!CPSRDef) |
293 | // If this BB loops back to itself, conservatively avoid narrowing the |
294 | // first instruction that does partial flag update. |
295 | return HighLatencyCPSR || FirstInSelfLoop; |
296 | |
297 | SmallSet<unsigned, 2> Defs; |
298 | for (const MachineOperand &MO : CPSRDef->operands()) { |
299 | if (!MO.isReg() || MO.isUndef() || MO.isUse()) |
300 | continue; |
301 | Register Reg = MO.getReg(); |
302 | if (Reg == 0 || Reg == ARM::CPSR) |
303 | continue; |
304 | Defs.insert(V: Reg); |
305 | } |
306 | |
307 | for (const MachineOperand &MO : Use->operands()) { |
308 | if (!MO.isReg() || MO.isUndef() || MO.isDef()) |
309 | continue; |
310 | Register Reg = MO.getReg(); |
311 | if (Defs.count(V: Reg)) |
312 | return false; |
313 | } |
314 | |
315 | // If the current CPSR has high latency, try to avoid the false dependency. |
316 | if (HighLatencyCPSR) |
317 | return true; |
318 | |
319 | // tMOVi8 usually doesn't start long dependency chains, and there are a lot |
320 | // of them, so always shrink them when CPSR doesn't have high latency. |
321 | if (Use->getOpcode() == ARM::t2MOVi || |
322 | Use->getOpcode() == ARM::t2MOVi16) |
323 | return false; |
324 | |
325 | // No read-after-write dependency. The narrowing will add false dependency. |
326 | return true; |
327 | } |
328 | |
329 | bool |
330 | Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, |
331 | bool is2Addr, ARMCC::CondCodes Pred, |
332 | bool LiveCPSR, bool &HasCC, bool &CCDead) { |
333 | if ((is2Addr && Entry.PredCC2 == 0) || |
334 | (!is2Addr && Entry.PredCC1 == 0)) { |
335 | if (Pred == ARMCC::AL) { |
336 | // Not predicated, must set CPSR. |
337 | if (!HasCC) { |
338 | // Original instruction was not setting CPSR, but CPSR is not |
339 | // currently live anyway. It's ok to set it. The CPSR def is |
340 | // dead though. |
341 | if (!LiveCPSR) { |
342 | HasCC = true; |
343 | CCDead = true; |
344 | return true; |
345 | } |
346 | return false; |
347 | } |
348 | } else { |
349 | // Predicated, must not set CPSR. |
350 | if (HasCC) |
351 | return false; |
352 | } |
353 | } else if ((is2Addr && Entry.PredCC2 == 2) || |
354 | (!is2Addr && Entry.PredCC1 == 2)) { |
355 | /// Old opcode has an optional def of CPSR. |
356 | if (HasCC) |
357 | return true; |
358 | // If old opcode does not implicitly define CPSR, then it's not ok since |
359 | // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP. |
360 | if (!HasImplicitCPSRDef(MCID: MI->getDesc())) |
361 | return false; |
362 | HasCC = true; |
363 | } else { |
364 | // 16-bit instruction does not set CPSR. |
365 | if (HasCC) |
366 | return false; |
367 | } |
368 | |
369 | return true; |
370 | } |
371 | |
372 | static bool VerifyLowRegs(MachineInstr *MI) { |
373 | unsigned Opc = MI->getOpcode(); |
374 | bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD); |
375 | bool isLROk = (Opc == ARM::t2STMDB_UPD); |
376 | bool isSPOk = isPCOk || isLROk; |
377 | for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { |
378 | const MachineOperand &MO = MI->getOperand(i); |
379 | if (!MO.isReg() || MO.isImplicit()) |
380 | continue; |
381 | Register Reg = MO.getReg(); |
382 | if (Reg == 0 || Reg == ARM::CPSR) |
383 | continue; |
384 | if (isPCOk && Reg == ARM::PC) |
385 | continue; |
386 | if (isLROk && Reg == ARM::LR) |
387 | continue; |
388 | if (Reg == ARM::SP) { |
389 | if (isSPOk) |
390 | continue; |
391 | if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) |
392 | // Special case for these ldr / str with sp as base register. |
393 | continue; |
394 | } |
395 | if (!isARMLowRegister(Reg)) |
396 | return false; |
397 | } |
398 | return true; |
399 | } |
400 | |
401 | bool |
402 | Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, |
403 | const ReduceEntry &Entry) { |
404 | if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) |
405 | return false; |
406 | |
407 | unsigned Scale = 1; |
408 | bool HasImmOffset = false; |
409 | bool HasShift = false; |
410 | bool HasOffReg = true; |
411 | bool isLdStMul = false; |
412 | unsigned Opc = Entry.NarrowOpc1; |
413 | unsigned OpNum = 3; // First 'rest' of operands. |
414 | uint8_t ImmLimit = Entry.Imm1Limit; |
415 | |
416 | switch (Entry.WideOpc) { |
417 | default: |
418 | llvm_unreachable("Unexpected Thumb2 load / store opcode!" ); |
419 | case ARM::t2LDRi12: |
420 | case ARM::t2STRi12: |
421 | if (MI->getOperand(i: 1).getReg() == ARM::SP) { |
422 | Opc = Entry.NarrowOpc2; |
423 | ImmLimit = Entry.Imm2Limit; |
424 | } |
425 | |
426 | Scale = 4; |
427 | HasImmOffset = true; |
428 | HasOffReg = false; |
429 | break; |
430 | case ARM::t2LDRBi12: |
431 | case ARM::t2STRBi12: |
432 | HasImmOffset = true; |
433 | HasOffReg = false; |
434 | break; |
435 | case ARM::t2LDRHi12: |
436 | case ARM::t2STRHi12: |
437 | Scale = 2; |
438 | HasImmOffset = true; |
439 | HasOffReg = false; |
440 | break; |
441 | case ARM::t2LDRs: |
442 | case ARM::t2LDRBs: |
443 | case ARM::t2LDRHs: |
444 | case ARM::t2LDRSBs: |
445 | case ARM::t2LDRSHs: |
446 | case ARM::t2STRs: |
447 | case ARM::t2STRBs: |
448 | case ARM::t2STRHs: |
449 | HasShift = true; |
450 | OpNum = 4; |
451 | break; |
452 | case ARM::t2LDR_POST: |
453 | case ARM::t2STR_POST: { |
454 | if (!MinimizeSize) |
455 | return false; |
456 | |
457 | if (!MI->hasOneMemOperand() || |
458 | (*MI->memoperands_begin())->getAlign() < Align(4)) |
459 | return false; |
460 | |
461 | // We're creating a completely different type of load/store - LDM from LDR. |
462 | // For this reason we can't reuse the logic at the end of this function; we |
463 | // have to implement the MI building here. |
464 | bool IsStore = Entry.WideOpc == ARM::t2STR_POST; |
465 | Register Rt = MI->getOperand(i: IsStore ? 1 : 0).getReg(); |
466 | Register Rn = MI->getOperand(i: IsStore ? 0 : 1).getReg(); |
467 | unsigned Offset = MI->getOperand(i: 3).getImm(); |
468 | unsigned PredImm = MI->getOperand(i: 4).getImm(); |
469 | Register PredReg = MI->getOperand(i: 5).getReg(); |
470 | assert(isARMLowRegister(Rt)); |
471 | assert(isARMLowRegister(Rn)); |
472 | |
473 | if (Offset != 4) |
474 | return false; |
475 | |
476 | // Add the 16-bit load / store instruction. |
477 | DebugLoc dl = MI->getDebugLoc(); |
478 | auto MIB = BuildMI(BB&: MBB, I: MI, MIMD: dl, MCID: TII->get(Opcode: Entry.NarrowOpc1)) |
479 | .addReg(RegNo: Rn, flags: RegState::Define) |
480 | .addReg(RegNo: Rn) |
481 | .addImm(Val: PredImm) |
482 | .addReg(RegNo: PredReg) |
483 | .addReg(RegNo: Rt, flags: IsStore ? 0 : RegState::Define); |
484 | |
485 | // Transfer memoperands. |
486 | MIB.setMemRefs(MI->memoperands()); |
487 | |
488 | // Transfer MI flags. |
489 | MIB.setMIFlags(MI->getFlags()); |
490 | |
491 | // Kill the old instruction. |
492 | MI->eraseFromBundle(); |
493 | ++NumLdSts; |
494 | return true; |
495 | } |
496 | case ARM::t2LDMIA: { |
497 | Register BaseReg = MI->getOperand(i: 0).getReg(); |
498 | assert(isARMLowRegister(BaseReg)); |
499 | |
500 | // For the non-writeback version (this one), the base register must be |
501 | // one of the registers being loaded. |
502 | bool isOK = false; |
503 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 3)) { |
504 | if (MO.getReg() == BaseReg) { |
505 | isOK = true; |
506 | break; |
507 | } |
508 | } |
509 | |
510 | if (!isOK) |
511 | return false; |
512 | |
513 | OpNum = 0; |
514 | isLdStMul = true; |
515 | break; |
516 | } |
517 | case ARM::t2STMIA: { |
518 | // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this |
519 | // if the base register is killed, as then it doesn't matter what its value |
520 | // is after the instruction. |
521 | if (!MI->getOperand(i: 0).isKill()) |
522 | return false; |
523 | |
524 | // If the base register is in the register list and isn't the lowest |
525 | // numbered register (i.e. it's in operand 4 onwards) then with writeback |
526 | // the stored value is unknown, so we can't convert to tSTMIA_UPD. |
527 | Register BaseReg = MI->getOperand(i: 0).getReg(); |
528 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: 4)) |
529 | if (MO.getReg() == BaseReg) |
530 | return false; |
531 | |
532 | break; |
533 | } |
534 | case ARM::t2LDMIA_RET: { |
535 | Register BaseReg = MI->getOperand(i: 1).getReg(); |
536 | if (BaseReg != ARM::SP) |
537 | return false; |
538 | Opc = Entry.NarrowOpc2; // tPOP_RET |
539 | OpNum = 2; |
540 | isLdStMul = true; |
541 | break; |
542 | } |
543 | case ARM::t2LDMIA_UPD: |
544 | case ARM::t2STMIA_UPD: |
545 | case ARM::t2STMDB_UPD: { |
546 | OpNum = 0; |
547 | |
548 | Register BaseReg = MI->getOperand(i: 1).getReg(); |
549 | if (BaseReg == ARM::SP && |
550 | (Entry.WideOpc == ARM::t2LDMIA_UPD || |
551 | Entry.WideOpc == ARM::t2STMDB_UPD)) { |
552 | Opc = Entry.NarrowOpc2; // tPOP or tPUSH |
553 | OpNum = 2; |
554 | } else if (!isARMLowRegister(Reg: BaseReg) || |
555 | (Entry.WideOpc != ARM::t2LDMIA_UPD && |
556 | Entry.WideOpc != ARM::t2STMIA_UPD)) { |
557 | return false; |
558 | } |
559 | |
560 | isLdStMul = true; |
561 | break; |
562 | } |
563 | } |
564 | |
565 | unsigned OffsetReg = 0; |
566 | bool OffsetKill = false; |
567 | bool OffsetInternal = false; |
568 | if (HasShift) { |
569 | OffsetReg = MI->getOperand(i: 2).getReg(); |
570 | OffsetKill = MI->getOperand(i: 2).isKill(); |
571 | OffsetInternal = MI->getOperand(i: 2).isInternalRead(); |
572 | |
573 | if (MI->getOperand(i: 3).getImm()) |
574 | // Thumb1 addressing mode doesn't support shift. |
575 | return false; |
576 | } |
577 | |
578 | unsigned OffsetImm = 0; |
579 | if (HasImmOffset) { |
580 | OffsetImm = MI->getOperand(i: 2).getImm(); |
581 | unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; |
582 | |
583 | if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset) |
584 | // Make sure the immediate field fits. |
585 | return false; |
586 | } |
587 | |
588 | // Add the 16-bit load / store instruction. |
589 | DebugLoc dl = MI->getDebugLoc(); |
590 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: dl, MCID: TII->get(Opcode: Opc)); |
591 | |
592 | // tSTMIA_UPD takes a defining register operand. We've already checked that |
593 | // the register is killed, so mark it as dead here. |
594 | if (Entry.WideOpc == ARM::t2STMIA) |
595 | MIB.addReg(RegNo: MI->getOperand(i: 0).getReg(), flags: RegState::Define | RegState::Dead); |
596 | |
597 | if (!isLdStMul) { |
598 | MIB.add(MO: MI->getOperand(i: 0)); |
599 | MIB.add(MO: MI->getOperand(i: 1)); |
600 | |
601 | if (HasImmOffset) |
602 | MIB.addImm(Val: OffsetImm / Scale); |
603 | |
604 | assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!" ); |
605 | |
606 | if (HasOffReg) |
607 | MIB.addReg(RegNo: OffsetReg, flags: getKillRegState(B: OffsetKill) | |
608 | getInternalReadRegState(B: OffsetInternal)); |
609 | } |
610 | |
611 | // Transfer the rest of operands. |
612 | for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands(), N: OpNum)) |
613 | MIB.add(MO); |
614 | |
615 | // Transfer memoperands. |
616 | MIB.setMemRefs(MI->memoperands()); |
617 | |
618 | // Transfer MI flags. |
619 | MIB.setMIFlags(MI->getFlags()); |
620 | |
621 | LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI |
622 | << " to 16-bit: " << *MIB); |
623 | |
624 | MBB.erase_instr(I: MI); |
625 | ++NumLdSts; |
626 | return true; |
627 | } |
628 | |
629 | bool |
630 | Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, |
631 | const ReduceEntry &Entry, |
632 | bool LiveCPSR, bool IsSelfLoop) { |
633 | unsigned Opc = MI->getOpcode(); |
634 | if (Opc == ARM::t2ADDri) { |
635 | // If the source register is SP, try to reduce to tADDrSPi, otherwise |
636 | // it's a normal reduce. |
637 | if (MI->getOperand(i: 1).getReg() != ARM::SP) { |
638 | if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) |
639 | return true; |
640 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
641 | } |
642 | // Try to reduce to tADDrSPi. |
643 | unsigned Imm = MI->getOperand(i: 2).getImm(); |
644 | // The immediate must be in range, the destination register must be a low |
645 | // reg, the predicate must be "always" and the condition flags must not |
646 | // be being set. |
647 | if (Imm & 3 || Imm > 1020) |
648 | return false; |
649 | if (!isARMLowRegister(Reg: MI->getOperand(i: 0).getReg())) |
650 | return false; |
651 | if (MI->getOperand(i: 3).getImm() != ARMCC::AL) |
652 | return false; |
653 | const MCInstrDesc &MCID = MI->getDesc(); |
654 | if (MCID.hasOptionalDef() && |
655 | MI->getOperand(i: MCID.getNumOperands()-1).getReg() == ARM::CPSR) |
656 | return false; |
657 | |
658 | MachineInstrBuilder MIB = |
659 | BuildMI(BB&: MBB, I: MI, MIMD: MI->getDebugLoc(), |
660 | MCID: TII->get(Opcode: ARM::tADDrSPi)) |
661 | .add(MO: MI->getOperand(i: 0)) |
662 | .add(MO: MI->getOperand(i: 1)) |
663 | .addImm(Val: Imm / 4) // The tADDrSPi has an implied scale by four. |
664 | .add(MOs: predOps(Pred: ARMCC::AL)); |
665 | |
666 | // Transfer MI flags. |
667 | MIB.setMIFlags(MI->getFlags()); |
668 | |
669 | LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI |
670 | << " to 16-bit: " << *MIB); |
671 | |
672 | MBB.erase_instr(I: MI); |
673 | ++NumNarrows; |
674 | return true; |
675 | } |
676 | |
677 | if (Entry.LowRegs1 && !VerifyLowRegs(MI)) |
678 | return false; |
679 | |
680 | if (MI->mayLoadOrStore()) |
681 | return ReduceLoadStore(MBB, MI, Entry); |
682 | |
683 | switch (Opc) { |
684 | default: break; |
685 | case ARM::t2ADDSri: |
686 | case ARM::t2ADDSrr: { |
687 | Register PredReg; |
688 | if (getInstrPredicate(MI: *MI, PredReg) == ARMCC::AL) { |
689 | switch (Opc) { |
690 | default: break; |
691 | case ARM::t2ADDSri: |
692 | if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) |
693 | return true; |
694 | [[fallthrough]]; |
695 | case ARM::t2ADDSrr: |
696 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
697 | } |
698 | } |
699 | break; |
700 | } |
701 | case ARM::t2RSBri: |
702 | case ARM::t2RSBSri: |
703 | case ARM::t2SXTB: |
704 | case ARM::t2SXTH: |
705 | case ARM::t2UXTB: |
706 | case ARM::t2UXTH: |
707 | if (MI->getOperand(i: 2).getImm() == 0) |
708 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
709 | break; |
710 | case ARM::t2MOVi16: |
711 | // Can convert only 'pure' immediate operands, not immediates obtained as |
712 | // globals' addresses. |
713 | if (MI->getOperand(i: 1).isImm()) |
714 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
715 | break; |
716 | case ARM::t2CMPrr: { |
717 | // Try to reduce to the lo-reg only version first. Why there are two |
718 | // versions of the instruction is a mystery. |
719 | // It would be nice to just have two entries in the main table that |
720 | // are prioritized, but the table assumes a unique entry for each |
721 | // source insn opcode. So for now, we hack a local entry record to use. |
722 | static const ReduceEntry NarrowEntry = |
723 | { .WideOpc: ARM::t2CMPrr,.NarrowOpc1: ARM::tCMPr, .NarrowOpc2: 0, .Imm1Limit: 0, .Imm2Limit: 0, .LowRegs1: 1, .LowRegs2: 1,.PredCC1: 2, .PredCC2: 0, .PartFlag: 0,.Special: 1,.AvoidMovs: 0 }; |
724 | if (ReduceToNarrow(MBB, MI, Entry: NarrowEntry, LiveCPSR, IsSelfLoop)) |
725 | return true; |
726 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
727 | } |
728 | case ARM::t2TEQrr: { |
729 | Register PredReg; |
730 | // Can only convert to eors if we're not in an IT block. |
731 | if (getInstrPredicate(MI: *MI, PredReg) != ARMCC::AL) |
732 | break; |
733 | // TODO if Operand 0 is not killed but Operand 1 is, then we could write |
734 | // to Op1 instead. |
735 | if (MI->getOperand(i: 0).isKill()) |
736 | return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
737 | } |
738 | } |
739 | return false; |
740 | } |
741 | |
742 | bool |
743 | Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, |
744 | const ReduceEntry &Entry, |
745 | bool LiveCPSR, bool IsSelfLoop) { |
746 | if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) |
747 | return false; |
748 | |
749 | if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) |
750 | // Don't issue movs with shifter operand for some CPUs unless we |
751 | // are optimizing for size. |
752 | return false; |
753 | |
754 | Register Reg0 = MI->getOperand(i: 0).getReg(); |
755 | Register Reg1 = MI->getOperand(i: 1).getReg(); |
756 | // t2MUL is "special". The tied source operand is second, not first. |
757 | if (MI->getOpcode() == ARM::t2MUL) { |
758 | Register Reg2 = MI->getOperand(i: 2).getReg(); |
759 | // Early exit if the regs aren't all low regs. |
760 | if (!isARMLowRegister(Reg: Reg0) || !isARMLowRegister(Reg: Reg1) |
761 | || !isARMLowRegister(Reg: Reg2)) |
762 | return false; |
763 | if (Reg0 != Reg2) { |
764 | // If the other operand also isn't the same as the destination, we |
765 | // can't reduce. |
766 | if (Reg1 != Reg0) |
767 | return false; |
768 | // Try to commute the operands to make it a 2-address instruction. |
769 | MachineInstr *CommutedMI = TII->commuteInstruction(MI&: *MI); |
770 | if (!CommutedMI) |
771 | return false; |
772 | } |
773 | } else if (Reg0 != Reg1) { |
774 | // Try to commute the operands to make it a 2-address instruction. |
775 | unsigned CommOpIdx1 = 1; |
776 | unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex; |
777 | if (!TII->findCommutedOpIndices(MI: *MI, SrcOpIdx1&: CommOpIdx1, SrcOpIdx2&: CommOpIdx2) || |
778 | MI->getOperand(i: CommOpIdx2).getReg() != Reg0) |
779 | return false; |
780 | MachineInstr *CommutedMI = |
781 | TII->commuteInstruction(MI&: *MI, NewMI: false, OpIdx1: CommOpIdx1, OpIdx2: CommOpIdx2); |
782 | if (!CommutedMI) |
783 | return false; |
784 | } |
785 | if (Entry.LowRegs2 && !isARMLowRegister(Reg: Reg0)) |
786 | return false; |
787 | if (Entry.Imm2Limit) { |
788 | unsigned Imm = MI->getOperand(i: 2).getImm(); |
789 | unsigned Limit = (1 << Entry.Imm2Limit) - 1; |
790 | if (Imm > Limit) |
791 | return false; |
792 | } else { |
793 | Register Reg2 = MI->getOperand(i: 2).getReg(); |
794 | if (Entry.LowRegs2 && !isARMLowRegister(Reg: Reg2)) |
795 | return false; |
796 | } |
797 | |
798 | // Check if it's possible / necessary to transfer the predicate. |
799 | const MCInstrDesc &NewMCID = TII->get(Opcode: Entry.NarrowOpc2); |
800 | Register PredReg; |
801 | ARMCC::CondCodes Pred = getInstrPredicate(MI: *MI, PredReg); |
802 | bool SkipPred = false; |
803 | if (Pred != ARMCC::AL) { |
804 | if (!NewMCID.isPredicable()) |
805 | // Can't transfer predicate, fail. |
806 | return false; |
807 | } else { |
808 | SkipPred = !NewMCID.isPredicable(); |
809 | } |
810 | |
811 | bool HasCC = false; |
812 | bool CCDead = false; |
813 | const MCInstrDesc &MCID = MI->getDesc(); |
814 | if (MCID.hasOptionalDef()) { |
815 | unsigned NumOps = MCID.getNumOperands(); |
816 | HasCC = (MI->getOperand(i: NumOps-1).getReg() == ARM::CPSR); |
817 | if (HasCC && MI->getOperand(i: NumOps-1).isDead()) |
818 | CCDead = true; |
819 | } |
820 | if (!VerifyPredAndCC(MI, Entry, is2Addr: true, Pred, LiveCPSR, HasCC, CCDead)) |
821 | return false; |
822 | |
823 | // Avoid adding a false dependency on partial flag update by some 16-bit |
824 | // instructions which has the 's' bit set. |
825 | if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && |
826 | canAddPseudoFlagDep(Use: MI, FirstInSelfLoop: IsSelfLoop)) |
827 | return false; |
828 | |
829 | // Add the 16-bit instruction. |
830 | DebugLoc dl = MI->getDebugLoc(); |
831 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: dl, MCID: NewMCID); |
832 | MIB.add(MO: MI->getOperand(i: 0)); |
833 | if (NewMCID.hasOptionalDef()) |
834 | MIB.add(MO: HasCC ? t1CondCodeOp(isDead: CCDead) : condCodeOp()); |
835 | |
836 | // Transfer the rest of operands. |
837 | unsigned NumOps = MCID.getNumOperands(); |
838 | for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { |
839 | if (i < NumOps && MCID.operands()[i].isOptionalDef()) |
840 | continue; |
841 | if (SkipPred && MCID.operands()[i].isPredicate()) |
842 | continue; |
843 | MIB.add(MO: MI->getOperand(i)); |
844 | } |
845 | |
846 | // Transfer MI flags. |
847 | MIB.setMIFlags(MI->getFlags()); |
848 | |
849 | LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI |
850 | << " to 16-bit: " << *MIB); |
851 | |
852 | MBB.erase_instr(I: MI); |
853 | ++Num2Addrs; |
854 | return true; |
855 | } |
856 | |
857 | bool |
858 | Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, |
859 | const ReduceEntry &Entry, |
860 | bool LiveCPSR, bool IsSelfLoop) { |
861 | if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) |
862 | return false; |
863 | |
864 | if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand()) |
865 | // Don't issue movs with shifter operand for some CPUs unless we |
866 | // are optimizing for size. |
867 | return false; |
868 | |
869 | unsigned Limit = ~0U; |
870 | if (Entry.Imm1Limit) |
871 | Limit = (1 << Entry.Imm1Limit) - 1; |
872 | |
873 | const MCInstrDesc &MCID = MI->getDesc(); |
874 | for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { |
875 | if (MCID.operands()[i].isPredicate()) |
876 | continue; |
877 | const MachineOperand &MO = MI->getOperand(i); |
878 | if (MO.isReg()) { |
879 | Register Reg = MO.getReg(); |
880 | if (!Reg || Reg == ARM::CPSR) |
881 | continue; |
882 | if (Entry.LowRegs1 && !isARMLowRegister(Reg)) |
883 | return false; |
884 | } else if (MO.isImm() && !MCID.operands()[i].isPredicate()) { |
885 | if (((unsigned)MO.getImm()) > Limit) |
886 | return false; |
887 | } |
888 | } |
889 | |
890 | // Check if it's possible / necessary to transfer the predicate. |
891 | const MCInstrDesc &NewMCID = TII->get(Opcode: Entry.NarrowOpc1); |
892 | Register PredReg; |
893 | ARMCC::CondCodes Pred = getInstrPredicate(MI: *MI, PredReg); |
894 | bool SkipPred = false; |
895 | if (Pred != ARMCC::AL) { |
896 | if (!NewMCID.isPredicable()) |
897 | // Can't transfer predicate, fail. |
898 | return false; |
899 | } else { |
900 | SkipPred = !NewMCID.isPredicable(); |
901 | } |
902 | |
903 | bool HasCC = false; |
904 | bool CCDead = false; |
905 | if (MCID.hasOptionalDef()) { |
906 | unsigned NumOps = MCID.getNumOperands(); |
907 | HasCC = (MI->getOperand(i: NumOps-1).getReg() == ARM::CPSR); |
908 | if (HasCC && MI->getOperand(i: NumOps-1).isDead()) |
909 | CCDead = true; |
910 | } |
911 | if (!VerifyPredAndCC(MI, Entry, is2Addr: false, Pred, LiveCPSR, HasCC, CCDead)) |
912 | return false; |
913 | |
914 | // Avoid adding a false dependency on partial flag update by some 16-bit |
915 | // instructions which has the 's' bit set. |
916 | if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && |
917 | canAddPseudoFlagDep(Use: MI, FirstInSelfLoop: IsSelfLoop)) |
918 | return false; |
919 | |
920 | // Add the 16-bit instruction. |
921 | DebugLoc dl = MI->getDebugLoc(); |
922 | MachineInstrBuilder MIB = BuildMI(BB&: MBB, I: MI, MIMD: dl, MCID: NewMCID); |
923 | |
924 | // TEQ is special in that it doesn't define a register but we're converting |
925 | // it into an EOR which does. So add the first operand as a def and then |
926 | // again as a use. |
927 | if (MCID.getOpcode() == ARM::t2TEQrr) { |
928 | MIB.add(MO: MI->getOperand(i: 0)); |
929 | MIB->getOperand(i: 0).setIsKill(false); |
930 | MIB->getOperand(i: 0).setIsDef(true); |
931 | MIB->getOperand(i: 0).setIsDead(true); |
932 | |
933 | if (NewMCID.hasOptionalDef()) |
934 | MIB.add(MO: HasCC ? t1CondCodeOp(isDead: CCDead) : condCodeOp()); |
935 | MIB.add(MO: MI->getOperand(i: 0)); |
936 | } else { |
937 | MIB.add(MO: MI->getOperand(i: 0)); |
938 | if (NewMCID.hasOptionalDef()) |
939 | MIB.add(MO: HasCC ? t1CondCodeOp(isDead: CCDead) : condCodeOp()); |
940 | } |
941 | |
942 | // Transfer the rest of operands. |
943 | unsigned NumOps = MCID.getNumOperands(); |
944 | for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { |
945 | if (i < NumOps && MCID.operands()[i].isOptionalDef()) |
946 | continue; |
947 | if ((MCID.getOpcode() == ARM::t2RSBSri || |
948 | MCID.getOpcode() == ARM::t2RSBri || |
949 | MCID.getOpcode() == ARM::t2SXTB || |
950 | MCID.getOpcode() == ARM::t2SXTH || |
951 | MCID.getOpcode() == ARM::t2UXTB || |
952 | MCID.getOpcode() == ARM::t2UXTH) && i == 2) |
953 | // Skip the zero immediate operand, it's now implicit. |
954 | continue; |
955 | bool isPred = (i < NumOps && MCID.operands()[i].isPredicate()); |
956 | if (SkipPred && isPred) |
957 | continue; |
958 | const MachineOperand &MO = MI->getOperand(i); |
959 | if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) |
960 | // Skip implicit def of CPSR. Either it's modeled as an optional |
961 | // def now or it's already an implicit def on the new instruction. |
962 | continue; |
963 | MIB.add(MO); |
964 | } |
965 | if (!MCID.isPredicable() && NewMCID.isPredicable()) |
966 | MIB.add(MOs: predOps(Pred: ARMCC::AL)); |
967 | |
968 | // Transfer MI flags. |
969 | MIB.setMIFlags(MI->getFlags()); |
970 | |
971 | LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI |
972 | << " to 16-bit: " << *MIB); |
973 | |
974 | MBB.erase_instr(I: MI); |
975 | ++NumNarrows; |
976 | return true; |
977 | } |
978 | |
979 | static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { |
980 | bool HasDef = false; |
981 | for (const MachineOperand &MO : MI.operands()) { |
982 | if (!MO.isReg() || MO.isUndef() || MO.isUse()) |
983 | continue; |
984 | if (MO.getReg() != ARM::CPSR) |
985 | continue; |
986 | |
987 | DefCPSR = true; |
988 | if (!MO.isDead()) |
989 | HasDef = true; |
990 | } |
991 | |
992 | return HasDef || LiveCPSR; |
993 | } |
994 | |
995 | static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { |
996 | for (const MachineOperand &MO : MI.operands()) { |
997 | if (!MO.isReg() || MO.isUndef() || MO.isDef()) |
998 | continue; |
999 | if (MO.getReg() != ARM::CPSR) |
1000 | continue; |
1001 | assert(LiveCPSR && "CPSR liveness tracking is wrong!" ); |
1002 | if (MO.isKill()) { |
1003 | LiveCPSR = false; |
1004 | break; |
1005 | } |
1006 | } |
1007 | |
1008 | return LiveCPSR; |
1009 | } |
1010 | |
1011 | bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, |
1012 | bool LiveCPSR, bool IsSelfLoop, |
1013 | bool SkipPrologueEpilogue) { |
1014 | unsigned Opcode = MI->getOpcode(); |
1015 | DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Val: Opcode); |
1016 | if (OPI == ReduceOpcodeMap.end()) |
1017 | return false; |
1018 | if (SkipPrologueEpilogue && (MI->getFlag(Flag: MachineInstr::FrameSetup) || |
1019 | MI->getFlag(Flag: MachineInstr::FrameDestroy))) |
1020 | return false; |
1021 | const ReduceEntry &Entry = ReduceTable[OPI->second]; |
1022 | |
1023 | // Don't attempt normal reductions on "special" cases for now. |
1024 | if (Entry.Special) |
1025 | return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop); |
1026 | |
1027 | // Try to transform to a 16-bit two-address instruction. |
1028 | if (Entry.NarrowOpc2 && |
1029 | ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) |
1030 | return true; |
1031 | |
1032 | // Try to transform to a 16-bit non-two-address instruction. |
1033 | if (Entry.NarrowOpc1 && |
1034 | ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) |
1035 | return true; |
1036 | |
1037 | return false; |
1038 | } |
1039 | |
1040 | bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB, |
1041 | bool SkipPrologueEpilogue) { |
1042 | bool Modified = false; |
1043 | |
1044 | // Yes, CPSR could be livein. |
1045 | bool LiveCPSR = MBB.isLiveIn(Reg: ARM::CPSR); |
1046 | MachineInstr *BundleMI = nullptr; |
1047 | |
1048 | CPSRDef = nullptr; |
1049 | HighLatencyCPSR = false; |
1050 | |
1051 | // Check predecessors for the latest CPSRDef. |
1052 | for (auto *Pred : MBB.predecessors()) { |
1053 | const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; |
1054 | if (!PInfo.Visited) { |
1055 | // Since blocks are visited in RPO, this must be a back-edge. |
1056 | continue; |
1057 | } |
1058 | if (PInfo.HighLatencyCPSR) { |
1059 | HighLatencyCPSR = true; |
1060 | break; |
1061 | } |
1062 | } |
1063 | |
1064 | // If this BB loops back to itself, conservatively avoid narrowing the |
1065 | // first instruction that does partial flag update. |
1066 | bool IsSelfLoop = MBB.isSuccessor(MBB: &MBB); |
1067 | MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); |
1068 | MachineBasicBlock::instr_iterator NextMII; |
1069 | for (; MII != E; MII = NextMII) { |
1070 | NextMII = std::next(x: MII); |
1071 | |
1072 | MachineInstr *MI = &*MII; |
1073 | if (MI->isBundle()) { |
1074 | BundleMI = MI; |
1075 | continue; |
1076 | } |
1077 | if (MI->isDebugInstr()) |
1078 | continue; |
1079 | |
1080 | LiveCPSR = UpdateCPSRUse(MI&: *MI, LiveCPSR); |
1081 | |
1082 | // Does NextMII belong to the same bundle as MI? |
1083 | bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); |
1084 | |
1085 | if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) { |
1086 | Modified = true; |
1087 | MachineBasicBlock::instr_iterator I = std::prev(x: NextMII); |
1088 | MI = &*I; |
1089 | // Removing and reinserting the first instruction in a bundle will break |
1090 | // up the bundle. Fix the bundling if it was broken. |
1091 | if (NextInSameBundle && !NextMII->isBundledWithPred()) |
1092 | NextMII->bundleWithPred(); |
1093 | } |
1094 | |
1095 | if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) { |
1096 | // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill |
1097 | // marker is only on the BUNDLE instruction. Process the BUNDLE |
1098 | // instruction as we finish with the bundled instruction to work around |
1099 | // the inconsistency. |
1100 | if (BundleMI->killsRegister(Reg: ARM::CPSR, /*TRI=*/nullptr)) |
1101 | LiveCPSR = false; |
1102 | MachineOperand *MO = |
1103 | BundleMI->findRegisterDefOperand(Reg: ARM::CPSR, /*TRI=*/nullptr); |
1104 | if (MO && !MO->isDead()) |
1105 | LiveCPSR = true; |
1106 | MO = BundleMI->findRegisterUseOperand(Reg: ARM::CPSR, /*TRI=*/nullptr); |
1107 | if (MO && !MO->isKill()) |
1108 | LiveCPSR = true; |
1109 | } |
1110 | |
1111 | bool DefCPSR = false; |
1112 | LiveCPSR = UpdateCPSRDef(MI&: *MI, LiveCPSR, DefCPSR); |
1113 | if (MI->isCall()) { |
1114 | // Calls don't really set CPSR. |
1115 | CPSRDef = nullptr; |
1116 | HighLatencyCPSR = false; |
1117 | IsSelfLoop = false; |
1118 | } else if (DefCPSR) { |
1119 | // This is the last CPSR defining instruction. |
1120 | CPSRDef = MI; |
1121 | HighLatencyCPSR = isHighLatencyCPSR(Def: CPSRDef); |
1122 | IsSelfLoop = false; |
1123 | } |
1124 | } |
1125 | |
1126 | MBBInfo &Info = BlockInfo[MBB.getNumber()]; |
1127 | Info.HighLatencyCPSR = HighLatencyCPSR; |
1128 | Info.Visited = true; |
1129 | return Modified; |
1130 | } |
1131 | |
1132 | bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { |
1133 | if (PredicateFtor && !PredicateFtor(MF.getFunction())) |
1134 | return false; |
1135 | |
1136 | STI = &MF.getSubtarget<ARMSubtarget>(); |
1137 | if (STI->isThumb1Only() || STI->prefers32BitThumb()) |
1138 | return false; |
1139 | |
1140 | TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo()); |
1141 | |
1142 | // Optimizing / minimizing size? Minimizing size implies optimizing for size. |
1143 | OptimizeSize = MF.getFunction().hasOptSize(); |
1144 | MinimizeSize = STI->hasMinSize(); |
1145 | |
1146 | BlockInfo.clear(); |
1147 | BlockInfo.resize(N: MF.getNumBlockIDs()); |
1148 | |
1149 | // Visit blocks in reverse post-order so LastCPSRDef is known for all |
1150 | // predecessors. |
1151 | ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); |
1152 | bool Modified = false; |
1153 | bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && |
1154 | MF.getFunction().needsUnwindTableEntry(); |
1155 | for (MachineBasicBlock *MBB : RPOT) |
1156 | Modified |= ReduceMBB(MBB&: *MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI); |
1157 | return Modified; |
1158 | } |
1159 | |
1160 | /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size |
1161 | /// reduction pass. |
1162 | FunctionPass *llvm::createThumb2SizeReductionPass( |
1163 | std::function<bool(const Function &)> Ftor) { |
1164 | return new Thumb2SizeReduce(std::move(Ftor)); |
1165 | } |
1166 | |