1 | //===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into |
10 | // separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of |
11 | // which define GPR3. A copy is added from GPR3 to the target virtual |
12 | // register of the original instruction. The GETtlsADDR[32] is really |
13 | // a call instruction, so its target register is constrained to be GPR3. |
14 | // This is not true of ADDItls[gd]L[32], but there is a legacy linker |
15 | // optimization bug that requires the target register of the addi of |
16 | // a local- or general-dynamic TLS access sequence to be GPR3. |
17 | // |
18 | // This is done in a late pass so that TLS variable accesses can be |
19 | // fully commoned by MachineCSE. |
20 | // |
21 | //===----------------------------------------------------------------------===// |
22 | |
23 | #include "PPC.h" |
24 | #include "PPCInstrInfo.h" |
25 | #include "PPCTargetMachine.h" |
26 | #include "llvm/CodeGen/LiveIntervals.h" |
27 | #include "llvm/CodeGen/MachineFrameInfo.h" |
28 | #include "llvm/CodeGen/MachineFunctionPass.h" |
29 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
30 | #include "llvm/Support/Debug.h" |
31 | #include "llvm/Support/raw_ostream.h" |
32 | |
33 | using namespace llvm; |
34 | |
35 | #define DEBUG_TYPE "ppc-tls-dynamic-call" |
36 | |
37 | namespace { |
38 | struct PPCTLSDynamicCall : public MachineFunctionPass { |
39 | static char ID; |
40 | PPCTLSDynamicCall() : MachineFunctionPass(ID) {} |
41 | |
42 | const PPCInstrInfo *TII; |
43 | |
44 | protected: |
45 | bool processBlock(MachineBasicBlock &MBB) { |
46 | bool Changed = false; |
47 | bool NeedFence = true; |
48 | const PPCSubtarget &Subtarget = |
49 | MBB.getParent()->getSubtarget<PPCSubtarget>(); |
50 | bool Is64Bit = Subtarget.isPPC64(); |
51 | bool IsAIX = Subtarget.isAIXABI(); |
52 | bool IsLargeModel = |
53 | Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large; |
54 | bool IsPCREL = false; |
55 | MachineFunction *MF = MBB.getParent(); |
56 | MachineRegisterInfo &RegInfo = MF->getRegInfo(); |
57 | |
58 | for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); |
59 | I != IE;) { |
60 | MachineInstr &MI = *I; |
61 | IsPCREL = isPCREL(MI); |
62 | // There are a number of slight differences in code generation |
63 | // when we call .__get_tpointer (32-bit AIX TLS). |
64 | bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX; |
65 | bool IsTLSLDAIXMI = (MI.getOpcode() == PPC::TLSLDAIX8 || |
66 | MI.getOpcode() == PPC::TLSLDAIX); |
67 | |
68 | if (MI.getOpcode() != PPC::ADDItlsgdLADDR && |
69 | MI.getOpcode() != PPC::ADDItlsldLADDR && |
70 | MI.getOpcode() != PPC::ADDItlsgdLADDR32 && |
71 | MI.getOpcode() != PPC::ADDItlsldLADDR32 && |
72 | MI.getOpcode() != PPC::TLSGDAIX && |
73 | MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL && |
74 | !IsTLSLDAIXMI) { |
75 | // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP |
76 | // as scheduling fences, we skip creating fences if we already |
77 | // have existing ADJCALLSTACKDOWN/UP to avoid nesting, |
78 | // which causes verification error with -verify-machineinstrs. |
79 | if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN) |
80 | NeedFence = false; |
81 | else if (MI.getOpcode() == PPC::ADJCALLSTACKUP) |
82 | NeedFence = true; |
83 | |
84 | ++I; |
85 | continue; |
86 | } |
87 | |
88 | LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI); |
89 | |
90 | Register OutReg = MI.getOperand(i: 0).getReg(); |
91 | Register InReg = PPC::NoRegister; |
92 | Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3; |
93 | Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4; |
94 | if (!IsPCREL && !IsTLSTPRelMI) |
95 | InReg = MI.getOperand(i: 1).getReg(); |
96 | DebugLoc DL = MI.getDebugLoc(); |
97 | |
98 | unsigned Opc1, Opc2; |
99 | switch (MI.getOpcode()) { |
100 | default: |
101 | llvm_unreachable("Opcode inconsistency error" ); |
102 | case PPC::ADDItlsgdLADDR: |
103 | Opc1 = PPC::ADDItlsgdL; |
104 | Opc2 = PPC::GETtlsADDR; |
105 | break; |
106 | case PPC::ADDItlsldLADDR: |
107 | Opc1 = PPC::ADDItlsldL; |
108 | Opc2 = PPC::GETtlsldADDR; |
109 | break; |
110 | case PPC::ADDItlsgdLADDR32: |
111 | Opc1 = PPC::ADDItlsgdL32; |
112 | Opc2 = PPC::GETtlsADDR32; |
113 | break; |
114 | case PPC::ADDItlsldLADDR32: |
115 | Opc1 = PPC::ADDItlsldL32; |
116 | Opc2 = PPC::GETtlsldADDR32; |
117 | break; |
118 | case PPC::TLSLDAIX: |
119 | // TLSLDAIX is expanded to one copy and GET_TLS_MOD, so we only set |
120 | // Opc2 here. |
121 | Opc2 = PPC::GETtlsMOD32AIX; |
122 | break; |
123 | case PPC::TLSLDAIX8: |
124 | // TLSLDAIX8 is expanded to one copy and GET_TLS_MOD, so we only set |
125 | // Opc2 here. |
126 | Opc2 = PPC::GETtlsMOD64AIX; |
127 | break; |
128 | case PPC::TLSGDAIX8: |
129 | // TLSGDAIX8 is expanded to two copies and GET_TLS_ADDR, so we only |
130 | // set Opc2 here. |
131 | Opc2 = PPC::GETtlsADDR64AIX; |
132 | break; |
133 | case PPC::TLSGDAIX: |
134 | // TLSGDAIX is expanded to two copies and GET_TLS_ADDR, so we only |
135 | // set Opc2 here. |
136 | Opc2 = PPC::GETtlsADDR32AIX; |
137 | break; |
138 | case PPC::GETtlsTpointer32AIX: |
139 | // GETtlsTpointer32AIX is expanded to a call to GET_TPOINTER on AIX |
140 | // 32-bit mode within PPCAsmPrinter. This instruction does not need |
141 | // to change, so Opc2 is set to the same instruction opcode. |
142 | Opc2 = PPC::GETtlsTpointer32AIX; |
143 | break; |
144 | case PPC::PADDI8pc: |
145 | assert(IsPCREL && "Expecting General/Local Dynamic PCRel" ); |
146 | Opc1 = PPC::PADDI8pc; |
147 | Opc2 = MI.getOperand(i: 2).getTargetFlags() == |
148 | PPCII::MO_GOT_TLSGD_PCREL_FLAG |
149 | ? PPC::GETtlsADDRPCREL |
150 | : PPC::GETtlsldADDRPCREL; |
151 | } |
152 | |
153 | // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr |
154 | // as scheduling fence to avoid it is scheduled before |
155 | // mflr in the prologue and the address in LR is clobbered (PR25839). |
156 | // We don't really need to save data to the stack - the clobbered |
157 | // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr) |
158 | // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR). |
159 | if (NeedFence) { |
160 | MBB.getParent()->getFrameInfo().setAdjustsStack(true); |
161 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: PPC::ADJCALLSTACKDOWN)).addImm(Val: 0) |
162 | .addImm(Val: 0); |
163 | } |
164 | |
165 | if (IsAIX) { |
166 | if (IsTLSLDAIXMI) { |
167 | // The relative order between the node that loads the variable |
168 | // offset from the TOC, and the .__tls_get_mod node is being tuned |
169 | // here. It is better to put the variable offset TOC load after the |
170 | // call, since this node can use clobbers r4/r5. |
171 | // Search for the pattern of the two nodes that load from the TOC |
172 | // (either for the variable offset or for the module handle), and |
173 | // then move the variable offset TOC load right before the node that |
174 | // uses the OutReg of the .__tls_get_mod node. |
175 | unsigned LDTocOp = |
176 | Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc) |
177 | : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc); |
178 | if (!RegInfo.use_empty(RegNo: OutReg)) { |
179 | std::set<MachineInstr *> Uses; |
180 | // Collect all instructions that use the OutReg. |
181 | for (MachineOperand &MO : RegInfo.use_operands(Reg: OutReg)) |
182 | Uses.insert(x: MO.getParent()); |
183 | // Find the first user (e.g.: lwax/stfdx) of the OutReg within the |
184 | // current BB. |
185 | MachineBasicBlock::iterator UseIter = MBB.begin(); |
186 | for (MachineBasicBlock::iterator IE = MBB.end(); UseIter != IE; |
187 | ++UseIter) |
188 | if (Uses.count(x: &*UseIter)) |
189 | break; |
190 | |
191 | // Additional handling is required when UserIter (the first user |
192 | // of OutReg) is pointing to a valid node that loads from the TOC. |
193 | // Check the pattern and do the movement if the pattern matches. |
194 | if (UseIter != MBB.end()) { |
195 | // Collect all associated nodes that load from the TOC. Use |
196 | // hasOneDef() to guard against unexpected scenarios. |
197 | std::set<MachineInstr *> LoadFromTocs; |
198 | for (MachineOperand &MO : UseIter->operands()) |
199 | if (MO.isReg() && MO.isUse()) { |
200 | Register MOReg = MO.getReg(); |
201 | if (RegInfo.hasOneDef(RegNo: MOReg)) { |
202 | MachineInstr *Temp = |
203 | RegInfo.getOneDef(Reg: MOReg)->getParent(); |
204 | // For the current TLSLDAIX node, get the corresponding |
205 | // node that loads from the TOC for the InReg. Otherwise, |
206 | // Temp probably pointed to the variable offset TOC load |
207 | // we would like to move. |
208 | if (Temp == &MI && RegInfo.hasOneDef(RegNo: InReg)) |
209 | Temp = RegInfo.getOneDef(Reg: InReg)->getParent(); |
210 | if (Temp->getOpcode() == LDTocOp) |
211 | LoadFromTocs.insert(x: Temp); |
212 | } else { |
213 | // FIXME: analyze this scenario if there is one. |
214 | LoadFromTocs.clear(); |
215 | break; |
216 | } |
217 | } |
218 | |
219 | // Check the two nodes that loaded from the TOC: one should be |
220 | // "_$TLSML", and the other will be moved before the node that |
221 | // uses the OutReg of the .__tls_get_mod node. |
222 | if (LoadFromTocs.size() == 2) { |
223 | MachineBasicBlock::iterator TLSMLIter = MBB.end(); |
224 | MachineBasicBlock::iterator OffsetIter = MBB.end(); |
225 | // Make sure the two nodes that loaded from the TOC are within |
226 | // the current BB, and that one of them is from the "_$TLSML" |
227 | // pseudo symbol, while the other is from the variable. |
228 | for (MachineBasicBlock::iterator I = MBB.begin(), |
229 | IE = MBB.end(); |
230 | I != IE; ++I) |
231 | if (LoadFromTocs.count(x: &*I)) { |
232 | MachineOperand MO = I->getOperand(i: 1); |
233 | if (MO.isGlobal() && MO.getGlobal()->hasName() && |
234 | MO.getGlobal()->getName() == "_$TLSML" ) |
235 | TLSMLIter = I; |
236 | else |
237 | OffsetIter = I; |
238 | } |
239 | // Perform the movement when the desired scenario has been |
240 | // identified, which should be when both of the iterators are |
241 | // valid. |
242 | if (TLSMLIter != MBB.end() && OffsetIter != MBB.end()) |
243 | OffsetIter->moveBefore(MovePos: &*UseIter); |
244 | } |
245 | } |
246 | } |
247 | // The module-handle is copied into r3. The copy is followed by |
248 | // GETtlsMOD32AIX/GETtlsMOD64AIX. |
249 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: GPR3) |
250 | .addReg(RegNo: InReg); |
251 | // The call to .__tls_get_mod. |
252 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3).addReg(RegNo: GPR3); |
253 | } else if (!IsTLSTPRelMI) { |
254 | // The variable offset and region handle (for TLSGD) are copied in |
255 | // r4 and r3. The copies are followed by |
256 | // GETtlsADDR32AIX/GETtlsADDR64AIX. |
257 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: GPR4) |
258 | .addReg(RegNo: MI.getOperand(i: 1).getReg()); |
259 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: GPR3) |
260 | .addReg(RegNo: MI.getOperand(i: 2).getReg()); |
261 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3).addReg(RegNo: GPR3).addReg(RegNo: GPR4); |
262 | } else |
263 | // The opcode of GETtlsTpointer32AIX does not change, because later |
264 | // this instruction will be expanded into a call to .__get_tpointer, |
265 | // which will return the thread pointer into r3. |
266 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3); |
267 | } else { |
268 | MachineInstr *Addi; |
269 | if (IsPCREL) { |
270 | Addi = BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc1), DestReg: GPR3).addImm(Val: 0); |
271 | } else { |
272 | // Expand into two ops built prior to the existing instruction. |
273 | assert(InReg != PPC::NoRegister && "Operand must be a register" ); |
274 | Addi = BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc1), DestReg: GPR3).addReg(RegNo: InReg); |
275 | } |
276 | |
277 | Addi->addOperand(Op: MI.getOperand(i: 2)); |
278 | |
279 | MachineInstr *Call = |
280 | (BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3).addReg(RegNo: GPR3)); |
281 | if (IsPCREL) |
282 | Call->addOperand(Op: MI.getOperand(i: 2)); |
283 | else |
284 | Call->addOperand(Op: MI.getOperand(i: 3)); |
285 | } |
286 | if (NeedFence) |
287 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: PPC::ADJCALLSTACKUP)).addImm(Val: 0).addImm(Val: 0); |
288 | |
289 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: OutReg) |
290 | .addReg(RegNo: GPR3); |
291 | |
292 | // Move past the original instruction and remove it. |
293 | ++I; |
294 | MI.removeFromParent(); |
295 | |
296 | Changed = true; |
297 | } |
298 | |
299 | return Changed; |
300 | } |
301 | |
302 | public: |
303 | bool isPCREL(const MachineInstr &MI) { |
304 | return (MI.getOpcode() == PPC::PADDI8pc) && |
305 | (MI.getOperand(i: 2).getTargetFlags() == |
306 | PPCII::MO_GOT_TLSGD_PCREL_FLAG || |
307 | MI.getOperand(i: 2).getTargetFlags() == |
308 | PPCII::MO_GOT_TLSLD_PCREL_FLAG); |
309 | } |
310 | |
311 | bool runOnMachineFunction(MachineFunction &MF) override { |
312 | TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); |
313 | |
314 | bool Changed = false; |
315 | |
316 | for (MachineBasicBlock &B : llvm::make_early_inc_range(Range&: MF)) |
317 | if (processBlock(MBB&: B)) |
318 | Changed = true; |
319 | |
320 | return Changed; |
321 | } |
322 | |
323 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
324 | AU.addRequired<LiveIntervalsWrapperPass>(); |
325 | AU.addRequired<SlotIndexesWrapperPass>(); |
326 | MachineFunctionPass::getAnalysisUsage(AU); |
327 | } |
328 | }; |
329 | } |
330 | |
331 | INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE, |
332 | "PowerPC TLS Dynamic Call Fixup" , false, false) |
333 | INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) |
334 | INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) |
335 | INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE, |
336 | "PowerPC TLS Dynamic Call Fixup" , false, false) |
337 | |
338 | char PPCTLSDynamicCall::ID = 0; |
339 | FunctionPass* |
340 | llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); } |
341 | |