1 | //===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into |
10 | // separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of |
11 | // which define GPR3. A copy is added from GPR3 to the target virtual |
12 | // register of the original instruction. The GETtlsADDR[32] is really |
13 | // a call instruction, so its target register is constrained to be GPR3. |
14 | // This is not true of ADDItls[gd]L[32], but there is a legacy linker |
15 | // optimization bug that requires the target register of the addi of |
16 | // a local- or general-dynamic TLS access sequence to be GPR3. |
17 | // |
18 | // This is done in a late pass so that TLS variable accesses can be |
19 | // fully commoned by MachineCSE. |
20 | // |
21 | //===----------------------------------------------------------------------===// |
22 | |
23 | #include "PPC.h" |
24 | #include "PPCInstrBuilder.h" |
25 | #include "PPCInstrInfo.h" |
26 | #include "PPCTargetMachine.h" |
27 | #include "llvm/CodeGen/LiveIntervals.h" |
28 | #include "llvm/CodeGen/MachineFrameInfo.h" |
29 | #include "llvm/CodeGen/MachineFunctionPass.h" |
30 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
31 | #include "llvm/InitializePasses.h" |
32 | #include "llvm/Support/Debug.h" |
33 | #include "llvm/Support/raw_ostream.h" |
34 | |
35 | using namespace llvm; |
36 | |
37 | #define DEBUG_TYPE "ppc-tls-dynamic-call" |
38 | |
39 | namespace { |
40 | struct PPCTLSDynamicCall : public MachineFunctionPass { |
41 | static char ID; |
42 | PPCTLSDynamicCall() : MachineFunctionPass(ID) { |
43 | initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry()); |
44 | } |
45 | |
46 | const PPCInstrInfo *TII; |
47 | |
48 | protected: |
49 | bool processBlock(MachineBasicBlock &MBB) { |
50 | bool Changed = false; |
51 | bool NeedFence = true; |
52 | const PPCSubtarget &Subtarget = |
53 | MBB.getParent()->getSubtarget<PPCSubtarget>(); |
54 | bool Is64Bit = Subtarget.isPPC64(); |
55 | bool IsAIX = Subtarget.isAIXABI(); |
56 | bool IsLargeModel = |
57 | Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large; |
58 | bool IsPCREL = false; |
59 | MachineFunction *MF = MBB.getParent(); |
60 | MachineRegisterInfo &RegInfo = MF->getRegInfo(); |
61 | |
62 | for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); |
63 | I != IE;) { |
64 | MachineInstr &MI = *I; |
65 | IsPCREL = isPCREL(MI); |
66 | // There are a number of slight differences in code generation |
67 | // when we call .__get_tpointer (32-bit AIX TLS). |
68 | bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX; |
69 | bool IsTLSLDAIXMI = (MI.getOpcode() == PPC::TLSLDAIX8 || |
70 | MI.getOpcode() == PPC::TLSLDAIX); |
71 | |
72 | if (MI.getOpcode() != PPC::ADDItlsgdLADDR && |
73 | MI.getOpcode() != PPC::ADDItlsldLADDR && |
74 | MI.getOpcode() != PPC::ADDItlsgdLADDR32 && |
75 | MI.getOpcode() != PPC::ADDItlsldLADDR32 && |
76 | MI.getOpcode() != PPC::TLSGDAIX && |
77 | MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL && |
78 | !IsTLSLDAIXMI) { |
79 | // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP |
80 | // as scheduling fences, we skip creating fences if we already |
81 | // have existing ADJCALLSTACKDOWN/UP to avoid nesting, |
82 | // which causes verification error with -verify-machineinstrs. |
83 | if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN) |
84 | NeedFence = false; |
85 | else if (MI.getOpcode() == PPC::ADJCALLSTACKUP) |
86 | NeedFence = true; |
87 | |
88 | ++I; |
89 | continue; |
90 | } |
91 | |
92 | LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI); |
93 | |
94 | Register OutReg = MI.getOperand(i: 0).getReg(); |
95 | Register InReg = PPC::NoRegister; |
96 | Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3; |
97 | Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4; |
98 | if (!IsPCREL && !IsTLSTPRelMI) |
99 | InReg = MI.getOperand(i: 1).getReg(); |
100 | DebugLoc DL = MI.getDebugLoc(); |
101 | |
102 | unsigned Opc1, Opc2; |
103 | switch (MI.getOpcode()) { |
104 | default: |
105 | llvm_unreachable("Opcode inconsistency error" ); |
106 | case PPC::ADDItlsgdLADDR: |
107 | Opc1 = PPC::ADDItlsgdL; |
108 | Opc2 = PPC::GETtlsADDR; |
109 | break; |
110 | case PPC::ADDItlsldLADDR: |
111 | Opc1 = PPC::ADDItlsldL; |
112 | Opc2 = PPC::GETtlsldADDR; |
113 | break; |
114 | case PPC::ADDItlsgdLADDR32: |
115 | Opc1 = PPC::ADDItlsgdL32; |
116 | Opc2 = PPC::GETtlsADDR32; |
117 | break; |
118 | case PPC::ADDItlsldLADDR32: |
119 | Opc1 = PPC::ADDItlsldL32; |
120 | Opc2 = PPC::GETtlsldADDR32; |
121 | break; |
122 | case PPC::TLSLDAIX: |
123 | // TLSLDAIX is expanded to one copy and GET_TLS_MOD, so we only set |
124 | // Opc2 here. |
125 | Opc2 = PPC::GETtlsMOD32AIX; |
126 | break; |
127 | case PPC::TLSLDAIX8: |
128 | // TLSLDAIX8 is expanded to one copy and GET_TLS_MOD, so we only set |
129 | // Opc2 here. |
130 | Opc2 = PPC::GETtlsMOD64AIX; |
131 | break; |
132 | case PPC::TLSGDAIX8: |
133 | // TLSGDAIX8 is expanded to two copies and GET_TLS_ADDR, so we only |
134 | // set Opc2 here. |
135 | Opc2 = PPC::GETtlsADDR64AIX; |
136 | break; |
137 | case PPC::TLSGDAIX: |
138 | // TLSGDAIX is expanded to two copies and GET_TLS_ADDR, so we only |
139 | // set Opc2 here. |
140 | Opc2 = PPC::GETtlsADDR32AIX; |
141 | break; |
142 | case PPC::GETtlsTpointer32AIX: |
143 | // GETtlsTpointer32AIX is expanded to a call to GET_TPOINTER on AIX |
144 | // 32-bit mode within PPCAsmPrinter. This instruction does not need |
145 | // to change, so Opc2 is set to the same instruction opcode. |
146 | Opc2 = PPC::GETtlsTpointer32AIX; |
147 | break; |
148 | case PPC::PADDI8pc: |
149 | assert(IsPCREL && "Expecting General/Local Dynamic PCRel" ); |
150 | Opc1 = PPC::PADDI8pc; |
151 | Opc2 = MI.getOperand(i: 2).getTargetFlags() == |
152 | PPCII::MO_GOT_TLSGD_PCREL_FLAG |
153 | ? PPC::GETtlsADDRPCREL |
154 | : PPC::GETtlsldADDRPCREL; |
155 | } |
156 | |
157 | // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr |
158 | // as scheduling fence to avoid it is scheduled before |
159 | // mflr in the prologue and the address in LR is clobbered (PR25839). |
160 | // We don't really need to save data to the stack - the clobbered |
161 | // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr) |
162 | // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR). |
163 | if (NeedFence) { |
164 | MBB.getParent()->getFrameInfo().setAdjustsStack(true); |
165 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: PPC::ADJCALLSTACKDOWN)).addImm(Val: 0) |
166 | .addImm(Val: 0); |
167 | } |
168 | |
169 | if (IsAIX) { |
170 | if (IsTLSLDAIXMI) { |
171 | // The relative order between the node that loads the variable |
172 | // offset from the TOC, and the .__tls_get_mod node is being tuned |
173 | // here. It is better to put the variable offset TOC load after the |
174 | // call, since this node can use clobbers r4/r5. |
175 | // Search for the pattern of the two nodes that load from the TOC |
176 | // (either for the variable offset or for the module handle), and |
177 | // then move the variable offset TOC load right before the node that |
178 | // uses the OutReg of the .__tls_get_mod node. |
179 | unsigned LDTocOp = |
180 | Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc) |
181 | : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc); |
182 | if (!RegInfo.use_empty(RegNo: OutReg)) { |
183 | std::set<MachineInstr *> Uses; |
184 | // Collect all instructions that use the OutReg. |
185 | for (MachineOperand &MO : RegInfo.use_operands(Reg: OutReg)) |
186 | Uses.insert(x: MO.getParent()); |
187 | // Find the first user (e.g.: lwax/stfdx) of the OutReg within the |
188 | // current BB. |
189 | MachineBasicBlock::iterator UseIter = MBB.begin(); |
190 | for (MachineBasicBlock::iterator IE = MBB.end(); UseIter != IE; |
191 | ++UseIter) |
192 | if (Uses.count(x: &*UseIter)) |
193 | break; |
194 | |
195 | // Additional handling is required when UserIter (the first user |
196 | // of OutReg) is pointing to a valid node that loads from the TOC. |
197 | // Check the pattern and do the movement if the pattern matches. |
198 | if (UseIter != MBB.end()) { |
199 | // Collect all associated nodes that load from the TOC. Use |
200 | // hasOneDef() to guard against unexpected scenarios. |
201 | std::set<MachineInstr *> LoadFromTocs; |
202 | for (MachineOperand &MO : UseIter->operands()) |
203 | if (MO.isReg() && MO.isUse()) { |
204 | Register MOReg = MO.getReg(); |
205 | if (RegInfo.hasOneDef(RegNo: MOReg)) { |
206 | MachineInstr *Temp = |
207 | RegInfo.getOneDef(Reg: MOReg)->getParent(); |
208 | // For the current TLSLDAIX node, get the corresponding |
209 | // node that loads from the TOC for the InReg. Otherwise, |
210 | // Temp probably pointed to the variable offset TOC load |
211 | // we would like to move. |
212 | if (Temp == &MI && RegInfo.hasOneDef(RegNo: InReg)) |
213 | Temp = RegInfo.getOneDef(Reg: InReg)->getParent(); |
214 | if (Temp->getOpcode() == LDTocOp) |
215 | LoadFromTocs.insert(x: Temp); |
216 | } else { |
217 | // FIXME: analyze this scenario if there is one. |
218 | LoadFromTocs.clear(); |
219 | break; |
220 | } |
221 | } |
222 | |
223 | // Check the two nodes that loaded from the TOC: one should be |
224 | // "_$TLSML", and the other will be moved before the node that |
225 | // uses the OutReg of the .__tls_get_mod node. |
226 | if (LoadFromTocs.size() == 2) { |
227 | MachineBasicBlock::iterator TLSMLIter = MBB.end(); |
228 | MachineBasicBlock::iterator OffsetIter = MBB.end(); |
229 | // Make sure the two nodes that loaded from the TOC are within |
230 | // the current BB, and that one of them is from the "_$TLSML" |
231 | // pseudo symbol, while the other is from the variable. |
232 | for (MachineBasicBlock::iterator I = MBB.begin(), |
233 | IE = MBB.end(); |
234 | I != IE; ++I) |
235 | if (LoadFromTocs.count(x: &*I)) { |
236 | MachineOperand MO = I->getOperand(i: 1); |
237 | if (MO.isGlobal() && MO.getGlobal()->hasName() && |
238 | MO.getGlobal()->getName() == "_$TLSML" ) |
239 | TLSMLIter = I; |
240 | else |
241 | OffsetIter = I; |
242 | } |
243 | // Perform the movement when the desired scenario has been |
244 | // identified, which should be when both of the iterators are |
245 | // valid. |
246 | if (TLSMLIter != MBB.end() && OffsetIter != MBB.end()) |
247 | OffsetIter->moveBefore(MovePos: &*UseIter); |
248 | } |
249 | } |
250 | } |
251 | // The module-handle is copied into r3. The copy is followed by |
252 | // GETtlsMOD32AIX/GETtlsMOD64AIX. |
253 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: GPR3) |
254 | .addReg(RegNo: InReg); |
255 | // The call to .__tls_get_mod. |
256 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3).addReg(RegNo: GPR3); |
257 | } else if (!IsTLSTPRelMI) { |
258 | // The variable offset and region handle (for TLSGD) are copied in |
259 | // r4 and r3. The copies are followed by |
260 | // GETtlsADDR32AIX/GETtlsADDR64AIX. |
261 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: GPR4) |
262 | .addReg(RegNo: MI.getOperand(i: 1).getReg()); |
263 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: GPR3) |
264 | .addReg(RegNo: MI.getOperand(i: 2).getReg()); |
265 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3).addReg(RegNo: GPR3).addReg(RegNo: GPR4); |
266 | } else |
267 | // The opcode of GETtlsTpointer32AIX does not change, because later |
268 | // this instruction will be expanded into a call to .__get_tpointer, |
269 | // which will return the thread pointer into r3. |
270 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3); |
271 | } else { |
272 | MachineInstr *Addi; |
273 | if (IsPCREL) { |
274 | Addi = BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc1), DestReg: GPR3).addImm(Val: 0); |
275 | } else { |
276 | // Expand into two ops built prior to the existing instruction. |
277 | assert(InReg != PPC::NoRegister && "Operand must be a register" ); |
278 | Addi = BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc1), DestReg: GPR3).addReg(RegNo: InReg); |
279 | } |
280 | |
281 | Addi->addOperand(Op: MI.getOperand(i: 2)); |
282 | |
283 | MachineInstr *Call = |
284 | (BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3).addReg(RegNo: GPR3)); |
285 | if (IsPCREL) |
286 | Call->addOperand(Op: MI.getOperand(i: 2)); |
287 | else |
288 | Call->addOperand(Op: MI.getOperand(i: 3)); |
289 | } |
290 | if (NeedFence) |
291 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: PPC::ADJCALLSTACKUP)).addImm(Val: 0).addImm(Val: 0); |
292 | |
293 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: OutReg) |
294 | .addReg(RegNo: GPR3); |
295 | |
296 | // Move past the original instruction and remove it. |
297 | ++I; |
298 | MI.removeFromParent(); |
299 | |
300 | Changed = true; |
301 | } |
302 | |
303 | return Changed; |
304 | } |
305 | |
306 | public: |
307 | bool isPCREL(const MachineInstr &MI) { |
308 | return (MI.getOpcode() == PPC::PADDI8pc) && |
309 | (MI.getOperand(i: 2).getTargetFlags() == |
310 | PPCII::MO_GOT_TLSGD_PCREL_FLAG || |
311 | MI.getOperand(i: 2).getTargetFlags() == |
312 | PPCII::MO_GOT_TLSLD_PCREL_FLAG); |
313 | } |
314 | |
315 | bool runOnMachineFunction(MachineFunction &MF) override { |
316 | TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); |
317 | |
318 | bool Changed = false; |
319 | |
320 | for (MachineBasicBlock &B : llvm::make_early_inc_range(Range&: MF)) |
321 | if (processBlock(MBB&: B)) |
322 | Changed = true; |
323 | |
324 | return Changed; |
325 | } |
326 | |
327 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
328 | AU.addRequired<LiveIntervalsWrapperPass>(); |
329 | AU.addRequired<SlotIndexesWrapperPass>(); |
330 | MachineFunctionPass::getAnalysisUsage(AU); |
331 | } |
332 | }; |
333 | } |
334 | |
335 | INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE, |
336 | "PowerPC TLS Dynamic Call Fixup" , false, false) |
337 | INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) |
338 | INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) |
339 | INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE, |
340 | "PowerPC TLS Dynamic Call Fixup" , false, false) |
341 | |
342 | char PPCTLSDynamicCall::ID = 0; |
343 | FunctionPass* |
344 | llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); } |
345 | |