| 1 | //===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into |
| 10 | // separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of |
| 11 | // which define GPR3. A copy is added from GPR3 to the target virtual |
| 12 | // register of the original instruction. The GETtlsADDR[32] is really |
| 13 | // a call instruction, so its target register is constrained to be GPR3. |
| 14 | // This is not true of ADDItls[gd]L[32], but there is a legacy linker |
| 15 | // optimization bug that requires the target register of the addi of |
| 16 | // a local- or general-dynamic TLS access sequence to be GPR3. |
| 17 | // |
| 18 | // This is done in a late pass so that TLS variable accesses can be |
| 19 | // fully commoned by MachineCSE. |
| 20 | // |
| 21 | //===----------------------------------------------------------------------===// |
| 22 | |
| 23 | #include "PPC.h" |
| 24 | #include "PPCInstrInfo.h" |
| 25 | #include "PPCTargetMachine.h" |
| 26 | #include "llvm/CodeGen/LiveIntervals.h" |
| 27 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 28 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 29 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 30 | #include "llvm/Support/Debug.h" |
| 31 | #include "llvm/Support/raw_ostream.h" |
| 32 | |
| 33 | using namespace llvm; |
| 34 | |
| 35 | #define DEBUG_TYPE "ppc-tls-dynamic-call" |
| 36 | |
| 37 | namespace { |
| 38 | struct PPCTLSDynamicCall : public MachineFunctionPass { |
| 39 | static char ID; |
| 40 | PPCTLSDynamicCall() : MachineFunctionPass(ID) {} |
| 41 | |
| 42 | const PPCInstrInfo *TII; |
| 43 | |
| 44 | protected: |
| 45 | bool processBlock(MachineBasicBlock &MBB) { |
| 46 | bool Changed = false; |
| 47 | bool NeedFence = true; |
| 48 | const PPCSubtarget &Subtarget = |
| 49 | MBB.getParent()->getSubtarget<PPCSubtarget>(); |
| 50 | bool Is64Bit = Subtarget.isPPC64(); |
| 51 | bool IsAIX = Subtarget.isAIXABI(); |
| 52 | bool IsLargeModel = |
| 53 | Subtarget.getTargetMachine().getCodeModel() == CodeModel::Large; |
| 54 | bool IsPCREL = false; |
| 55 | MachineFunction *MF = MBB.getParent(); |
| 56 | MachineRegisterInfo &RegInfo = MF->getRegInfo(); |
| 57 | |
| 58 | for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); |
| 59 | I != IE;) { |
| 60 | MachineInstr &MI = *I; |
| 61 | IsPCREL = isPCREL(MI); |
| 62 | // There are a number of slight differences in code generation |
| 63 | // when we call .__get_tpointer (32-bit AIX TLS). |
| 64 | bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX; |
| 65 | bool IsTLSLDAIXMI = (MI.getOpcode() == PPC::TLSLDAIX8 || |
| 66 | MI.getOpcode() == PPC::TLSLDAIX); |
| 67 | |
| 68 | if (MI.getOpcode() != PPC::ADDItlsgdLADDR && |
| 69 | MI.getOpcode() != PPC::ADDItlsldLADDR && |
| 70 | MI.getOpcode() != PPC::ADDItlsgdLADDR32 && |
| 71 | MI.getOpcode() != PPC::ADDItlsldLADDR32 && |
| 72 | MI.getOpcode() != PPC::TLSGDAIX && |
| 73 | MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL && |
| 74 | !IsTLSLDAIXMI) { |
| 75 | // Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP |
| 76 | // as scheduling fences, we skip creating fences if we already |
| 77 | // have existing ADJCALLSTACKDOWN/UP to avoid nesting, |
| 78 | // which causes verification error with -verify-machineinstrs. |
| 79 | if (MI.getOpcode() == PPC::ADJCALLSTACKDOWN) |
| 80 | NeedFence = false; |
| 81 | else if (MI.getOpcode() == PPC::ADJCALLSTACKUP) |
| 82 | NeedFence = true; |
| 83 | |
| 84 | ++I; |
| 85 | continue; |
| 86 | } |
| 87 | |
| 88 | LLVM_DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << MI); |
| 89 | |
| 90 | Register OutReg = MI.getOperand(i: 0).getReg(); |
| 91 | Register InReg = PPC::NoRegister; |
| 92 | Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3; |
| 93 | Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4; |
| 94 | if (!IsPCREL && !IsTLSTPRelMI) |
| 95 | InReg = MI.getOperand(i: 1).getReg(); |
| 96 | DebugLoc DL = MI.getDebugLoc(); |
| 97 | |
| 98 | unsigned Opc1, Opc2; |
| 99 | switch (MI.getOpcode()) { |
| 100 | default: |
| 101 | llvm_unreachable("Opcode inconsistency error" ); |
| 102 | case PPC::ADDItlsgdLADDR: |
| 103 | Opc1 = PPC::ADDItlsgdL; |
| 104 | Opc2 = PPC::GETtlsADDR; |
| 105 | break; |
| 106 | case PPC::ADDItlsldLADDR: |
| 107 | Opc1 = PPC::ADDItlsldL; |
| 108 | Opc2 = PPC::GETtlsldADDR; |
| 109 | break; |
| 110 | case PPC::ADDItlsgdLADDR32: |
| 111 | Opc1 = PPC::ADDItlsgdL32; |
| 112 | Opc2 = PPC::GETtlsADDR32; |
| 113 | break; |
| 114 | case PPC::ADDItlsldLADDR32: |
| 115 | Opc1 = PPC::ADDItlsldL32; |
| 116 | Opc2 = PPC::GETtlsldADDR32; |
| 117 | break; |
| 118 | case PPC::TLSLDAIX: |
| 119 | // TLSLDAIX is expanded to one copy and GET_TLS_MOD, so we only set |
| 120 | // Opc2 here. |
| 121 | Opc2 = PPC::GETtlsMOD32AIX; |
| 122 | break; |
| 123 | case PPC::TLSLDAIX8: |
| 124 | // TLSLDAIX8 is expanded to one copy and GET_TLS_MOD, so we only set |
| 125 | // Opc2 here. |
| 126 | Opc2 = PPC::GETtlsMOD64AIX; |
| 127 | break; |
| 128 | case PPC::TLSGDAIX8: |
| 129 | // TLSGDAIX8 is expanded to two copies and GET_TLS_ADDR, so we only |
| 130 | // set Opc2 here. |
| 131 | Opc2 = PPC::GETtlsADDR64AIX; |
| 132 | break; |
| 133 | case PPC::TLSGDAIX: |
| 134 | // TLSGDAIX is expanded to two copies and GET_TLS_ADDR, so we only |
| 135 | // set Opc2 here. |
| 136 | Opc2 = PPC::GETtlsADDR32AIX; |
| 137 | break; |
| 138 | case PPC::GETtlsTpointer32AIX: |
| 139 | // GETtlsTpointer32AIX is expanded to a call to GET_TPOINTER on AIX |
| 140 | // 32-bit mode within PPCAsmPrinter. This instruction does not need |
| 141 | // to change, so Opc2 is set to the same instruction opcode. |
| 142 | Opc2 = PPC::GETtlsTpointer32AIX; |
| 143 | break; |
| 144 | case PPC::PADDI8pc: |
| 145 | assert(IsPCREL && "Expecting General/Local Dynamic PCRel" ); |
| 146 | Opc1 = PPC::PADDI8pc; |
| 147 | Opc2 = MI.getOperand(i: 2).getTargetFlags() == |
| 148 | PPCII::MO_GOT_TLSGD_PCREL_FLAG |
| 149 | ? PPC::GETtlsADDRPCREL |
| 150 | : PPC::GETtlsldADDRPCREL; |
| 151 | } |
| 152 | |
| 153 | // We create ADJCALLSTACKUP and ADJCALLSTACKDOWN around _tls_get_addr |
| 154 | // as scheduling fence to avoid it is scheduled before |
| 155 | // mflr in the prologue and the address in LR is clobbered (PR25839). |
| 156 | // We don't really need to save data to the stack - the clobbered |
| 157 | // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr) |
| 158 | // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR). |
| 159 | if (NeedFence) { |
| 160 | MBB.getParent()->getFrameInfo().setAdjustsStack(true); |
| 161 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: PPC::ADJCALLSTACKDOWN)).addImm(Val: 0) |
| 162 | .addImm(Val: 0); |
| 163 | } |
| 164 | |
| 165 | if (IsAIX) { |
| 166 | if (IsTLSLDAIXMI) { |
| 167 | // The relative order between the node that loads the variable |
| 168 | // offset from the TOC, and the .__tls_get_mod node is being tuned |
| 169 | // here. It is better to put the variable offset TOC load after the |
| 170 | // call, since this node can use clobbers r4/r5. |
| 171 | // Search for the pattern of the two nodes that load from the TOC |
| 172 | // (either for the variable offset or for the module handle), and |
| 173 | // then move the variable offset TOC load right before the node that |
| 174 | // uses the OutReg of the .__tls_get_mod node. |
| 175 | unsigned LDTocOp = |
| 176 | Is64Bit ? (IsLargeModel ? PPC::LDtocL : PPC::LDtoc) |
| 177 | : (IsLargeModel ? PPC::LWZtocL : PPC::LWZtoc); |
| 178 | if (!RegInfo.use_empty(RegNo: OutReg)) { |
| 179 | std::set<MachineInstr *> Uses; |
| 180 | // Collect all instructions that use the OutReg. |
| 181 | for (MachineOperand &MO : RegInfo.use_operands(Reg: OutReg)) |
| 182 | Uses.insert(x: MO.getParent()); |
| 183 | // Find the first user (e.g.: lwax/stfdx) of the OutReg within the |
| 184 | // current BB. |
| 185 | MachineBasicBlock::iterator UseIter = MBB.begin(); |
| 186 | for (MachineBasicBlock::iterator IE = MBB.end(); UseIter != IE; |
| 187 | ++UseIter) |
| 188 | if (Uses.count(x: &*UseIter)) |
| 189 | break; |
| 190 | |
| 191 | // Additional handling is required when UserIter (the first user |
| 192 | // of OutReg) is pointing to a valid node that loads from the TOC. |
| 193 | // Check the pattern and do the movement if the pattern matches. |
| 194 | if (UseIter != MBB.end()) { |
| 195 | // Collect all associated nodes that load from the TOC. Use |
| 196 | // hasOneDef() to guard against unexpected scenarios. |
| 197 | std::set<MachineInstr *> LoadFromTocs; |
| 198 | for (MachineOperand &MO : UseIter->operands()) |
| 199 | if (MO.isReg() && MO.isUse()) { |
| 200 | Register MOReg = MO.getReg(); |
| 201 | if (RegInfo.hasOneDef(RegNo: MOReg)) { |
| 202 | MachineInstr *Temp = |
| 203 | RegInfo.getOneDef(Reg: MOReg)->getParent(); |
| 204 | // For the current TLSLDAIX node, get the corresponding |
| 205 | // node that loads from the TOC for the InReg. Otherwise, |
| 206 | // Temp probably pointed to the variable offset TOC load |
| 207 | // we would like to move. |
| 208 | if (Temp == &MI && RegInfo.hasOneDef(RegNo: InReg)) |
| 209 | Temp = RegInfo.getOneDef(Reg: InReg)->getParent(); |
| 210 | if (Temp->getOpcode() == LDTocOp) |
| 211 | LoadFromTocs.insert(x: Temp); |
| 212 | } else { |
| 213 | // FIXME: analyze this scenario if there is one. |
| 214 | LoadFromTocs.clear(); |
| 215 | break; |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | // Check the two nodes that loaded from the TOC: one should be |
| 220 | // "_$TLSML", and the other will be moved before the node that |
| 221 | // uses the OutReg of the .__tls_get_mod node. |
| 222 | if (LoadFromTocs.size() == 2) { |
| 223 | MachineBasicBlock::iterator TLSMLIter = MBB.end(); |
| 224 | MachineBasicBlock::iterator OffsetIter = MBB.end(); |
| 225 | // Make sure the two nodes that loaded from the TOC are within |
| 226 | // the current BB, and that one of them is from the "_$TLSML" |
| 227 | // pseudo symbol, while the other is from the variable. |
| 228 | for (MachineBasicBlock::iterator I = MBB.begin(), |
| 229 | IE = MBB.end(); |
| 230 | I != IE; ++I) |
| 231 | if (LoadFromTocs.count(x: &*I)) { |
| 232 | MachineOperand MO = I->getOperand(i: 1); |
| 233 | if (MO.isGlobal() && MO.getGlobal()->hasName() && |
| 234 | MO.getGlobal()->getName() == "_$TLSML" ) |
| 235 | TLSMLIter = I; |
| 236 | else |
| 237 | OffsetIter = I; |
| 238 | } |
| 239 | // Perform the movement when the desired scenario has been |
| 240 | // identified, which should be when both of the iterators are |
| 241 | // valid. |
| 242 | if (TLSMLIter != MBB.end() && OffsetIter != MBB.end()) |
| 243 | OffsetIter->moveBefore(MovePos: &*UseIter); |
| 244 | } |
| 245 | } |
| 246 | } |
| 247 | // The module-handle is copied into r3. The copy is followed by |
| 248 | // GETtlsMOD32AIX/GETtlsMOD64AIX. |
| 249 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: GPR3) |
| 250 | .addReg(RegNo: InReg); |
| 251 | // The call to .__tls_get_mod. |
| 252 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3).addReg(RegNo: GPR3); |
| 253 | } else if (!IsTLSTPRelMI) { |
| 254 | // The variable offset and region handle (for TLSGD) are copied in |
| 255 | // r4 and r3. The copies are followed by |
| 256 | // GETtlsADDR32AIX/GETtlsADDR64AIX. |
| 257 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: GPR4) |
| 258 | .addReg(RegNo: MI.getOperand(i: 1).getReg()); |
| 259 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: GPR3) |
| 260 | .addReg(RegNo: MI.getOperand(i: 2).getReg()); |
| 261 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3).addReg(RegNo: GPR3).addReg(RegNo: GPR4); |
| 262 | } else |
| 263 | // The opcode of GETtlsTpointer32AIX does not change, because later |
| 264 | // this instruction will be expanded into a call to .__get_tpointer, |
| 265 | // which will return the thread pointer into r3. |
| 266 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3); |
| 267 | } else { |
| 268 | MachineInstr *Addi; |
| 269 | if (IsPCREL) { |
| 270 | Addi = BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc1), DestReg: GPR3).addImm(Val: 0); |
| 271 | } else { |
| 272 | // Expand into two ops built prior to the existing instruction. |
| 273 | assert(InReg != PPC::NoRegister && "Operand must be a register" ); |
| 274 | Addi = BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc1), DestReg: GPR3).addReg(RegNo: InReg); |
| 275 | } |
| 276 | |
| 277 | Addi->addOperand(Op: MI.getOperand(i: 2)); |
| 278 | |
| 279 | MachineInstr *Call = |
| 280 | (BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: Opc2), DestReg: GPR3).addReg(RegNo: GPR3)); |
| 281 | if (IsPCREL) |
| 282 | Call->addOperand(Op: MI.getOperand(i: 2)); |
| 283 | else |
| 284 | Call->addOperand(Op: MI.getOperand(i: 3)); |
| 285 | } |
| 286 | if (NeedFence) |
| 287 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: PPC::ADJCALLSTACKUP)).addImm(Val: 0).addImm(Val: 0); |
| 288 | |
| 289 | BuildMI(BB&: MBB, I, MIMD: DL, MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: OutReg) |
| 290 | .addReg(RegNo: GPR3); |
| 291 | |
| 292 | // Move past the original instruction and remove it. |
| 293 | ++I; |
| 294 | MI.removeFromParent(); |
| 295 | |
| 296 | Changed = true; |
| 297 | } |
| 298 | |
| 299 | return Changed; |
| 300 | } |
| 301 | |
| 302 | public: |
| 303 | bool isPCREL(const MachineInstr &MI) { |
| 304 | return (MI.getOpcode() == PPC::PADDI8pc) && |
| 305 | (MI.getOperand(i: 2).getTargetFlags() == |
| 306 | PPCII::MO_GOT_TLSGD_PCREL_FLAG || |
| 307 | MI.getOperand(i: 2).getTargetFlags() == |
| 308 | PPCII::MO_GOT_TLSLD_PCREL_FLAG); |
| 309 | } |
| 310 | |
| 311 | bool runOnMachineFunction(MachineFunction &MF) override { |
| 312 | TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); |
| 313 | |
| 314 | bool Changed = false; |
| 315 | |
| 316 | for (MachineBasicBlock &B : llvm::make_early_inc_range(Range&: MF)) |
| 317 | if (processBlock(MBB&: B)) |
| 318 | Changed = true; |
| 319 | |
| 320 | return Changed; |
| 321 | } |
| 322 | |
| 323 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 324 | AU.addRequired<LiveIntervalsWrapperPass>(); |
| 325 | AU.addRequired<SlotIndexesWrapperPass>(); |
| 326 | MachineFunctionPass::getAnalysisUsage(AU); |
| 327 | } |
| 328 | }; |
| 329 | } |
| 330 | |
| 331 | INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE, |
| 332 | "PowerPC TLS Dynamic Call Fixup" , false, false) |
| 333 | INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) |
| 334 | INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) |
| 335 | INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE, |
| 336 | "PowerPC TLS Dynamic Call Fixup" , false, false) |
| 337 | |
| 338 | char PPCTLSDynamicCall::ID = 0; |
| 339 | FunctionPass* |
| 340 | llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); } |
| 341 | |