| 1 | //==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// |
| 10 | /// Pass that injects an MI thunk that is used to lower indirect calls in a way |
| 11 | /// that prevents speculation on some x86 processors and can be used to mitigate |
| 12 | /// security vulnerabilities due to targeted speculative execution and side |
| 13 | /// channels such as CVE-2017-5715. |
| 14 | /// |
| 15 | /// Currently supported thunks include: |
| 16 | /// - Retpoline -- A RET-implemented trampoline that lowers indirect calls |
| 17 | /// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization |
| 18 | /// before making an indirect call/jump |
| 19 | /// |
| 20 | /// Note that the reason that this is implemented as a MachineFunctionPass and |
| 21 | /// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline |
| 22 | /// serialize all transformations, which can consume lots of memory. |
| 23 | /// |
| 24 | /// TODO(chandlerc): All of this code could use better comments and |
| 25 | /// documentation. |
| 26 | /// |
| 27 | //===----------------------------------------------------------------------===// |
| 28 | |
| 29 | #include "X86.h" |
| 30 | #include "X86InstrBuilder.h" |
| 31 | #include "X86Subtarget.h" |
| 32 | #include "llvm/CodeGen/IndirectThunks.h" |
| 33 | #include "llvm/CodeGen/MachineFunction.h" |
| 34 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 35 | #include "llvm/CodeGen/MachineModuleInfo.h" |
| 36 | #include "llvm/CodeGen/Passes.h" |
| 37 | #include "llvm/CodeGen/TargetPassConfig.h" |
| 38 | #include "llvm/IR/Instructions.h" |
| 39 | #include "llvm/Target/TargetMachine.h" |
| 40 | |
| 41 | using namespace llvm; |
| 42 | |
| 43 | #define DEBUG_TYPE "x86-retpoline-thunks" |
| 44 | |
| 45 | static const char RetpolineNamePrefix[] = "__llvm_retpoline_" ; |
| 46 | static const char R11RetpolineName[] = "__llvm_retpoline_r11" ; |
| 47 | static const char EAXRetpolineName[] = "__llvm_retpoline_eax" ; |
| 48 | static const char ECXRetpolineName[] = "__llvm_retpoline_ecx" ; |
| 49 | static const char EDXRetpolineName[] = "__llvm_retpoline_edx" ; |
| 50 | static const char EDIRetpolineName[] = "__llvm_retpoline_edi" ; |
| 51 | |
| 52 | static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_" ; |
| 53 | static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11" ; |
| 54 | |
| 55 | namespace { |
| 56 | struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> { |
| 57 | const char *getThunkPrefix() { return RetpolineNamePrefix; } |
| 58 | bool mayUseThunk(const MachineFunction &MF) { |
| 59 | const auto &STI = MF.getSubtarget<X86Subtarget>(); |
| 60 | return (STI.useRetpolineIndirectCalls() || |
| 61 | STI.useRetpolineIndirectBranches()) && |
| 62 | !STI.useRetpolineExternalThunk(); |
| 63 | } |
| 64 | bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF, |
| 65 | bool ExistingThunks); |
| 66 | void populateThunk(MachineFunction &MF); |
| 67 | }; |
| 68 | |
| 69 | struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> { |
| 70 | const char *getThunkPrefix() { return LVIThunkNamePrefix; } |
| 71 | bool mayUseThunk(const MachineFunction &MF) { |
| 72 | return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity(); |
| 73 | } |
| 74 | bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF, |
| 75 | bool ExistingThunks) { |
| 76 | if (ExistingThunks) |
| 77 | return false; |
| 78 | createThunkFunction(MMI, Name: R11LVIThunkName); |
| 79 | return true; |
| 80 | } |
| 81 | void populateThunk(MachineFunction &MF) { |
| 82 | assert (MF.size() == 1); |
| 83 | MachineBasicBlock *Entry = &MF.front(); |
| 84 | Entry->clear(); |
| 85 | |
| 86 | // This code mitigates LVI by replacing each indirect call/jump with a |
| 87 | // direct call/jump to a thunk that looks like: |
| 88 | // ``` |
| 89 | // lfence |
| 90 | // jmpq *%r11 |
| 91 | // ``` |
| 92 | // This ensures that if the value in register %r11 was loaded from memory, |
| 93 | // then the value in %r11 is (architecturally) correct prior to the jump. |
| 94 | const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); |
| 95 | BuildMI(BB: &MF.front(), MIMD: DebugLoc(), MCID: TII->get(Opcode: X86::LFENCE)); |
| 96 | BuildMI(BB: &MF.front(), MIMD: DebugLoc(), MCID: TII->get(Opcode: X86::JMP64r)).addReg(RegNo: X86::R11); |
| 97 | MF.front().addLiveIn(PhysReg: X86::R11); |
| 98 | } |
| 99 | }; |
| 100 | |
| 101 | class X86IndirectThunks |
| 102 | : public ThunkInserterPass<RetpolineThunkInserter, LVIThunkInserter> { |
| 103 | public: |
| 104 | static char ID; |
| 105 | |
| 106 | X86IndirectThunks() : ThunkInserterPass(ID) {} |
| 107 | |
| 108 | StringRef getPassName() const override { return "X86 Indirect Thunks" ; } |
| 109 | }; |
| 110 | |
| 111 | } // end anonymous namespace |
| 112 | |
| 113 | bool RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI, |
| 114 | MachineFunction &MF, |
| 115 | bool ExistingThunks) { |
| 116 | if (ExistingThunks) |
| 117 | return false; |
| 118 | if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64) |
| 119 | createThunkFunction(MMI, Name: R11RetpolineName); |
| 120 | else |
| 121 | for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName, |
| 122 | EDIRetpolineName}) |
| 123 | createThunkFunction(MMI, Name); |
| 124 | return true; |
| 125 | } |
| 126 | |
| 127 | void RetpolineThunkInserter::populateThunk(MachineFunction &MF) { |
| 128 | bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64; |
| 129 | Register ThunkReg; |
| 130 | if (Is64Bit) { |
| 131 | assert(MF.getName() == "__llvm_retpoline_r11" && |
| 132 | "Should only have an r11 thunk on 64-bit targets" ); |
| 133 | |
| 134 | // __llvm_retpoline_r11: |
| 135 | // callq .Lr11_call_target |
| 136 | // .Lr11_capture_spec: |
| 137 | // pause |
| 138 | // lfence |
| 139 | // jmp .Lr11_capture_spec |
| 140 | // .align 16 |
| 141 | // .Lr11_call_target: |
| 142 | // movq %r11, (%rsp) |
| 143 | // retq |
| 144 | ThunkReg = X86::R11; |
| 145 | } else { |
| 146 | // For 32-bit targets we need to emit a collection of thunks for various |
| 147 | // possible scratch registers as well as a fallback that uses EDI, which is |
| 148 | // normally callee saved. |
| 149 | // __llvm_retpoline_eax: |
| 150 | // calll .Leax_call_target |
| 151 | // .Leax_capture_spec: |
| 152 | // pause |
| 153 | // jmp .Leax_capture_spec |
| 154 | // .align 16 |
| 155 | // .Leax_call_target: |
| 156 | // movl %eax, (%esp) # Clobber return addr |
| 157 | // retl |
| 158 | // |
| 159 | // __llvm_retpoline_ecx: |
| 160 | // ... # Same setup |
| 161 | // movl %ecx, (%esp) |
| 162 | // retl |
| 163 | // |
| 164 | // __llvm_retpoline_edx: |
| 165 | // ... # Same setup |
| 166 | // movl %edx, (%esp) |
| 167 | // retl |
| 168 | // |
| 169 | // __llvm_retpoline_edi: |
| 170 | // ... # Same setup |
| 171 | // movl %edi, (%esp) |
| 172 | // retl |
| 173 | if (MF.getName() == EAXRetpolineName) |
| 174 | ThunkReg = X86::EAX; |
| 175 | else if (MF.getName() == ECXRetpolineName) |
| 176 | ThunkReg = X86::ECX; |
| 177 | else if (MF.getName() == EDXRetpolineName) |
| 178 | ThunkReg = X86::EDX; |
| 179 | else if (MF.getName() == EDIRetpolineName) |
| 180 | ThunkReg = X86::EDI; |
| 181 | else |
| 182 | llvm_unreachable("Invalid thunk name on x86-32!" ); |
| 183 | } |
| 184 | |
| 185 | const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); |
| 186 | assert (MF.size() == 1); |
| 187 | MachineBasicBlock *Entry = &MF.front(); |
| 188 | Entry->clear(); |
| 189 | |
| 190 | MachineBasicBlock *CaptureSpec = |
| 191 | MF.CreateMachineBasicBlock(BB: Entry->getBasicBlock()); |
| 192 | MachineBasicBlock *CallTarget = |
| 193 | MF.CreateMachineBasicBlock(BB: Entry->getBasicBlock()); |
| 194 | MCSymbol *TargetSym = MF.getContext().createTempSymbol(); |
| 195 | MF.push_back(MBB: CaptureSpec); |
| 196 | MF.push_back(MBB: CallTarget); |
| 197 | |
| 198 | const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; |
| 199 | const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32; |
| 200 | |
| 201 | Entry->addLiveIn(PhysReg: ThunkReg); |
| 202 | BuildMI(BB: Entry, MIMD: DebugLoc(), MCID: TII->get(Opcode: CallOpc)).addSym(Sym: TargetSym); |
| 203 | |
| 204 | // The MIR verifier thinks that the CALL in the entry block will fall through |
| 205 | // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is |
| 206 | // the successor, but the MIR verifier doesn't know how to cope with that. |
| 207 | Entry->addSuccessor(Succ: CaptureSpec); |
| 208 | |
| 209 | // In the capture loop for speculation, we want to stop the processor from |
| 210 | // speculating as fast as possible. On Intel processors, the PAUSE instruction |
| 211 | // will block speculation without consuming any execution resources. On AMD |
| 212 | // processors, the PAUSE instruction is (essentially) a nop, so we also use an |
| 213 | // LFENCE instruction which they have advised will stop speculation as well |
| 214 | // with minimal resource utilization. We still end the capture with a jump to |
| 215 | // form an infinite loop to fully guarantee that no matter what implementation |
| 216 | // of the x86 ISA, speculating this code path never escapes. |
| 217 | BuildMI(BB: CaptureSpec, MIMD: DebugLoc(), MCID: TII->get(Opcode: X86::PAUSE)); |
| 218 | BuildMI(BB: CaptureSpec, MIMD: DebugLoc(), MCID: TII->get(Opcode: X86::LFENCE)); |
| 219 | BuildMI(BB: CaptureSpec, MIMD: DebugLoc(), MCID: TII->get(Opcode: X86::JMP_1)).addMBB(MBB: CaptureSpec); |
| 220 | CaptureSpec->setMachineBlockAddressTaken(); |
| 221 | CaptureSpec->addSuccessor(Succ: CaptureSpec); |
| 222 | |
| 223 | CallTarget->addLiveIn(PhysReg: ThunkReg); |
| 224 | CallTarget->setMachineBlockAddressTaken(); |
| 225 | CallTarget->setAlignment(Align(16)); |
| 226 | |
| 227 | // Insert return address clobber |
| 228 | const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr; |
| 229 | const Register SPReg = Is64Bit ? X86::RSP : X86::ESP; |
| 230 | addRegOffset(MIB: BuildMI(BB: CallTarget, MIMD: DebugLoc(), MCID: TII->get(Opcode: MovOpc)), Reg: SPReg, isKill: false, |
| 231 | Offset: 0) |
| 232 | .addReg(RegNo: ThunkReg); |
| 233 | |
| 234 | CallTarget->back().setPreInstrSymbol(MF, Symbol: TargetSym); |
| 235 | BuildMI(BB: CallTarget, MIMD: DebugLoc(), MCID: TII->get(Opcode: RetOpc)); |
| 236 | } |
| 237 | |
| 238 | FunctionPass *llvm::createX86IndirectThunksPass() { |
| 239 | return new X86IndirectThunks(); |
| 240 | } |
| 241 | |
| 242 | char X86IndirectThunks::ID = 0; |
| 243 | |