| 1 | //===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation --===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains a pass to insert code to mitigate against side channel |
| 10 | // vulnerabilities that may happen under control flow miss-speculation. |
| 11 | // |
| 12 | // The pass implements tracking of control flow miss-speculation into a "taint" |
| 13 | // register. That taint register can then be used to mask off registers with |
| 14 | // sensitive data when executing under miss-speculation, a.k.a. "transient |
| 15 | // execution". |
| 16 | // This pass is aimed at mitigating against SpectreV1-style vulnerabilities. |
| 17 | // |
| 18 | // It also implements speculative load hardening, i.e. using the taint register |
| 19 | // to automatically mask off loaded data. |
| 20 | // |
| 21 | // As a possible follow-on improvement, also an intrinsics-based approach as |
| 22 | // explained at https://lwn.net/Articles/759423/ could be implemented on top of |
| 23 | // the current design. |
| 24 | // |
| 25 | // For AArch64, the following implementation choices are made to implement the |
| 26 | // tracking of control flow miss-speculation into a taint register: |
| 27 | // Some of these are different than the implementation choices made in |
| 28 | // the similar pass implemented in X86SpeculativeLoadHardening.cpp, as |
| 29 | // the instruction set characteristics result in different trade-offs. |
| 30 | // - The speculation hardening is done after register allocation. With a |
| 31 | // relative abundance of registers, one register is reserved (X16) to be |
| 32 | // the taint register. X16 is expected to not clash with other register |
| 33 | // reservation mechanisms with very high probability because: |
| 34 | // . The AArch64 ABI doesn't guarantee X16 to be retained across any call. |
| 35 | // . The only way to request X16 to be used as a programmer is through |
| 36 | // inline assembly. In the rare case a function explicitly demands to |
| 37 | // use X16/W16, this pass falls back to hardening against speculation |
| 38 | // by inserting a DSB SYS/ISB barrier pair which will prevent control |
| 39 | // flow speculation. |
| 40 | // - It is easy to insert mask operations at this late stage as we have |
| 41 | // mask operations available that don't set flags. |
| 42 | // - The taint variable contains all-ones when no miss-speculation is detected, |
| 43 | // and contains all-zeros when miss-speculation is detected. Therefore, when |
| 44 | // masking, an AND instruction (which only changes the register to be masked, |
| 45 | // no other side effects) can easily be inserted anywhere that's needed. |
| 46 | // - The tracking of miss-speculation is done by using a data-flow conditional |
| 47 | // select instruction (CSEL) to evaluate the flags that were also used to |
| 48 | // make conditional branch direction decisions. Speculation of the CSEL |
| 49 | // instruction can be limited with a CSDB instruction - so the combination of |
| 50 | // CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL |
| 51 | // aren't speculated. When conditional branch direction gets miss-speculated, |
| 52 | // the semantics of the inserted CSEL instruction is such that the taint |
| 53 | // register will contain all zero bits. |
| 54 | // One key requirement for this to work is that the conditional branch is |
| 55 | // followed by an execution of the CSEL instruction, where the CSEL |
| 56 | // instruction needs to use the same flags status as the conditional branch. |
| 57 | // This means that the conditional branches must not be implemented as one |
| 58 | // of the AArch64 conditional branches that do not use the flags as input |
| 59 | // (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction |
| 60 | // selectors to not produce these instructions when speculation hardening |
| 61 | // is enabled. This pass will assert if it does encounter such an instruction. |
| 62 | // - On function call boundaries, the miss-speculation state is transferred from |
| 63 | // the taint register X16 to be encoded in the SP register as value 0. |
| 64 | // |
| 65 | // For the aspect of automatically hardening loads, using the taint register, |
| 66 | // (a.k.a. speculative load hardening, see |
| 67 | // https://llvm.org/docs/SpeculativeLoadHardening.html), the following |
| 68 | // implementation choices are made for AArch64: |
| 69 | // - Many of the optimizations described at |
| 70 | // https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer |
| 71 | // loads haven't been implemented yet - but for some of them there are |
| 72 | // FIXMEs in the code. |
| 73 | // - loads that load into general purpose (X or W) registers get hardened by |
| 74 | // masking the loaded data. For loads that load into other registers, the |
| 75 | // address loaded from gets hardened. It is expected that hardening the |
| 76 | // loaded data may be more efficient; but masking data in registers other |
| 77 | // than X or W is not easy and may result in being slower than just |
| 78 | // hardening the X address register loaded from. |
| 79 | // - On AArch64, CSDB instructions are inserted between the masking of the |
| 80 | // register and its first use, to ensure there's no non-control-flow |
| 81 | // speculation that might undermine the hardening mechanism. |
| 82 | // |
| 83 | // Future extensions/improvements could be: |
| 84 | // - Implement this functionality using full speculation barriers, akin to the |
| 85 | // x86-slh-lfence option. This may be more useful for the intrinsics-based |
| 86 | // approach than for the SLH approach to masking. |
| 87 | // Note that this pass already inserts the full speculation barriers if the |
| 88 | // function for some niche reason makes use of X16/W16. |
| 89 | // - no indirect branch misprediction gets protected/instrumented; but this |
| 90 | // could be done for some indirect branches, such as switch jump tables. |
| 91 | //===----------------------------------------------------------------------===// |
| 92 | |
| 93 | #include "AArch64Subtarget.h" |
| 94 | #include "Utils/AArch64BaseInfo.h" |
| 95 | #include "llvm/ADT/BitVector.h" |
| 96 | #include "llvm/ADT/SmallVector.h" |
| 97 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 98 | #include "llvm/CodeGen/MachineFunction.h" |
| 99 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 100 | #include "llvm/CodeGen/MachineInstr.h" |
| 101 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 102 | #include "llvm/CodeGen/MachineOperand.h" |
| 103 | #include "llvm/CodeGen/RegisterScavenging.h" |
| 104 | #include "llvm/IR/DebugLoc.h" |
| 105 | #include "llvm/Pass.h" |
| 106 | #include "llvm/Support/Debug.h" |
| 107 | #include "llvm/Target/TargetMachine.h" |
| 108 | #include <cassert> |
| 109 | |
| 110 | using namespace llvm; |
| 111 | |
| 112 | #define DEBUG_TYPE "aarch64-speculation-hardening" |
| 113 | |
| 114 | #define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass" |
| 115 | |
| 116 | static cl::opt<bool> HardenLoads("aarch64-slh-loads" , cl::Hidden, |
| 117 | cl::desc("Sanitize loads from memory." ), |
| 118 | cl::init(Val: true)); |
| 119 | |
| 120 | namespace { |
| 121 | |
| 122 | class AArch64SpeculationHardening : public MachineFunctionPass { |
| 123 | public: |
| 124 | const TargetInstrInfo *TII; |
| 125 | const TargetRegisterInfo *TRI; |
| 126 | |
| 127 | static char ID; |
| 128 | |
| 129 | AArch64SpeculationHardening() : MachineFunctionPass(ID) {} |
| 130 | |
| 131 | bool runOnMachineFunction(MachineFunction &Fn) override; |
| 132 | |
| 133 | StringRef getPassName() const override { |
| 134 | return AARCH64_SPECULATION_HARDENING_NAME; |
| 135 | } |
| 136 | |
| 137 | private: |
| 138 | unsigned MisspeculatingTaintReg; |
| 139 | unsigned MisspeculatingTaintReg32Bit; |
| 140 | bool UseControlFlowSpeculationBarrier; |
| 141 | BitVector RegsNeedingCSDBBeforeUse; |
| 142 | BitVector RegsAlreadyMasked; |
| 143 | |
| 144 | bool functionUsesHardeningRegister(MachineFunction &MF) const; |
| 145 | bool instrumentControlFlow(MachineBasicBlock &MBB, |
| 146 | bool &UsesFullSpeculationBarrier); |
| 147 | bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, |
| 148 | MachineBasicBlock *&FBB, |
| 149 | AArch64CC::CondCode &CondCode) const; |
| 150 | void insertTrackingCode(MachineBasicBlock &SplitEdgeBB, |
| 151 | AArch64CC::CondCode &CondCode, DebugLoc DL) const; |
| 152 | void insertSPToRegTaintPropagation(MachineBasicBlock &MBB, |
| 153 | MachineBasicBlock::iterator MBBI) const; |
| 154 | void insertRegToSPTaintPropagation(MachineBasicBlock &MBB, |
| 155 | MachineBasicBlock::iterator MBBI, |
| 156 | unsigned TmpReg) const; |
| 157 | void insertFullSpeculationBarrier(MachineBasicBlock &MBB, |
| 158 | MachineBasicBlock::iterator MBBI, |
| 159 | DebugLoc DL) const; |
| 160 | |
| 161 | bool slhLoads(MachineBasicBlock &MBB); |
| 162 | bool makeGPRSpeculationSafe(MachineBasicBlock &MBB, |
| 163 | MachineBasicBlock::iterator MBBI, |
| 164 | MachineInstr &MI, unsigned Reg); |
| 165 | bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB, |
| 166 | bool UsesFullSpeculationBarrier); |
| 167 | bool expandSpeculationSafeValue(MachineBasicBlock &MBB, |
| 168 | MachineBasicBlock::iterator MBBI, |
| 169 | bool UsesFullSpeculationBarrier); |
| 170 | bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
| 171 | DebugLoc DL); |
| 172 | }; |
| 173 | |
| 174 | } // end anonymous namespace |
| 175 | |
| 176 | char AArch64SpeculationHardening::ID = 0; |
| 177 | |
| 178 | INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening" , |
| 179 | AARCH64_SPECULATION_HARDENING_NAME, false, false) |
| 180 | |
| 181 | bool AArch64SpeculationHardening::endsWithCondControlFlow( |
| 182 | MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, |
| 183 | AArch64CC::CondCode &CondCode) const { |
| 184 | SmallVector<MachineOperand, 1> analyzeBranchCondCode; |
| 185 | if (TII->analyzeBranch(MBB, TBB, FBB, Cond&: analyzeBranchCondCode, AllowModify: false)) |
| 186 | return false; |
| 187 | |
| 188 | // Ignore if the BB ends in an unconditional branch/fall-through. |
| 189 | if (analyzeBranchCondCode.empty()) |
| 190 | return false; |
| 191 | |
| 192 | // If the BB ends with a single conditional branch, FBB will be set to |
| 193 | // nullptr (see API docs for TII->analyzeBranch). For the rest of the |
| 194 | // analysis we want the FBB block to be set always. |
| 195 | assert(TBB != nullptr); |
| 196 | if (FBB == nullptr) |
| 197 | FBB = MBB.getFallThrough(); |
| 198 | |
| 199 | // If both the true and the false condition jump to the same basic block, |
| 200 | // there isn't need for any protection - whether the branch is speculated |
| 201 | // correctly or not, we end up executing the architecturally correct code. |
| 202 | if (TBB == FBB) |
| 203 | return false; |
| 204 | |
| 205 | assert(MBB.succ_size() == 2); |
| 206 | // translate analyzeBranchCondCode to CondCode. |
| 207 | assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format" ); |
| 208 | CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm()); |
| 209 | return true; |
| 210 | } |
| 211 | |
| 212 | void AArch64SpeculationHardening::insertFullSpeculationBarrier( |
| 213 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
| 214 | DebugLoc DL) const { |
| 215 | // A full control flow speculation barrier consists of (DSB SYS + ISB) |
| 216 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::DSB)).addImm(Val: 0xf); |
| 217 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ISB)).addImm(Val: 0xf); |
| 218 | } |
| 219 | |
| 220 | void AArch64SpeculationHardening::insertTrackingCode( |
| 221 | MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode, |
| 222 | DebugLoc DL) const { |
| 223 | if (UseControlFlowSpeculationBarrier) { |
| 224 | insertFullSpeculationBarrier(MBB&: SplitEdgeBB, MBBI: SplitEdgeBB.begin(), DL); |
| 225 | } else { |
| 226 | BuildMI(BB&: SplitEdgeBB, I: SplitEdgeBB.begin(), MIMD: DL, MCID: TII->get(Opcode: AArch64::CSELXr)) |
| 227 | .addDef(RegNo: MisspeculatingTaintReg) |
| 228 | .addUse(RegNo: MisspeculatingTaintReg) |
| 229 | .addUse(RegNo: AArch64::XZR) |
| 230 | .addImm(Val: CondCode); |
| 231 | SplitEdgeBB.addLiveIn(PhysReg: AArch64::NZCV); |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | bool AArch64SpeculationHardening::instrumentControlFlow( |
| 236 | MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) { |
| 237 | LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB); |
| 238 | |
| 239 | bool Modified = false; |
| 240 | MachineBasicBlock *TBB = nullptr; |
| 241 | MachineBasicBlock *FBB = nullptr; |
| 242 | AArch64CC::CondCode CondCode; |
| 243 | |
| 244 | if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) { |
| 245 | LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n" ); |
| 246 | } else { |
| 247 | // Now insert: |
| 248 | // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and |
| 249 | // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False |
| 250 | // edge. |
| 251 | AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(Code: CondCode); |
| 252 | |
| 253 | MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(Succ: TBB, P&: *this); |
| 254 | MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(Succ: FBB, P&: *this); |
| 255 | |
| 256 | assert(SplitEdgeTBB != nullptr); |
| 257 | assert(SplitEdgeFBB != nullptr); |
| 258 | |
| 259 | DebugLoc DL; |
| 260 | if (MBB.instr_end() != MBB.instr_begin()) |
| 261 | DL = (--MBB.instr_end())->getDebugLoc(); |
| 262 | |
| 263 | insertTrackingCode(SplitEdgeBB&: *SplitEdgeTBB, CondCode, DL); |
| 264 | insertTrackingCode(SplitEdgeBB&: *SplitEdgeFBB, CondCode&: InvCondCode, DL); |
| 265 | |
| 266 | LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n" ); |
| 267 | LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n" ); |
| 268 | Modified = true; |
| 269 | } |
| 270 | |
| 271 | // Perform correct code generation around function calls and before returns. |
| 272 | // The below variables record the return/terminator instructions and the call |
| 273 | // instructions respectively; including which register is available as a |
| 274 | // temporary register just before the recorded instructions. |
| 275 | SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions; |
| 276 | SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions; |
| 277 | // if a temporary register is not available for at least one of the |
| 278 | // instructions for which we need to transfer taint to the stack pointer, we |
| 279 | // need to insert a full speculation barrier. |
| 280 | // TmpRegisterNotAvailableEverywhere tracks that condition. |
| 281 | bool TmpRegisterNotAvailableEverywhere = false; |
| 282 | |
| 283 | RegScavenger RS; |
| 284 | RS.enterBasicBlockEnd(MBB); |
| 285 | |
| 286 | for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) { |
| 287 | MachineInstr &MI = *--I; |
| 288 | if (!MI.isReturn() && !MI.isCall()) |
| 289 | continue; |
| 290 | |
| 291 | // The RegScavenger represents registers available *after* the MI |
| 292 | // instruction pointed to by RS.getCurrentPosition(). |
| 293 | // We need to have a register that is available *before* the MI is executed. |
| 294 | if (I == MBB.begin()) |
| 295 | RS.enterBasicBlock(MBB); |
| 296 | else |
| 297 | RS.backward(I); |
| 298 | // FIXME: The below just finds *a* unused register. Maybe code could be |
| 299 | // optimized more if this looks for the register that isn't used for the |
| 300 | // longest time around this place, to enable more scheduling freedom. Not |
| 301 | // sure if that would actually result in a big performance difference |
| 302 | // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic |
| 303 | // already to do this - but it's unclear if that could easily be used here. |
| 304 | Register TmpReg = RS.FindUnusedReg(RC: &AArch64::GPR64commonRegClass); |
| 305 | LLVM_DEBUG(dbgs() << "RS finds " |
| 306 | << ((TmpReg == 0) ? "no register " : "register " ); |
| 307 | if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " " ; |
| 308 | dbgs() << "to be available at MI " << MI); |
| 309 | if (TmpReg == 0) |
| 310 | TmpRegisterNotAvailableEverywhere = true; |
| 311 | if (MI.isReturn()) |
| 312 | ReturnInstructions.push_back(Elt: {&MI, TmpReg}); |
| 313 | else if (MI.isCall()) |
| 314 | CallInstructions.push_back(Elt: {&MI, TmpReg}); |
| 315 | } |
| 316 | |
| 317 | if (TmpRegisterNotAvailableEverywhere) { |
| 318 | // When a temporary register is not available everywhere in this basic |
| 319 | // basic block where a propagate-taint-to-sp operation is needed, just |
| 320 | // emit a full speculation barrier at the start of this basic block, which |
| 321 | // renders the taint/speculation tracking in this basic block unnecessary. |
| 322 | insertFullSpeculationBarrier(MBB, MBBI: MBB.begin(), |
| 323 | DL: (MBB.begin())->getDebugLoc()); |
| 324 | UsesFullSpeculationBarrier = true; |
| 325 | Modified = true; |
| 326 | } else { |
| 327 | for (auto MI_Reg : ReturnInstructions) { |
| 328 | assert(MI_Reg.second != 0); |
| 329 | LLVM_DEBUG( |
| 330 | dbgs() |
| 331 | << " About to insert Reg to SP taint propagation with temp register " |
| 332 | << printReg(MI_Reg.second, TRI) |
| 333 | << " on instruction: " << *MI_Reg.first); |
| 334 | insertRegToSPTaintPropagation(MBB, MBBI: MI_Reg.first, TmpReg: MI_Reg.second); |
| 335 | Modified = true; |
| 336 | } |
| 337 | |
| 338 | for (auto MI_Reg : CallInstructions) { |
| 339 | assert(MI_Reg.second != 0); |
| 340 | LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint " |
| 341 | "propagation with temp register " |
| 342 | << printReg(MI_Reg.second, TRI) |
| 343 | << " around instruction: " << *MI_Reg.first); |
| 344 | // Just after the call: |
| 345 | insertSPToRegTaintPropagation( |
| 346 | MBB, MBBI: std::next(x: (MachineBasicBlock::iterator)MI_Reg.first)); |
| 347 | // Just before the call: |
| 348 | insertRegToSPTaintPropagation(MBB, MBBI: MI_Reg.first, TmpReg: MI_Reg.second); |
| 349 | Modified = true; |
| 350 | } |
| 351 | } |
| 352 | return Modified; |
| 353 | } |
| 354 | |
| 355 | void AArch64SpeculationHardening::insertSPToRegTaintPropagation( |
| 356 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { |
| 357 | // If full control flow speculation barriers are used, emit a control flow |
| 358 | // barrier to block potential miss-speculation in flight coming in to this |
| 359 | // function. |
| 360 | if (UseControlFlowSpeculationBarrier) { |
| 361 | insertFullSpeculationBarrier(MBB, MBBI, DL: DebugLoc()); |
| 362 | return; |
| 363 | } |
| 364 | |
| 365 | // CMP SP, #0 === SUBS xzr, SP, #0 |
| 366 | BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::SUBSXri)) |
| 367 | .addDef(RegNo: AArch64::XZR) |
| 368 | .addUse(RegNo: AArch64::SP) |
| 369 | .addImm(Val: 0) |
| 370 | .addImm(Val: 0); // no shift |
| 371 | // CSETM x16, NE === CSINV x16, xzr, xzr, EQ |
| 372 | BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::CSINVXr)) |
| 373 | .addDef(RegNo: MisspeculatingTaintReg) |
| 374 | .addUse(RegNo: AArch64::XZR) |
| 375 | .addUse(RegNo: AArch64::XZR) |
| 376 | .addImm(Val: AArch64CC::EQ); |
| 377 | } |
| 378 | |
| 379 | void AArch64SpeculationHardening::insertRegToSPTaintPropagation( |
| 380 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
| 381 | unsigned TmpReg) const { |
| 382 | // If full control flow speculation barriers are used, there will not be |
| 383 | // miss-speculation when returning from this function, and therefore, also |
| 384 | // no need to encode potential miss-speculation into the stack pointer. |
| 385 | if (UseControlFlowSpeculationBarrier) |
| 386 | return; |
| 387 | |
| 388 | // mov Xtmp, SP === ADD Xtmp, SP, #0 |
| 389 | BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri)) |
| 390 | .addDef(RegNo: TmpReg) |
| 391 | .addUse(RegNo: AArch64::SP) |
| 392 | .addImm(Val: 0) |
| 393 | .addImm(Val: 0); // no shift |
| 394 | // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0 |
| 395 | BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::ANDXrs)) |
| 396 | .addDef(RegNo: TmpReg, Flags: RegState::Renamable) |
| 397 | .addUse(RegNo: TmpReg, Flags: RegState::Kill | RegState::Renamable) |
| 398 | .addUse(RegNo: MisspeculatingTaintReg, Flags: RegState::Kill) |
| 399 | .addImm(Val: 0); |
| 400 | // mov SP, Xtmp === ADD SP, Xtmp, #0 |
| 401 | BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri)) |
| 402 | .addDef(RegNo: AArch64::SP) |
| 403 | .addUse(RegNo: TmpReg, Flags: RegState::Kill) |
| 404 | .addImm(Val: 0) |
| 405 | .addImm(Val: 0); // no shift |
| 406 | } |
| 407 | |
| 408 | bool AArch64SpeculationHardening::functionUsesHardeningRegister( |
| 409 | MachineFunction &MF) const { |
| 410 | for (MachineBasicBlock &MBB : MF) { |
| 411 | for (MachineInstr &MI : MBB) { |
| 412 | // treat function calls specially, as the hardening register does not |
| 413 | // need to remain live across function calls. |
| 414 | if (MI.isCall()) |
| 415 | continue; |
| 416 | if (MI.readsRegister(Reg: MisspeculatingTaintReg, TRI) || |
| 417 | MI.modifiesRegister(Reg: MisspeculatingTaintReg, TRI)) |
| 418 | return true; |
| 419 | } |
| 420 | } |
| 421 | return false; |
| 422 | } |
| 423 | |
| 424 | // Make GPR register Reg speculation-safe by putting it through the |
| 425 | // SpeculationSafeValue pseudo instruction, if we can't prove that |
| 426 | // the value in the register has already been hardened. |
| 427 | bool AArch64SpeculationHardening::makeGPRSpeculationSafe( |
| 428 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI, |
| 429 | unsigned Reg) { |
| 430 | assert(AArch64::GPR32allRegClass.contains(Reg) || |
| 431 | AArch64::GPR64allRegClass.contains(Reg)); |
| 432 | |
| 433 | // Loads cannot directly load a value into the SP (nor WSP). |
| 434 | // Therefore, if Reg is SP or WSP, it is because the instruction loads from |
| 435 | // the stack through the stack pointer. |
| 436 | // |
| 437 | // Since the stack pointer is never dynamically controllable, don't harden it. |
| 438 | if (Reg == AArch64::SP || Reg == AArch64::WSP) |
| 439 | return false; |
| 440 | |
| 441 | // Do not harden the register again if already hardened before. |
| 442 | if (RegsAlreadyMasked[Reg]) |
| 443 | return false; |
| 444 | |
| 445 | const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg); |
| 446 | LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n" ); |
| 447 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
| 448 | MCID: TII->get(Opcode: Is64Bit ? AArch64::SpeculationSafeValueX |
| 449 | : AArch64::SpeculationSafeValueW)) |
| 450 | .addDef(RegNo: Reg) |
| 451 | .addUse(RegNo: Reg); |
| 452 | RegsAlreadyMasked.set(Reg); |
| 453 | return true; |
| 454 | } |
| 455 | |
| 456 | bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) { |
| 457 | bool Modified = false; |
| 458 | |
| 459 | LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB); |
| 460 | |
| 461 | RegsAlreadyMasked.reset(); |
| 462 | |
| 463 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
| 464 | MachineBasicBlock::iterator NextMBBI; |
| 465 | for (; MBBI != E; MBBI = NextMBBI) { |
| 466 | MachineInstr &MI = *MBBI; |
| 467 | NextMBBI = std::next(x: MBBI); |
| 468 | // Only harden loaded values or addresses used in loads. |
| 469 | if (!MI.mayLoad()) |
| 470 | continue; |
| 471 | |
| 472 | LLVM_DEBUG(dbgs() << "About to harden: " << MI); |
| 473 | |
| 474 | // For general purpose register loads, harden the registers loaded into. |
| 475 | // For other loads, harden the address loaded from. |
| 476 | // Masking the loaded value is expected to result in less performance |
| 477 | // overhead, as the load can still execute speculatively in comparison to |
| 478 | // when the address loaded from gets masked. However, masking is only |
| 479 | // easy to do efficiently on GPR registers, so for loads into non-GPR |
| 480 | // registers (e.g. floating point loads), mask the address loaded from. |
| 481 | bool AllDefsAreGPR = llvm::all_of(Range: MI.defs(), P: [&](MachineOperand &Op) { |
| 482 | return Op.isReg() && (AArch64::GPR32allRegClass.contains(Reg: Op.getReg()) || |
| 483 | AArch64::GPR64allRegClass.contains(Reg: Op.getReg())); |
| 484 | }); |
| 485 | // FIXME: it might be a worthwhile optimization to not mask loaded |
| 486 | // values if all the registers involved in address calculation are already |
| 487 | // hardened, leading to this load not able to execute on a miss-speculated |
| 488 | // path. |
| 489 | bool HardenLoadedData = AllDefsAreGPR; |
| 490 | bool HardenAddressLoadedFrom = !HardenLoadedData; |
| 491 | |
| 492 | // First remove registers from AlreadyMaskedRegisters if their value is |
| 493 | // updated by this instruction - it makes them contain a new value that is |
| 494 | // not guaranteed to already have been masked. |
| 495 | for (MachineOperand Op : MI.defs()) |
| 496 | for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI) |
| 497 | RegsAlreadyMasked.reset(Idx: *AI); |
| 498 | |
| 499 | // FIXME: loads from the stack with an immediate offset from the stack |
| 500 | // pointer probably shouldn't be hardened, which could result in a |
| 501 | // significant optimization. See section "Don’t check loads from |
| 502 | // compile-time constant stack offsets", in |
| 503 | // https://llvm.org/docs/SpeculativeLoadHardening.html |
| 504 | |
| 505 | if (HardenLoadedData) |
| 506 | for (auto Def : MI.defs()) { |
| 507 | if (Def.isDead()) |
| 508 | // Do not mask a register that is not used further. |
| 509 | continue; |
| 510 | // FIXME: For pre/post-increment addressing modes, the base register |
| 511 | // used in address calculation is also defined by this instruction. |
| 512 | // It might be a worthwhile optimization to not harden that |
| 513 | // base register increment/decrement when the increment/decrement is |
| 514 | // an immediate. |
| 515 | Modified |= makeGPRSpeculationSafe(MBB, MBBI: NextMBBI, MI, Reg: Def.getReg()); |
| 516 | } |
| 517 | |
| 518 | if (HardenAddressLoadedFrom) |
| 519 | for (auto Use : MI.uses()) { |
| 520 | if (!Use.isReg()) |
| 521 | continue; |
| 522 | Register Reg = Use.getReg(); |
| 523 | // Some loads of floating point data have implicit defs/uses on a |
| 524 | // super register of that floating point data. Some examples: |
| 525 | // $s0 = LDRSui $sp, 22, implicit-def $q0 |
| 526 | // $q0 = LD1i64 $q0, 1, renamable $x0 |
| 527 | // We need to filter out these uses for non-GPR register which occur |
| 528 | // because the load partially fills a non-GPR register with the loaded |
| 529 | // data. Just skipping all non-GPR registers is safe (for now) as all |
| 530 | // AArch64 load instructions only use GPR registers to perform the |
| 531 | // address calculation. FIXME: However that might change once we can |
| 532 | // produce SVE gather instructions. |
| 533 | if (!(AArch64::GPR32allRegClass.contains(Reg) || |
| 534 | AArch64::GPR64allRegClass.contains(Reg))) |
| 535 | continue; |
| 536 | Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg); |
| 537 | } |
| 538 | } |
| 539 | return Modified; |
| 540 | } |
| 541 | |
| 542 | /// \brief If MBBI references a pseudo instruction that should be expanded |
| 543 | /// here, do the expansion and return true. Otherwise return false. |
| 544 | bool AArch64SpeculationHardening::expandSpeculationSafeValue( |
| 545 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
| 546 | bool UsesFullSpeculationBarrier) { |
| 547 | MachineInstr &MI = *MBBI; |
| 548 | unsigned Opcode = MI.getOpcode(); |
| 549 | bool Is64Bit = true; |
| 550 | |
| 551 | switch (Opcode) { |
| 552 | default: |
| 553 | break; |
| 554 | case AArch64::SpeculationSafeValueW: |
| 555 | Is64Bit = false; |
| 556 | [[fallthrough]]; |
| 557 | case AArch64::SpeculationSafeValueX: |
| 558 | // Just remove the SpeculationSafe pseudo's if control flow |
| 559 | // miss-speculation isn't happening because we're already inserting barriers |
| 560 | // to guarantee that. |
| 561 | if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) { |
| 562 | Register DstReg = MI.getOperand(i: 0).getReg(); |
| 563 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
| 564 | // Mark this register and all its aliasing registers as needing to be |
| 565 | // value speculation hardened before its next use, by using a CSDB |
| 566 | // barrier instruction. |
| 567 | for (MachineOperand Op : MI.defs()) |
| 568 | for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI) |
| 569 | RegsNeedingCSDBBeforeUse.set(*AI); |
| 570 | |
| 571 | // Mask off with taint state. |
| 572 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
| 573 | MCID: Is64Bit ? TII->get(Opcode: AArch64::ANDXrs) : TII->get(Opcode: AArch64::ANDWrs)) |
| 574 | .addDef(RegNo: DstReg) |
| 575 | .addUse(RegNo: SrcReg, Flags: RegState::Kill) |
| 576 | .addUse(RegNo: Is64Bit ? MisspeculatingTaintReg |
| 577 | : MisspeculatingTaintReg32Bit) |
| 578 | .addImm(Val: 0); |
| 579 | } |
| 580 | MI.eraseFromParent(); |
| 581 | return true; |
| 582 | } |
| 583 | return false; |
| 584 | } |
| 585 | |
| 586 | bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB, |
| 587 | MachineBasicBlock::iterator MBBI, |
| 588 | DebugLoc DL) { |
| 589 | assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when " |
| 590 | "control flow miss-speculation " |
| 591 | "is already blocked" ); |
| 592 | // insert data value speculation barrier (CSDB) |
| 593 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::HINT)).addImm(Val: 0x14); |
| 594 | RegsNeedingCSDBBeforeUse.reset(); |
| 595 | return true; |
| 596 | } |
| 597 | |
| 598 | bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos( |
| 599 | MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) { |
| 600 | bool Modified = false; |
| 601 | |
| 602 | RegsNeedingCSDBBeforeUse.reset(); |
| 603 | |
| 604 | // The following loop iterates over all instructions in the basic block, |
| 605 | // and performs 2 operations: |
| 606 | // 1. Insert a CSDB at this location if needed. |
| 607 | // 2. Expand the SpeculationSafeValuePseudo if the current instruction is |
| 608 | // one. |
| 609 | // |
| 610 | // The insertion of the CSDB is done as late as possible (i.e. just before |
| 611 | // the use of a masked register), in the hope that that will reduce the |
| 612 | // total number of CSDBs in a block when there are multiple masked registers |
| 613 | // in the block. |
| 614 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
| 615 | DebugLoc DL; |
| 616 | while (MBBI != E) { |
| 617 | MachineInstr &MI = *MBBI; |
| 618 | DL = MI.getDebugLoc(); |
| 619 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
| 620 | |
| 621 | // First check if a CSDB needs to be inserted due to earlier registers |
| 622 | // that were masked and that are used by the next instruction. |
| 623 | // Also emit the barrier on any potential control flow changes. |
| 624 | bool NeedToEmitBarrier = false; |
| 625 | if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator())) |
| 626 | NeedToEmitBarrier = true; |
| 627 | if (!NeedToEmitBarrier) |
| 628 | for (MachineOperand Op : MI.uses()) |
| 629 | if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) { |
| 630 | NeedToEmitBarrier = true; |
| 631 | break; |
| 632 | } |
| 633 | |
| 634 | if (NeedToEmitBarrier && !UsesFullSpeculationBarrier) |
| 635 | Modified |= insertCSDB(MBB, MBBI, DL); |
| 636 | |
| 637 | Modified |= |
| 638 | expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier); |
| 639 | |
| 640 | MBBI = NMBBI; |
| 641 | } |
| 642 | |
| 643 | if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier) |
| 644 | Modified |= insertCSDB(MBB, MBBI, DL); |
| 645 | |
| 646 | return Modified; |
| 647 | } |
| 648 | |
| 649 | bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) { |
| 650 | if (!MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening)) |
| 651 | return false; |
| 652 | |
| 653 | MisspeculatingTaintReg = AArch64::X16; |
| 654 | MisspeculatingTaintReg32Bit = AArch64::W16; |
| 655 | TII = MF.getSubtarget().getInstrInfo(); |
| 656 | TRI = MF.getSubtarget().getRegisterInfo(); |
| 657 | RegsNeedingCSDBBeforeUse.resize(N: TRI->getNumRegs()); |
| 658 | RegsAlreadyMasked.resize(N: TRI->getNumRegs()); |
| 659 | UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF); |
| 660 | |
| 661 | bool Modified = false; |
| 662 | |
| 663 | // Step 1: Enable automatic insertion of SpeculationSafeValue. |
| 664 | if (HardenLoads) { |
| 665 | LLVM_DEBUG( |
| 666 | dbgs() << "***** AArch64SpeculationHardening - automatic insertion of " |
| 667 | "SpeculationSafeValue intrinsics *****\n" ); |
| 668 | for (auto &MBB : MF) |
| 669 | Modified |= slhLoads(MBB); |
| 670 | } |
| 671 | |
| 672 | // 2. Add instrumentation code to function entry and exits. |
| 673 | LLVM_DEBUG( |
| 674 | dbgs() |
| 675 | << "***** AArch64SpeculationHardening - track control flow *****\n" ); |
| 676 | |
| 677 | SmallVector<MachineBasicBlock *, 2> EntryBlocks; |
| 678 | EntryBlocks.push_back(Elt: &MF.front()); |
| 679 | for (const LandingPadInfo &LPI : MF.getLandingPads()) |
| 680 | EntryBlocks.push_back(Elt: LPI.LandingPadBlock); |
| 681 | for (auto *Entry : EntryBlocks) |
| 682 | insertSPToRegTaintPropagation( |
| 683 | MBB&: *Entry, MBBI: Entry->SkipPHIsLabelsAndDebug(I: Entry->begin())); |
| 684 | |
| 685 | // 3. Add instrumentation code to every basic block. |
| 686 | for (auto &MBB : MF) { |
| 687 | bool UsesFullSpeculationBarrier = false; |
| 688 | Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier); |
| 689 | Modified |= |
| 690 | lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier); |
| 691 | } |
| 692 | |
| 693 | return Modified; |
| 694 | } |
| 695 | |
| 696 | /// \brief Returns an instance of the pseudo instruction expansion pass. |
| 697 | FunctionPass *llvm::createAArch64SpeculationHardeningPass() { |
| 698 | return new AArch64SpeculationHardening(); |
| 699 | } |
| 700 | |