1 | //===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass to insert code to mitigate against side channel |
10 | // vulnerabilities that may happen under control flow miss-speculation. |
11 | // |
12 | // The pass implements tracking of control flow miss-speculation into a "taint" |
13 | // register. That taint register can then be used to mask off registers with |
14 | // sensitive data when executing under miss-speculation, a.k.a. "transient |
15 | // execution". |
16 | // This pass is aimed at mitigating against SpectreV1-style vulnarabilities. |
17 | // |
18 | // It also implements speculative load hardening, i.e. using the taint register |
19 | // to automatically mask off loaded data. |
20 | // |
21 | // As a possible follow-on improvement, also an intrinsics-based approach as |
22 | // explained at https://lwn.net/Articles/759423/ could be implemented on top of |
23 | // the current design. |
24 | // |
25 | // For AArch64, the following implementation choices are made to implement the |
26 | // tracking of control flow miss-speculation into a taint register: |
27 | // Some of these are different than the implementation choices made in |
28 | // the similar pass implemented in X86SpeculativeLoadHardening.cpp, as |
29 | // the instruction set characteristics result in different trade-offs. |
30 | // - The speculation hardening is done after register allocation. With a |
31 | // relative abundance of registers, one register is reserved (X16) to be |
32 | // the taint register. X16 is expected to not clash with other register |
33 | // reservation mechanisms with very high probability because: |
34 | // . The AArch64 ABI doesn't guarantee X16 to be retained across any call. |
35 | // . The only way to request X16 to be used as a programmer is through |
36 | // inline assembly. In the rare case a function explicitly demands to |
37 | // use X16/W16, this pass falls back to hardening against speculation |
38 | // by inserting a DSB SYS/ISB barrier pair which will prevent control |
39 | // flow speculation. |
40 | // - It is easy to insert mask operations at this late stage as we have |
41 | // mask operations available that don't set flags. |
42 | // - The taint variable contains all-ones when no miss-speculation is detected, |
43 | // and contains all-zeros when miss-speculation is detected. Therefore, when |
44 | // masking, an AND instruction (which only changes the register to be masked, |
45 | // no other side effects) can easily be inserted anywhere that's needed. |
46 | // - The tracking of miss-speculation is done by using a data-flow conditional |
47 | // select instruction (CSEL) to evaluate the flags that were also used to |
48 | // make conditional branch direction decisions. Speculation of the CSEL |
49 | // instruction can be limited with a CSDB instruction - so the combination of |
50 | // CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL |
51 | // aren't speculated. When conditional branch direction gets miss-speculated, |
52 | // the semantics of the inserted CSEL instruction is such that the taint |
53 | // register will contain all zero bits. |
54 | // One key requirement for this to work is that the conditional branch is |
55 | // followed by an execution of the CSEL instruction, where the CSEL |
56 | // instruction needs to use the same flags status as the conditional branch. |
57 | // This means that the conditional branches must not be implemented as one |
58 | // of the AArch64 conditional branches that do not use the flags as input |
59 | // (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction |
60 | // selectors to not produce these instructions when speculation hardening |
61 | // is enabled. This pass will assert if it does encounter such an instruction. |
62 | // - On function call boundaries, the miss-speculation state is transferred from |
63 | // the taint register X16 to be encoded in the SP register as value 0. |
64 | // |
65 | // For the aspect of automatically hardening loads, using the taint register, |
66 | // (a.k.a. speculative load hardening, see |
67 | // https://llvm.org/docs/SpeculativeLoadHardening.html), the following |
68 | // implementation choices are made for AArch64: |
69 | // - Many of the optimizations described at |
70 | // https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer |
71 | // loads haven't been implemented yet - but for some of them there are |
72 | // FIXMEs in the code. |
73 | // - loads that load into general purpose (X or W) registers get hardened by |
74 | // masking the loaded data. For loads that load into other registers, the |
75 | // address loaded from gets hardened. It is expected that hardening the |
76 | // loaded data may be more efficient; but masking data in registers other |
77 | // than X or W is not easy and may result in being slower than just |
78 | // hardening the X address register loaded from. |
79 | // - On AArch64, CSDB instructions are inserted between the masking of the |
80 | // register and its first use, to ensure there's no non-control-flow |
81 | // speculation that might undermine the hardening mechanism. |
82 | // |
83 | // Future extensions/improvements could be: |
84 | // - Implement this functionality using full speculation barriers, akin to the |
85 | // x86-slh-lfence option. This may be more useful for the intrinsics-based |
86 | // approach than for the SLH approach to masking. |
87 | // Note that this pass already inserts the full speculation barriers if the |
88 | // function for some niche reason makes use of X16/W16. |
89 | // - no indirect branch misprediction gets protected/instrumented; but this |
90 | // could be done for some indirect branches, such as switch jump tables. |
91 | //===----------------------------------------------------------------------===// |
92 | |
93 | #include "AArch64InstrInfo.h" |
94 | #include "AArch64Subtarget.h" |
95 | #include "Utils/AArch64BaseInfo.h" |
96 | #include "llvm/ADT/BitVector.h" |
97 | #include "llvm/ADT/SmallVector.h" |
98 | #include "llvm/CodeGen/MachineBasicBlock.h" |
99 | #include "llvm/CodeGen/MachineFunction.h" |
100 | #include "llvm/CodeGen/MachineFunctionPass.h" |
101 | #include "llvm/CodeGen/MachineInstr.h" |
102 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
103 | #include "llvm/CodeGen/MachineOperand.h" |
104 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
105 | #include "llvm/CodeGen/RegisterScavenging.h" |
106 | #include "llvm/IR/DebugLoc.h" |
107 | #include "llvm/Pass.h" |
108 | #include "llvm/Support/CodeGen.h" |
109 | #include "llvm/Support/Debug.h" |
110 | #include "llvm/Target/TargetMachine.h" |
111 | #include <cassert> |
112 | |
113 | using namespace llvm; |
114 | |
115 | #define DEBUG_TYPE "aarch64-speculation-hardening" |
116 | |
117 | #define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass" |
118 | |
119 | static cl::opt<bool> HardenLoads("aarch64-slh-loads" , cl::Hidden, |
120 | cl::desc("Sanitize loads from memory." ), |
121 | cl::init(Val: true)); |
122 | |
123 | namespace { |
124 | |
125 | class AArch64SpeculationHardening : public MachineFunctionPass { |
126 | public: |
127 | const TargetInstrInfo *TII; |
128 | const TargetRegisterInfo *TRI; |
129 | |
130 | static char ID; |
131 | |
132 | AArch64SpeculationHardening() : MachineFunctionPass(ID) { |
133 | initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry()); |
134 | } |
135 | |
136 | bool runOnMachineFunction(MachineFunction &Fn) override; |
137 | |
138 | StringRef getPassName() const override { |
139 | return AARCH64_SPECULATION_HARDENING_NAME; |
140 | } |
141 | |
142 | private: |
143 | unsigned MisspeculatingTaintReg; |
144 | unsigned MisspeculatingTaintReg32Bit; |
145 | bool UseControlFlowSpeculationBarrier; |
146 | BitVector RegsNeedingCSDBBeforeUse; |
147 | BitVector RegsAlreadyMasked; |
148 | |
149 | bool functionUsesHardeningRegister(MachineFunction &MF) const; |
150 | bool instrumentControlFlow(MachineBasicBlock &MBB, |
151 | bool &UsesFullSpeculationBarrier); |
152 | bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, |
153 | MachineBasicBlock *&FBB, |
154 | AArch64CC::CondCode &CondCode) const; |
155 | void insertTrackingCode(MachineBasicBlock &SplitEdgeBB, |
156 | AArch64CC::CondCode &CondCode, DebugLoc DL) const; |
157 | void insertSPToRegTaintPropagation(MachineBasicBlock &MBB, |
158 | MachineBasicBlock::iterator MBBI) const; |
159 | void insertRegToSPTaintPropagation(MachineBasicBlock &MBB, |
160 | MachineBasicBlock::iterator MBBI, |
161 | unsigned TmpReg) const; |
162 | void insertFullSpeculationBarrier(MachineBasicBlock &MBB, |
163 | MachineBasicBlock::iterator MBBI, |
164 | DebugLoc DL) const; |
165 | |
166 | bool slhLoads(MachineBasicBlock &MBB); |
167 | bool makeGPRSpeculationSafe(MachineBasicBlock &MBB, |
168 | MachineBasicBlock::iterator MBBI, |
169 | MachineInstr &MI, unsigned Reg); |
170 | bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB, |
171 | bool UsesFullSpeculationBarrier); |
172 | bool expandSpeculationSafeValue(MachineBasicBlock &MBB, |
173 | MachineBasicBlock::iterator MBBI, |
174 | bool UsesFullSpeculationBarrier); |
175 | bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
176 | DebugLoc DL); |
177 | }; |
178 | |
179 | } // end anonymous namespace |
180 | |
181 | char AArch64SpeculationHardening::ID = 0; |
182 | |
183 | INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening" , |
184 | AARCH64_SPECULATION_HARDENING_NAME, false, false) |
185 | |
186 | bool AArch64SpeculationHardening::endsWithCondControlFlow( |
187 | MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, |
188 | AArch64CC::CondCode &CondCode) const { |
189 | SmallVector<MachineOperand, 1> analyzeBranchCondCode; |
190 | if (TII->analyzeBranch(MBB, TBB, FBB, Cond&: analyzeBranchCondCode, AllowModify: false)) |
191 | return false; |
192 | |
193 | // Ignore if the BB ends in an unconditional branch/fall-through. |
194 | if (analyzeBranchCondCode.empty()) |
195 | return false; |
196 | |
197 | // If the BB ends with a single conditional branch, FBB will be set to |
198 | // nullptr (see API docs for TII->analyzeBranch). For the rest of the |
199 | // analysis we want the FBB block to be set always. |
200 | assert(TBB != nullptr); |
201 | if (FBB == nullptr) |
202 | FBB = MBB.getFallThrough(); |
203 | |
204 | // If both the true and the false condition jump to the same basic block, |
205 | // there isn't need for any protection - whether the branch is speculated |
206 | // correctly or not, we end up executing the architecturally correct code. |
207 | if (TBB == FBB) |
208 | return false; |
209 | |
210 | assert(MBB.succ_size() == 2); |
211 | // translate analyzeBranchCondCode to CondCode. |
212 | assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format" ); |
213 | CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm()); |
214 | return true; |
215 | } |
216 | |
217 | void AArch64SpeculationHardening::insertFullSpeculationBarrier( |
218 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
219 | DebugLoc DL) const { |
220 | // A full control flow speculation barrier consists of (DSB SYS + ISB) |
221 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::DSB)).addImm(Val: 0xf); |
222 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ISB)).addImm(Val: 0xf); |
223 | } |
224 | |
225 | void AArch64SpeculationHardening::insertTrackingCode( |
226 | MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode, |
227 | DebugLoc DL) const { |
228 | if (UseControlFlowSpeculationBarrier) { |
229 | insertFullSpeculationBarrier(MBB&: SplitEdgeBB, MBBI: SplitEdgeBB.begin(), DL); |
230 | } else { |
231 | BuildMI(BB&: SplitEdgeBB, I: SplitEdgeBB.begin(), MIMD: DL, MCID: TII->get(Opcode: AArch64::CSELXr)) |
232 | .addDef(RegNo: MisspeculatingTaintReg) |
233 | .addUse(RegNo: MisspeculatingTaintReg) |
234 | .addUse(RegNo: AArch64::XZR) |
235 | .addImm(Val: CondCode); |
236 | SplitEdgeBB.addLiveIn(PhysReg: AArch64::NZCV); |
237 | } |
238 | } |
239 | |
240 | bool AArch64SpeculationHardening::instrumentControlFlow( |
241 | MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) { |
242 | LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB); |
243 | |
244 | bool Modified = false; |
245 | MachineBasicBlock *TBB = nullptr; |
246 | MachineBasicBlock *FBB = nullptr; |
247 | AArch64CC::CondCode CondCode; |
248 | |
249 | if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) { |
250 | LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n" ); |
251 | } else { |
252 | // Now insert: |
253 | // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and |
254 | // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False |
255 | // edge. |
256 | AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(Code: CondCode); |
257 | |
258 | MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(Succ: TBB, P&: *this); |
259 | MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(Succ: FBB, P&: *this); |
260 | |
261 | assert(SplitEdgeTBB != nullptr); |
262 | assert(SplitEdgeFBB != nullptr); |
263 | |
264 | DebugLoc DL; |
265 | if (MBB.instr_end() != MBB.instr_begin()) |
266 | DL = (--MBB.instr_end())->getDebugLoc(); |
267 | |
268 | insertTrackingCode(SplitEdgeBB&: *SplitEdgeTBB, CondCode, DL); |
269 | insertTrackingCode(SplitEdgeBB&: *SplitEdgeFBB, CondCode&: InvCondCode, DL); |
270 | |
271 | LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n" ); |
272 | LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n" ); |
273 | Modified = true; |
274 | } |
275 | |
276 | // Perform correct code generation around function calls and before returns. |
277 | // The below variables record the return/terminator instructions and the call |
278 | // instructions respectively; including which register is available as a |
279 | // temporary register just before the recorded instructions. |
280 | SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions; |
281 | SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions; |
282 | // if a temporary register is not available for at least one of the |
283 | // instructions for which we need to transfer taint to the stack pointer, we |
284 | // need to insert a full speculation barrier. |
285 | // TmpRegisterNotAvailableEverywhere tracks that condition. |
286 | bool TmpRegisterNotAvailableEverywhere = false; |
287 | |
288 | RegScavenger RS; |
289 | RS.enterBasicBlockEnd(MBB); |
290 | |
291 | for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) { |
292 | MachineInstr &MI = *--I; |
293 | if (!MI.isReturn() && !MI.isCall()) |
294 | continue; |
295 | |
296 | // The RegScavenger represents registers available *after* the MI |
297 | // instruction pointed to by RS.getCurrentPosition(). |
298 | // We need to have a register that is available *before* the MI is executed. |
299 | if (I == MBB.begin()) |
300 | RS.enterBasicBlock(MBB); |
301 | else |
302 | RS.backward(I); |
303 | // FIXME: The below just finds *a* unused register. Maybe code could be |
304 | // optimized more if this looks for the register that isn't used for the |
305 | // longest time around this place, to enable more scheduling freedom. Not |
306 | // sure if that would actually result in a big performance difference |
307 | // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic |
308 | // already to do this - but it's unclear if that could easily be used here. |
309 | Register TmpReg = RS.FindUnusedReg(RC: &AArch64::GPR64commonRegClass); |
310 | LLVM_DEBUG(dbgs() << "RS finds " |
311 | << ((TmpReg == 0) ? "no register " : "register " ); |
312 | if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " " ; |
313 | dbgs() << "to be available at MI " << MI); |
314 | if (TmpReg == 0) |
315 | TmpRegisterNotAvailableEverywhere = true; |
316 | if (MI.isReturn()) |
317 | ReturnInstructions.push_back(Elt: {&MI, TmpReg}); |
318 | else if (MI.isCall()) |
319 | CallInstructions.push_back(Elt: {&MI, TmpReg}); |
320 | } |
321 | |
322 | if (TmpRegisterNotAvailableEverywhere) { |
323 | // When a temporary register is not available everywhere in this basic |
324 | // basic block where a propagate-taint-to-sp operation is needed, just |
325 | // emit a full speculation barrier at the start of this basic block, which |
326 | // renders the taint/speculation tracking in this basic block unnecessary. |
327 | insertFullSpeculationBarrier(MBB, MBBI: MBB.begin(), |
328 | DL: (MBB.begin())->getDebugLoc()); |
329 | UsesFullSpeculationBarrier = true; |
330 | Modified = true; |
331 | } else { |
332 | for (auto MI_Reg : ReturnInstructions) { |
333 | assert(MI_Reg.second != 0); |
334 | LLVM_DEBUG( |
335 | dbgs() |
336 | << " About to insert Reg to SP taint propagation with temp register " |
337 | << printReg(MI_Reg.second, TRI) |
338 | << " on instruction: " << *MI_Reg.first); |
339 | insertRegToSPTaintPropagation(MBB, MBBI: MI_Reg.first, TmpReg: MI_Reg.second); |
340 | Modified = true; |
341 | } |
342 | |
343 | for (auto MI_Reg : CallInstructions) { |
344 | assert(MI_Reg.second != 0); |
345 | LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint " |
346 | "propagation with temp register " |
347 | << printReg(MI_Reg.second, TRI) |
348 | << " around instruction: " << *MI_Reg.first); |
349 | // Just after the call: |
350 | insertSPToRegTaintPropagation( |
351 | MBB, MBBI: std::next(x: (MachineBasicBlock::iterator)MI_Reg.first)); |
352 | // Just before the call: |
353 | insertRegToSPTaintPropagation(MBB, MBBI: MI_Reg.first, TmpReg: MI_Reg.second); |
354 | Modified = true; |
355 | } |
356 | } |
357 | return Modified; |
358 | } |
359 | |
360 | void AArch64SpeculationHardening::insertSPToRegTaintPropagation( |
361 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { |
362 | // If full control flow speculation barriers are used, emit a control flow |
363 | // barrier to block potential miss-speculation in flight coming in to this |
364 | // function. |
365 | if (UseControlFlowSpeculationBarrier) { |
366 | insertFullSpeculationBarrier(MBB, MBBI, DL: DebugLoc()); |
367 | return; |
368 | } |
369 | |
370 | // CMP SP, #0 === SUBS xzr, SP, #0 |
371 | BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::SUBSXri)) |
372 | .addDef(RegNo: AArch64::XZR) |
373 | .addUse(RegNo: AArch64::SP) |
374 | .addImm(Val: 0) |
375 | .addImm(Val: 0); // no shift |
376 | // CSETM x16, NE === CSINV x16, xzr, xzr, EQ |
377 | BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::CSINVXr)) |
378 | .addDef(RegNo: MisspeculatingTaintReg) |
379 | .addUse(RegNo: AArch64::XZR) |
380 | .addUse(RegNo: AArch64::XZR) |
381 | .addImm(Val: AArch64CC::EQ); |
382 | } |
383 | |
384 | void AArch64SpeculationHardening::insertRegToSPTaintPropagation( |
385 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
386 | unsigned TmpReg) const { |
387 | // If full control flow speculation barriers are used, there will not be |
388 | // miss-speculation when returning from this function, and therefore, also |
389 | // no need to encode potential miss-speculation into the stack pointer. |
390 | if (UseControlFlowSpeculationBarrier) |
391 | return; |
392 | |
393 | // mov Xtmp, SP === ADD Xtmp, SP, #0 |
394 | BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri)) |
395 | .addDef(RegNo: TmpReg) |
396 | .addUse(RegNo: AArch64::SP) |
397 | .addImm(Val: 0) |
398 | .addImm(Val: 0); // no shift |
399 | // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0 |
400 | BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::ANDXrs)) |
401 | .addDef(RegNo: TmpReg, Flags: RegState::Renamable) |
402 | .addUse(RegNo: TmpReg, Flags: RegState::Kill | RegState::Renamable) |
403 | .addUse(RegNo: MisspeculatingTaintReg, Flags: RegState::Kill) |
404 | .addImm(Val: 0); |
405 | // mov SP, Xtmp === ADD SP, Xtmp, #0 |
406 | BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri)) |
407 | .addDef(RegNo: AArch64::SP) |
408 | .addUse(RegNo: TmpReg, Flags: RegState::Kill) |
409 | .addImm(Val: 0) |
410 | .addImm(Val: 0); // no shift |
411 | } |
412 | |
413 | bool AArch64SpeculationHardening::functionUsesHardeningRegister( |
414 | MachineFunction &MF) const { |
415 | for (MachineBasicBlock &MBB : MF) { |
416 | for (MachineInstr &MI : MBB) { |
417 | // treat function calls specially, as the hardening register does not |
418 | // need to remain live across function calls. |
419 | if (MI.isCall()) |
420 | continue; |
421 | if (MI.readsRegister(Reg: MisspeculatingTaintReg, TRI) || |
422 | MI.modifiesRegister(Reg: MisspeculatingTaintReg, TRI)) |
423 | return true; |
424 | } |
425 | } |
426 | return false; |
427 | } |
428 | |
429 | // Make GPR register Reg speculation-safe by putting it through the |
430 | // SpeculationSafeValue pseudo instruction, if we can't prove that |
431 | // the value in the register has already been hardened. |
432 | bool AArch64SpeculationHardening::makeGPRSpeculationSafe( |
433 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI, |
434 | unsigned Reg) { |
435 | assert(AArch64::GPR32allRegClass.contains(Reg) || |
436 | AArch64::GPR64allRegClass.contains(Reg)); |
437 | |
438 | // Loads cannot directly load a value into the SP (nor WSP). |
439 | // Therefore, if Reg is SP or WSP, it is because the instruction loads from |
440 | // the stack through the stack pointer. |
441 | // |
442 | // Since the stack pointer is never dynamically controllable, don't harden it. |
443 | if (Reg == AArch64::SP || Reg == AArch64::WSP) |
444 | return false; |
445 | |
446 | // Do not harden the register again if already hardened before. |
447 | if (RegsAlreadyMasked[Reg]) |
448 | return false; |
449 | |
450 | const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg); |
451 | LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n" ); |
452 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
453 | MCID: TII->get(Opcode: Is64Bit ? AArch64::SpeculationSafeValueX |
454 | : AArch64::SpeculationSafeValueW)) |
455 | .addDef(RegNo: Reg) |
456 | .addUse(RegNo: Reg); |
457 | RegsAlreadyMasked.set(Reg); |
458 | return true; |
459 | } |
460 | |
461 | bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) { |
462 | bool Modified = false; |
463 | |
464 | LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB); |
465 | |
466 | RegsAlreadyMasked.reset(); |
467 | |
468 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
469 | MachineBasicBlock::iterator NextMBBI; |
470 | for (; MBBI != E; MBBI = NextMBBI) { |
471 | MachineInstr &MI = *MBBI; |
472 | NextMBBI = std::next(x: MBBI); |
473 | // Only harden loaded values or addresses used in loads. |
474 | if (!MI.mayLoad()) |
475 | continue; |
476 | |
477 | LLVM_DEBUG(dbgs() << "About to harden: " << MI); |
478 | |
479 | // For general purpose register loads, harden the registers loaded into. |
480 | // For other loads, harden the address loaded from. |
481 | // Masking the loaded value is expected to result in less performance |
482 | // overhead, as the load can still execute speculatively in comparison to |
483 | // when the address loaded from gets masked. However, masking is only |
484 | // easy to do efficiently on GPR registers, so for loads into non-GPR |
485 | // registers (e.g. floating point loads), mask the address loaded from. |
486 | bool AllDefsAreGPR = llvm::all_of(Range: MI.defs(), P: [&](MachineOperand &Op) { |
487 | return Op.isReg() && (AArch64::GPR32allRegClass.contains(Reg: Op.getReg()) || |
488 | AArch64::GPR64allRegClass.contains(Reg: Op.getReg())); |
489 | }); |
490 | // FIXME: it might be a worthwhile optimization to not mask loaded |
491 | // values if all the registers involved in address calculation are already |
492 | // hardened, leading to this load not able to execute on a miss-speculated |
493 | // path. |
494 | bool HardenLoadedData = AllDefsAreGPR; |
495 | bool HardenAddressLoadedFrom = !HardenLoadedData; |
496 | |
497 | // First remove registers from AlreadyMaskedRegisters if their value is |
498 | // updated by this instruction - it makes them contain a new value that is |
499 | // not guaranteed to already have been masked. |
500 | for (MachineOperand Op : MI.defs()) |
501 | for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI) |
502 | RegsAlreadyMasked.reset(Idx: *AI); |
503 | |
504 | // FIXME: loads from the stack with an immediate offset from the stack |
505 | // pointer probably shouldn't be hardened, which could result in a |
506 | // significant optimization. See section "Don’t check loads from |
507 | // compile-time constant stack offsets", in |
508 | // https://llvm.org/docs/SpeculativeLoadHardening.html |
509 | |
510 | if (HardenLoadedData) |
511 | for (auto Def : MI.defs()) { |
512 | if (Def.isDead()) |
513 | // Do not mask a register that is not used further. |
514 | continue; |
515 | // FIXME: For pre/post-increment addressing modes, the base register |
516 | // used in address calculation is also defined by this instruction. |
517 | // It might be a worthwhile optimization to not harden that |
518 | // base register increment/decrement when the increment/decrement is |
519 | // an immediate. |
520 | Modified |= makeGPRSpeculationSafe(MBB, MBBI: NextMBBI, MI, Reg: Def.getReg()); |
521 | } |
522 | |
523 | if (HardenAddressLoadedFrom) |
524 | for (auto Use : MI.uses()) { |
525 | if (!Use.isReg()) |
526 | continue; |
527 | Register Reg = Use.getReg(); |
528 | // Some loads of floating point data have implicit defs/uses on a |
529 | // super register of that floating point data. Some examples: |
530 | // $s0 = LDRSui $sp, 22, implicit-def $q0 |
531 | // $q0 = LD1i64 $q0, 1, renamable $x0 |
532 | // We need to filter out these uses for non-GPR register which occur |
533 | // because the load partially fills a non-GPR register with the loaded |
534 | // data. Just skipping all non-GPR registers is safe (for now) as all |
535 | // AArch64 load instructions only use GPR registers to perform the |
536 | // address calculation. FIXME: However that might change once we can |
537 | // produce SVE gather instructions. |
538 | if (!(AArch64::GPR32allRegClass.contains(Reg) || |
539 | AArch64::GPR64allRegClass.contains(Reg))) |
540 | continue; |
541 | Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg); |
542 | } |
543 | } |
544 | return Modified; |
545 | } |
546 | |
547 | /// \brief If MBBI references a pseudo instruction that should be expanded |
548 | /// here, do the expansion and return true. Otherwise return false. |
549 | bool AArch64SpeculationHardening::expandSpeculationSafeValue( |
550 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
551 | bool UsesFullSpeculationBarrier) { |
552 | MachineInstr &MI = *MBBI; |
553 | unsigned Opcode = MI.getOpcode(); |
554 | bool Is64Bit = true; |
555 | |
556 | switch (Opcode) { |
557 | default: |
558 | break; |
559 | case AArch64::SpeculationSafeValueW: |
560 | Is64Bit = false; |
561 | [[fallthrough]]; |
562 | case AArch64::SpeculationSafeValueX: |
563 | // Just remove the SpeculationSafe pseudo's if control flow |
564 | // miss-speculation isn't happening because we're already inserting barriers |
565 | // to guarantee that. |
566 | if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) { |
567 | Register DstReg = MI.getOperand(i: 0).getReg(); |
568 | Register SrcReg = MI.getOperand(i: 1).getReg(); |
569 | // Mark this register and all its aliasing registers as needing to be |
570 | // value speculation hardened before its next use, by using a CSDB |
571 | // barrier instruction. |
572 | for (MachineOperand Op : MI.defs()) |
573 | for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI) |
574 | RegsNeedingCSDBBeforeUse.set(*AI); |
575 | |
576 | // Mask off with taint state. |
577 | BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(), |
578 | MCID: Is64Bit ? TII->get(Opcode: AArch64::ANDXrs) : TII->get(Opcode: AArch64::ANDWrs)) |
579 | .addDef(RegNo: DstReg) |
580 | .addUse(RegNo: SrcReg, Flags: RegState::Kill) |
581 | .addUse(RegNo: Is64Bit ? MisspeculatingTaintReg |
582 | : MisspeculatingTaintReg32Bit) |
583 | .addImm(Val: 0); |
584 | } |
585 | MI.eraseFromParent(); |
586 | return true; |
587 | } |
588 | return false; |
589 | } |
590 | |
591 | bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB, |
592 | MachineBasicBlock::iterator MBBI, |
593 | DebugLoc DL) { |
594 | assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when " |
595 | "control flow miss-speculation " |
596 | "is already blocked" ); |
597 | // insert data value speculation barrier (CSDB) |
598 | BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::HINT)).addImm(Val: 0x14); |
599 | RegsNeedingCSDBBeforeUse.reset(); |
600 | return true; |
601 | } |
602 | |
603 | bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos( |
604 | MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) { |
605 | bool Modified = false; |
606 | |
607 | RegsNeedingCSDBBeforeUse.reset(); |
608 | |
609 | // The following loop iterates over all instructions in the basic block, |
610 | // and performs 2 operations: |
611 | // 1. Insert a CSDB at this location if needed. |
612 | // 2. Expand the SpeculationSafeValuePseudo if the current instruction is |
613 | // one. |
614 | // |
615 | // The insertion of the CSDB is done as late as possible (i.e. just before |
616 | // the use of a masked register), in the hope that that will reduce the |
617 | // total number of CSDBs in a block when there are multiple masked registers |
618 | // in the block. |
619 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
620 | DebugLoc DL; |
621 | while (MBBI != E) { |
622 | MachineInstr &MI = *MBBI; |
623 | DL = MI.getDebugLoc(); |
624 | MachineBasicBlock::iterator NMBBI = std::next(x: MBBI); |
625 | |
626 | // First check if a CSDB needs to be inserted due to earlier registers |
627 | // that were masked and that are used by the next instruction. |
628 | // Also emit the barrier on any potential control flow changes. |
629 | bool NeedToEmitBarrier = false; |
630 | if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator())) |
631 | NeedToEmitBarrier = true; |
632 | if (!NeedToEmitBarrier) |
633 | for (MachineOperand Op : MI.uses()) |
634 | if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) { |
635 | NeedToEmitBarrier = true; |
636 | break; |
637 | } |
638 | |
639 | if (NeedToEmitBarrier && !UsesFullSpeculationBarrier) |
640 | Modified |= insertCSDB(MBB, MBBI, DL); |
641 | |
642 | Modified |= |
643 | expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier); |
644 | |
645 | MBBI = NMBBI; |
646 | } |
647 | |
648 | if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier) |
649 | Modified |= insertCSDB(MBB, MBBI, DL); |
650 | |
651 | return Modified; |
652 | } |
653 | |
654 | bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) { |
655 | if (!MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening)) |
656 | return false; |
657 | |
658 | MisspeculatingTaintReg = AArch64::X16; |
659 | MisspeculatingTaintReg32Bit = AArch64::W16; |
660 | TII = MF.getSubtarget().getInstrInfo(); |
661 | TRI = MF.getSubtarget().getRegisterInfo(); |
662 | RegsNeedingCSDBBeforeUse.resize(N: TRI->getNumRegs()); |
663 | RegsAlreadyMasked.resize(N: TRI->getNumRegs()); |
664 | UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF); |
665 | |
666 | bool Modified = false; |
667 | |
668 | // Step 1: Enable automatic insertion of SpeculationSafeValue. |
669 | if (HardenLoads) { |
670 | LLVM_DEBUG( |
671 | dbgs() << "***** AArch64SpeculationHardening - automatic insertion of " |
672 | "SpeculationSafeValue intrinsics *****\n" ); |
673 | for (auto &MBB : MF) |
674 | Modified |= slhLoads(MBB); |
675 | } |
676 | |
677 | // 2. Add instrumentation code to function entry and exits. |
678 | LLVM_DEBUG( |
679 | dbgs() |
680 | << "***** AArch64SpeculationHardening - track control flow *****\n" ); |
681 | |
682 | SmallVector<MachineBasicBlock *, 2> EntryBlocks; |
683 | EntryBlocks.push_back(Elt: &MF.front()); |
684 | for (const LandingPadInfo &LPI : MF.getLandingPads()) |
685 | EntryBlocks.push_back(Elt: LPI.LandingPadBlock); |
686 | for (auto *Entry : EntryBlocks) |
687 | insertSPToRegTaintPropagation( |
688 | MBB&: *Entry, MBBI: Entry->SkipPHIsLabelsAndDebug(I: Entry->begin())); |
689 | |
690 | // 3. Add instrumentation code to every basic block. |
691 | for (auto &MBB : MF) { |
692 | bool UsesFullSpeculationBarrier = false; |
693 | Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier); |
694 | Modified |= |
695 | lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier); |
696 | } |
697 | |
698 | return Modified; |
699 | } |
700 | |
701 | /// \brief Returns an instance of the pseudo instruction expansion pass. |
702 | FunctionPass *llvm::createAArch64SpeculationHardeningPass() { |
703 | return new AArch64SpeculationHardening(); |
704 | } |
705 | |