1//===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass to insert code to mitigate against side channel
10// vulnerabilities that may happen under control flow miss-speculation.
11//
12// The pass implements tracking of control flow miss-speculation into a "taint"
13// register. That taint register can then be used to mask off registers with
14// sensitive data when executing under miss-speculation, a.k.a. "transient
15// execution".
16// This pass is aimed at mitigating against SpectreV1-style vulnerabilities.
17//
18// It also implements speculative load hardening, i.e. using the taint register
19// to automatically mask off loaded data.
20//
21// As a possible follow-on improvement, also an intrinsics-based approach as
22// explained at https://lwn.net/Articles/759423/ could be implemented on top of
23// the current design.
24//
25// For AArch64, the following implementation choices are made to implement the
26// tracking of control flow miss-speculation into a taint register:
27// Some of these are different than the implementation choices made in
28// the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
29// the instruction set characteristics result in different trade-offs.
30// - The speculation hardening is done after register allocation. With a
31// relative abundance of registers, one register is reserved (X16) to be
32// the taint register. X16 is expected to not clash with other register
33// reservation mechanisms with very high probability because:
34// . The AArch64 ABI doesn't guarantee X16 to be retained across any call.
35// . The only way to request X16 to be used as a programmer is through
36// inline assembly. In the rare case a function explicitly demands to
37// use X16/W16, this pass falls back to hardening against speculation
38// by inserting a DSB SYS/ISB barrier pair which will prevent control
39// flow speculation.
40// - It is easy to insert mask operations at this late stage as we have
41// mask operations available that don't set flags.
42// - The taint variable contains all-ones when no miss-speculation is detected,
43// and contains all-zeros when miss-speculation is detected. Therefore, when
44// masking, an AND instruction (which only changes the register to be masked,
45// no other side effects) can easily be inserted anywhere that's needed.
46// - The tracking of miss-speculation is done by using a data-flow conditional
47// select instruction (CSEL) to evaluate the flags that were also used to
48// make conditional branch direction decisions. Speculation of the CSEL
49// instruction can be limited with a CSDB instruction - so the combination of
50// CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
51// aren't speculated. When conditional branch direction gets miss-speculated,
52// the semantics of the inserted CSEL instruction is such that the taint
53// register will contain all zero bits.
54// One key requirement for this to work is that the conditional branch is
55// followed by an execution of the CSEL instruction, where the CSEL
56// instruction needs to use the same flags status as the conditional branch.
57// This means that the conditional branches must not be implemented as one
58// of the AArch64 conditional branches that do not use the flags as input
59// (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
60// selectors to not produce these instructions when speculation hardening
61// is enabled. This pass will assert if it does encounter such an instruction.
62// - On function call boundaries, the miss-speculation state is transferred from
63// the taint register X16 to be encoded in the SP register as value 0.
64//
65// For the aspect of automatically hardening loads, using the taint register,
66// (a.k.a. speculative load hardening, see
67// https://llvm.org/docs/SpeculativeLoadHardening.html), the following
68// implementation choices are made for AArch64:
69// - Many of the optimizations described at
70// https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer
71// loads haven't been implemented yet - but for some of them there are
72// FIXMEs in the code.
73// - loads that load into general purpose (X or W) registers get hardened by
74// masking the loaded data. For loads that load into other registers, the
75// address loaded from gets hardened. It is expected that hardening the
76// loaded data may be more efficient; but masking data in registers other
77// than X or W is not easy and may result in being slower than just
78// hardening the X address register loaded from.
79// - On AArch64, CSDB instructions are inserted between the masking of the
80// register and its first use, to ensure there's no non-control-flow
81// speculation that might undermine the hardening mechanism.
82//
83// Future extensions/improvements could be:
84// - Implement this functionality using full speculation barriers, akin to the
85// x86-slh-lfence option. This may be more useful for the intrinsics-based
86// approach than for the SLH approach to masking.
87// Note that this pass already inserts the full speculation barriers if the
88// function for some niche reason makes use of X16/W16.
89// - no indirect branch misprediction gets protected/instrumented; but this
90// could be done for some indirect branches, such as switch jump tables.
91//===----------------------------------------------------------------------===//
92
93#include "AArch64Subtarget.h"
94#include "Utils/AArch64BaseInfo.h"
95#include "llvm/ADT/BitVector.h"
96#include "llvm/ADT/SmallVector.h"
97#include "llvm/CodeGen/MachineBasicBlock.h"
98#include "llvm/CodeGen/MachineFunction.h"
99#include "llvm/CodeGen/MachineFunctionPass.h"
100#include "llvm/CodeGen/MachineInstr.h"
101#include "llvm/CodeGen/MachineInstrBuilder.h"
102#include "llvm/CodeGen/MachineOperand.h"
103#include "llvm/CodeGen/RegisterScavenging.h"
104#include "llvm/IR/DebugLoc.h"
105#include "llvm/Pass.h"
106#include "llvm/Support/Debug.h"
107#include "llvm/Target/TargetMachine.h"
108#include <cassert>
109
110using namespace llvm;
111
112#define DEBUG_TYPE "aarch64-speculation-hardening"
113
114#define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass"
115
116static cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden,
117 cl::desc("Sanitize loads from memory."),
118 cl::init(Val: true));
119
120namespace {
121
122class AArch64SpeculationHardening : public MachineFunctionPass {
123public:
124 const TargetInstrInfo *TII;
125 const TargetRegisterInfo *TRI;
126
127 static char ID;
128
129 AArch64SpeculationHardening() : MachineFunctionPass(ID) {}
130
131 bool runOnMachineFunction(MachineFunction &Fn) override;
132
133 StringRef getPassName() const override {
134 return AARCH64_SPECULATION_HARDENING_NAME;
135 }
136
137private:
138 unsigned MisspeculatingTaintReg;
139 unsigned MisspeculatingTaintReg32Bit;
140 bool UseControlFlowSpeculationBarrier;
141 BitVector RegsNeedingCSDBBeforeUse;
142 BitVector RegsAlreadyMasked;
143
144 bool functionUsesHardeningRegister(MachineFunction &MF) const;
145 bool instrumentControlFlow(MachineBasicBlock &MBB,
146 bool &UsesFullSpeculationBarrier);
147 bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
148 MachineBasicBlock *&FBB,
149 AArch64CC::CondCode &CondCode) const;
150 void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
151 AArch64CC::CondCode &CondCode, DebugLoc DL) const;
152 void insertSPToRegTaintPropagation(MachineBasicBlock &MBB,
153 MachineBasicBlock::iterator MBBI) const;
154 void insertRegToSPTaintPropagation(MachineBasicBlock &MBB,
155 MachineBasicBlock::iterator MBBI,
156 unsigned TmpReg) const;
157 void insertFullSpeculationBarrier(MachineBasicBlock &MBB,
158 MachineBasicBlock::iterator MBBI,
159 DebugLoc DL) const;
160
161 bool slhLoads(MachineBasicBlock &MBB);
162 bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
163 MachineBasicBlock::iterator MBBI,
164 MachineInstr &MI, unsigned Reg);
165 bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB,
166 bool UsesFullSpeculationBarrier);
167 bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
168 MachineBasicBlock::iterator MBBI,
169 bool UsesFullSpeculationBarrier);
170 bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
171 DebugLoc DL);
172};
173
174} // end anonymous namespace
175
176char AArch64SpeculationHardening::ID = 0;
177
178INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening",
179 AARCH64_SPECULATION_HARDENING_NAME, false, false)
180
181bool AArch64SpeculationHardening::endsWithCondControlFlow(
182 MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
183 AArch64CC::CondCode &CondCode) const {
184 SmallVector<MachineOperand, 1> analyzeBranchCondCode;
185 if (TII->analyzeBranch(MBB, TBB, FBB, Cond&: analyzeBranchCondCode, AllowModify: false))
186 return false;
187
188 // Ignore if the BB ends in an unconditional branch/fall-through.
189 if (analyzeBranchCondCode.empty())
190 return false;
191
192 // If the BB ends with a single conditional branch, FBB will be set to
193 // nullptr (see API docs for TII->analyzeBranch). For the rest of the
194 // analysis we want the FBB block to be set always.
195 assert(TBB != nullptr);
196 if (FBB == nullptr)
197 FBB = MBB.getFallThrough();
198
199 // If both the true and the false condition jump to the same basic block,
200 // there isn't need for any protection - whether the branch is speculated
201 // correctly or not, we end up executing the architecturally correct code.
202 if (TBB == FBB)
203 return false;
204
205 assert(MBB.succ_size() == 2);
206 // translate analyzeBranchCondCode to CondCode.
207 assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format");
208 CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm());
209 return true;
210}
211
212void AArch64SpeculationHardening::insertFullSpeculationBarrier(
213 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
214 DebugLoc DL) const {
215 // A full control flow speculation barrier consists of (DSB SYS + ISB)
216 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::DSB)).addImm(Val: 0xf);
217 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::ISB)).addImm(Val: 0xf);
218}
219
220void AArch64SpeculationHardening::insertTrackingCode(
221 MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
222 DebugLoc DL) const {
223 if (UseControlFlowSpeculationBarrier) {
224 insertFullSpeculationBarrier(MBB&: SplitEdgeBB, MBBI: SplitEdgeBB.begin(), DL);
225 } else {
226 BuildMI(BB&: SplitEdgeBB, I: SplitEdgeBB.begin(), MIMD: DL, MCID: TII->get(Opcode: AArch64::CSELXr))
227 .addDef(RegNo: MisspeculatingTaintReg)
228 .addUse(RegNo: MisspeculatingTaintReg)
229 .addUse(RegNo: AArch64::XZR)
230 .addImm(Val: CondCode);
231 SplitEdgeBB.addLiveIn(PhysReg: AArch64::NZCV);
232 }
233}
234
235bool AArch64SpeculationHardening::instrumentControlFlow(
236 MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) {
237 LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
238
239 bool Modified = false;
240 MachineBasicBlock *TBB = nullptr;
241 MachineBasicBlock *FBB = nullptr;
242 AArch64CC::CondCode CondCode;
243
244 if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) {
245 LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n");
246 } else {
247 // Now insert:
248 // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and
249 // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False
250 // edge.
251 AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(Code: CondCode);
252
253 MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(Succ: TBB, P&: *this);
254 MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(Succ: FBB, P&: *this);
255
256 assert(SplitEdgeTBB != nullptr);
257 assert(SplitEdgeFBB != nullptr);
258
259 DebugLoc DL;
260 if (MBB.instr_end() != MBB.instr_begin())
261 DL = (--MBB.instr_end())->getDebugLoc();
262
263 insertTrackingCode(SplitEdgeBB&: *SplitEdgeTBB, CondCode, DL);
264 insertTrackingCode(SplitEdgeBB&: *SplitEdgeFBB, CondCode&: InvCondCode, DL);
265
266 LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n");
267 LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n");
268 Modified = true;
269 }
270
271 // Perform correct code generation around function calls and before returns.
272 // The below variables record the return/terminator instructions and the call
273 // instructions respectively; including which register is available as a
274 // temporary register just before the recorded instructions.
275 SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions;
276 SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions;
277 // if a temporary register is not available for at least one of the
278 // instructions for which we need to transfer taint to the stack pointer, we
279 // need to insert a full speculation barrier.
280 // TmpRegisterNotAvailableEverywhere tracks that condition.
281 bool TmpRegisterNotAvailableEverywhere = false;
282
283 RegScavenger RS;
284 RS.enterBasicBlockEnd(MBB);
285
286 for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) {
287 MachineInstr &MI = *--I;
288 if (!MI.isReturn() && !MI.isCall())
289 continue;
290
291 // The RegScavenger represents registers available *after* the MI
292 // instruction pointed to by RS.getCurrentPosition().
293 // We need to have a register that is available *before* the MI is executed.
294 if (I == MBB.begin())
295 RS.enterBasicBlock(MBB);
296 else
297 RS.backward(I);
298 // FIXME: The below just finds *a* unused register. Maybe code could be
299 // optimized more if this looks for the register that isn't used for the
300 // longest time around this place, to enable more scheduling freedom. Not
301 // sure if that would actually result in a big performance difference
302 // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
303 // already to do this - but it's unclear if that could easily be used here.
304 Register TmpReg = RS.FindUnusedReg(RC: &AArch64::GPR64commonRegClass);
305 LLVM_DEBUG(dbgs() << "RS finds "
306 << ((TmpReg == 0) ? "no register " : "register ");
307 if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
308 dbgs() << "to be available at MI " << MI);
309 if (TmpReg == 0)
310 TmpRegisterNotAvailableEverywhere = true;
311 if (MI.isReturn())
312 ReturnInstructions.push_back(Elt: {&MI, TmpReg});
313 else if (MI.isCall())
314 CallInstructions.push_back(Elt: {&MI, TmpReg});
315 }
316
317 if (TmpRegisterNotAvailableEverywhere) {
318 // When a temporary register is not available everywhere in this basic
319 // basic block where a propagate-taint-to-sp operation is needed, just
320 // emit a full speculation barrier at the start of this basic block, which
321 // renders the taint/speculation tracking in this basic block unnecessary.
322 insertFullSpeculationBarrier(MBB, MBBI: MBB.begin(),
323 DL: (MBB.begin())->getDebugLoc());
324 UsesFullSpeculationBarrier = true;
325 Modified = true;
326 } else {
327 for (auto MI_Reg : ReturnInstructions) {
328 assert(MI_Reg.second != 0);
329 LLVM_DEBUG(
330 dbgs()
331 << " About to insert Reg to SP taint propagation with temp register "
332 << printReg(MI_Reg.second, TRI)
333 << " on instruction: " << *MI_Reg.first);
334 insertRegToSPTaintPropagation(MBB, MBBI: MI_Reg.first, TmpReg: MI_Reg.second);
335 Modified = true;
336 }
337
338 for (auto MI_Reg : CallInstructions) {
339 assert(MI_Reg.second != 0);
340 LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint "
341 "propagation with temp register "
342 << printReg(MI_Reg.second, TRI)
343 << " around instruction: " << *MI_Reg.first);
344 // Just after the call:
345 insertSPToRegTaintPropagation(
346 MBB, MBBI: std::next(x: (MachineBasicBlock::iterator)MI_Reg.first));
347 // Just before the call:
348 insertRegToSPTaintPropagation(MBB, MBBI: MI_Reg.first, TmpReg: MI_Reg.second);
349 Modified = true;
350 }
351 }
352 return Modified;
353}
354
355void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
356 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
357 // If full control flow speculation barriers are used, emit a control flow
358 // barrier to block potential miss-speculation in flight coming in to this
359 // function.
360 if (UseControlFlowSpeculationBarrier) {
361 insertFullSpeculationBarrier(MBB, MBBI, DL: DebugLoc());
362 return;
363 }
364
365 // CMP SP, #0 === SUBS xzr, SP, #0
366 BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::SUBSXri))
367 .addDef(RegNo: AArch64::XZR)
368 .addUse(RegNo: AArch64::SP)
369 .addImm(Val: 0)
370 .addImm(Val: 0); // no shift
371 // CSETM x16, NE === CSINV x16, xzr, xzr, EQ
372 BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::CSINVXr))
373 .addDef(RegNo: MisspeculatingTaintReg)
374 .addUse(RegNo: AArch64::XZR)
375 .addUse(RegNo: AArch64::XZR)
376 .addImm(Val: AArch64CC::EQ);
377}
378
379void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
380 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
381 unsigned TmpReg) const {
382 // If full control flow speculation barriers are used, there will not be
383 // miss-speculation when returning from this function, and therefore, also
384 // no need to encode potential miss-speculation into the stack pointer.
385 if (UseControlFlowSpeculationBarrier)
386 return;
387
388 // mov Xtmp, SP === ADD Xtmp, SP, #0
389 BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
390 .addDef(RegNo: TmpReg)
391 .addUse(RegNo: AArch64::SP)
392 .addImm(Val: 0)
393 .addImm(Val: 0); // no shift
394 // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
395 BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::ANDXrs))
396 .addDef(RegNo: TmpReg, Flags: RegState::Renamable)
397 .addUse(RegNo: TmpReg, Flags: RegState::Kill | RegState::Renamable)
398 .addUse(RegNo: MisspeculatingTaintReg, Flags: RegState::Kill)
399 .addImm(Val: 0);
400 // mov SP, Xtmp === ADD SP, Xtmp, #0
401 BuildMI(BB&: MBB, I: MBBI, MIMD: DebugLoc(), MCID: TII->get(Opcode: AArch64::ADDXri))
402 .addDef(RegNo: AArch64::SP)
403 .addUse(RegNo: TmpReg, Flags: RegState::Kill)
404 .addImm(Val: 0)
405 .addImm(Val: 0); // no shift
406}
407
408bool AArch64SpeculationHardening::functionUsesHardeningRegister(
409 MachineFunction &MF) const {
410 for (MachineBasicBlock &MBB : MF) {
411 for (MachineInstr &MI : MBB) {
412 // treat function calls specially, as the hardening register does not
413 // need to remain live across function calls.
414 if (MI.isCall())
415 continue;
416 if (MI.readsRegister(Reg: MisspeculatingTaintReg, TRI) ||
417 MI.modifiesRegister(Reg: MisspeculatingTaintReg, TRI))
418 return true;
419 }
420 }
421 return false;
422}
423
424// Make GPR register Reg speculation-safe by putting it through the
425// SpeculationSafeValue pseudo instruction, if we can't prove that
426// the value in the register has already been hardened.
427bool AArch64SpeculationHardening::makeGPRSpeculationSafe(
428 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI,
429 unsigned Reg) {
430 assert(AArch64::GPR32allRegClass.contains(Reg) ||
431 AArch64::GPR64allRegClass.contains(Reg));
432
433 // Loads cannot directly load a value into the SP (nor WSP).
434 // Therefore, if Reg is SP or WSP, it is because the instruction loads from
435 // the stack through the stack pointer.
436 //
437 // Since the stack pointer is never dynamically controllable, don't harden it.
438 if (Reg == AArch64::SP || Reg == AArch64::WSP)
439 return false;
440
441 // Do not harden the register again if already hardened before.
442 if (RegsAlreadyMasked[Reg])
443 return false;
444
445 const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg);
446 LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n");
447 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
448 MCID: TII->get(Opcode: Is64Bit ? AArch64::SpeculationSafeValueX
449 : AArch64::SpeculationSafeValueW))
450 .addDef(RegNo: Reg)
451 .addUse(RegNo: Reg);
452 RegsAlreadyMasked.set(Reg);
453 return true;
454}
455
456bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
457 bool Modified = false;
458
459 LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB);
460
461 RegsAlreadyMasked.reset();
462
463 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
464 MachineBasicBlock::iterator NextMBBI;
465 for (; MBBI != E; MBBI = NextMBBI) {
466 MachineInstr &MI = *MBBI;
467 NextMBBI = std::next(x: MBBI);
468 // Only harden loaded values or addresses used in loads.
469 if (!MI.mayLoad())
470 continue;
471
472 LLVM_DEBUG(dbgs() << "About to harden: " << MI);
473
474 // For general purpose register loads, harden the registers loaded into.
475 // For other loads, harden the address loaded from.
476 // Masking the loaded value is expected to result in less performance
477 // overhead, as the load can still execute speculatively in comparison to
478 // when the address loaded from gets masked. However, masking is only
479 // easy to do efficiently on GPR registers, so for loads into non-GPR
480 // registers (e.g. floating point loads), mask the address loaded from.
481 bool AllDefsAreGPR = llvm::all_of(Range: MI.defs(), P: [&](MachineOperand &Op) {
482 return Op.isReg() && (AArch64::GPR32allRegClass.contains(Reg: Op.getReg()) ||
483 AArch64::GPR64allRegClass.contains(Reg: Op.getReg()));
484 });
485 // FIXME: it might be a worthwhile optimization to not mask loaded
486 // values if all the registers involved in address calculation are already
487 // hardened, leading to this load not able to execute on a miss-speculated
488 // path.
489 bool HardenLoadedData = AllDefsAreGPR;
490 bool HardenAddressLoadedFrom = !HardenLoadedData;
491
492 // First remove registers from AlreadyMaskedRegisters if their value is
493 // updated by this instruction - it makes them contain a new value that is
494 // not guaranteed to already have been masked.
495 for (MachineOperand Op : MI.defs())
496 for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
497 RegsAlreadyMasked.reset(Idx: *AI);
498
499 // FIXME: loads from the stack with an immediate offset from the stack
500 // pointer probably shouldn't be hardened, which could result in a
501 // significant optimization. See section "Don’t check loads from
502 // compile-time constant stack offsets", in
503 // https://llvm.org/docs/SpeculativeLoadHardening.html
504
505 if (HardenLoadedData)
506 for (auto Def : MI.defs()) {
507 if (Def.isDead())
508 // Do not mask a register that is not used further.
509 continue;
510 // FIXME: For pre/post-increment addressing modes, the base register
511 // used in address calculation is also defined by this instruction.
512 // It might be a worthwhile optimization to not harden that
513 // base register increment/decrement when the increment/decrement is
514 // an immediate.
515 Modified |= makeGPRSpeculationSafe(MBB, MBBI: NextMBBI, MI, Reg: Def.getReg());
516 }
517
518 if (HardenAddressLoadedFrom)
519 for (auto Use : MI.uses()) {
520 if (!Use.isReg())
521 continue;
522 Register Reg = Use.getReg();
523 // Some loads of floating point data have implicit defs/uses on a
524 // super register of that floating point data. Some examples:
525 // $s0 = LDRSui $sp, 22, implicit-def $q0
526 // $q0 = LD1i64 $q0, 1, renamable $x0
527 // We need to filter out these uses for non-GPR register which occur
528 // because the load partially fills a non-GPR register with the loaded
529 // data. Just skipping all non-GPR registers is safe (for now) as all
530 // AArch64 load instructions only use GPR registers to perform the
531 // address calculation. FIXME: However that might change once we can
532 // produce SVE gather instructions.
533 if (!(AArch64::GPR32allRegClass.contains(Reg) ||
534 AArch64::GPR64allRegClass.contains(Reg)))
535 continue;
536 Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg);
537 }
538 }
539 return Modified;
540}
541
542/// \brief If MBBI references a pseudo instruction that should be expanded
543/// here, do the expansion and return true. Otherwise return false.
544bool AArch64SpeculationHardening::expandSpeculationSafeValue(
545 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
546 bool UsesFullSpeculationBarrier) {
547 MachineInstr &MI = *MBBI;
548 unsigned Opcode = MI.getOpcode();
549 bool Is64Bit = true;
550
551 switch (Opcode) {
552 default:
553 break;
554 case AArch64::SpeculationSafeValueW:
555 Is64Bit = false;
556 [[fallthrough]];
557 case AArch64::SpeculationSafeValueX:
558 // Just remove the SpeculationSafe pseudo's if control flow
559 // miss-speculation isn't happening because we're already inserting barriers
560 // to guarantee that.
561 if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
562 Register DstReg = MI.getOperand(i: 0).getReg();
563 Register SrcReg = MI.getOperand(i: 1).getReg();
564 // Mark this register and all its aliasing registers as needing to be
565 // value speculation hardened before its next use, by using a CSDB
566 // barrier instruction.
567 for (MachineOperand Op : MI.defs())
568 for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
569 RegsNeedingCSDBBeforeUse.set(*AI);
570
571 // Mask off with taint state.
572 BuildMI(BB&: MBB, I: MBBI, MIMD: MI.getDebugLoc(),
573 MCID: Is64Bit ? TII->get(Opcode: AArch64::ANDXrs) : TII->get(Opcode: AArch64::ANDWrs))
574 .addDef(RegNo: DstReg)
575 .addUse(RegNo: SrcReg, Flags: RegState::Kill)
576 .addUse(RegNo: Is64Bit ? MisspeculatingTaintReg
577 : MisspeculatingTaintReg32Bit)
578 .addImm(Val: 0);
579 }
580 MI.eraseFromParent();
581 return true;
582 }
583 return false;
584}
585
586bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
587 MachineBasicBlock::iterator MBBI,
588 DebugLoc DL) {
589 assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when "
590 "control flow miss-speculation "
591 "is already blocked");
592 // insert data value speculation barrier (CSDB)
593 BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: TII->get(Opcode: AArch64::HINT)).addImm(Val: 0x14);
594 RegsNeedingCSDBBeforeUse.reset();
595 return true;
596}
597
598bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
599 MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) {
600 bool Modified = false;
601
602 RegsNeedingCSDBBeforeUse.reset();
603
604 // The following loop iterates over all instructions in the basic block,
605 // and performs 2 operations:
606 // 1. Insert a CSDB at this location if needed.
607 // 2. Expand the SpeculationSafeValuePseudo if the current instruction is
608 // one.
609 //
610 // The insertion of the CSDB is done as late as possible (i.e. just before
611 // the use of a masked register), in the hope that that will reduce the
612 // total number of CSDBs in a block when there are multiple masked registers
613 // in the block.
614 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
615 DebugLoc DL;
616 while (MBBI != E) {
617 MachineInstr &MI = *MBBI;
618 DL = MI.getDebugLoc();
619 MachineBasicBlock::iterator NMBBI = std::next(x: MBBI);
620
621 // First check if a CSDB needs to be inserted due to earlier registers
622 // that were masked and that are used by the next instruction.
623 // Also emit the barrier on any potential control flow changes.
624 bool NeedToEmitBarrier = false;
625 if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator()))
626 NeedToEmitBarrier = true;
627 if (!NeedToEmitBarrier)
628 for (MachineOperand Op : MI.uses())
629 if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) {
630 NeedToEmitBarrier = true;
631 break;
632 }
633
634 if (NeedToEmitBarrier && !UsesFullSpeculationBarrier)
635 Modified |= insertCSDB(MBB, MBBI, DL);
636
637 Modified |=
638 expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier);
639
640 MBBI = NMBBI;
641 }
642
643 if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier)
644 Modified |= insertCSDB(MBB, MBBI, DL);
645
646 return Modified;
647}
648
649bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
650 if (!MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening))
651 return false;
652
653 MisspeculatingTaintReg = AArch64::X16;
654 MisspeculatingTaintReg32Bit = AArch64::W16;
655 TII = MF.getSubtarget().getInstrInfo();
656 TRI = MF.getSubtarget().getRegisterInfo();
657 RegsNeedingCSDBBeforeUse.resize(N: TRI->getNumRegs());
658 RegsAlreadyMasked.resize(N: TRI->getNumRegs());
659 UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF);
660
661 bool Modified = false;
662
663 // Step 1: Enable automatic insertion of SpeculationSafeValue.
664 if (HardenLoads) {
665 LLVM_DEBUG(
666 dbgs() << "***** AArch64SpeculationHardening - automatic insertion of "
667 "SpeculationSafeValue intrinsics *****\n");
668 for (auto &MBB : MF)
669 Modified |= slhLoads(MBB);
670 }
671
672 // 2. Add instrumentation code to function entry and exits.
673 LLVM_DEBUG(
674 dbgs()
675 << "***** AArch64SpeculationHardening - track control flow *****\n");
676
677 SmallVector<MachineBasicBlock *, 2> EntryBlocks;
678 EntryBlocks.push_back(Elt: &MF.front());
679 for (const LandingPadInfo &LPI : MF.getLandingPads())
680 EntryBlocks.push_back(Elt: LPI.LandingPadBlock);
681 for (auto *Entry : EntryBlocks)
682 insertSPToRegTaintPropagation(
683 MBB&: *Entry, MBBI: Entry->SkipPHIsLabelsAndDebug(I: Entry->begin()));
684
685 // 3. Add instrumentation code to every basic block.
686 for (auto &MBB : MF) {
687 bool UsesFullSpeculationBarrier = false;
688 Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier);
689 Modified |=
690 lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier);
691 }
692
693 return Modified;
694}
695
696/// \brief Returns an instance of the pseudo instruction expansion pass.
697FunctionPass *llvm::createAArch64SpeculationHardeningPass() {
698 return new AArch64SpeculationHardening();
699}
700