1//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass works around a Cortex Core Fused AES erratum:
9// - Cortex-A57 Erratum 1742098
10// - Cortex-A72 Erratum 1655431
11//
12// The erratum may be triggered if an input vector register to AESE or AESD was
13// last written by an instruction that only updated 32 bits of it. This can
14// occur for either of the input registers.
15//
16// The workaround chosen is to update the input register using `r = VORRq r, r`,
17// as this updates all 128 bits of the register unconditionally, but does not
18// change the values observed in `r`, making the input safe.
19//
20// This pass has to be conservative in a few cases:
21// - an input vector register to the AES instruction is defined outside the
22// current function, where we have to assume the register was updated in an
23// unsafe way; and
24// - an input vector register to the AES instruction is updated along multiple
25// different control-flow paths, where we have to ensure all the register
26// updating instructions are safe.
27//
28// Both of these cases may apply to a input vector register. In either case, we
29// need to ensure that, when the pass is finished, there exists a safe
30// instruction between every unsafe register updating instruction and the AES
31// instruction.
32//
33//===----------------------------------------------------------------------===//
34
35#include "ARM.h"
36#include "ARMBaseInstrInfo.h"
37#include "ARMBaseRegisterInfo.h"
38#include "ARMSubtarget.h"
39#include "Utils/ARMBaseInfo.h"
40#include "llvm/ADT/STLExtras.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/SmallVector.h"
43#include "llvm/ADT/StringRef.h"
44#include "llvm/CodeGen/MachineBasicBlock.h"
45#include "llvm/CodeGen/MachineFunction.h"
46#include "llvm/CodeGen/MachineFunctionPass.h"
47#include "llvm/CodeGen/MachineInstr.h"
48#include "llvm/CodeGen/MachineInstrBuilder.h"
49#include "llvm/CodeGen/MachineInstrBundleIterator.h"
50#include "llvm/CodeGen/MachineOperand.h"
51#include "llvm/CodeGen/ReachingDefAnalysis.h"
52#include "llvm/CodeGen/Register.h"
53#include "llvm/CodeGen/TargetRegisterInfo.h"
54#include "llvm/IR/DebugLoc.h"
55#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/raw_ostream.h"
58#include <assert.h>
59#include <stdint.h>
60
61using namespace llvm;
62
63#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
64
65//===----------------------------------------------------------------------===//
66
67namespace {
68class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
69public:
70 static char ID;
71 explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {}
72
73 bool runOnMachineFunction(MachineFunction &F) override;
74
75 MachineFunctionProperties getRequiredProperties() const override {
76 return MachineFunctionProperties().setNoVRegs();
77 }
78
79 StringRef getPassName() const override {
80 return "ARM fix for Cortex-A57 AES Erratum 1742098";
81 }
82
83 void getAnalysisUsage(AnalysisUsage &AU) const override {
84 AU.addRequired<ReachingDefAnalysis>();
85 AU.setPreservesCFG();
86 MachineFunctionPass::getAnalysisUsage(AU);
87 }
88
89private:
90 // This is the information needed to insert the fixup in the right place.
91 struct AESFixupLocation {
92 MachineBasicBlock *Block;
93 // The fixup instruction will be inserted *before* InsertionPt.
94 MachineInstr *InsertionPt;
95 MachineOperand *MOp;
96 };
97
98 void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
99 const ARMBaseRegisterInfo *TRI,
100 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
101
102 void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
103 const ARMBaseRegisterInfo *TRI) const;
104
105 static bool isFirstAESPairInstr(MachineInstr &MI);
106 static bool isSafeAESInput(MachineInstr &MI);
107};
108char ARMFixCortexA57AES1742098::ID = 0;
109
110} // end anonymous namespace
111
112INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
113 "ARM fix for Cortex-A57 AES Erratum 1742098", false,
114 false)
115INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis);
116INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
117 "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
118
119//===----------------------------------------------------------------------===//
120
121bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
122 unsigned Opc = MI.getOpcode();
123 return Opc == ARM::AESD || Opc == ARM::AESE;
124}
125
126bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
127 auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
128 int CCIdx = MI.findFirstPredOperandIdx();
129 if (CCIdx == -1)
130 return false;
131 return MI.getOperand(i: CCIdx).getImm() == (int64_t)ARMCC::AL;
132 };
133
134 switch (MI.getOpcode()) {
135 // Unknown: Assume not safe.
136 default:
137 return false;
138 // 128-bit wide AES instructions
139 case ARM::AESD:
140 case ARM::AESE:
141 case ARM::AESMC:
142 case ARM::AESIMC:
143 // No CondCode.
144 return true;
145 // 128-bit and 64-bit wide bitwise ops (when condition = al)
146 case ARM::VANDd:
147 case ARM::VANDq:
148 case ARM::VORRd:
149 case ARM::VORRq:
150 case ARM::VEORd:
151 case ARM::VEORq:
152 case ARM::VMVNd:
153 case ARM::VMVNq:
154 // VMOV of 64-bit value between D registers (when condition = al)
155 case ARM::VMOVD:
156 // VMOV of 64 bit value from GPRs (when condition = al)
157 case ARM::VMOVDRR:
158 // VMOV of immediate into D or Q registers (when condition = al)
159 case ARM::VMOVv2i64:
160 case ARM::VMOVv1i64:
161 case ARM::VMOVv2f32:
162 case ARM::VMOVv4f32:
163 case ARM::VMOVv2i32:
164 case ARM::VMOVv4i32:
165 case ARM::VMOVv4i16:
166 case ARM::VMOVv8i16:
167 case ARM::VMOVv8i8:
168 case ARM::VMOVv16i8:
169 // Loads (when condition = al)
170 // VLD Dn, [Rn, #imm]
171 case ARM::VLDRD:
172 // VLDM
173 case ARM::VLDMDDB_UPD:
174 case ARM::VLDMDIA_UPD:
175 case ARM::VLDMDIA:
176 // VLDn to all lanes.
177 case ARM::VLD1d64:
178 case ARM::VLD1q64:
179 case ARM::VLD1d32:
180 case ARM::VLD1q32:
181 case ARM::VLD2b32:
182 case ARM::VLD2d32:
183 case ARM::VLD2q32:
184 case ARM::VLD1d16:
185 case ARM::VLD1q16:
186 case ARM::VLD2d16:
187 case ARM::VLD2q16:
188 case ARM::VLD1d8:
189 case ARM::VLD1q8:
190 case ARM::VLD2b8:
191 case ARM::VLD2d8:
192 case ARM::VLD2q8:
193 case ARM::VLD3d32:
194 case ARM::VLD3q32:
195 case ARM::VLD3d16:
196 case ARM::VLD3q16:
197 case ARM::VLD3d8:
198 case ARM::VLD3q8:
199 case ARM::VLD4d32:
200 case ARM::VLD4q32:
201 case ARM::VLD4d16:
202 case ARM::VLD4q16:
203 case ARM::VLD4d8:
204 case ARM::VLD4q8:
205 // VLD1 (single element to one lane)
206 case ARM::VLD1LNd32:
207 case ARM::VLD1LNd32_UPD:
208 case ARM::VLD1LNd8:
209 case ARM::VLD1LNd8_UPD:
210 case ARM::VLD1LNd16:
211 case ARM::VLD1LNd16_UPD:
212 // VLD1 (single element to all lanes)
213 case ARM::VLD1DUPd32:
214 case ARM::VLD1DUPd32wb_fixed:
215 case ARM::VLD1DUPd32wb_register:
216 case ARM::VLD1DUPd16:
217 case ARM::VLD1DUPd16wb_fixed:
218 case ARM::VLD1DUPd16wb_register:
219 case ARM::VLD1DUPd8:
220 case ARM::VLD1DUPd8wb_fixed:
221 case ARM::VLD1DUPd8wb_register:
222 case ARM::VLD1DUPq32:
223 case ARM::VLD1DUPq32wb_fixed:
224 case ARM::VLD1DUPq32wb_register:
225 case ARM::VLD1DUPq16:
226 case ARM::VLD1DUPq16wb_fixed:
227 case ARM::VLD1DUPq16wb_register:
228 case ARM::VLD1DUPq8:
229 case ARM::VLD1DUPq8wb_fixed:
230 case ARM::VLD1DUPq8wb_register:
231 // VMOV
232 case ARM::VSETLNi32:
233 case ARM::VSETLNi16:
234 case ARM::VSETLNi8:
235 return CondCodeIsAL(MI);
236 };
237
238 return false;
239}
240
241bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
242 LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
243 auto &STI = F.getSubtarget<ARMSubtarget>();
244
245 // Fix not requested or AES instructions not present: skip pass.
246 if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
247 return false;
248
249 const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
250 const ARMBaseInstrInfo *TII = STI.getInstrInfo();
251
252 auto &RDA = getAnalysis<ReachingDefAnalysis>();
253
254 // Analyze whole function to find instructions which need fixing up...
255 SmallVector<AESFixupLocation> FixupLocsForFn{};
256 analyzeMF(MF&: F, RDA, TRI, FixupLocsForFn);
257
258 // ... and fix the instructions up all at the same time.
259 bool Changed = false;
260 LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
261 for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
262 insertAESFixup(FixupLoc, TII, TRI);
263 Changed |= true;
264 }
265
266 return Changed;
267}
268
269void ARMFixCortexA57AES1742098::analyzeMF(
270 MachineFunction &MF, ReachingDefAnalysis &RDA,
271 const ARMBaseRegisterInfo *TRI,
272 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
273 unsigned MaxAllowedFixups = 0;
274
275 for (MachineBasicBlock &MBB : MF) {
276 for (MachineInstr &MI : MBB) {
277 if (!isFirstAESPairInstr(MI))
278 continue;
279
280 // Found an instruction to check the operands of.
281 LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
282 assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
283 "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
284
285 // A maximum of two fixups should be inserted for each AES pair (one per
286 // register use).
287 MaxAllowedFixups += 2;
288
289 // Inspect all operands, choosing whether to insert a fixup.
290 for (MachineOperand &MOp : MI.uses()) {
291 SmallPtrSet<MachineInstr *, 1> AllDefs{};
292 RDA.getGlobalReachingDefs(MI: &MI, Reg: MOp.getReg(), Defs&: AllDefs);
293
294 // Planned Fixup: This should be added to FixupLocsForFn at most once.
295 AESFixupLocation NewLoc{.Block: &MBB, .InsertionPt: &MI, .MOp: &MOp};
296
297 // In small functions with loops, this operand may be both a live-in and
298 // have definitions within the function itself. These will need a fixup.
299 bool IsLiveIn = MF.front().isLiveIn(Reg: MOp.getReg());
300
301 // If the register doesn't have defining instructions, and is not a
302 // live-in, then something is wrong and the fixup must always be
303 // inserted to be safe.
304 if (!IsLiveIn && AllDefs.size() == 0) {
305 LLVM_DEBUG(dbgs()
306 << "Fixup Planned: No Defining Instrs found, not live-in: "
307 << printReg(MOp.getReg(), TRI) << "\n");
308 FixupLocsForFn.emplace_back(Args&: NewLoc);
309 continue;
310 }
311
312 auto IsUnsafe = [](MachineInstr *MI) -> bool {
313 return !isSafeAESInput(MI&: *MI);
314 };
315 size_t UnsafeCount = llvm::count_if(Range&: AllDefs, P: IsUnsafe);
316
317 // If there are no unsafe definitions...
318 if (UnsafeCount == 0) {
319 // ... and the register is not live-in ...
320 if (!IsLiveIn) {
321 // ... then skip the fixup.
322 LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
323 << printReg(MOp.getReg(), TRI) << "\n");
324 continue;
325 }
326
327 // Otherwise, the only unsafe "definition" is a live-in, so insert the
328 // fixup at the start of the function.
329 LLVM_DEBUG(dbgs()
330 << "Fixup Planned: Live-In (with safe defining instrs): "
331 << printReg(MOp.getReg(), TRI) << "\n");
332 NewLoc.Block = &MF.front();
333 NewLoc.InsertionPt = &*NewLoc.Block->begin();
334 LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
335 << *NewLoc.InsertionPt);
336 FixupLocsForFn.emplace_back(Args&: NewLoc);
337 continue;
338 }
339
340 // If a fixup is needed in more than one place, then the best place to
341 // insert it is adjacent to the use rather than introducing a fixup
342 // adjacent to each def.
343 //
344 // FIXME: It might be better to hoist this to the start of the BB, if
345 // possible.
346 if (IsLiveIn || UnsafeCount > 1) {
347 LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
348 "(including live-ins): "
349 << printReg(MOp.getReg(), TRI) << "\n");
350 FixupLocsForFn.emplace_back(Args&: NewLoc);
351 continue;
352 }
353
354 assert(UnsafeCount == 1 && !IsLiveIn &&
355 "At this point, there should be one unsafe defining instrs "
356 "and the defined register should not be a live-in.");
357 SmallPtrSetIterator<MachineInstr *> It =
358 llvm::find_if(Range&: AllDefs, P: IsUnsafe);
359 assert(It != AllDefs.end() &&
360 "UnsafeCount == 1 but No Unsafe MachineInstr found.");
361 MachineInstr *DefMI = *It;
362
363 LLVM_DEBUG(
364 dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
365 << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
366
367 // There is one unsafe defining instruction, which needs a fixup. It is
368 // generally good to hoist the fixup to be adjacent to the defining
369 // instruction rather than the using instruction, as the using
370 // instruction may be inside a loop when the defining instruction is
371 // not.
372 MachineBasicBlock::iterator DefIt = DefMI;
373 ++DefIt;
374 if (DefIt != DefMI->getParent()->end()) {
375 LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
376 << "And immediately before " << *DefIt);
377 NewLoc.Block = DefIt->getParent();
378 NewLoc.InsertionPt = &*DefIt;
379 }
380
381 FixupLocsForFn.emplace_back(Args&: NewLoc);
382 }
383 }
384 }
385
386 assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
387 "Inserted too many fixups for this function.");
388 (void)MaxAllowedFixups;
389}
390
391void ARMFixCortexA57AES1742098::insertAESFixup(
392 AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
393 const ARMBaseRegisterInfo *TRI) const {
394 MachineOperand *OperandToFixup = FixupLoc.MOp;
395
396 assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
397 Register RegToFixup = OperandToFixup->getReg();
398
399 LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
400 << " before: " << *FixupLoc.InsertionPt);
401
402 // Insert the new `VORRq qN, qN, qN`. There are a few details here:
403 //
404 // The uses are marked as killed, even if the original use of OperandToFixup
405 // is not killed, as the new instruction is clobbering the register. This is
406 // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
407 // (it is inserted for microarchitectural reasons).
408 //
409 // The def and the uses are still marked as Renamable if the original register
410 // was, to avoid having to rummage through all the other uses and defs and
411 // unset their renamable bits.
412 unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
413 BuildMI(BB&: *FixupLoc.Block, I: FixupLoc.InsertionPt, MIMD: DebugLoc(),
414 MCID: TII->get(Opcode: ARM::VORRq))
415 .addReg(RegNo: RegToFixup, flags: RegState::Define | Renamable)
416 .addReg(RegNo: RegToFixup, flags: RegState::Kill | Renamable)
417 .addReg(RegNo: RegToFixup, flags: RegState::Kill | Renamable)
418 .addImm(Val: (uint64_t)ARMCC::AL)
419 .addReg(RegNo: ARM::NoRegister);
420}
421
422// Factory function used by AArch64TargetMachine to add the pass to
423// the passmanager.
424FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() {
425 return new ARMFixCortexA57AES1742098();
426}
427