1//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This pass works around a Cortex Core Fused AES erratum:
9// - Cortex-A57 Erratum 1742098
10// - Cortex-A72 Erratum 1655431
11//
12// The erratum may be triggered if an input vector register to AESE or AESD was
13// last written by an instruction that only updated 32 bits of it. This can
14// occur for either of the input registers.
15//
16// The workaround chosen is to update the input register using `r = VORRq r, r`,
17// as this updates all 128 bits of the register unconditionally, but does not
18// change the values observed in `r`, making the input safe.
19//
20// This pass has to be conservative in a few cases:
21// - an input vector register to the AES instruction is defined outside the
22// current function, where we have to assume the register was updated in an
23// unsafe way; and
24// - an input vector register to the AES instruction is updated along multiple
25// different control-flow paths, where we have to ensure all the register
26// updating instructions are safe.
27//
28// Both of these cases may apply to a input vector register. In either case, we
29// need to ensure that, when the pass is finished, there exists a safe
30// instruction between every unsafe register updating instruction and the AES
31// instruction.
32//
33//===----------------------------------------------------------------------===//
34
35#include "ARM.h"
36#include "ARMBaseInstrInfo.h"
37#include "ARMBaseRegisterInfo.h"
38#include "ARMSubtarget.h"
39#include "Utils/ARMBaseInfo.h"
40#include "llvm/ADT/STLExtras.h"
41#include "llvm/ADT/SmallPtrSet.h"
42#include "llvm/ADT/SmallVector.h"
43#include "llvm/ADT/StringRef.h"
44#include "llvm/CodeGen/MachineBasicBlock.h"
45#include "llvm/CodeGen/MachineFunction.h"
46#include "llvm/CodeGen/MachineFunctionPass.h"
47#include "llvm/CodeGen/MachineInstr.h"
48#include "llvm/CodeGen/MachineInstrBuilder.h"
49#include "llvm/CodeGen/MachineInstrBundleIterator.h"
50#include "llvm/CodeGen/MachineOperand.h"
51#include "llvm/CodeGen/ReachingDefAnalysis.h"
52#include "llvm/CodeGen/Register.h"
53#include "llvm/CodeGen/TargetRegisterInfo.h"
54#include "llvm/IR/DebugLoc.h"
55#include "llvm/InitializePasses.h"
56#include "llvm/MC/MCInstrDesc.h"
57#include "llvm/Pass.h"
58#include "llvm/PassRegistry.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/raw_ostream.h"
61#include <assert.h>
62#include <stdint.h>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
67
68//===----------------------------------------------------------------------===//
69
70namespace {
71class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
72public:
73 static char ID;
74 explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {
75 initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry());
76 }
77
78 bool runOnMachineFunction(MachineFunction &F) override;
79
80 MachineFunctionProperties getRequiredProperties() const override {
81 return MachineFunctionProperties().set(
82 MachineFunctionProperties::Property::NoVRegs);
83 }
84
85 StringRef getPassName() const override {
86 return "ARM fix for Cortex-A57 AES Erratum 1742098";
87 }
88
89 void getAnalysisUsage(AnalysisUsage &AU) const override {
90 AU.addRequired<ReachingDefAnalysis>();
91 AU.setPreservesCFG();
92 MachineFunctionPass::getAnalysisUsage(AU);
93 }
94
95private:
96 // This is the information needed to insert the fixup in the right place.
97 struct AESFixupLocation {
98 MachineBasicBlock *Block;
99 // The fixup instruction will be inserted *before* InsertionPt.
100 MachineInstr *InsertionPt;
101 MachineOperand *MOp;
102 };
103
104 void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
105 const ARMBaseRegisterInfo *TRI,
106 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
107
108 void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
109 const ARMBaseRegisterInfo *TRI) const;
110
111 static bool isFirstAESPairInstr(MachineInstr &MI);
112 static bool isSafeAESInput(MachineInstr &MI);
113};
114char ARMFixCortexA57AES1742098::ID = 0;
115
116} // end anonymous namespace
117
118INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
119 "ARM fix for Cortex-A57 AES Erratum 1742098", false,
120 false)
121INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis);
122INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
123 "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
124
125//===----------------------------------------------------------------------===//
126
127bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
128 unsigned Opc = MI.getOpcode();
129 return Opc == ARM::AESD || Opc == ARM::AESE;
130}
131
132bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
133 auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
134 int CCIdx = MI.findFirstPredOperandIdx();
135 if (CCIdx == -1)
136 return false;
137 return MI.getOperand(i: CCIdx).getImm() == (int64_t)ARMCC::AL;
138 };
139
140 switch (MI.getOpcode()) {
141 // Unknown: Assume not safe.
142 default:
143 return false;
144 // 128-bit wide AES instructions
145 case ARM::AESD:
146 case ARM::AESE:
147 case ARM::AESMC:
148 case ARM::AESIMC:
149 // No CondCode.
150 return true;
151 // 128-bit and 64-bit wide bitwise ops (when condition = al)
152 case ARM::VANDd:
153 case ARM::VANDq:
154 case ARM::VORRd:
155 case ARM::VORRq:
156 case ARM::VEORd:
157 case ARM::VEORq:
158 case ARM::VMVNd:
159 case ARM::VMVNq:
160 // VMOV of 64-bit value between D registers (when condition = al)
161 case ARM::VMOVD:
162 // VMOV of 64 bit value from GPRs (when condition = al)
163 case ARM::VMOVDRR:
164 // VMOV of immediate into D or Q registers (when condition = al)
165 case ARM::VMOVv2i64:
166 case ARM::VMOVv1i64:
167 case ARM::VMOVv2f32:
168 case ARM::VMOVv4f32:
169 case ARM::VMOVv2i32:
170 case ARM::VMOVv4i32:
171 case ARM::VMOVv4i16:
172 case ARM::VMOVv8i16:
173 case ARM::VMOVv8i8:
174 case ARM::VMOVv16i8:
175 // Loads (when condition = al)
176 // VLD Dn, [Rn, #imm]
177 case ARM::VLDRD:
178 // VLDM
179 case ARM::VLDMDDB_UPD:
180 case ARM::VLDMDIA_UPD:
181 case ARM::VLDMDIA:
182 // VLDn to all lanes.
183 case ARM::VLD1d64:
184 case ARM::VLD1q64:
185 case ARM::VLD1d32:
186 case ARM::VLD1q32:
187 case ARM::VLD2b32:
188 case ARM::VLD2d32:
189 case ARM::VLD2q32:
190 case ARM::VLD1d16:
191 case ARM::VLD1q16:
192 case ARM::VLD2d16:
193 case ARM::VLD2q16:
194 case ARM::VLD1d8:
195 case ARM::VLD1q8:
196 case ARM::VLD2b8:
197 case ARM::VLD2d8:
198 case ARM::VLD2q8:
199 case ARM::VLD3d32:
200 case ARM::VLD3q32:
201 case ARM::VLD3d16:
202 case ARM::VLD3q16:
203 case ARM::VLD3d8:
204 case ARM::VLD3q8:
205 case ARM::VLD4d32:
206 case ARM::VLD4q32:
207 case ARM::VLD4d16:
208 case ARM::VLD4q16:
209 case ARM::VLD4d8:
210 case ARM::VLD4q8:
211 // VLD1 (single element to one lane)
212 case ARM::VLD1LNd32:
213 case ARM::VLD1LNd32_UPD:
214 case ARM::VLD1LNd8:
215 case ARM::VLD1LNd8_UPD:
216 case ARM::VLD1LNd16:
217 case ARM::VLD1LNd16_UPD:
218 // VLD1 (single element to all lanes)
219 case ARM::VLD1DUPd32:
220 case ARM::VLD1DUPd32wb_fixed:
221 case ARM::VLD1DUPd32wb_register:
222 case ARM::VLD1DUPd16:
223 case ARM::VLD1DUPd16wb_fixed:
224 case ARM::VLD1DUPd16wb_register:
225 case ARM::VLD1DUPd8:
226 case ARM::VLD1DUPd8wb_fixed:
227 case ARM::VLD1DUPd8wb_register:
228 case ARM::VLD1DUPq32:
229 case ARM::VLD1DUPq32wb_fixed:
230 case ARM::VLD1DUPq32wb_register:
231 case ARM::VLD1DUPq16:
232 case ARM::VLD1DUPq16wb_fixed:
233 case ARM::VLD1DUPq16wb_register:
234 case ARM::VLD1DUPq8:
235 case ARM::VLD1DUPq8wb_fixed:
236 case ARM::VLD1DUPq8wb_register:
237 // VMOV
238 case ARM::VSETLNi32:
239 case ARM::VSETLNi16:
240 case ARM::VSETLNi8:
241 return CondCodeIsAL(MI);
242 };
243
244 return false;
245}
246
247bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
248 LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
249 auto &STI = F.getSubtarget<ARMSubtarget>();
250
251 // Fix not requested or AES instructions not present: skip pass.
252 if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
253 return false;
254
255 const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
256 const ARMBaseInstrInfo *TII = STI.getInstrInfo();
257
258 auto &RDA = getAnalysis<ReachingDefAnalysis>();
259
260 // Analyze whole function to find instructions which need fixing up...
261 SmallVector<AESFixupLocation> FixupLocsForFn{};
262 analyzeMF(MF&: F, RDA, TRI, FixupLocsForFn);
263
264 // ... and fix the instructions up all at the same time.
265 bool Changed = false;
266 LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
267 for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
268 insertAESFixup(FixupLoc, TII, TRI);
269 Changed |= true;
270 }
271
272 return Changed;
273}
274
275void ARMFixCortexA57AES1742098::analyzeMF(
276 MachineFunction &MF, ReachingDefAnalysis &RDA,
277 const ARMBaseRegisterInfo *TRI,
278 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
279 unsigned MaxAllowedFixups = 0;
280
281 for (MachineBasicBlock &MBB : MF) {
282 for (MachineInstr &MI : MBB) {
283 if (!isFirstAESPairInstr(MI))
284 continue;
285
286 // Found an instruction to check the operands of.
287 LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
288 assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
289 "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
290
291 // A maximum of two fixups should be inserted for each AES pair (one per
292 // register use).
293 MaxAllowedFixups += 2;
294
295 // Inspect all operands, choosing whether to insert a fixup.
296 for (MachineOperand &MOp : MI.uses()) {
297 SmallPtrSet<MachineInstr *, 1> AllDefs{};
298 RDA.getGlobalReachingDefs(MI: &MI, PhysReg: MOp.getReg(), Defs&: AllDefs);
299
300 // Planned Fixup: This should be added to FixupLocsForFn at most once.
301 AESFixupLocation NewLoc{.Block: &MBB, .InsertionPt: &MI, .MOp: &MOp};
302
303 // In small functions with loops, this operand may be both a live-in and
304 // have definitions within the function itself. These will need a fixup.
305 bool IsLiveIn = MF.front().isLiveIn(Reg: MOp.getReg());
306
307 // If the register doesn't have defining instructions, and is not a
308 // live-in, then something is wrong and the fixup must always be
309 // inserted to be safe.
310 if (!IsLiveIn && AllDefs.size() == 0) {
311 LLVM_DEBUG(dbgs()
312 << "Fixup Planned: No Defining Instrs found, not live-in: "
313 << printReg(MOp.getReg(), TRI) << "\n");
314 FixupLocsForFn.emplace_back(Args&: NewLoc);
315 continue;
316 }
317
318 auto IsUnsafe = [](MachineInstr *MI) -> bool {
319 return !isSafeAESInput(MI&: *MI);
320 };
321 size_t UnsafeCount = llvm::count_if(Range&: AllDefs, P: IsUnsafe);
322
323 // If there are no unsafe definitions...
324 if (UnsafeCount == 0) {
325 // ... and the register is not live-in ...
326 if (!IsLiveIn) {
327 // ... then skip the fixup.
328 LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
329 << printReg(MOp.getReg(), TRI) << "\n");
330 continue;
331 }
332
333 // Otherwise, the only unsafe "definition" is a live-in, so insert the
334 // fixup at the start of the function.
335 LLVM_DEBUG(dbgs()
336 << "Fixup Planned: Live-In (with safe defining instrs): "
337 << printReg(MOp.getReg(), TRI) << "\n");
338 NewLoc.Block = &MF.front();
339 NewLoc.InsertionPt = &*NewLoc.Block->begin();
340 LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
341 << *NewLoc.InsertionPt);
342 FixupLocsForFn.emplace_back(Args&: NewLoc);
343 continue;
344 }
345
346 // If a fixup is needed in more than one place, then the best place to
347 // insert it is adjacent to the use rather than introducing a fixup
348 // adjacent to each def.
349 //
350 // FIXME: It might be better to hoist this to the start of the BB, if
351 // possible.
352 if (IsLiveIn || UnsafeCount > 1) {
353 LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
354 "(including live-ins): "
355 << printReg(MOp.getReg(), TRI) << "\n");
356 FixupLocsForFn.emplace_back(Args&: NewLoc);
357 continue;
358 }
359
360 assert(UnsafeCount == 1 && !IsLiveIn &&
361 "At this point, there should be one unsafe defining instrs "
362 "and the defined register should not be a live-in.");
363 SmallPtrSetIterator<MachineInstr *> It =
364 llvm::find_if(Range&: AllDefs, P: IsUnsafe);
365 assert(It != AllDefs.end() &&
366 "UnsafeCount == 1 but No Unsafe MachineInstr found.");
367 MachineInstr *DefMI = *It;
368
369 LLVM_DEBUG(
370 dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
371 << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
372
373 // There is one unsafe defining instruction, which needs a fixup. It is
374 // generally good to hoist the fixup to be adjacent to the defining
375 // instruction rather than the using instruction, as the using
376 // instruction may be inside a loop when the defining instruction is
377 // not.
378 MachineBasicBlock::iterator DefIt = DefMI;
379 ++DefIt;
380 if (DefIt != DefMI->getParent()->end()) {
381 LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
382 << "And immediately before " << *DefIt);
383 NewLoc.Block = DefIt->getParent();
384 NewLoc.InsertionPt = &*DefIt;
385 }
386
387 FixupLocsForFn.emplace_back(Args&: NewLoc);
388 }
389 }
390 }
391
392 assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
393 "Inserted too many fixups for this function.");
394 (void)MaxAllowedFixups;
395}
396
397void ARMFixCortexA57AES1742098::insertAESFixup(
398 AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
399 const ARMBaseRegisterInfo *TRI) const {
400 MachineOperand *OperandToFixup = FixupLoc.MOp;
401
402 assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
403 Register RegToFixup = OperandToFixup->getReg();
404
405 LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
406 << " before: " << *FixupLoc.InsertionPt);
407
408 // Insert the new `VORRq qN, qN, qN`. There are a few details here:
409 //
410 // The uses are marked as killed, even if the original use of OperandToFixup
411 // is not killed, as the new instruction is clobbering the register. This is
412 // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
413 // (it is inserted for microarchitectural reasons).
414 //
415 // The def and the uses are still marked as Renamable if the original register
416 // was, to avoid having to rummage through all the other uses and defs and
417 // unset their renamable bits.
418 unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
419 BuildMI(BB&: *FixupLoc.Block, I: FixupLoc.InsertionPt, MIMD: DebugLoc(),
420 MCID: TII->get(Opcode: ARM::VORRq))
421 .addReg(RegNo: RegToFixup, flags: RegState::Define | Renamable)
422 .addReg(RegNo: RegToFixup, flags: RegState::Kill | Renamable)
423 .addReg(RegNo: RegToFixup, flags: RegState::Kill | Renamable)
424 .addImm(Val: (uint64_t)ARMCC::AL)
425 .addReg(RegNo: ARM::NoRegister);
426}
427
428// Factory function used by AArch64TargetMachine to add the pass to
429// the passmanager.
430FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() {
431 return new ARMFixCortexA57AES1742098();
432}
433