ARMFixCortexA57AES1742098Pass.cpp source code [llvm_projects/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp]

1	//===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// This pass works around a Cortex Core Fused AES erratum:
9	// - Cortex-A57 Erratum 1742098
10	// - Cortex-A72 Erratum 1655431
11	//
12	// The erratum may be triggered if an input vector register to AESE or AESD was
13	// last written by an instruction that only updated 32 bits of it. This can
14	// occur for either of the input registers.
15	//
16	// The workaround chosen is to update the input register using `r = VORRq r, r`,
17	// as this updates all 128 bits of the register unconditionally, but does not
18	// change the values observed in `r`, making the input safe.
19	//
20	// This pass has to be conservative in a few cases:
21	// - an input vector register to the AES instruction is defined outside the
22	// current function, where we have to assume the register was updated in an
23	// unsafe way; and
24	// - an input vector register to the AES instruction is updated along multiple
25	// different control-flow paths, where we have to ensure all the register
26	// updating instructions are safe.
27	//
28	// Both of these cases may apply to a input vector register. In either case, we
29	// need to ensure that, when the pass is finished, there exists a safe
30	// instruction between every unsafe register updating instruction and the AES
31	// instruction.
32	//
33	//===----------------------------------------------------------------------===//
34
35	#include "ARM.h"
36	#include "ARMBaseInstrInfo.h"
37	#include "ARMBaseRegisterInfo.h"
38	#include "ARMSubtarget.h"
39	#include "Utils/ARMBaseInfo.h"
40	#include "llvm/ADT/STLExtras.h"
41	#include "llvm/ADT/SmallPtrSet.h"
42	#include "llvm/ADT/SmallVector.h"
43	#include "llvm/ADT/StringRef.h"
44	#include "llvm/CodeGen/MachineBasicBlock.h"
45	#include "llvm/CodeGen/MachineFunction.h"
46	#include "llvm/CodeGen/MachineFunctionPass.h"
47	#include "llvm/CodeGen/MachineInstr.h"
48	#include "llvm/CodeGen/MachineInstrBuilder.h"
49	#include "llvm/CodeGen/MachineInstrBundleIterator.h"
50	#include "llvm/CodeGen/MachineOperand.h"
51	#include "llvm/CodeGen/ReachingDefAnalysis.h"
52	#include "llvm/CodeGen/Register.h"
53	#include "llvm/CodeGen/TargetRegisterInfo.h"
54	#include "llvm/IR/DebugLoc.h"
55	#include "llvm/Pass.h"
56	#include "llvm/Support/Debug.h"
57	#include "llvm/Support/raw_ostream.h"
58	#include <assert.h>
59	#include <stdint.h>
60
61	using namespace llvm;
62
63	#define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
64
65	//===----------------------------------------------------------------------===//
66
67	namespace {
68	class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
69	public:
70	static char ID;
71	explicit ARMFixCortexA57AES1742098() : MachineFunctionPass (ID) {}
72
73	bool runOnMachineFunction(MachineFunction &F) override;
74
75	MachineFunctionProperties getRequiredProperties() const override {
76	return MachineFunctionProperties ().setNoVRegs();
77	}
78
79	StringRef getPassName() const override {
80	return "ARM fix for Cortex-A57 AES Erratum 1742098";
81	}
82
83	void getAnalysisUsage(AnalysisUsage &AU) const override {
84	AU.addRequired<ReachingDefAnalysis>();
85	AU.setPreservesCFG();
86	MachineFunctionPass::getAnalysisUsage(AU);
87	}
88
89	private:
90	// This is the information needed to insert the fixup in the right place.
91	struct AESFixupLocation {
92	MachineBasicBlock *Block;
93	// The fixup instruction will be inserted before* InsertionPt.*
94	MachineInstr *InsertionPt;
95	MachineOperand *MOp;
96	};
97
98	void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
99	const ARMBaseRegisterInfo *TRI,
100	SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
101
102	void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
103	const ARMBaseRegisterInfo TRI) const*;
104
105	static bool isFirstAESPairInstr(MachineInstr &MI);
106	static bool isSafeAESInput(MachineInstr &MI);
107	};
108	char ARMFixCortexA57AES1742098::ID = `0`;
109
110	} // end anonymous namespace
111
112	INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
113	"ARM fix for Cortex-A57 AES Erratum 1742098", false,
114	false)
115	INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis);
116	INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
117	"ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
118
119	//===----------------------------------------------------------------------===//
120
121	bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
122	unsigned Opc = MI.getOpcode();
123	return Opc == ARM::AESD \|\| Opc == ARM::AESE;
124	}
125
126	bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
127	auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
128	int CCIdx = MI.findFirstPredOperandIdx();
129	if (CCIdx == -`1`)
130	return false;
131	return MI.getOperand(i: CCIdx).getImm() == (int64_t)ARMCC::AL;
132	};
133
134	switch (MI.getOpcode()) {
135	// Unknown: Assume not safe.
136	default:
137	return false;
138	// 128-bit wide AES instructions
139	case ARM::AESD:
140	case ARM::AESE:
141	case ARM::AESMC:
142	case ARM::AESIMC:
143	// No CondCode.
144	return true;
145	// 128-bit and 64-bit wide bitwise ops (when condition = al)
146	case ARM::VANDd:
147	case ARM::VANDq:
148	case ARM::VORRd:
149	case ARM::VORRq:
150	case ARM::VEORd:
151	case ARM::VEORq:
152	case ARM::VMVNd:
153	case ARM::VMVNq:
154	// VMOV of 64-bit value between D registers (when condition = al)
155	case ARM::VMOVD:
156	// VMOV of 64 bit value from GPRs (when condition = al)
157	case ARM::VMOVDRR:
158	// VMOV of immediate into D or Q registers (when condition = al)
159	case ARM::VMOVv2i64:
160	case ARM::VMOVv1i64:
161	case ARM::VMOVv2f32:
162	case ARM::VMOVv4f32:
163	case ARM::VMOVv2i32:
164	case ARM::VMOVv4i32:
165	case ARM::VMOVv4i16:
166	case ARM::VMOVv8i16:
167	case ARM::VMOVv8i8:
168	case ARM::VMOVv16i8:
169	// Loads (when condition = al)
170	// VLD Dn, [Rn, #imm]
171	case ARM::VLDRD:
172	// VLDM
173	case ARM::VLDMDDB_UPD:
174	case ARM::VLDMDIA_UPD:
175	case ARM::VLDMDIA:
176	// VLDn to all lanes.
177	case ARM::VLD1d64:
178	case ARM::VLD1q64:
179	case ARM::VLD1d32:
180	case ARM::VLD1q32:
181	case ARM::VLD2b32:
182	case ARM::VLD2d32:
183	case ARM::VLD2q32:
184	case ARM::VLD1d16:
185	case ARM::VLD1q16:
186	case ARM::VLD2d16:
187	case ARM::VLD2q16:
188	case ARM::VLD1d8:
189	case ARM::VLD1q8:
190	case ARM::VLD2b8:
191	case ARM::VLD2d8:
192	case ARM::VLD2q8:
193	case ARM::VLD3d32:
194	case ARM::VLD3q32:
195	case ARM::VLD3d16:
196	case ARM::VLD3q16:
197	case ARM::VLD3d8:
198	case ARM::VLD3q8:
199	case ARM::VLD4d32:
200	case ARM::VLD4q32:
201	case ARM::VLD4d16:
202	case ARM::VLD4q16:
203	case ARM::VLD4d8:
204	case ARM::VLD4q8:
205	// VLD1 (single element to one lane)
206	case ARM::VLD1LNd32:
207	case ARM::VLD1LNd32_UPD:
208	case ARM::VLD1LNd8:
209	case ARM::VLD1LNd8_UPD:
210	case ARM::VLD1LNd16:
211	case ARM::VLD1LNd16_UPD:
212	// VLD1 (single element to all lanes)
213	case ARM::VLD1DUPd32:
214	case ARM::VLD1DUPd32wb_fixed:
215	case ARM::VLD1DUPd32wb_register:
216	case ARM::VLD1DUPd16:
217	case ARM::VLD1DUPd16wb_fixed:
218	case ARM::VLD1DUPd16wb_register:
219	case ARM::VLD1DUPd8:
220	case ARM::VLD1DUPd8wb_fixed:
221	case ARM::VLD1DUPd8wb_register:
222	case ARM::VLD1DUPq32:
223	case ARM::VLD1DUPq32wb_fixed:
224	case ARM::VLD1DUPq32wb_register:
225	case ARM::VLD1DUPq16:
226	case ARM::VLD1DUPq16wb_fixed:
227	case ARM::VLD1DUPq16wb_register:
228	case ARM::VLD1DUPq8:
229	case ARM::VLD1DUPq8wb_fixed:
230	case ARM::VLD1DUPq8wb_register:
231	// VMOV
232	case ARM::VSETLNi32:
233	case ARM::VSETLNi16:
234	case ARM::VSETLNi8:
235	return CondCodeIsAL (MI);
236	};
237
238	return false;
239	}
240
241	bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
242	LLVM_DEBUG(dbgs() << "*** ARMFixCortexA57AES1742098 ***\n");
243	auto &STI = F.getSubtarget<ARMSubtarget>();
244
245	// Fix not requested or AES instructions not present: skip pass.
246	if (!STI.hasAES() \|\| !STI.fixCortexA57AES1742098())
247	return false;
248
249	const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
250	const ARMBaseInstrInfo *TII = STI.getInstrInfo();
251
252	auto &RDA = getAnalysis<ReachingDefAnalysis>();
253
254	// Analyze whole function to find instructions which need fixing up...
255	SmallVector<AESFixupLocation> FixupLocsForFn{};
256	analyzeMF(MF&: F, RDA, TRI, FixupLocsForFn);
257
258	// ... and fix the instructions up all at the same time.
259	bool Changed = false;
260	LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
261	for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
262	insertAESFixup(FixupLoc, TII, TRI);
263	Changed \|= true;
264	}
265
266	return Changed;
267	}
268
269	void ARMFixCortexA57AES1742098::analyzeMF(
270	MachineFunction &MF, ReachingDefAnalysis &RDA,
271	const ARMBaseRegisterInfo *TRI,
272	SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
273	unsigned MaxAllowedFixups = `0`;
274
275	for (MachineBasicBlock &MBB : MF) {
276	for (MachineInstr &MI : MBB) {
277	if (!isFirstAESPairInstr(MI))
278	continue;
279
280	// Found an instruction to check the operands of.
281	LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
282	assert(MI.getNumExplicitOperands() == `3` && MI.getNumExplicitDefs() == `1` &&
283	"Unknown AES Instruction Format. Expected 1 def, 2 uses.");
284
285	// A maximum of two fixups should be inserted for each AES pair (one per
286	// register use).
287	MaxAllowedFixups += `2`;
288
289	// Inspect all operands, choosing whether to insert a fixup.
290	for (MachineOperand &MOp : MI.uses()) {
291	SmallPtrSet<MachineInstr *, `1`> AllDefs{};
292	RDA.getGlobalReachingDefs(MI: &MI, Reg: MOp.getReg(), Defs&: AllDefs);
293
294	// Planned Fixup: This should be added to FixupLocsForFn at most once.
295	AESFixupLocation NewLoc{.Block: &MBB, .InsertionPt: &MI, .MOp: &MOp};
296
297	// In small functions with loops, this operand may be both a live-in and
298	// have definitions within the function itself. These will need a fixup.
299	bool IsLiveIn = MF.front().isLiveIn(Reg: MOp.getReg());
300
301	// If the register doesn't have defining instructions, and is not a
302	// live-in, then something is wrong and the fixup must always be
303	// inserted to be safe.
304	if (!IsLiveIn && AllDefs.size() == `0`) {
305	LLVM_DEBUG(dbgs()
306	<< "Fixup Planned: No Defining Instrs found, not live-in: "
307	<< printReg(MOp.getReg(), TRI) << "\n");
308	FixupLocsForFn.emplace_back(Args&: NewLoc);
309	continue;
310	}
311
312	auto IsUnsafe = [](MachineInstr MI) -> bool* {
313	return !isSafeAESInput(MI&: *MI);
314	};
315	size_t UnsafeCount = llvm::count_if(Range&: AllDefs, P: IsUnsafe);
316
317	// If there are no unsafe definitions...
318	if (UnsafeCount == `0`) {
319	// ... and the register is not live-in ...
320	if (!IsLiveIn) {
321	// ... then skip the fixup.
322	LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
323	<< printReg(MOp.getReg(), TRI) << "\n");
324	continue;
325	}
326
327	// Otherwise, the only unsafe "definition" is a live-in, so insert the
328	// fixup at the start of the function.
329	LLVM_DEBUG(dbgs()
330	<< "Fixup Planned: Live-In (with safe defining instrs): "
331	<< printReg(MOp.getReg(), TRI) << "\n");
332	NewLoc.Block = &MF.front();
333	NewLoc.InsertionPt = &*NewLoc.Block->begin();
334	LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
335	<< *NewLoc.InsertionPt);
336	FixupLocsForFn.emplace_back(Args&: NewLoc);
337	continue;
338	}
339
340	// If a fixup is needed in more than one place, then the best place to
341	// insert it is adjacent to the use rather than introducing a fixup
342	// adjacent to each def.
343	//
344	// FIXME: It might be better to hoist this to the start of the BB, if
345	// possible.
346	if (IsLiveIn \|\| UnsafeCount > `1`) {
347	LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
348	"(including live-ins): "
349	<< printReg(MOp.getReg(), TRI) << "\n");
350	FixupLocsForFn.emplace_back(Args&: NewLoc);
351	continue;
352	}
353
354	assert(UnsafeCount == `1` && !IsLiveIn &&
355	"At this point, there should be one unsafe defining instrs "
356	"and the defined register should not be a live-in.");
357	SmallPtrSetIterator<MachineInstr *> It =
358	llvm::find_if(Range&: AllDefs, P: IsUnsafe);
359	assert(It != AllDefs.end() &&
360	"UnsafeCount == 1 but No Unsafe MachineInstr found.");
361	MachineInstr DefMI = It;
362
363	LLVM_DEBUG(
364	dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
365	<< printReg(MOp.getReg(), TRI) << ": " << *DefMI);
366
367	// There is one unsafe defining instruction, which needs a fixup. It is
368	// generally good to hoist the fixup to be adjacent to the defining
369	// instruction rather than the using instruction, as the using
370	// instruction may be inside a loop when the defining instruction is
371	// not.
372	MachineBasicBlock::iterator DefIt = DefMI;
373	++DefIt;
374	if (DefIt != DefMI->getParent()->end()) {
375	LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
376	<< "And immediately before " << *DefIt);
377	NewLoc.Block = DefIt ->getParent();
378	NewLoc.InsertionPt = &*DefIt;
379	}
380
381	FixupLocsForFn.emplace_back(Args&: NewLoc);
382	}
383	}
384	}
385
386	assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
387	"Inserted too many fixups for this function.");
388	(void)MaxAllowedFixups;
389	}
390
391	void ARMFixCortexA57AES1742098::insertAESFixup(
392	AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
393	const ARMBaseRegisterInfo TRI) const* {
394	MachineOperand *OperandToFixup = FixupLoc.MOp;
395
396	assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
397	Register RegToFixup = OperandToFixup->getReg();
398
399	LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
400	<< " before: " << *FixupLoc.InsertionPt);
401
402	// Insert the new `VORRq qN, qN, qN`. There are a few details here:
403	//
404	// The uses are marked as killed, even if the original use of OperandToFixup
405	// is not killed, as the new instruction is clobbering the register. This is
406	// safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
407	// (it is inserted for microarchitectural reasons).
408	//
409	// The def and the uses are still marked as Renamable if the original register
410	// was, to avoid having to rummage through all the other uses and defs and
411	// unset their renamable bits.
412	unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : `0`;
413	BuildMI(BB&: *FixupLoc.Block, I: FixupLoc.InsertionPt, MIMD: DebugLoc (),
414	MCID: TII->get(Opcode: ARM::VORRq))
415	.addReg(RegNo: RegToFixup, flags: RegState::Define \| Renamable)
416	.addReg(RegNo: RegToFixup, flags: RegState::Kill \| Renamable)
417	.addReg(RegNo: RegToFixup, flags: RegState::Kill \| Renamable)
418	.addImm(Val: (uint64_t)ARMCC::AL)
419	.addReg(RegNo: ARM::NoRegister);
420	}
421
422	// Factory function used by AArch64TargetMachine to add the pass to
423	// the passmanager.
424	FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() {
425	return new ARMFixCortexA57AES1742098 ();
426	}
427

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMFixCortexA57AES1742098Pass.cpp