1//===- FileAnalysis.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H
10#define LLVM_CFI_VERIFY_FILE_ANALYSIS_H
11
12#include "llvm/ADT/DenseMap.h"
13#include "llvm/ADT/SmallSet.h"
14#include "llvm/BinaryFormat/ELF.h"
15#include "llvm/DebugInfo/Symbolize/Symbolize.h"
16#include "llvm/MC/MCAsmInfo.h"
17#include "llvm/MC/MCContext.h"
18#include "llvm/MC/MCDisassembler/MCDisassembler.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCInstPrinter.h"
21#include "llvm/MC/MCInstrAnalysis.h"
22#include "llvm/MC/MCInstrDesc.h"
23#include "llvm/MC/MCInstrInfo.h"
24#include "llvm/MC/MCObjectFileInfo.h"
25#include "llvm/MC/MCRegisterInfo.h"
26#include "llvm/MC/MCSubtargetInfo.h"
27#include "llvm/MC/TargetRegistry.h"
28#include "llvm/Object/Binary.h"
29#include "llvm/Object/COFF.h"
30#include "llvm/Object/ELFObjectFile.h"
31#include "llvm/Object/ObjectFile.h"
32#include "llvm/Support/Casting.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Error.h"
35#include "llvm/Support/MemoryBuffer.h"
36#include "llvm/Support/TargetSelect.h"
37#include "llvm/Support/raw_ostream.h"
38
39#include <functional>
40#include <set>
41#include <string>
42
43namespace llvm {
44namespace cfi_verify {
45
46struct GraphResult;
47
48extern bool IgnoreDWARFFlag;
49
50enum class CFIProtectionStatus {
51 // This instruction is protected by CFI.
52 PROTECTED,
53 // The instruction is not an indirect control flow instruction, and thus
54 // shouldn't be protected.
55 FAIL_NOT_INDIRECT_CF,
56 // There is a path to the instruction that was unexpected.
57 FAIL_ORPHANS,
58 // There is a path to the instruction from a conditional branch that does not
59 // properly check the destination for this vcall/icall.
60 FAIL_BAD_CONDITIONAL_BRANCH,
61 // One of the operands of the indirect CF instruction is modified between the
62 // CFI-check and execution.
63 FAIL_REGISTER_CLOBBERED,
64 // The instruction referenced does not exist. This normally indicates an
65 // error in the program, where you try and validate a graph that was created
66 // in a different FileAnalysis object.
67 FAIL_INVALID_INSTRUCTION,
68};
69
70StringRef stringCFIProtectionStatus(CFIProtectionStatus Status);
71
72// Disassembler and analysis tool for machine code files. Keeps track of non-
73// sequential control flows, including indirect control flow instructions.
74class FileAnalysis {
75public:
76 // A metadata struct for an instruction.
77 struct Instr {
78 uint64_t VMAddress; // Virtual memory address of this instruction.
79 MCInst Instruction; // Instruction.
80 uint64_t InstructionSize; // Size of this instruction.
81 bool Valid; // Is this a valid instruction? If false, Instr::Instruction is
82 // undefined.
83 };
84
85 // Construct a FileAnalysis from a file path.
86 static Expected<FileAnalysis> Create(StringRef Filename);
87
88 // Construct and take ownership of the supplied object. Do not use this
89 // constructor, prefer to use FileAnalysis::Create instead.
90 FileAnalysis(object::OwningBinary<object::Binary> Binary);
91 FileAnalysis() = delete;
92 FileAnalysis(const FileAnalysis &) = delete;
93 FileAnalysis(FileAnalysis &&Other) = default;
94
95 // Returns the instruction at the provided address. Returns nullptr if there
96 // is no instruction at the provided address.
97 const Instr *getInstruction(uint64_t Address) const;
98
99 // Returns the instruction at the provided adress, dying if the instruction is
100 // not found.
101 const Instr &getInstructionOrDie(uint64_t Address) const;
102
103 // Returns a pointer to the previous/next instruction in sequence,
104 // respectively. Returns nullptr if the next/prev instruction doesn't exist,
105 // or if the provided instruction doesn't exist.
106 const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const;
107 const Instr *getNextInstructionSequential(const Instr &InstrMeta) const;
108
109 // Returns whether this instruction is used by CFI to trap the program.
110 bool isCFITrap(const Instr &InstrMeta) const;
111
112 // Returns whether this instruction is a call to a function that will trap on
113 // CFI violations (i.e., it serves as a trap in this instance).
114 bool willTrapOnCFIViolation(const Instr &InstrMeta) const;
115
116 // Returns whether this function can fall through to the next instruction.
117 // Undefined (and bad) instructions cannot fall through, and instruction that
118 // modify the control flow can only fall through if they are conditional
119 // branches or calls.
120 bool canFallThrough(const Instr &InstrMeta) const;
121
122 // Returns the definitive next instruction. This is different from the next
123 // instruction sequentially as it will follow unconditional branches (assuming
124 // they can be resolved at compile time, i.e. not indirect). This method
125 // returns nullptr if the provided instruction does not transfer control flow
126 // to exactly one instruction that is known deterministically at compile time.
127 // Also returns nullptr if the deterministic target does not exist in this
128 // file.
129 const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const;
130
131 // Get a list of deterministic control flows that lead to the provided
132 // instruction. This list includes all static control flow cross-references as
133 // well as the previous instruction if it can fall through.
134 std::set<const Instr *>
135 getDirectControlFlowXRefs(const Instr &InstrMeta) const;
136
137 // Returns whether this instruction uses a register operand.
138 bool usesRegisterOperand(const Instr &InstrMeta) const;
139
140 // Returns the list of indirect instructions.
141 const std::set<object::SectionedAddress> &getIndirectInstructions() const;
142
143 const MCRegisterInfo *getRegisterInfo() const;
144 const MCInstrInfo *getMCInstrInfo() const;
145 const MCInstrAnalysis *getMCInstrAnalysis() const;
146
147 // Returns the inlining information for the provided address.
148 Expected<DIInliningInfo>
149 symbolizeInlinedCode(object::SectionedAddress Address);
150
151 // Returns whether the provided Graph represents a protected indirect control
152 // flow instruction in this file.
153 CFIProtectionStatus validateCFIProtection(const GraphResult &Graph) const;
154
155 // Returns the first place the operand register is clobbered between the CFI-
156 // check and the indirect CF instruction execution. We do this by walking
157 // backwards from the indirect CF and ensuring there is at most one load
158 // involving the operand register (which is the indirect CF itself on x86).
159 // If the register is not modified, returns the address of the indirect CF
160 // instruction. The result is undefined if the provided graph does not fall
161 // under either the FAIL_REGISTER_CLOBBERED or PROTECTED status (see
162 // CFIProtectionStatus).
163 uint64_t indirectCFOperandClobber(const GraphResult& Graph) const;
164
165 // Prints an instruction to the provided stream using this object's pretty-
166 // printers.
167 void printInstruction(const Instr &InstrMeta, raw_ostream &OS) const;
168
169protected:
170 // Construct a blank object with the provided triple and features. Used in
171 // testing, where a sub class will dependency inject protected methods to
172 // allow analysis of raw binary, without requiring a fully valid ELF file.
173 FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features);
174
175 // Add an instruction to this object.
176 void addInstruction(const Instr &Instruction);
177
178 // Disassemble and parse the provided bytes into this object. Instruction
179 // address calculation is done relative to the provided SectionAddress.
180 void parseSectionContents(ArrayRef<uint8_t> SectionBytes,
181 object::SectionedAddress Address);
182
183 // Constructs and initialises members required for disassembly.
184 Error initialiseDisassemblyMembers();
185
186 // Parses code sections from the internal object file. Saves them into the
187 // internal members. Should only be called once by Create().
188 Error parseCodeSections();
189
190 // Parses the symbol table to look for the addresses of functions that will
191 // trap on CFI violations.
192 Error parseSymbolTable();
193
194private:
195 // Members that describe the input file.
196 object::OwningBinary<object::Binary> Binary;
197 const object::ObjectFile *Object = nullptr;
198 Triple ObjectTriple;
199 std::string ArchName;
200 std::string MCPU;
201 const Target *ObjectTarget = nullptr;
202 SubtargetFeatures Features;
203
204 // Members required for disassembly.
205 std::unique_ptr<const MCRegisterInfo> RegisterInfo;
206 std::unique_ptr<const MCAsmInfo> AsmInfo;
207 std::unique_ptr<MCSubtargetInfo> SubtargetInfo;
208 std::unique_ptr<const MCInstrInfo> MII;
209 std::unique_ptr<MCContext> Context;
210 std::unique_ptr<const MCDisassembler> Disassembler;
211 std::unique_ptr<const MCInstrAnalysis> MIA;
212 std::unique_ptr<MCInstPrinter> Printer;
213
214 // Symbolizer used for debug information parsing.
215 std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
216
217 // A mapping between the virtual memory address to the instruction metadata
218 // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per-
219 // insertion allocation.
220 std::map<uint64_t, Instr> Instructions;
221
222 // Contains a mapping between a specific address, and a list of instructions
223 // that use this address as a branch target (including call instructions).
224 DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings;
225
226 // A list of addresses of indirect control flow instructions.
227 std::set<object::SectionedAddress> IndirectInstructions;
228
229 // The addresses of functions that will trap on CFI violations.
230 SmallSet<uint64_t, 4> TrapOnFailFunctionAddresses;
231};
232
233class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> {
234public:
235 static char ID;
236 std::string Text;
237
238 UnsupportedDisassembly(StringRef Text);
239
240 void log(raw_ostream &OS) const override;
241 std::error_code convertToErrorCode() const override;
242};
243
244} // namespace cfi_verify
245} // namespace llvm
246
247#endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H
248