1 | //===- FileAnalysis.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_CFI_VERIFY_FILE_ANALYSIS_H |
10 | #define LLVM_CFI_VERIFY_FILE_ANALYSIS_H |
11 | |
12 | #include "llvm/ADT/DenseMap.h" |
13 | #include "llvm/ADT/SmallSet.h" |
14 | #include "llvm/BinaryFormat/ELF.h" |
15 | #include "llvm/DebugInfo/Symbolize/Symbolize.h" |
16 | #include "llvm/MC/MCAsmInfo.h" |
17 | #include "llvm/MC/MCContext.h" |
18 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
19 | #include "llvm/MC/MCInst.h" |
20 | #include "llvm/MC/MCInstPrinter.h" |
21 | #include "llvm/MC/MCInstrAnalysis.h" |
22 | #include "llvm/MC/MCInstrDesc.h" |
23 | #include "llvm/MC/MCInstrInfo.h" |
24 | #include "llvm/MC/MCObjectFileInfo.h" |
25 | #include "llvm/MC/MCRegisterInfo.h" |
26 | #include "llvm/MC/MCSubtargetInfo.h" |
27 | #include "llvm/MC/TargetRegistry.h" |
28 | #include "llvm/Object/Binary.h" |
29 | #include "llvm/Object/COFF.h" |
30 | #include "llvm/Object/ELFObjectFile.h" |
31 | #include "llvm/Object/ObjectFile.h" |
32 | #include "llvm/Support/Casting.h" |
33 | #include "llvm/Support/CommandLine.h" |
34 | #include "llvm/Support/Error.h" |
35 | #include "llvm/Support/MemoryBuffer.h" |
36 | #include "llvm/Support/TargetSelect.h" |
37 | #include "llvm/Support/raw_ostream.h" |
38 | |
39 | #include <functional> |
40 | #include <set> |
41 | #include <string> |
42 | |
43 | namespace llvm { |
44 | namespace cfi_verify { |
45 | |
46 | struct GraphResult; |
47 | |
48 | extern bool IgnoreDWARFFlag; |
49 | |
50 | enum class CFIProtectionStatus { |
51 | // This instruction is protected by CFI. |
52 | PROTECTED, |
53 | // The instruction is not an indirect control flow instruction, and thus |
54 | // shouldn't be protected. |
55 | FAIL_NOT_INDIRECT_CF, |
56 | // There is a path to the instruction that was unexpected. |
57 | FAIL_ORPHANS, |
58 | // There is a path to the instruction from a conditional branch that does not |
59 | // properly check the destination for this vcall/icall. |
60 | FAIL_BAD_CONDITIONAL_BRANCH, |
61 | // One of the operands of the indirect CF instruction is modified between the |
62 | // CFI-check and execution. |
63 | FAIL_REGISTER_CLOBBERED, |
64 | // The instruction referenced does not exist. This normally indicates an |
65 | // error in the program, where you try and validate a graph that was created |
66 | // in a different FileAnalysis object. |
67 | FAIL_INVALID_INSTRUCTION, |
68 | }; |
69 | |
70 | StringRef stringCFIProtectionStatus(CFIProtectionStatus Status); |
71 | |
72 | // Disassembler and analysis tool for machine code files. Keeps track of non- |
73 | // sequential control flows, including indirect control flow instructions. |
74 | class FileAnalysis { |
75 | public: |
76 | // A metadata struct for an instruction. |
77 | struct Instr { |
78 | uint64_t VMAddress; // Virtual memory address of this instruction. |
79 | MCInst Instruction; // Instruction. |
80 | uint64_t InstructionSize; // Size of this instruction. |
81 | bool Valid; // Is this a valid instruction? If false, Instr::Instruction is |
82 | // undefined. |
83 | }; |
84 | |
85 | // Construct a FileAnalysis from a file path. |
86 | static Expected<FileAnalysis> Create(StringRef Filename); |
87 | |
88 | // Construct and take ownership of the supplied object. Do not use this |
89 | // constructor, prefer to use FileAnalysis::Create instead. |
90 | FileAnalysis(object::OwningBinary<object::Binary> Binary); |
91 | FileAnalysis() = delete; |
92 | FileAnalysis(const FileAnalysis &) = delete; |
93 | FileAnalysis(FileAnalysis &&Other) = default; |
94 | |
95 | // Returns the instruction at the provided address. Returns nullptr if there |
96 | // is no instruction at the provided address. |
97 | const Instr *getInstruction(uint64_t Address) const; |
98 | |
99 | // Returns the instruction at the provided adress, dying if the instruction is |
100 | // not found. |
101 | const Instr &getInstructionOrDie(uint64_t Address) const; |
102 | |
103 | // Returns a pointer to the previous/next instruction in sequence, |
104 | // respectively. Returns nullptr if the next/prev instruction doesn't exist, |
105 | // or if the provided instruction doesn't exist. |
106 | const Instr *getPrevInstructionSequential(const Instr &InstrMeta) const; |
107 | const Instr *getNextInstructionSequential(const Instr &InstrMeta) const; |
108 | |
109 | // Returns whether this instruction is used by CFI to trap the program. |
110 | bool isCFITrap(const Instr &InstrMeta) const; |
111 | |
112 | // Returns whether this instruction is a call to a function that will trap on |
113 | // CFI violations (i.e., it serves as a trap in this instance). |
114 | bool willTrapOnCFIViolation(const Instr &InstrMeta) const; |
115 | |
116 | // Returns whether this function can fall through to the next instruction. |
117 | // Undefined (and bad) instructions cannot fall through, and instruction that |
118 | // modify the control flow can only fall through if they are conditional |
119 | // branches or calls. |
120 | bool canFallThrough(const Instr &InstrMeta) const; |
121 | |
122 | // Returns the definitive next instruction. This is different from the next |
123 | // instruction sequentially as it will follow unconditional branches (assuming |
124 | // they can be resolved at compile time, i.e. not indirect). This method |
125 | // returns nullptr if the provided instruction does not transfer control flow |
126 | // to exactly one instruction that is known deterministically at compile time. |
127 | // Also returns nullptr if the deterministic target does not exist in this |
128 | // file. |
129 | const Instr *getDefiniteNextInstruction(const Instr &InstrMeta) const; |
130 | |
131 | // Get a list of deterministic control flows that lead to the provided |
132 | // instruction. This list includes all static control flow cross-references as |
133 | // well as the previous instruction if it can fall through. |
134 | std::set<const Instr *> |
135 | getDirectControlFlowXRefs(const Instr &InstrMeta) const; |
136 | |
137 | // Returns whether this instruction uses a register operand. |
138 | bool usesRegisterOperand(const Instr &InstrMeta) const; |
139 | |
140 | // Returns the list of indirect instructions. |
141 | const std::set<object::SectionedAddress> &getIndirectInstructions() const; |
142 | |
143 | const MCRegisterInfo *getRegisterInfo() const; |
144 | const MCInstrInfo *getMCInstrInfo() const; |
145 | const MCInstrAnalysis *getMCInstrAnalysis() const; |
146 | |
147 | // Returns the inlining information for the provided address. |
148 | Expected<DIInliningInfo> |
149 | symbolizeInlinedCode(object::SectionedAddress Address); |
150 | |
151 | // Returns whether the provided Graph represents a protected indirect control |
152 | // flow instruction in this file. |
153 | CFIProtectionStatus validateCFIProtection(const GraphResult &Graph) const; |
154 | |
155 | // Returns the first place the operand register is clobbered between the CFI- |
156 | // check and the indirect CF instruction execution. We do this by walking |
157 | // backwards from the indirect CF and ensuring there is at most one load |
158 | // involving the operand register (which is the indirect CF itself on x86). |
159 | // If the register is not modified, returns the address of the indirect CF |
160 | // instruction. The result is undefined if the provided graph does not fall |
161 | // under either the FAIL_REGISTER_CLOBBERED or PROTECTED status (see |
162 | // CFIProtectionStatus). |
163 | uint64_t indirectCFOperandClobber(const GraphResult& Graph) const; |
164 | |
165 | // Prints an instruction to the provided stream using this object's pretty- |
166 | // printers. |
167 | void printInstruction(const Instr &InstrMeta, raw_ostream &OS) const; |
168 | |
169 | protected: |
170 | // Construct a blank object with the provided triple and features. Used in |
171 | // testing, where a sub class will dependency inject protected methods to |
172 | // allow analysis of raw binary, without requiring a fully valid ELF file. |
173 | FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features); |
174 | |
175 | // Add an instruction to this object. |
176 | void addInstruction(const Instr &Instruction); |
177 | |
178 | // Disassemble and parse the provided bytes into this object. Instruction |
179 | // address calculation is done relative to the provided SectionAddress. |
180 | void parseSectionContents(ArrayRef<uint8_t> SectionBytes, |
181 | object::SectionedAddress Address); |
182 | |
183 | // Constructs and initialises members required for disassembly. |
184 | Error initialiseDisassemblyMembers(); |
185 | |
186 | // Parses code sections from the internal object file. Saves them into the |
187 | // internal members. Should only be called once by Create(). |
188 | Error parseCodeSections(); |
189 | |
190 | // Parses the symbol table to look for the addresses of functions that will |
191 | // trap on CFI violations. |
192 | Error parseSymbolTable(); |
193 | |
194 | private: |
195 | // Members that describe the input file. |
196 | object::OwningBinary<object::Binary> Binary; |
197 | const object::ObjectFile *Object = nullptr; |
198 | Triple ObjectTriple; |
199 | std::string ArchName; |
200 | std::string MCPU; |
201 | const Target *ObjectTarget = nullptr; |
202 | SubtargetFeatures Features; |
203 | |
204 | // Members required for disassembly. |
205 | std::unique_ptr<const MCRegisterInfo> RegisterInfo; |
206 | std::unique_ptr<const MCAsmInfo> AsmInfo; |
207 | std::unique_ptr<MCSubtargetInfo> SubtargetInfo; |
208 | std::unique_ptr<const MCInstrInfo> MII; |
209 | std::unique_ptr<MCContext> Context; |
210 | std::unique_ptr<const MCDisassembler> Disassembler; |
211 | std::unique_ptr<const MCInstrAnalysis> MIA; |
212 | std::unique_ptr<MCInstPrinter> Printer; |
213 | |
214 | // Symbolizer used for debug information parsing. |
215 | std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer; |
216 | |
217 | // A mapping between the virtual memory address to the instruction metadata |
218 | // struct. TODO(hctim): Reimplement this as a sorted vector to avoid per- |
219 | // insertion allocation. |
220 | std::map<uint64_t, Instr> Instructions; |
221 | |
222 | // Contains a mapping between a specific address, and a list of instructions |
223 | // that use this address as a branch target (including call instructions). |
224 | DenseMap<uint64_t, std::vector<uint64_t>> StaticBranchTargetings; |
225 | |
226 | // A list of addresses of indirect control flow instructions. |
227 | std::set<object::SectionedAddress> IndirectInstructions; |
228 | |
229 | // The addresses of functions that will trap on CFI violations. |
230 | SmallSet<uint64_t, 4> TrapOnFailFunctionAddresses; |
231 | }; |
232 | |
233 | class UnsupportedDisassembly : public ErrorInfo<UnsupportedDisassembly> { |
234 | public: |
235 | static char ID; |
236 | std::string Text; |
237 | |
238 | UnsupportedDisassembly(StringRef Text); |
239 | |
240 | void log(raw_ostream &OS) const override; |
241 | std::error_code convertToErrorCode() const override; |
242 | }; |
243 | |
244 | } // namespace cfi_verify |
245 | } // namespace llvm |
246 | |
247 | #endif // LLVM_CFI_VERIFY_FILE_ANALYSIS_H |
248 | |