1 | //===- GraphBuilder.cpp -----------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "GraphBuilder.h" |
10 | |
11 | #include "llvm/BinaryFormat/ELF.h" |
12 | #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" |
13 | #include "llvm/MC/MCAsmInfo.h" |
14 | #include "llvm/MC/MCContext.h" |
15 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
16 | #include "llvm/MC/MCInst.h" |
17 | #include "llvm/MC/MCInstPrinter.h" |
18 | #include "llvm/MC/MCInstrAnalysis.h" |
19 | #include "llvm/MC/MCInstrDesc.h" |
20 | #include "llvm/MC/MCInstrInfo.h" |
21 | #include "llvm/MC/MCObjectFileInfo.h" |
22 | #include "llvm/MC/MCRegisterInfo.h" |
23 | #include "llvm/MC/MCSubtargetInfo.h" |
24 | #include "llvm/MC/TargetRegistry.h" |
25 | #include "llvm/Object/Binary.h" |
26 | #include "llvm/Object/COFF.h" |
27 | #include "llvm/Object/ELFObjectFile.h" |
28 | #include "llvm/Object/ObjectFile.h" |
29 | #include "llvm/Support/Casting.h" |
30 | #include "llvm/Support/CommandLine.h" |
31 | #include "llvm/Support/Error.h" |
32 | #include "llvm/Support/MemoryBuffer.h" |
33 | #include "llvm/Support/TargetSelect.h" |
34 | #include "llvm/Support/raw_ostream.h" |
35 | |
36 | using Instr = llvm::cfi_verify::FileAnalysis::Instr; |
37 | |
38 | namespace llvm { |
39 | namespace cfi_verify { |
40 | |
41 | uint64_t SearchLengthForUndef; |
42 | uint64_t SearchLengthForConditionalBranch; |
43 | |
44 | static cl::opt<uint64_t, true> SearchLengthForUndefArg( |
45 | "search-length-undef" , |
46 | cl::desc("Specify the maximum amount of instructions " |
47 | "to inspect when searching for an undefined " |
48 | "instruction from a conditional branch." ), |
49 | cl::location(L&: SearchLengthForUndef), cl::init(Val: 2)); |
50 | |
51 | static cl::opt<uint64_t, true> SearchLengthForConditionalBranchArg( |
52 | "search-length-cb" , |
53 | cl::desc("Specify the maximum amount of instructions " |
54 | "to inspect when searching for a conditional " |
55 | "branch from an indirect control flow." ), |
56 | cl::location(L&: SearchLengthForConditionalBranch), cl::init(Val: 20)); |
57 | |
58 | std::vector<uint64_t> GraphResult::flattenAddress(uint64_t Address) const { |
59 | std::vector<uint64_t> Addresses; |
60 | |
61 | auto It = IntermediateNodes.find(Val: Address); |
62 | Addresses.push_back(x: Address); |
63 | |
64 | while (It != IntermediateNodes.end()) { |
65 | Addresses.push_back(x: It->second); |
66 | It = IntermediateNodes.find(Val: It->second); |
67 | } |
68 | return Addresses; |
69 | } |
70 | |
71 | void printPairToDOT(const FileAnalysis &Analysis, raw_ostream &OS, |
72 | uint64_t From, uint64_t To) { |
73 | OS << " \"" << format_hex(N: From, Width: 2) << ": " ; |
74 | Analysis.printInstruction(InstrMeta: Analysis.getInstructionOrDie(Address: From), OS); |
75 | OS << "\" -> \"" << format_hex(N: To, Width: 2) << ": " ; |
76 | Analysis.printInstruction(InstrMeta: Analysis.getInstructionOrDie(Address: To), OS); |
77 | OS << "\"\n" ; |
78 | } |
79 | |
80 | void GraphResult::printToDOT(const FileAnalysis &Analysis, |
81 | raw_ostream &OS) const { |
82 | std::map<uint64_t, uint64_t> SortedIntermediateNodes( |
83 | IntermediateNodes.begin(), IntermediateNodes.end()); |
84 | OS << "digraph graph_" << format_hex(N: BaseAddress, Width: 2) << " {\n" ; |
85 | for (const auto &KV : SortedIntermediateNodes) |
86 | printPairToDOT(Analysis, OS, From: KV.first, To: KV.second); |
87 | |
88 | for (auto &BranchNode : ConditionalBranchNodes) { |
89 | for (auto &V : {BranchNode.Target, BranchNode.Fallthrough}) |
90 | printPairToDOT(Analysis, OS, From: BranchNode.Address, To: V); |
91 | } |
92 | OS << "}\n" ; |
93 | } |
94 | |
95 | GraphResult GraphBuilder::buildFlowGraph(const FileAnalysis &Analysis, |
96 | object::SectionedAddress Address) { |
97 | GraphResult Result; |
98 | Result.BaseAddress = Address.Address; |
99 | DenseSet<uint64_t> OpenedNodes; |
100 | |
101 | const auto &IndirectInstructions = Analysis.getIndirectInstructions(); |
102 | |
103 | // check that IndirectInstructions contains specified Address |
104 | if (IndirectInstructions.find(x: Address) == IndirectInstructions.end()) { |
105 | return Result; |
106 | } |
107 | |
108 | buildFlowGraphImpl(Analysis, OpenedNodes, Result, Address: Address.Address, Depth: 0); |
109 | return Result; |
110 | } |
111 | |
112 | void GraphBuilder::buildFlowsToUndefined(const FileAnalysis &Analysis, |
113 | GraphResult &Result, |
114 | ConditionalBranchNode &BranchNode, |
115 | const Instr &BranchInstrMeta) { |
116 | assert(SearchLengthForUndef > 0 && |
117 | "Search length for undefined flow must be greater than zero." ); |
118 | |
119 | // Start setting up the next node in the block. |
120 | uint64_t NextAddress = 0; |
121 | const Instr *NextMetaPtr; |
122 | |
123 | // Find out the next instruction in the block and add it to the new |
124 | // node. |
125 | if (BranchNode.Target && !BranchNode.Fallthrough) { |
126 | // We know the target of the branch, find the fallthrough. |
127 | NextMetaPtr = Analysis.getNextInstructionSequential(InstrMeta: BranchInstrMeta); |
128 | if (!NextMetaPtr) { |
129 | errs() << "Failed to get next instruction from " |
130 | << format_hex(N: BranchNode.Address, Width: 2) << ".\n" ; |
131 | return; |
132 | } |
133 | |
134 | NextAddress = NextMetaPtr->VMAddress; |
135 | BranchNode.Fallthrough = |
136 | NextMetaPtr->VMAddress; // Add the new node to the branch head. |
137 | } else if (BranchNode.Fallthrough && !BranchNode.Target) { |
138 | // We already know the fallthrough, evaluate the target. |
139 | uint64_t Target; |
140 | if (!Analysis.getMCInstrAnalysis()->evaluateBranch( |
141 | Inst: BranchInstrMeta.Instruction, Addr: BranchInstrMeta.VMAddress, |
142 | Size: BranchInstrMeta.InstructionSize, Target)) { |
143 | errs() << "Failed to get branch target for conditional branch at address " |
144 | << format_hex(N: BranchInstrMeta.VMAddress, Width: 2) << ".\n" ; |
145 | return; |
146 | } |
147 | |
148 | // Resolve the meta pointer for the target of this branch. |
149 | NextMetaPtr = Analysis.getInstruction(Address: Target); |
150 | if (!NextMetaPtr) { |
151 | errs() << "Failed to find instruction at address " |
152 | << format_hex(N: Target, Width: 2) << ".\n" ; |
153 | return; |
154 | } |
155 | |
156 | NextAddress = Target; |
157 | BranchNode.Target = |
158 | NextMetaPtr->VMAddress; // Add the new node to the branch head. |
159 | } else { |
160 | errs() << "ControlBranchNode supplied to buildFlowsToUndefined should " |
161 | "provide Target xor Fallthrough.\n" ; |
162 | return; |
163 | } |
164 | |
165 | uint64_t CurrentAddress = NextAddress; |
166 | const Instr *CurrentMetaPtr = NextMetaPtr; |
167 | |
168 | // Now the branch head has been set properly, complete the rest of the block. |
169 | for (uint64_t i = 1; i < SearchLengthForUndef; ++i) { |
170 | // Check to see whether the block should die. |
171 | if (Analysis.isCFITrap(InstrMeta: *CurrentMetaPtr)) { |
172 | BranchNode.CFIProtection = true; |
173 | return; |
174 | } |
175 | |
176 | // Find the metadata of the next instruction. |
177 | NextMetaPtr = Analysis.getDefiniteNextInstruction(InstrMeta: *CurrentMetaPtr); |
178 | if (!NextMetaPtr) |
179 | return; |
180 | |
181 | // Setup the next node. |
182 | NextAddress = NextMetaPtr->VMAddress; |
183 | |
184 | // Add this as an intermediate. |
185 | Result.IntermediateNodes[CurrentAddress] = NextAddress; |
186 | |
187 | // Move the 'current' pointers to the new tail of the block. |
188 | CurrentMetaPtr = NextMetaPtr; |
189 | CurrentAddress = NextAddress; |
190 | } |
191 | |
192 | // Final check of the last thing we added to the block. |
193 | if (Analysis.isCFITrap(InstrMeta: *CurrentMetaPtr)) |
194 | BranchNode.CFIProtection = true; |
195 | } |
196 | |
197 | void GraphBuilder::buildFlowGraphImpl(const FileAnalysis &Analysis, |
198 | DenseSet<uint64_t> &OpenedNodes, |
199 | GraphResult &Result, uint64_t Address, |
200 | uint64_t Depth) { |
201 | // If we've exceeded the flow length, terminate. |
202 | if (Depth >= SearchLengthForConditionalBranch) { |
203 | Result.OrphanedNodes.push_back(x: Address); |
204 | return; |
205 | } |
206 | |
207 | // Ensure this flow is acyclic. |
208 | if (OpenedNodes.count(V: Address)) |
209 | Result.OrphanedNodes.push_back(x: Address); |
210 | |
211 | // If this flow is already explored, stop here. |
212 | if (Result.IntermediateNodes.count(Val: Address)) |
213 | return; |
214 | |
215 | // Get the metadata for the node instruction. |
216 | const auto &InstrMetaPtr = Analysis.getInstruction(Address); |
217 | if (!InstrMetaPtr) { |
218 | errs() << "Failed to build flow graph for instruction at address " |
219 | << format_hex(N: Address, Width: 2) << ".\n" ; |
220 | Result.OrphanedNodes.push_back(x: Address); |
221 | return; |
222 | } |
223 | const auto &ChildMeta = *InstrMetaPtr; |
224 | |
225 | OpenedNodes.insert(V: Address); |
226 | std::set<const Instr *> CFCrossRefs = |
227 | Analysis.getDirectControlFlowXRefs(InstrMeta: ChildMeta); |
228 | |
229 | bool HasValidCrossRef = false; |
230 | |
231 | for (const auto *ParentMetaPtr : CFCrossRefs) { |
232 | assert(ParentMetaPtr && "CFCrossRefs returned nullptr." ); |
233 | const auto &ParentMeta = *ParentMetaPtr; |
234 | const auto &ParentDesc = |
235 | Analysis.getMCInstrInfo()->get(Opcode: ParentMeta.Instruction.getOpcode()); |
236 | |
237 | if (!ParentDesc.mayAffectControlFlow(MI: ParentMeta.Instruction, |
238 | RI: *Analysis.getRegisterInfo())) { |
239 | // If this cross reference doesn't affect CF, continue the graph. |
240 | buildFlowGraphImpl(Analysis, OpenedNodes, Result, Address: ParentMeta.VMAddress, |
241 | Depth: Depth + 1); |
242 | Result.IntermediateNodes[ParentMeta.VMAddress] = Address; |
243 | HasValidCrossRef = true; |
244 | continue; |
245 | } |
246 | |
247 | // Call instructions are not valid in the upwards traversal. |
248 | if (ParentDesc.isCall()) { |
249 | Result.IntermediateNodes[ParentMeta.VMAddress] = Address; |
250 | Result.OrphanedNodes.push_back(x: ParentMeta.VMAddress); |
251 | continue; |
252 | } |
253 | |
254 | // Evaluate the branch target to ascertain whether this XRef is the result |
255 | // of a fallthrough or the target of a branch. |
256 | uint64_t BranchTarget; |
257 | if (!Analysis.getMCInstrAnalysis()->evaluateBranch( |
258 | Inst: ParentMeta.Instruction, Addr: ParentMeta.VMAddress, |
259 | Size: ParentMeta.InstructionSize, Target&: BranchTarget)) { |
260 | errs() << "Failed to evaluate branch target for instruction at address " |
261 | << format_hex(N: ParentMeta.VMAddress, Width: 2) << ".\n" ; |
262 | Result.IntermediateNodes[ParentMeta.VMAddress] = Address; |
263 | Result.OrphanedNodes.push_back(x: ParentMeta.VMAddress); |
264 | continue; |
265 | } |
266 | |
267 | // Allow unconditional branches to be part of the upwards traversal. |
268 | if (ParentDesc.isUnconditionalBranch()) { |
269 | // Ensures that the unconditional branch is actually an XRef to the child. |
270 | if (BranchTarget != Address) { |
271 | errs() << "Control flow to " << format_hex(N: Address, Width: 2) |
272 | << ", but target resolution of " |
273 | << format_hex(N: ParentMeta.VMAddress, Width: 2) |
274 | << " is not this address?\n" ; |
275 | Result.IntermediateNodes[ParentMeta.VMAddress] = Address; |
276 | Result.OrphanedNodes.push_back(x: ParentMeta.VMAddress); |
277 | continue; |
278 | } |
279 | |
280 | buildFlowGraphImpl(Analysis, OpenedNodes, Result, Address: ParentMeta.VMAddress, |
281 | Depth: Depth + 1); |
282 | Result.IntermediateNodes[ParentMeta.VMAddress] = Address; |
283 | HasValidCrossRef = true; |
284 | continue; |
285 | } |
286 | |
287 | // Ensure that any unknown CFs are caught. |
288 | if (!ParentDesc.isConditionalBranch()) { |
289 | errs() << "Unknown control flow encountered when building graph at " |
290 | << format_hex(N: Address, Width: 2) << "\n." ; |
291 | Result.IntermediateNodes[ParentMeta.VMAddress] = Address; |
292 | Result.OrphanedNodes.push_back(x: ParentMeta.VMAddress); |
293 | continue; |
294 | } |
295 | |
296 | // Only direct conditional branches should be present at this point. Setup |
297 | // a conditional branch node and build flows to the ud2. |
298 | ConditionalBranchNode BranchNode; |
299 | BranchNode.Address = ParentMeta.VMAddress; |
300 | BranchNode.Target = 0; |
301 | BranchNode.Fallthrough = 0; |
302 | BranchNode.CFIProtection = false; |
303 | BranchNode.IndirectCFIsOnTargetPath = (BranchTarget == Address); |
304 | |
305 | if (BranchTarget == Address) |
306 | BranchNode.Target = Address; |
307 | else |
308 | BranchNode.Fallthrough = Address; |
309 | |
310 | HasValidCrossRef = true; |
311 | buildFlowsToUndefined(Analysis, Result, BranchNode, BranchInstrMeta: ParentMeta); |
312 | Result.ConditionalBranchNodes.push_back(x: BranchNode); |
313 | } |
314 | |
315 | // When using cross-DSO, some indirect calls are not guarded by a branch to a |
316 | // trap but instead follow a call to __cfi_slowpath. For example: |
317 | // if (!InlinedFastCheck(f)) |
318 | // call *f |
319 | // else { |
320 | // __cfi_slowpath(CallSiteTypeId, f); |
321 | // call *f |
322 | // } |
323 | // To mark the second call as protected, we recognize indirect calls that |
324 | // directly follow calls to functions that will trap on CFI violations. |
325 | if (CFCrossRefs.empty()) { |
326 | const Instr *PrevInstr = Analysis.getPrevInstructionSequential(InstrMeta: ChildMeta); |
327 | if (PrevInstr && Analysis.willTrapOnCFIViolation(InstrMeta: *PrevInstr)) { |
328 | Result.IntermediateNodes[PrevInstr->VMAddress] = Address; |
329 | HasValidCrossRef = true; |
330 | } |
331 | } |
332 | |
333 | if (!HasValidCrossRef) |
334 | Result.OrphanedNodes.push_back(x: Address); |
335 | |
336 | OpenedNodes.erase(V: Address); |
337 | } |
338 | |
339 | } // namespace cfi_verify |
340 | } // namespace llvm |
341 | |