1 | //===- DWARFCFIProgram.h ----------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_DEBUGINFO_DWARF_DWARFCFIPROGRAM_H |
10 | #define LLVM_DEBUGINFO_DWARF_DWARFCFIPROGRAM_H |
11 | |
12 | #include "llvm/ADT/ArrayRef.h" |
13 | #include "llvm/ADT/SmallString.h" |
14 | #include "llvm/ADT/iterator.h" |
15 | #include "llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h" |
16 | #include "llvm/DebugInfo/DWARF/LowLevel/DWARFExpression.h" |
17 | #include "llvm/Support/Compiler.h" |
18 | #include "llvm/Support/Error.h" |
19 | #include "llvm/TargetParser/Triple.h" |
20 | #include <map> |
21 | #include <memory> |
22 | #include <vector> |
23 | |
24 | namespace llvm { |
25 | |
26 | namespace dwarf { |
27 | |
28 | /// Represent a sequence of Call Frame Information instructions that, when read |
29 | /// in order, construct a table mapping PC to frame state. This can also be |
30 | /// referred to as "CFI rules" in DWARF literature to avoid confusion with |
31 | /// computer programs in the broader sense, and in this context each instruction |
32 | /// would be a rule to establish the mapping. Refer to pg. 172 in the DWARF5 |
33 | /// manual, "6.4.1 Structure of Call Frame Information". |
34 | class CFIProgram { |
35 | public: |
36 | static constexpr size_t MaxOperands = 3; |
37 | typedef SmallVector<uint64_t, MaxOperands> Operands; |
38 | |
39 | /// An instruction consists of a DWARF CFI opcode and an optional sequence of |
40 | /// operands. If it refers to an expression, then this expression has its own |
41 | /// sequence of operations and operands handled separately by DWARFExpression. |
42 | struct Instruction { |
43 | Instruction(uint8_t Opcode) : Opcode(Opcode) {} |
44 | |
45 | uint8_t Opcode; |
46 | Operands Ops; |
47 | // Associated DWARF expression in case this instruction refers to one |
48 | std::optional<DWARFExpression> Expression; |
49 | |
50 | LLVM_ABI Expected<uint64_t> getOperandAsUnsigned(const CFIProgram &CFIP, |
51 | uint32_t OperandIdx) const; |
52 | |
53 | LLVM_ABI Expected<int64_t> getOperandAsSigned(const CFIProgram &CFIP, |
54 | uint32_t OperandIdx) const; |
55 | }; |
56 | |
57 | using InstrList = std::vector<Instruction>; |
58 | using iterator = InstrList::iterator; |
59 | using const_iterator = InstrList::const_iterator; |
60 | |
61 | iterator begin() { return Instructions.begin(); } |
62 | const_iterator begin() const { return Instructions.begin(); } |
63 | iterator end() { return Instructions.end(); } |
64 | const_iterator end() const { return Instructions.end(); } |
65 | |
66 | unsigned size() const { return (unsigned)Instructions.size(); } |
67 | bool empty() const { return Instructions.empty(); } |
68 | uint64_t codeAlign() const { return CodeAlignmentFactor; } |
69 | int64_t dataAlign() const { return DataAlignmentFactor; } |
70 | Triple::ArchType triple() const { return Arch; } |
71 | |
72 | CFIProgram(uint64_t CodeAlignmentFactor, int64_t DataAlignmentFactor, |
73 | Triple::ArchType Arch) |
74 | : CodeAlignmentFactor(CodeAlignmentFactor), |
75 | DataAlignmentFactor(DataAlignmentFactor), Arch(Arch) {} |
76 | |
77 | /// Parse and store a sequence of CFI instructions from Data, |
78 | /// starting at *Offset and ending at EndOffset. *Offset is updated |
79 | /// to EndOffset upon successful parsing, or indicates the offset |
80 | /// where a problem occurred in case an error is returned. |
81 | template <typename T> |
82 | LLVM_ABI Error (DWARFDataExtractorBase<T> &Data, uint64_t *Offset, |
83 | uint64_t EndOffset) { |
84 | // See DWARF standard v3, section 7.23 |
85 | const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; |
86 | const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; |
87 | |
88 | DataExtractor::Cursor C(*Offset); |
89 | while (C && C.tell() < EndOffset) { |
90 | uint8_t Opcode = Data.getRelocatedValue(C, 1); |
91 | if (!C) |
92 | break; |
93 | |
94 | // Some instructions have a primary opcode encoded in the top bits. |
95 | if (uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) { |
96 | // If it's a primary opcode, the first operand is encoded in the |
97 | // bottom bits of the opcode itself. |
98 | uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK; |
99 | switch (Primary) { |
100 | case DW_CFA_advance_loc: |
101 | case DW_CFA_restore: |
102 | addInstruction(Opcode: Primary, Operand1: Op1); |
103 | break; |
104 | case DW_CFA_offset: |
105 | addInstruction(Primary, Op1, Data.getULEB128(C)); |
106 | break; |
107 | default: |
108 | llvm_unreachable("invalid primary CFI opcode" ); |
109 | } |
110 | continue; |
111 | } |
112 | |
113 | // Extended opcode - its value is Opcode itself. |
114 | switch (Opcode) { |
115 | default: |
116 | return createStringError(EC: errc::illegal_byte_sequence, |
117 | Fmt: "invalid extended CFI opcode 0x%" PRIx8, |
118 | Vals: Opcode); |
119 | case DW_CFA_nop: |
120 | case DW_CFA_remember_state: |
121 | case DW_CFA_restore_state: |
122 | case DW_CFA_GNU_window_save: |
123 | case DW_CFA_AARCH64_negate_ra_state_with_pc: |
124 | // No operands |
125 | addInstruction(Opcode); |
126 | break; |
127 | case DW_CFA_set_loc: |
128 | // Operands: Address |
129 | addInstruction(Opcode, Data.getRelocatedAddress(C)); |
130 | break; |
131 | case DW_CFA_advance_loc1: |
132 | // Operands: 1-byte delta |
133 | addInstruction(Opcode, Data.getRelocatedValue(C, 1)); |
134 | break; |
135 | case DW_CFA_advance_loc2: |
136 | // Operands: 2-byte delta |
137 | addInstruction(Opcode, Data.getRelocatedValue(C, 2)); |
138 | break; |
139 | case DW_CFA_advance_loc4: |
140 | // Operands: 4-byte delta |
141 | addInstruction(Opcode, Data.getRelocatedValue(C, 4)); |
142 | break; |
143 | case DW_CFA_restore_extended: |
144 | case DW_CFA_undefined: |
145 | case DW_CFA_same_value: |
146 | case DW_CFA_def_cfa_register: |
147 | case DW_CFA_def_cfa_offset: |
148 | case DW_CFA_GNU_args_size: |
149 | // Operands: ULEB128 |
150 | addInstruction(Opcode, Data.getULEB128(C)); |
151 | break; |
152 | case DW_CFA_def_cfa_offset_sf: |
153 | // Operands: SLEB128 |
154 | addInstruction(Opcode, Data.getSLEB128(C)); |
155 | break; |
156 | case DW_CFA_LLVM_def_aspace_cfa: |
157 | case DW_CFA_LLVM_def_aspace_cfa_sf: { |
158 | auto RegNum = Data.getULEB128(C); |
159 | auto CfaOffset = Opcode == DW_CFA_LLVM_def_aspace_cfa |
160 | ? Data.getULEB128(C) |
161 | : Data.getSLEB128(C); |
162 | auto AddressSpace = Data.getULEB128(C); |
163 | addInstruction(Opcode, RegNum, CfaOffset, AddressSpace); |
164 | break; |
165 | } |
166 | case DW_CFA_offset_extended: |
167 | case DW_CFA_register: |
168 | case DW_CFA_def_cfa: |
169 | case DW_CFA_val_offset: { |
170 | // Operands: ULEB128, ULEB128 |
171 | // Note: We can not embed getULEB128 directly into function |
172 | // argument list. getULEB128 changes Offset and order of evaluation |
173 | // for arguments is unspecified. |
174 | uint64_t op1 = Data.getULEB128(C); |
175 | uint64_t op2 = Data.getULEB128(C); |
176 | addInstruction(Opcode, Operand1: op1, Operand2: op2); |
177 | break; |
178 | } |
179 | case DW_CFA_offset_extended_sf: |
180 | case DW_CFA_def_cfa_sf: |
181 | case DW_CFA_val_offset_sf: { |
182 | // Operands: ULEB128, SLEB128 |
183 | // Note: see comment for the previous case |
184 | uint64_t op1 = Data.getULEB128(C); |
185 | uint64_t op2 = (uint64_t)Data.getSLEB128(C); |
186 | addInstruction(Opcode, Operand1: op1, Operand2: op2); |
187 | break; |
188 | } |
189 | case DW_CFA_def_cfa_expression: { |
190 | uint64_t ExprLength = Data.getULEB128(C); |
191 | addInstruction(Opcode, Operand1: 0); |
192 | StringRef Expression = Data.getBytes(C, ExprLength); |
193 | |
194 | DataExtractor (Expression, Data.isLittleEndian(), |
195 | Data.getAddressSize()); |
196 | // Note. We do not pass the DWARF format to DWARFExpression, because |
197 | // DW_OP_call_ref, the only operation which depends on the format, is |
198 | // prohibited in call frame instructions, see sec. 6.4.2 in DWARFv5. |
199 | Instructions.back().Expression = |
200 | DWARFExpression(Extractor, Data.getAddressSize()); |
201 | break; |
202 | } |
203 | case DW_CFA_expression: |
204 | case DW_CFA_val_expression: { |
205 | uint64_t RegNum = Data.getULEB128(C); |
206 | addInstruction(Opcode, Operand1: RegNum, Operand2: 0); |
207 | |
208 | uint64_t BlockLength = Data.getULEB128(C); |
209 | StringRef Expression = Data.getBytes(C, BlockLength); |
210 | DataExtractor (Expression, Data.isLittleEndian(), |
211 | Data.getAddressSize()); |
212 | // Note. We do not pass the DWARF format to DWARFExpression, because |
213 | // DW_OP_call_ref, the only operation which depends on the format, is |
214 | // prohibited in call frame instructions, see sec. 6.4.2 in DWARFv5. |
215 | Instructions.back().Expression = |
216 | DWARFExpression(Extractor, Data.getAddressSize()); |
217 | break; |
218 | } |
219 | } |
220 | } |
221 | |
222 | *Offset = C.tell(); |
223 | return C.takeError(); |
224 | } |
225 | |
226 | void addInstruction(const Instruction &I) { Instructions.push_back(x: I); } |
227 | |
228 | /// Get a DWARF CFI call frame string for the given DW_CFA opcode. |
229 | LLVM_ABI StringRef callFrameString(unsigned Opcode) const; |
230 | |
231 | /// Types of operands to CFI instructions |
232 | /// In DWARF, this type is implicitly tied to a CFI instruction opcode and |
233 | /// thus this type doesn't need to be explicitly written to the file (this is |
234 | /// not a DWARF encoding). The relationship of instrs to operand types can |
235 | /// be obtained from getOperandTypes() and is only used to simplify |
236 | /// instruction printing and error messages. |
237 | enum OperandType { |
238 | OT_Unset, |
239 | OT_None, |
240 | OT_Address, |
241 | OT_Offset, |
242 | OT_FactoredCodeOffset, |
243 | OT_SignedFactDataOffset, |
244 | OT_UnsignedFactDataOffset, |
245 | OT_Register, |
246 | OT_AddressSpace, |
247 | OT_Expression |
248 | }; |
249 | |
250 | /// Get the OperandType as a "const char *". |
251 | LLVM_ABI static const char *operandTypeString(OperandType OT); |
252 | |
253 | /// Retrieve the array describing the types of operands according to the enum |
254 | /// above. This is indexed by opcode. |
255 | LLVM_ABI static ArrayRef<OperandType[MaxOperands]> getOperandTypes(); |
256 | |
257 | /// Convenience method to add a new instruction with the given opcode. |
258 | void addInstruction(uint8_t Opcode) { |
259 | Instructions.push_back(x: Instruction(Opcode)); |
260 | } |
261 | |
262 | /// Add a new single-operand instruction. |
263 | void addInstruction(uint8_t Opcode, uint64_t Operand1) { |
264 | Instructions.push_back(x: Instruction(Opcode)); |
265 | Instructions.back().Ops.push_back(Elt: Operand1); |
266 | } |
267 | |
268 | /// Add a new instruction that has two operands. |
269 | void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2) { |
270 | Instructions.push_back(x: Instruction(Opcode)); |
271 | Instructions.back().Ops.push_back(Elt: Operand1); |
272 | Instructions.back().Ops.push_back(Elt: Operand2); |
273 | } |
274 | |
275 | /// Add a new instruction that has three operands. |
276 | void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2, |
277 | uint64_t Operand3) { |
278 | Instructions.push_back(x: Instruction(Opcode)); |
279 | Instructions.back().Ops.push_back(Elt: Operand1); |
280 | Instructions.back().Ops.push_back(Elt: Operand2); |
281 | Instructions.back().Ops.push_back(Elt: Operand3); |
282 | } |
283 | |
284 | private: |
285 | std::vector<Instruction> Instructions; |
286 | const uint64_t CodeAlignmentFactor; |
287 | const int64_t DataAlignmentFactor; |
288 | Triple::ArchType Arch; |
289 | }; |
290 | |
291 | } // end namespace dwarf |
292 | |
293 | } // end namespace llvm |
294 | |
295 | #endif // LLVM_DEBUGINFO_DWARF_DWARFCFIPROGRAM_H |
296 | |