1 | //===- FunctionInfo.cpp ---------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/DebugInfo/GSYM/FunctionInfo.h" |
10 | #include "llvm/DebugInfo/GSYM/FileWriter.h" |
11 | #include "llvm/DebugInfo/GSYM/GsymReader.h" |
12 | #include "llvm/DebugInfo/GSYM/LineTable.h" |
13 | #include "llvm/DebugInfo/GSYM/InlineInfo.h" |
14 | #include "llvm/Support/DataExtractor.h" |
15 | #include <optional> |
16 | |
17 | using namespace llvm; |
18 | using namespace gsym; |
19 | |
20 | /// FunctionInfo information type that is used to encode the optional data |
21 | /// that is associated with a FunctionInfo object. |
22 | enum InfoType : uint32_t { |
23 | EndOfList = 0u, |
24 | LineTableInfo = 1u, |
25 | InlineInfo = 2u, |
26 | MergedFunctionsInfo = 3u, |
27 | CallSiteInfo = 4u, |
28 | }; |
29 | |
30 | raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { |
31 | OS << FI.Range << ": " << "Name=" << HEX32(FI.Name) << '\n'; |
32 | if (FI.OptLineTable) |
33 | OS << FI.OptLineTable << '\n'; |
34 | if (FI.Inline) |
35 | OS << FI.Inline << '\n'; |
36 | if (FI.CallSites) |
37 | OS << *FI.CallSites << '\n'; |
38 | return OS; |
39 | } |
40 | |
41 | llvm::Expected<FunctionInfo> FunctionInfo::(DataExtractor &Data, |
42 | uint64_t BaseAddr) { |
43 | FunctionInfo FI; |
44 | uint64_t Offset = 0; |
45 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4)) |
46 | return createStringError(EC: std::errc::io_error, |
47 | Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo Size" , Vals: Offset); |
48 | FI.Range = {BaseAddr, BaseAddr + Data.getU32(offset_ptr: &Offset)}; |
49 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4)) |
50 | return createStringError(EC: std::errc::io_error, |
51 | Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo Name" , Vals: Offset); |
52 | FI.Name = Data.getU32(offset_ptr: &Offset); |
53 | if (FI.Name == 0) |
54 | return createStringError(EC: std::errc::io_error, |
55 | Fmt: "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x%8.8x" , |
56 | Vals: Offset - 4, Vals: FI.Name); |
57 | bool Done = false; |
58 | while (!Done) { |
59 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4)) |
60 | return createStringError(EC: std::errc::io_error, |
61 | Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo InfoType value" , Vals: Offset); |
62 | const uint32_t IT = Data.getU32(offset_ptr: &Offset); |
63 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4)) |
64 | return createStringError(EC: std::errc::io_error, |
65 | Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo InfoType length" , Vals: Offset); |
66 | const uint32_t InfoLength = Data.getU32(offset_ptr: &Offset); |
67 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: InfoLength)) |
68 | return createStringError(EC: std::errc::io_error, |
69 | Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u" , |
70 | Vals: Offset, Vals: IT); |
71 | DataExtractor InfoData(Data.getData().substr(Start: Offset, N: InfoLength), |
72 | Data.isLittleEndian(), |
73 | Data.getAddressSize()); |
74 | switch (IT) { |
75 | case InfoType::EndOfList: |
76 | Done = true; |
77 | break; |
78 | |
79 | case InfoType::LineTableInfo: |
80 | if (Expected<LineTable> LT = LineTable::decode(Data&: InfoData, BaseAddr)) |
81 | FI.OptLineTable = std::move(LT.get()); |
82 | else |
83 | return LT.takeError(); |
84 | break; |
85 | |
86 | case InfoType::InlineInfo: |
87 | if (Expected<InlineInfo> II = InlineInfo::decode(Data&: InfoData, BaseAddr)) |
88 | FI.Inline = std::move(II.get()); |
89 | else |
90 | return II.takeError(); |
91 | break; |
92 | |
93 | case InfoType::MergedFunctionsInfo: |
94 | if (Expected<MergedFunctionsInfo> MI = |
95 | MergedFunctionsInfo::decode(Data&: InfoData, BaseAddr)) |
96 | FI.MergedFunctions = std::move(MI.get()); |
97 | else |
98 | return MI.takeError(); |
99 | break; |
100 | |
101 | case InfoType::CallSiteInfo: |
102 | if (Expected<llvm::gsym::CallSiteInfoCollection> CI = |
103 | llvm::gsym::CallSiteInfoCollection::decode(Data&: InfoData)) |
104 | FI.CallSites = std::move(CI.get()); |
105 | else |
106 | return CI.takeError(); |
107 | break; |
108 | |
109 | default: |
110 | return createStringError(EC: std::errc::io_error, |
111 | Fmt: "0x%8.8" PRIx64 ": unsupported InfoType %u" , |
112 | Vals: Offset-8, Vals: IT); |
113 | } |
114 | Offset += InfoLength; |
115 | } |
116 | return std::move(FI); |
117 | } |
118 | |
119 | uint64_t FunctionInfo::cacheEncoding() { |
120 | EncodingCache.clear(); |
121 | if (!isValid()) |
122 | return 0; |
123 | raw_svector_ostream OutStrm(EncodingCache); |
124 | FileWriter FW(OutStrm, llvm::endianness::native); |
125 | llvm::Expected<uint64_t> Result = encode(O&: FW); |
126 | if (!Result) { |
127 | EncodingCache.clear(); |
128 | consumeError(Err: Result.takeError()); |
129 | return 0; |
130 | } |
131 | return EncodingCache.size(); |
132 | } |
133 | |
134 | llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out, |
135 | bool NoPadding) const { |
136 | if (!isValid()) |
137 | return createStringError(EC: std::errc::invalid_argument, |
138 | Fmt: "attempted to encode invalid FunctionInfo object" ); |
139 | // Align FunctionInfo data to a 4 byte alignment, if padding is allowed |
140 | if (NoPadding == false) |
141 | Out.alignTo(Align: 4); |
142 | const uint64_t FuncInfoOffset = Out.tell(); |
143 | // Check if we have already encoded this function info into EncodingCache. |
144 | // This will be non empty when creating segmented GSYM files as we need to |
145 | // precompute exactly how big FunctionInfo objects encode into so we can |
146 | // accurately make segments of a specific size. |
147 | if (!EncodingCache.empty() && |
148 | llvm::endianness::native == Out.getByteOrder()) { |
149 | // We already encoded this object, just write out the bytes. |
150 | Out.writeData(Data: llvm::ArrayRef<uint8_t>((const uint8_t *)EncodingCache.data(), |
151 | EncodingCache.size())); |
152 | return FuncInfoOffset; |
153 | } |
154 | // Write the size in bytes of this function as a uint32_t. This can be zero |
155 | // if we just have a symbol from a symbol table and that symbol has no size. |
156 | Out.writeU32(Value: size()); |
157 | // Write the name of this function as a uint32_t string table offset. |
158 | Out.writeU32(Value: Name); |
159 | |
160 | if (OptLineTable) { |
161 | Out.writeU32(Value: InfoType::LineTableInfo); |
162 | // Write a uint32_t length as zero for now, we will fix this up after |
163 | // writing the LineTable out with the number of bytes that were written. |
164 | Out.writeU32(Value: 0); |
165 | const auto StartOffset = Out.tell(); |
166 | llvm::Error err = OptLineTable->encode(O&: Out, BaseAddr: Range.start()); |
167 | if (err) |
168 | return std::move(err); |
169 | const auto Length = Out.tell() - StartOffset; |
170 | if (Length > UINT32_MAX) |
171 | return createStringError(EC: std::errc::invalid_argument, |
172 | Fmt: "LineTable length is greater than UINT32_MAX" ); |
173 | // Fixup the size of the LineTable data with the correct size. |
174 | Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4); |
175 | } |
176 | |
177 | // Write out the inline function info if we have any and if it is valid. |
178 | if (Inline) { |
179 | Out.writeU32(Value: InfoType::InlineInfo); |
180 | // Write a uint32_t length as zero for now, we will fix this up after |
181 | // writing the LineTable out with the number of bytes that were written. |
182 | Out.writeU32(Value: 0); |
183 | const auto StartOffset = Out.tell(); |
184 | llvm::Error err = Inline->encode(O&: Out, BaseAddr: Range.start()); |
185 | if (err) |
186 | return std::move(err); |
187 | const auto Length = Out.tell() - StartOffset; |
188 | if (Length > UINT32_MAX) |
189 | return createStringError(EC: std::errc::invalid_argument, |
190 | Fmt: "InlineInfo length is greater than UINT32_MAX" ); |
191 | // Fixup the size of the InlineInfo data with the correct size. |
192 | Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4); |
193 | } |
194 | |
195 | // Write out the merged functions info if we have any and if it is valid. |
196 | if (MergedFunctions) { |
197 | Out.writeU32(Value: InfoType::MergedFunctionsInfo); |
198 | // Write a uint32_t length as zero for now, we will fix this up after |
199 | // writing the LineTable out with the number of bytes that were written. |
200 | Out.writeU32(Value: 0); |
201 | const auto StartOffset = Out.tell(); |
202 | llvm::Error err = MergedFunctions->encode(O&: Out); |
203 | if (err) |
204 | return std::move(err); |
205 | const auto Length = Out.tell() - StartOffset; |
206 | if (Length > UINT32_MAX) |
207 | return createStringError( |
208 | EC: std::errc::invalid_argument, |
209 | Fmt: "MergedFunctionsInfo length is greater than UINT32_MAX" ); |
210 | // Fixup the size of the MergedFunctionsInfo data with the correct size. |
211 | Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4); |
212 | } |
213 | |
214 | // Write out the call sites if we have any and if they are valid. |
215 | if (CallSites) { |
216 | Out.writeU32(Value: InfoType::CallSiteInfo); |
217 | // Write a uint32_t length as zero for now, we will fix this up after |
218 | // writing the CallSites out with the number of bytes that were written. |
219 | Out.writeU32(Value: 0); |
220 | const auto StartOffset = Out.tell(); |
221 | Error Err = CallSites->encode(O&: Out); |
222 | if (Err) |
223 | return std::move(Err); |
224 | const auto Length = Out.tell() - StartOffset; |
225 | if (Length > UINT32_MAX) |
226 | return createStringError(EC: std::errc::invalid_argument, |
227 | Fmt: "CallSites length is greater than UINT32_MAX" ); |
228 | // Fixup the size of the CallSites data with the correct size. |
229 | Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4); |
230 | } |
231 | |
232 | // Terminate the data chunks with an end of list with zero size. |
233 | Out.writeU32(Value: InfoType::EndOfList); |
234 | Out.writeU32(Value: 0); |
235 | return FuncInfoOffset; |
236 | } |
237 | |
238 | llvm::Expected<LookupResult> |
239 | FunctionInfo::(DataExtractor &Data, const GsymReader &GR, |
240 | uint64_t FuncAddr, uint64_t Addr, |
241 | std::optional<DataExtractor> *MergedFuncsData) { |
242 | LookupResult LR; |
243 | LR.LookupAddr = Addr; |
244 | uint64_t Offset = 0; |
245 | LR.FuncRange = {FuncAddr, FuncAddr + Data.getU32(offset_ptr: &Offset)}; |
246 | uint32_t NameOffset = Data.getU32(offset_ptr: &Offset); |
247 | // The "lookup" functions doesn't report errors as accurately as the "decode" |
248 | // function as it is meant to be fast. For more accurage errors we could call |
249 | // "decode". |
250 | if (!Data.isValidOffset(offset: Offset)) |
251 | return createStringError(EC: std::errc::io_error, |
252 | Fmt: "FunctionInfo data is truncated" ); |
253 | // This function will be called with the result of a binary search of the |
254 | // address table, we must still make sure the address does not fall into a |
255 | // gap between functions are after the last function. |
256 | if (LR.FuncRange.size() > 0 && !LR.FuncRange.contains(Addr)) |
257 | return createStringError(EC: std::errc::io_error, |
258 | Fmt: "address 0x%" PRIx64 " is not in GSYM" , Vals: Addr); |
259 | |
260 | if (NameOffset == 0) |
261 | return createStringError(EC: std::errc::io_error, |
262 | Fmt: "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x00000000" , |
263 | Vals: Offset - 4); |
264 | LR.FuncName = GR.getString(Offset: NameOffset); |
265 | bool Done = false; |
266 | std::optional<LineEntry> LineEntry; |
267 | std::optional<DataExtractor> InlineInfoData; |
268 | while (!Done) { |
269 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 8)) |
270 | return createStringError(EC: std::errc::io_error, |
271 | Fmt: "FunctionInfo data is truncated" ); |
272 | const uint32_t IT = Data.getU32(offset_ptr: &Offset); |
273 | const uint32_t InfoLength = Data.getU32(offset_ptr: &Offset); |
274 | const StringRef InfoBytes = Data.getData().substr(Start: Offset, N: InfoLength); |
275 | if (InfoLength != InfoBytes.size()) |
276 | return createStringError(EC: std::errc::io_error, |
277 | Fmt: "FunctionInfo data is truncated" ); |
278 | DataExtractor InfoData(InfoBytes, Data.isLittleEndian(), |
279 | Data.getAddressSize()); |
280 | switch (IT) { |
281 | case InfoType::EndOfList: |
282 | Done = true; |
283 | break; |
284 | |
285 | case InfoType::LineTableInfo: |
286 | if (auto ExpectedLE = LineTable::lookup(Data&: InfoData, BaseAddr: FuncAddr, Addr)) |
287 | LineEntry = ExpectedLE.get(); |
288 | else |
289 | return ExpectedLE.takeError(); |
290 | break; |
291 | |
292 | case InfoType::MergedFunctionsInfo: |
293 | // Store the merged functions data for later parsing, if needed. |
294 | if (MergedFuncsData) |
295 | *MergedFuncsData = InfoData; |
296 | break; |
297 | |
298 | case InfoType::InlineInfo: |
299 | // We will parse the inline info after our line table, but only if |
300 | // we have a line entry. |
301 | InlineInfoData = InfoData; |
302 | break; |
303 | |
304 | case InfoType::CallSiteInfo: |
305 | if (auto CSIC = CallSiteInfoCollection::decode(Data&: InfoData)) { |
306 | // Find matching call site based on relative offset |
307 | for (const auto &CS : CSIC->CallSites) { |
308 | // Check if the call site matches the lookup address |
309 | if (CS.ReturnOffset == Addr - FuncAddr) { |
310 | // Get regex patterns |
311 | for (uint32_t RegexOffset : CS.MatchRegex) { |
312 | LR.CallSiteFuncRegex.push_back(x: GR.getString(Offset: RegexOffset)); |
313 | } |
314 | break; |
315 | } |
316 | } |
317 | } else { |
318 | return CSIC.takeError(); |
319 | } |
320 | break; |
321 | |
322 | default: |
323 | break; |
324 | } |
325 | Offset += InfoLength; |
326 | } |
327 | |
328 | if (!LineEntry) { |
329 | // We don't have a valid line entry for our address, fill in our source |
330 | // location as best we can and return. |
331 | SourceLocation SrcLoc; |
332 | SrcLoc.Name = LR.FuncName; |
333 | SrcLoc.Offset = Addr - FuncAddr; |
334 | LR.Locations.push_back(x: SrcLoc); |
335 | return LR; |
336 | } |
337 | |
338 | std::optional<FileEntry> LineEntryFile = GR.getFile(Index: LineEntry->File); |
339 | if (!LineEntryFile) |
340 | return createStringError(EC: std::errc::invalid_argument, |
341 | Fmt: "failed to extract file[%" PRIu32 "]" , |
342 | Vals: LineEntry->File); |
343 | |
344 | SourceLocation SrcLoc; |
345 | SrcLoc.Name = LR.FuncName; |
346 | SrcLoc.Offset = Addr - FuncAddr; |
347 | SrcLoc.Dir = GR.getString(Offset: LineEntryFile->Dir); |
348 | SrcLoc.Base = GR.getString(Offset: LineEntryFile->Base); |
349 | SrcLoc.Line = LineEntry->Line; |
350 | LR.Locations.push_back(x: SrcLoc); |
351 | // If we don't have inline information, we are done. |
352 | if (!InlineInfoData) |
353 | return LR; |
354 | // We have inline information. Try to augment the lookup result with this |
355 | // data. |
356 | llvm::Error Err = InlineInfo::lookup(GR, Data&: *InlineInfoData, BaseAddr: FuncAddr, Addr, |
357 | SrcLocs&: LR.Locations); |
358 | if (Err) |
359 | return std::move(Err); |
360 | return LR; |
361 | } |
362 | |