1//===- FunctionInfo.cpp ---------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
10#include "llvm/DebugInfo/GSYM/FileWriter.h"
11#include "llvm/DebugInfo/GSYM/GsymCreator.h"
12#include "llvm/DebugInfo/GSYM/GsymDataExtractor.h"
13#include "llvm/DebugInfo/GSYM/GsymReader.h"
14#include "llvm/DebugInfo/GSYM/InlineInfo.h"
15#include "llvm/DebugInfo/GSYM/LineTable.h"
16#include <optional>
17
18using namespace llvm;
19using namespace gsym;
20
21/// FunctionInfo information type that is used to encode the optional data
22/// that is associated with a FunctionInfo object.
23enum InfoType : uint32_t {
24 EndOfList = 0u,
25 LineTableInfo = 1u,
26 InlineInfo = 2u,
27 MergedFunctionsInfo = 3u,
28 CallSiteInfo = 4u,
29};
30
31raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
32 OS << FI.Range << ": " << "Name=" << HEX32(FI.Name) << '\n';
33 if (FI.OptLineTable)
34 OS << FI.OptLineTable << '\n';
35 if (FI.Inline)
36 OS << FI.Inline << '\n';
37 if (FI.CallSites)
38 OS << *FI.CallSites << '\n';
39 return OS;
40}
41
42llvm::Expected<FunctionInfo> FunctionInfo::decode(GsymDataExtractor &Data,
43 uint64_t BaseAddr) {
44 FunctionInfo FI;
45 uint64_t Offset = 0;
46 if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4))
47 return createStringError(EC: std::errc::io_error,
48 Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo Size", Vals: Offset);
49 FI.Range = {BaseAddr, BaseAddr + Data.getU32(offset_ptr: &Offset)};
50 if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4))
51 return createStringError(EC: std::errc::io_error,
52 Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo Name", Vals: Offset);
53 FI.Name = Data.getStringOffset(offset_ptr: &Offset);
54 if (FI.Name == 0)
55 return createStringError(EC: std::errc::io_error,
56 Fmt: "0x%8.8" PRIx64
57 ": invalid FunctionInfo Name value 0x%" PRIx64,
58 Vals: Offset - Data.getStringOffsetSize(), Vals: FI.Name);
59 bool Done = false;
60 while (!Done) {
61 if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4))
62 return createStringError(EC: std::errc::io_error,
63 Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo InfoType value", Vals: Offset);
64 const uint32_t IT = Data.getU32(offset_ptr: &Offset);
65 if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4))
66 return createStringError(EC: std::errc::io_error,
67 Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo InfoType length", Vals: Offset);
68 const uint32_t InfoLength = Data.getU32(offset_ptr: &Offset);
69 if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: InfoLength))
70 return createStringError(EC: std::errc::io_error,
71 Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u",
72 Vals: Offset, Vals: IT);
73 GsymDataExtractor InfoData(Data, Offset, InfoLength);
74 switch (IT) {
75 case InfoType::EndOfList:
76 Done = true;
77 break;
78
79 case InfoType::LineTableInfo:
80 if (Expected<LineTable> LT = LineTable::decode(Data&: InfoData, BaseAddr))
81 FI.OptLineTable = std::move(LT.get());
82 else
83 return LT.takeError();
84 break;
85
86 case InfoType::InlineInfo:
87 if (Expected<InlineInfo> II = InlineInfo::decode(Data&: InfoData, BaseAddr))
88 FI.Inline = std::move(II.get());
89 else
90 return II.takeError();
91 break;
92
93 case InfoType::MergedFunctionsInfo:
94 if (Expected<MergedFunctionsInfo> MI =
95 MergedFunctionsInfo::decode(Data&: InfoData, BaseAddr))
96 FI.MergedFunctions = std::move(MI.get());
97 else
98 return MI.takeError();
99 break;
100
101 case InfoType::CallSiteInfo:
102 if (Expected<llvm::gsym::CallSiteInfoCollection> CI =
103 llvm::gsym::CallSiteInfoCollection::decode(Data&: InfoData))
104 FI.CallSites = std::move(CI.get());
105 else
106 return CI.takeError();
107 break;
108
109 default:
110 return createStringError(EC: std::errc::io_error,
111 Fmt: "0x%8.8" PRIx64 ": unsupported InfoType %u",
112 Vals: Offset-8, Vals: IT);
113 }
114 Offset += InfoLength;
115 }
116 return std::move(FI);
117}
118
119uint64_t FunctionInfo::cacheEncoding(GsymCreator &GC) {
120 EncodingCache.clear();
121 if (!isValid())
122 return 0;
123 raw_svector_ostream OutStrm(EncodingCache);
124 FileWriter FW(OutStrm, llvm::endianness::native);
125 FW.setStringOffsetSize(GC.getStringOffsetSize());
126 llvm::Expected<uint64_t> Result = encode(O&: FW);
127 if (!Result) {
128 EncodingCache.clear();
129 consumeError(Err: Result.takeError());
130 return 0;
131 }
132 return EncodingCache.size();
133}
134
135llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
136 bool NoPadding) const {
137 if (!isValid())
138 return createStringError(EC: std::errc::invalid_argument,
139 Fmt: "attempted to encode invalid FunctionInfo object");
140 // Align FunctionInfo data to a 4 byte alignment, if padding is allowed
141 if (NoPadding == false)
142 Out.alignTo(Align: 4);
143 const uint64_t FuncInfoOffset = Out.tell();
144 // Check if we have already encoded this function info into EncodingCache.
145 // This will be non empty when creating segmented GSYM files as we need to
146 // precompute exactly how big FunctionInfo objects encode into so we can
147 // accurately make segments of a specific size.
148 if (!EncodingCache.empty() &&
149 llvm::endianness::native == Out.getByteOrder()) {
150 // We already encoded this object, just write out the bytes.
151 Out.writeData(Data: llvm::ArrayRef<uint8_t>((const uint8_t *)EncodingCache.data(),
152 EncodingCache.size()));
153 return FuncInfoOffset;
154 }
155 // Write the size in bytes of this function as a uint32_t. This can be zero
156 // if we just have a symbol from a symbol table and that symbol has no size.
157 Out.writeU32(Value: size());
158 // Write the name of this function as a string table offset.
159 Out.writeStringOffset(Value: Name);
160
161 if (OptLineTable) {
162 Out.writeU32(Value: InfoType::LineTableInfo);
163 // Write a uint32_t length as zero for now, we will fix this up after
164 // writing the LineTable out with the number of bytes that were written.
165 Out.writeU32(Value: 0);
166 const auto StartOffset = Out.tell();
167 llvm::Error err = OptLineTable->encode(O&: Out, BaseAddr: Range.start());
168 if (err)
169 return std::move(err);
170 const auto Length = Out.tell() - StartOffset;
171 if (Length > UINT32_MAX)
172 return createStringError(EC: std::errc::invalid_argument,
173 Fmt: "LineTable length is greater than UINT32_MAX");
174 // Fixup the size of the LineTable data with the correct size.
175 Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4);
176 }
177
178 // Write out the inline function info if we have any and if it is valid.
179 if (Inline) {
180 Out.writeU32(Value: InfoType::InlineInfo);
181 // Write a uint32_t length as zero for now, we will fix this up after
182 // writing the LineTable out with the number of bytes that were written.
183 Out.writeU32(Value: 0);
184 const auto StartOffset = Out.tell();
185 llvm::Error err = Inline->encode(O&: Out, BaseAddr: Range.start());
186 if (err)
187 return std::move(err);
188 const auto Length = Out.tell() - StartOffset;
189 if (Length > UINT32_MAX)
190 return createStringError(EC: std::errc::invalid_argument,
191 Fmt: "InlineInfo length is greater than UINT32_MAX");
192 // Fixup the size of the InlineInfo data with the correct size.
193 Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4);
194 }
195
196 // Write out the merged functions info if we have any and if it is valid.
197 if (MergedFunctions) {
198 Out.writeU32(Value: InfoType::MergedFunctionsInfo);
199 // Write a uint32_t length as zero for now, we will fix this up after
200 // writing the LineTable out with the number of bytes that were written.
201 Out.writeU32(Value: 0);
202 const auto StartOffset = Out.tell();
203 llvm::Error err = MergedFunctions->encode(O&: Out);
204 if (err)
205 return std::move(err);
206 const auto Length = Out.tell() - StartOffset;
207 if (Length > UINT32_MAX)
208 return createStringError(
209 EC: std::errc::invalid_argument,
210 Fmt: "MergedFunctionsInfo length is greater than UINT32_MAX");
211 // Fixup the size of the MergedFunctionsInfo data with the correct size.
212 Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4);
213 }
214
215 // Write out the call sites if we have any and if they are valid.
216 if (CallSites) {
217 Out.writeU32(Value: InfoType::CallSiteInfo);
218 // Write a uint32_t length as zero for now, we will fix this up after
219 // writing the CallSites out with the number of bytes that were written.
220 Out.writeU32(Value: 0);
221 const auto StartOffset = Out.tell();
222 Error Err = CallSites->encode(O&: Out);
223 if (Err)
224 return std::move(Err);
225 const auto Length = Out.tell() - StartOffset;
226 if (Length > UINT32_MAX)
227 return createStringError(EC: std::errc::invalid_argument,
228 Fmt: "CallSites length is greater than UINT32_MAX");
229 // Fixup the size of the CallSites data with the correct size.
230 Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4);
231 }
232
233 // Terminate the data chunks with an end of list with zero size.
234 Out.writeU32(Value: InfoType::EndOfList);
235 Out.writeU32(Value: 0);
236 return FuncInfoOffset;
237}
238
239llvm::Expected<LookupResult>
240FunctionInfo::lookup(GsymDataExtractor &Data, const GsymReader &GR,
241 uint64_t FuncAddr, uint64_t Addr,
242 std::optional<GsymDataExtractor> *MergedFuncsData) {
243 LookupResult LR;
244 LR.LookupAddr = Addr;
245 uint64_t Offset = 0;
246 LR.FuncRange = {FuncAddr, FuncAddr + Data.getU32(offset_ptr: &Offset)};
247 gsym_strp_t NameOffset = Data.getStringOffset(offset_ptr: &Offset);
248 // The "lookup" functions doesn't report errors as accurately as the "decode"
249 // function as it is meant to be fast. For more accurage errors we could call
250 // "decode".
251 if (!Data.isValidOffset(offset: Offset))
252 return createStringError(EC: std::errc::io_error,
253 Fmt: "FunctionInfo data is truncated");
254 // This function will be called with the result of a binary search of the
255 // address table, we must still make sure the address does not fall into a
256 // gap between functions are after the last function.
257 if (LR.FuncRange.size() > 0 && !LR.FuncRange.contains(Addr))
258 return createStringError(EC: std::errc::io_error,
259 Fmt: "address 0x%" PRIx64 " is not in GSYM", Vals: Addr);
260
261 if (NameOffset == 0)
262 return createStringError(EC: std::errc::io_error,
263 Fmt: "0x%8.8" PRIx64
264 ": invalid FunctionInfo Name value 0x0",
265 Vals: Offset - Data.getStringOffsetSize());
266 LR.FuncName = GR.getString(Offset: NameOffset);
267 bool Done = false;
268 std::optional<LineEntry> LineEntry;
269 std::optional<GsymDataExtractor> InlineInfoData;
270 while (!Done) {
271 if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 8))
272 return createStringError(EC: std::errc::io_error,
273 Fmt: "FunctionInfo data is truncated");
274 const uint32_t IT = Data.getU32(offset_ptr: &Offset);
275 const uint32_t InfoLength = Data.getU32(offset_ptr: &Offset);
276 const StringRef InfoBytes = Data.getData().substr(Start: Offset, N: InfoLength);
277 if (InfoLength != InfoBytes.size())
278 return createStringError(EC: std::errc::io_error,
279 Fmt: "FunctionInfo data is truncated");
280 GsymDataExtractor InfoData(Data, Offset, InfoLength);
281 switch (IT) {
282 case InfoType::EndOfList:
283 Done = true;
284 break;
285
286 case InfoType::LineTableInfo:
287 if (auto ExpectedLE = LineTable::lookup(Data&: InfoData, BaseAddr: FuncAddr, Addr))
288 LineEntry = ExpectedLE.get();
289 else
290 return ExpectedLE.takeError();
291 break;
292
293 case InfoType::MergedFunctionsInfo:
294 // Store the merged functions data for later parsing, if needed.
295 if (MergedFuncsData)
296 *MergedFuncsData = InfoData;
297 break;
298
299 case InfoType::InlineInfo:
300 // We will parse the inline info after our line table, but only if
301 // we have a line entry.
302 InlineInfoData = InfoData;
303 break;
304
305 case InfoType::CallSiteInfo:
306 if (auto CSIC = CallSiteInfoCollection::decode(Data&: InfoData)) {
307 // Find matching call site based on relative offset
308 for (const auto &CS : CSIC->CallSites) {
309 // Check if the call site matches the lookup address
310 if (CS.ReturnOffset == Addr - FuncAddr) {
311 // Get regex patterns
312 for (gsym_strp_t RegexOffset : CS.MatchRegex) {
313 LR.CallSiteFuncRegex.push_back(x: GR.getString(Offset: RegexOffset));
314 }
315 break;
316 }
317 }
318 } else {
319 return CSIC.takeError();
320 }
321 break;
322
323 default:
324 break;
325 }
326 Offset += InfoLength;
327 }
328
329 if (!LineEntry) {
330 // We don't have a valid line entry for our address, fill in our source
331 // location as best we can and return.
332 SourceLocation SrcLoc;
333 SrcLoc.Name = LR.FuncName;
334 SrcLoc.Offset = Addr - FuncAddr;
335 LR.Locations.push_back(x: SrcLoc);
336 return LR;
337 }
338
339 std::optional<FileEntry> LineEntryFile = GR.getFile(Index: LineEntry->File);
340 if (!LineEntryFile)
341 return createStringError(EC: std::errc::invalid_argument,
342 Fmt: "failed to extract file[%" PRIu32 "]",
343 Vals: LineEntry->File);
344
345 SourceLocation SrcLoc;
346 SrcLoc.Name = LR.FuncName;
347 SrcLoc.Offset = Addr - FuncAddr;
348 SrcLoc.Dir = GR.getString(Offset: LineEntryFile->Dir);
349 SrcLoc.Base = GR.getString(Offset: LineEntryFile->Base);
350 SrcLoc.Line = LineEntry->Line;
351 LR.Locations.push_back(x: SrcLoc);
352 // If we don't have inline information, we are done.
353 if (!InlineInfoData)
354 return LR;
355 // We have inline information. Try to augment the lookup result with this
356 // data.
357 llvm::Error Err = InlineInfo::lookup(GR, Data&: *InlineInfoData, BaseAddr: FuncAddr, Addr,
358 SrcLocs&: LR.Locations);
359 if (Err)
360 return std::move(Err);
361 return LR;
362}
363