| 1 | //===- FunctionInfo.cpp ---------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/DebugInfo/GSYM/FunctionInfo.h" |
| 10 | #include "llvm/DebugInfo/GSYM/FileWriter.h" |
| 11 | #include "llvm/DebugInfo/GSYM/GsymReader.h" |
| 12 | #include "llvm/DebugInfo/GSYM/LineTable.h" |
| 13 | #include "llvm/DebugInfo/GSYM/InlineInfo.h" |
| 14 | #include "llvm/Support/DataExtractor.h" |
| 15 | #include <optional> |
| 16 | |
| 17 | using namespace llvm; |
| 18 | using namespace gsym; |
| 19 | |
| 20 | /// FunctionInfo information type that is used to encode the optional data |
| 21 | /// that is associated with a FunctionInfo object. |
| 22 | enum InfoType : uint32_t { |
| 23 | EndOfList = 0u, |
| 24 | LineTableInfo = 1u, |
| 25 | InlineInfo = 2u, |
| 26 | MergedFunctionsInfo = 3u, |
| 27 | CallSiteInfo = 4u, |
| 28 | }; |
| 29 | |
| 30 | raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { |
| 31 | OS << FI.Range << ": " << "Name=" << HEX32(FI.Name) << '\n'; |
| 32 | if (FI.OptLineTable) |
| 33 | OS << FI.OptLineTable << '\n'; |
| 34 | if (FI.Inline) |
| 35 | OS << FI.Inline << '\n'; |
| 36 | if (FI.CallSites) |
| 37 | OS << *FI.CallSites << '\n'; |
| 38 | return OS; |
| 39 | } |
| 40 | |
| 41 | llvm::Expected<FunctionInfo> FunctionInfo::(DataExtractor &Data, |
| 42 | uint64_t BaseAddr) { |
| 43 | FunctionInfo FI; |
| 44 | uint64_t Offset = 0; |
| 45 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4)) |
| 46 | return createStringError(EC: std::errc::io_error, |
| 47 | Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo Size" , Vals: Offset); |
| 48 | FI.Range = {BaseAddr, BaseAddr + Data.getU32(offset_ptr: &Offset)}; |
| 49 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4)) |
| 50 | return createStringError(EC: std::errc::io_error, |
| 51 | Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo Name" , Vals: Offset); |
| 52 | FI.Name = Data.getU32(offset_ptr: &Offset); |
| 53 | if (FI.Name == 0) |
| 54 | return createStringError(EC: std::errc::io_error, |
| 55 | Fmt: "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x%8.8x" , |
| 56 | Vals: Offset - 4, Vals: FI.Name); |
| 57 | bool Done = false; |
| 58 | while (!Done) { |
| 59 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4)) |
| 60 | return createStringError(EC: std::errc::io_error, |
| 61 | Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo InfoType value" , Vals: Offset); |
| 62 | const uint32_t IT = Data.getU32(offset_ptr: &Offset); |
| 63 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 4)) |
| 64 | return createStringError(EC: std::errc::io_error, |
| 65 | Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo InfoType length" , Vals: Offset); |
| 66 | const uint32_t InfoLength = Data.getU32(offset_ptr: &Offset); |
| 67 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: InfoLength)) |
| 68 | return createStringError(EC: std::errc::io_error, |
| 69 | Fmt: "0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u" , |
| 70 | Vals: Offset, Vals: IT); |
| 71 | DataExtractor InfoData(Data.getData().substr(Start: Offset, N: InfoLength), |
| 72 | Data.isLittleEndian(), |
| 73 | Data.getAddressSize()); |
| 74 | switch (IT) { |
| 75 | case InfoType::EndOfList: |
| 76 | Done = true; |
| 77 | break; |
| 78 | |
| 79 | case InfoType::LineTableInfo: |
| 80 | if (Expected<LineTable> LT = LineTable::decode(Data&: InfoData, BaseAddr)) |
| 81 | FI.OptLineTable = std::move(LT.get()); |
| 82 | else |
| 83 | return LT.takeError(); |
| 84 | break; |
| 85 | |
| 86 | case InfoType::InlineInfo: |
| 87 | if (Expected<InlineInfo> II = InlineInfo::decode(Data&: InfoData, BaseAddr)) |
| 88 | FI.Inline = std::move(II.get()); |
| 89 | else |
| 90 | return II.takeError(); |
| 91 | break; |
| 92 | |
| 93 | case InfoType::MergedFunctionsInfo: |
| 94 | if (Expected<MergedFunctionsInfo> MI = |
| 95 | MergedFunctionsInfo::decode(Data&: InfoData, BaseAddr)) |
| 96 | FI.MergedFunctions = std::move(MI.get()); |
| 97 | else |
| 98 | return MI.takeError(); |
| 99 | break; |
| 100 | |
| 101 | case InfoType::CallSiteInfo: |
| 102 | if (Expected<llvm::gsym::CallSiteInfoCollection> CI = |
| 103 | llvm::gsym::CallSiteInfoCollection::decode(Data&: InfoData)) |
| 104 | FI.CallSites = std::move(CI.get()); |
| 105 | else |
| 106 | return CI.takeError(); |
| 107 | break; |
| 108 | |
| 109 | default: |
| 110 | return createStringError(EC: std::errc::io_error, |
| 111 | Fmt: "0x%8.8" PRIx64 ": unsupported InfoType %u" , |
| 112 | Vals: Offset-8, Vals: IT); |
| 113 | } |
| 114 | Offset += InfoLength; |
| 115 | } |
| 116 | return std::move(FI); |
| 117 | } |
| 118 | |
| 119 | uint64_t FunctionInfo::cacheEncoding() { |
| 120 | EncodingCache.clear(); |
| 121 | if (!isValid()) |
| 122 | return 0; |
| 123 | raw_svector_ostream OutStrm(EncodingCache); |
| 124 | FileWriter FW(OutStrm, llvm::endianness::native); |
| 125 | llvm::Expected<uint64_t> Result = encode(O&: FW); |
| 126 | if (!Result) { |
| 127 | EncodingCache.clear(); |
| 128 | consumeError(Err: Result.takeError()); |
| 129 | return 0; |
| 130 | } |
| 131 | return EncodingCache.size(); |
| 132 | } |
| 133 | |
| 134 | llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out, |
| 135 | bool NoPadding) const { |
| 136 | if (!isValid()) |
| 137 | return createStringError(EC: std::errc::invalid_argument, |
| 138 | Fmt: "attempted to encode invalid FunctionInfo object" ); |
| 139 | // Align FunctionInfo data to a 4 byte alignment, if padding is allowed |
| 140 | if (NoPadding == false) |
| 141 | Out.alignTo(Align: 4); |
| 142 | const uint64_t FuncInfoOffset = Out.tell(); |
| 143 | // Check if we have already encoded this function info into EncodingCache. |
| 144 | // This will be non empty when creating segmented GSYM files as we need to |
| 145 | // precompute exactly how big FunctionInfo objects encode into so we can |
| 146 | // accurately make segments of a specific size. |
| 147 | if (!EncodingCache.empty() && |
| 148 | llvm::endianness::native == Out.getByteOrder()) { |
| 149 | // We already encoded this object, just write out the bytes. |
| 150 | Out.writeData(Data: llvm::ArrayRef<uint8_t>((const uint8_t *)EncodingCache.data(), |
| 151 | EncodingCache.size())); |
| 152 | return FuncInfoOffset; |
| 153 | } |
| 154 | // Write the size in bytes of this function as a uint32_t. This can be zero |
| 155 | // if we just have a symbol from a symbol table and that symbol has no size. |
| 156 | Out.writeU32(Value: size()); |
| 157 | // Write the name of this function as a uint32_t string table offset. |
| 158 | Out.writeU32(Value: Name); |
| 159 | |
| 160 | if (OptLineTable) { |
| 161 | Out.writeU32(Value: InfoType::LineTableInfo); |
| 162 | // Write a uint32_t length as zero for now, we will fix this up after |
| 163 | // writing the LineTable out with the number of bytes that were written. |
| 164 | Out.writeU32(Value: 0); |
| 165 | const auto StartOffset = Out.tell(); |
| 166 | llvm::Error err = OptLineTable->encode(O&: Out, BaseAddr: Range.start()); |
| 167 | if (err) |
| 168 | return std::move(err); |
| 169 | const auto Length = Out.tell() - StartOffset; |
| 170 | if (Length > UINT32_MAX) |
| 171 | return createStringError(EC: std::errc::invalid_argument, |
| 172 | Fmt: "LineTable length is greater than UINT32_MAX" ); |
| 173 | // Fixup the size of the LineTable data with the correct size. |
| 174 | Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4); |
| 175 | } |
| 176 | |
| 177 | // Write out the inline function info if we have any and if it is valid. |
| 178 | if (Inline) { |
| 179 | Out.writeU32(Value: InfoType::InlineInfo); |
| 180 | // Write a uint32_t length as zero for now, we will fix this up after |
| 181 | // writing the LineTable out with the number of bytes that were written. |
| 182 | Out.writeU32(Value: 0); |
| 183 | const auto StartOffset = Out.tell(); |
| 184 | llvm::Error err = Inline->encode(O&: Out, BaseAddr: Range.start()); |
| 185 | if (err) |
| 186 | return std::move(err); |
| 187 | const auto Length = Out.tell() - StartOffset; |
| 188 | if (Length > UINT32_MAX) |
| 189 | return createStringError(EC: std::errc::invalid_argument, |
| 190 | Fmt: "InlineInfo length is greater than UINT32_MAX" ); |
| 191 | // Fixup the size of the InlineInfo data with the correct size. |
| 192 | Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4); |
| 193 | } |
| 194 | |
| 195 | // Write out the merged functions info if we have any and if it is valid. |
| 196 | if (MergedFunctions) { |
| 197 | Out.writeU32(Value: InfoType::MergedFunctionsInfo); |
| 198 | // Write a uint32_t length as zero for now, we will fix this up after |
| 199 | // writing the LineTable out with the number of bytes that were written. |
| 200 | Out.writeU32(Value: 0); |
| 201 | const auto StartOffset = Out.tell(); |
| 202 | llvm::Error err = MergedFunctions->encode(O&: Out); |
| 203 | if (err) |
| 204 | return std::move(err); |
| 205 | const auto Length = Out.tell() - StartOffset; |
| 206 | if (Length > UINT32_MAX) |
| 207 | return createStringError( |
| 208 | EC: std::errc::invalid_argument, |
| 209 | Fmt: "MergedFunctionsInfo length is greater than UINT32_MAX" ); |
| 210 | // Fixup the size of the MergedFunctionsInfo data with the correct size. |
| 211 | Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4); |
| 212 | } |
| 213 | |
| 214 | // Write out the call sites if we have any and if they are valid. |
| 215 | if (CallSites) { |
| 216 | Out.writeU32(Value: InfoType::CallSiteInfo); |
| 217 | // Write a uint32_t length as zero for now, we will fix this up after |
| 218 | // writing the CallSites out with the number of bytes that were written. |
| 219 | Out.writeU32(Value: 0); |
| 220 | const auto StartOffset = Out.tell(); |
| 221 | Error Err = CallSites->encode(O&: Out); |
| 222 | if (Err) |
| 223 | return std::move(Err); |
| 224 | const auto Length = Out.tell() - StartOffset; |
| 225 | if (Length > UINT32_MAX) |
| 226 | return createStringError(EC: std::errc::invalid_argument, |
| 227 | Fmt: "CallSites length is greater than UINT32_MAX" ); |
| 228 | // Fixup the size of the CallSites data with the correct size. |
| 229 | Out.fixup32(Value: static_cast<uint32_t>(Length), Offset: StartOffset - 4); |
| 230 | } |
| 231 | |
| 232 | // Terminate the data chunks with an end of list with zero size. |
| 233 | Out.writeU32(Value: InfoType::EndOfList); |
| 234 | Out.writeU32(Value: 0); |
| 235 | return FuncInfoOffset; |
| 236 | } |
| 237 | |
| 238 | llvm::Expected<LookupResult> |
| 239 | FunctionInfo::(DataExtractor &Data, const GsymReader &GR, |
| 240 | uint64_t FuncAddr, uint64_t Addr, |
| 241 | std::optional<DataExtractor> *MergedFuncsData) { |
| 242 | LookupResult LR; |
| 243 | LR.LookupAddr = Addr; |
| 244 | uint64_t Offset = 0; |
| 245 | LR.FuncRange = {FuncAddr, FuncAddr + Data.getU32(offset_ptr: &Offset)}; |
| 246 | uint32_t NameOffset = Data.getU32(offset_ptr: &Offset); |
| 247 | // The "lookup" functions doesn't report errors as accurately as the "decode" |
| 248 | // function as it is meant to be fast. For more accurage errors we could call |
| 249 | // "decode". |
| 250 | if (!Data.isValidOffset(offset: Offset)) |
| 251 | return createStringError(EC: std::errc::io_error, |
| 252 | Fmt: "FunctionInfo data is truncated" ); |
| 253 | // This function will be called with the result of a binary search of the |
| 254 | // address table, we must still make sure the address does not fall into a |
| 255 | // gap between functions are after the last function. |
| 256 | if (LR.FuncRange.size() > 0 && !LR.FuncRange.contains(Addr)) |
| 257 | return createStringError(EC: std::errc::io_error, |
| 258 | Fmt: "address 0x%" PRIx64 " is not in GSYM" , Vals: Addr); |
| 259 | |
| 260 | if (NameOffset == 0) |
| 261 | return createStringError(EC: std::errc::io_error, |
| 262 | Fmt: "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x00000000" , |
| 263 | Vals: Offset - 4); |
| 264 | LR.FuncName = GR.getString(Offset: NameOffset); |
| 265 | bool Done = false; |
| 266 | std::optional<LineEntry> LineEntry; |
| 267 | std::optional<DataExtractor> InlineInfoData; |
| 268 | while (!Done) { |
| 269 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: 8)) |
| 270 | return createStringError(EC: std::errc::io_error, |
| 271 | Fmt: "FunctionInfo data is truncated" ); |
| 272 | const uint32_t IT = Data.getU32(offset_ptr: &Offset); |
| 273 | const uint32_t InfoLength = Data.getU32(offset_ptr: &Offset); |
| 274 | const StringRef InfoBytes = Data.getData().substr(Start: Offset, N: InfoLength); |
| 275 | if (InfoLength != InfoBytes.size()) |
| 276 | return createStringError(EC: std::errc::io_error, |
| 277 | Fmt: "FunctionInfo data is truncated" ); |
| 278 | DataExtractor InfoData(InfoBytes, Data.isLittleEndian(), |
| 279 | Data.getAddressSize()); |
| 280 | switch (IT) { |
| 281 | case InfoType::EndOfList: |
| 282 | Done = true; |
| 283 | break; |
| 284 | |
| 285 | case InfoType::LineTableInfo: |
| 286 | if (auto ExpectedLE = LineTable::lookup(Data&: InfoData, BaseAddr: FuncAddr, Addr)) |
| 287 | LineEntry = ExpectedLE.get(); |
| 288 | else |
| 289 | return ExpectedLE.takeError(); |
| 290 | break; |
| 291 | |
| 292 | case InfoType::MergedFunctionsInfo: |
| 293 | // Store the merged functions data for later parsing, if needed. |
| 294 | if (MergedFuncsData) |
| 295 | *MergedFuncsData = InfoData; |
| 296 | break; |
| 297 | |
| 298 | case InfoType::InlineInfo: |
| 299 | // We will parse the inline info after our line table, but only if |
| 300 | // we have a line entry. |
| 301 | InlineInfoData = InfoData; |
| 302 | break; |
| 303 | |
| 304 | case InfoType::CallSiteInfo: |
| 305 | if (auto CSIC = CallSiteInfoCollection::decode(Data&: InfoData)) { |
| 306 | // Find matching call site based on relative offset |
| 307 | for (const auto &CS : CSIC->CallSites) { |
| 308 | // Check if the call site matches the lookup address |
| 309 | if (CS.ReturnOffset == Addr - FuncAddr) { |
| 310 | // Get regex patterns |
| 311 | for (uint32_t RegexOffset : CS.MatchRegex) { |
| 312 | LR.CallSiteFuncRegex.push_back(x: GR.getString(Offset: RegexOffset)); |
| 313 | } |
| 314 | break; |
| 315 | } |
| 316 | } |
| 317 | } else { |
| 318 | return CSIC.takeError(); |
| 319 | } |
| 320 | break; |
| 321 | |
| 322 | default: |
| 323 | break; |
| 324 | } |
| 325 | Offset += InfoLength; |
| 326 | } |
| 327 | |
| 328 | if (!LineEntry) { |
| 329 | // We don't have a valid line entry for our address, fill in our source |
| 330 | // location as best we can and return. |
| 331 | SourceLocation SrcLoc; |
| 332 | SrcLoc.Name = LR.FuncName; |
| 333 | SrcLoc.Offset = Addr - FuncAddr; |
| 334 | LR.Locations.push_back(x: SrcLoc); |
| 335 | return LR; |
| 336 | } |
| 337 | |
| 338 | std::optional<FileEntry> LineEntryFile = GR.getFile(Index: LineEntry->File); |
| 339 | if (!LineEntryFile) |
| 340 | return createStringError(EC: std::errc::invalid_argument, |
| 341 | Fmt: "failed to extract file[%" PRIu32 "]" , |
| 342 | Vals: LineEntry->File); |
| 343 | |
| 344 | SourceLocation SrcLoc; |
| 345 | SrcLoc.Name = LR.FuncName; |
| 346 | SrcLoc.Offset = Addr - FuncAddr; |
| 347 | SrcLoc.Dir = GR.getString(Offset: LineEntryFile->Dir); |
| 348 | SrcLoc.Base = GR.getString(Offset: LineEntryFile->Base); |
| 349 | SrcLoc.Line = LineEntry->Line; |
| 350 | LR.Locations.push_back(x: SrcLoc); |
| 351 | // If we don't have inline information, we are done. |
| 352 | if (!InlineInfoData) |
| 353 | return LR; |
| 354 | // We have inline information. Try to augment the lookup result with this |
| 355 | // data. |
| 356 | llvm::Error Err = InlineInfo::lookup(GR, Data&: *InlineInfoData, BaseAddr: FuncAddr, Addr, |
| 357 | SrcLocs&: LR.Locations); |
| 358 | if (Err) |
| 359 | return std::move(Err); |
| 360 | return LR; |
| 361 | } |
| 362 | |