| 1 | //===- CallSiteInfo.cpp -----------------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/DebugInfo/GSYM/CallSiteInfo.h" |
| 10 | #include "llvm/DebugInfo/GSYM/FileWriter.h" |
| 11 | #include "llvm/DebugInfo/GSYM/FunctionInfo.h" |
| 12 | #include "llvm/DebugInfo/GSYM/GsymCreator.h" |
| 13 | #include "llvm/MC/StringTableBuilder.h" |
| 14 | #include "llvm/Support/DataExtractor.h" |
| 15 | #include "llvm/Support/InterleavedRange.h" |
| 16 | #include "llvm/Support/YAMLParser.h" |
| 17 | #include "llvm/Support/YAMLTraits.h" |
| 18 | #include "llvm/Support/raw_ostream.h" |
| 19 | #include <string> |
| 20 | #include <vector> |
| 21 | |
| 22 | using namespace llvm; |
| 23 | using namespace gsym; |
| 24 | |
| 25 | Error CallSiteInfo::encode(FileWriter &O) const { |
| 26 | O.writeU64(Value: ReturnOffset); |
| 27 | O.writeU8(Value: Flags); |
| 28 | O.writeU32(Value: MatchRegex.size()); |
| 29 | for (uint32_t Entry : MatchRegex) |
| 30 | O.writeU32(Value: Entry); |
| 31 | return Error::success(); |
| 32 | } |
| 33 | |
| 34 | Expected<CallSiteInfo> CallSiteInfo::(DataExtractor &Data, |
| 35 | uint64_t &Offset) { |
| 36 | CallSiteInfo CSI; |
| 37 | |
| 38 | // Read ReturnOffset |
| 39 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: sizeof(uint64_t))) |
| 40 | return createStringError(EC: std::errc::io_error, |
| 41 | Fmt: "0x%8.8" PRIx64 ": missing ReturnOffset" , Vals: Offset); |
| 42 | CSI.ReturnOffset = Data.getU64(offset_ptr: &Offset); |
| 43 | |
| 44 | // Read Flags |
| 45 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: sizeof(uint8_t))) |
| 46 | return createStringError(EC: std::errc::io_error, |
| 47 | Fmt: "0x%8.8" PRIx64 ": missing Flags" , Vals: Offset); |
| 48 | CSI.Flags = Data.getU8(offset_ptr: &Offset); |
| 49 | |
| 50 | // Read number of MatchRegex entries |
| 51 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: sizeof(uint32_t))) |
| 52 | return createStringError(EC: std::errc::io_error, |
| 53 | Fmt: "0x%8.8" PRIx64 ": missing MatchRegex count" , |
| 54 | Vals: Offset); |
| 55 | uint32_t NumEntries = Data.getU32(offset_ptr: &Offset); |
| 56 | |
| 57 | CSI.MatchRegex.reserve(n: NumEntries); |
| 58 | for (uint32_t i = 0; i < NumEntries; ++i) { |
| 59 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: sizeof(uint32_t))) |
| 60 | return createStringError(EC: std::errc::io_error, |
| 61 | Fmt: "0x%8.8" PRIx64 ": missing MatchRegex entry" , |
| 62 | Vals: Offset); |
| 63 | uint32_t Entry = Data.getU32(offset_ptr: &Offset); |
| 64 | CSI.MatchRegex.push_back(x: Entry); |
| 65 | } |
| 66 | |
| 67 | return CSI; |
| 68 | } |
| 69 | |
| 70 | Error CallSiteInfoCollection::encode(FileWriter &O) const { |
| 71 | O.writeU32(Value: CallSites.size()); |
| 72 | for (const CallSiteInfo &CSI : CallSites) |
| 73 | if (Error Err = CSI.encode(O)) |
| 74 | return Err; |
| 75 | |
| 76 | return Error::success(); |
| 77 | } |
| 78 | |
| 79 | Expected<CallSiteInfoCollection> |
| 80 | CallSiteInfoCollection::(DataExtractor &Data) { |
| 81 | CallSiteInfoCollection CSC; |
| 82 | uint64_t Offset = 0; |
| 83 | |
| 84 | // Read number of CallSiteInfo entries |
| 85 | if (!Data.isValidOffsetForDataOfSize(offset: Offset, length: sizeof(uint32_t))) |
| 86 | return createStringError(EC: std::errc::io_error, |
| 87 | Fmt: "0x%8.8" PRIx64 ": missing CallSiteInfo count" , |
| 88 | Vals: Offset); |
| 89 | uint32_t NumCallSites = Data.getU32(offset_ptr: &Offset); |
| 90 | |
| 91 | CSC.CallSites.reserve(n: NumCallSites); |
| 92 | for (uint32_t i = 0; i < NumCallSites; ++i) { |
| 93 | Expected<CallSiteInfo> ECSI = CallSiteInfo::decode(Data, Offset); |
| 94 | if (!ECSI) |
| 95 | return ECSI.takeError(); |
| 96 | CSC.CallSites.emplace_back(args&: *ECSI); |
| 97 | } |
| 98 | |
| 99 | return CSC; |
| 100 | } |
| 101 | |
| 102 | /// Structures necessary for reading CallSiteInfo from YAML. |
| 103 | namespace llvm { |
| 104 | namespace yaml { |
| 105 | |
| 106 | struct CallSiteYAML { |
| 107 | // The offset of the return address of the call site - relative to the start |
| 108 | // of the function. |
| 109 | Hex64 return_offset; |
| 110 | std::vector<std::string> match_regex; |
| 111 | std::vector<std::string> flags; |
| 112 | }; |
| 113 | |
| 114 | struct FunctionYAML { |
| 115 | std::string name; |
| 116 | std::vector<CallSiteYAML> callsites; |
| 117 | }; |
| 118 | |
| 119 | struct FunctionsYAML { |
| 120 | std::vector<FunctionYAML> functions; |
| 121 | }; |
| 122 | |
| 123 | template <> struct MappingTraits<CallSiteYAML> { |
| 124 | static void mapping(IO &io, CallSiteYAML &callsite) { |
| 125 | io.mapRequired(Key: "return_offset" , Val&: callsite.return_offset); |
| 126 | io.mapRequired(Key: "match_regex" , Val&: callsite.match_regex); |
| 127 | io.mapOptional(Key: "flags" , Val&: callsite.flags); |
| 128 | } |
| 129 | }; |
| 130 | |
| 131 | template <> struct MappingTraits<FunctionYAML> { |
| 132 | static void mapping(IO &io, FunctionYAML &func) { |
| 133 | io.mapRequired(Key: "name" , Val&: func.name); |
| 134 | io.mapOptional(Key: "callsites" , Val&: func.callsites); |
| 135 | } |
| 136 | }; |
| 137 | |
| 138 | template <> struct MappingTraits<FunctionsYAML> { |
| 139 | static void mapping(IO &io, FunctionsYAML &FuncYAMLs) { |
| 140 | io.mapRequired(Key: "functions" , Val&: FuncYAMLs.functions); |
| 141 | } |
| 142 | }; |
| 143 | |
| 144 | } // namespace yaml |
| 145 | } // namespace llvm |
| 146 | |
| 147 | LLVM_YAML_IS_SEQUENCE_VECTOR(CallSiteYAML) |
| 148 | LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionYAML) |
| 149 | |
| 150 | Error CallSiteInfoLoader::loadYAML(StringRef YAMLFile) { |
| 151 | // Step 1: Read YAML file |
| 152 | auto BufferOrError = MemoryBuffer::getFile(Filename: YAMLFile, /*IsText=*/true); |
| 153 | if (!BufferOrError) |
| 154 | return errorCodeToError(EC: BufferOrError.getError()); |
| 155 | |
| 156 | std::unique_ptr<MemoryBuffer> Buffer = std::move(*BufferOrError); |
| 157 | |
| 158 | // Step 2: Parse YAML content |
| 159 | yaml::FunctionsYAML FuncsYAML; |
| 160 | yaml::Input Yin(Buffer->getMemBufferRef()); |
| 161 | Yin >> FuncsYAML; |
| 162 | if (Yin.error()) |
| 163 | return createStringError(EC: Yin.error(), Fmt: "Error parsing YAML file: %s\n" , |
| 164 | Vals: Buffer->getBufferIdentifier().str().c_str()); |
| 165 | |
| 166 | // Step 3: Build function map from Funcs |
| 167 | auto FuncMap = buildFunctionMap(); |
| 168 | |
| 169 | // Step 4: Process parsed YAML functions and update FuncMap |
| 170 | return processYAMLFunctions(FuncYAMLs: FuncsYAML, FuncMap); |
| 171 | } |
| 172 | |
| 173 | StringMap<FunctionInfo *> CallSiteInfoLoader::buildFunctionMap() { |
| 174 | // If the function name is already in the map, don't update it. This way we |
| 175 | // preferentially use the first encountered function. Since symbols are |
| 176 | // loaded from dSYM first, we end up preferring keeping track of symbols |
| 177 | // from dSYM rather than from the symbol table - which is what we want to |
| 178 | // do. |
| 179 | StringMap<FunctionInfo *> FuncMap; |
| 180 | for (auto &Func : Funcs) { |
| 181 | FuncMap.try_emplace(Key: GCreator.getString(Offset: Func.Name), Args: &Func); |
| 182 | if (auto &MFuncs = Func.MergedFunctions) |
| 183 | for (auto &MFunc : MFuncs->MergedFunctions) |
| 184 | FuncMap.try_emplace(Key: GCreator.getString(Offset: MFunc.Name), Args: &MFunc); |
| 185 | } |
| 186 | return FuncMap; |
| 187 | } |
| 188 | |
| 189 | Error CallSiteInfoLoader::processYAMLFunctions( |
| 190 | const yaml::FunctionsYAML &FuncYAMLs, StringMap<FunctionInfo *> &FuncMap) { |
| 191 | // For each function in the YAML file |
| 192 | for (const auto &FuncYAML : FuncYAMLs.functions) { |
| 193 | auto It = FuncMap.find(Key: FuncYAML.name); |
| 194 | if (It == FuncMap.end()) |
| 195 | return createStringError( |
| 196 | EC: std::errc::invalid_argument, |
| 197 | Fmt: "Can't find function '%s' specified in callsite YAML\n" , |
| 198 | Vals: FuncYAML.name.c_str()); |
| 199 | |
| 200 | FunctionInfo *FuncInfo = It->second; |
| 201 | // Create a CallSiteInfoCollection if not already present |
| 202 | if (!FuncInfo->CallSites) |
| 203 | FuncInfo->CallSites = CallSiteInfoCollection(); |
| 204 | for (const auto &CallSiteYAML : FuncYAML.callsites) { |
| 205 | CallSiteInfo CSI; |
| 206 | // Since YAML has specifies relative return offsets, add the function |
| 207 | // start address to make the offset absolute. |
| 208 | CSI.ReturnOffset = CallSiteYAML.return_offset; |
| 209 | for (const auto &Regex : CallSiteYAML.match_regex) { |
| 210 | uint32_t StrOffset = GCreator.insertString(S: Regex); |
| 211 | CSI.MatchRegex.push_back(x: StrOffset); |
| 212 | } |
| 213 | |
| 214 | // Parse flags and combine them |
| 215 | for (const auto &FlagStr : CallSiteYAML.flags) { |
| 216 | if (FlagStr == "InternalCall" ) { |
| 217 | CSI.Flags |= static_cast<uint8_t>(CallSiteInfo::InternalCall); |
| 218 | } else if (FlagStr == "ExternalCall" ) { |
| 219 | CSI.Flags |= static_cast<uint8_t>(CallSiteInfo::ExternalCall); |
| 220 | } else { |
| 221 | return createStringError(EC: std::errc::invalid_argument, |
| 222 | Fmt: "Unknown flag in callsite YAML: %s\n" , |
| 223 | Vals: FlagStr.c_str()); |
| 224 | } |
| 225 | } |
| 226 | FuncInfo->CallSites->CallSites.push_back(x: CSI); |
| 227 | } |
| 228 | } |
| 229 | return Error::success(); |
| 230 | } |
| 231 | |
| 232 | raw_ostream &gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) { |
| 233 | OS << " Return=" << HEX64(CSI.ReturnOffset); |
| 234 | OS << " Flags=" << HEX8(CSI.Flags); |
| 235 | OS << " RegEx=" << llvm::interleaved(R: CSI.MatchRegex, Separator: "," ); |
| 236 | return OS; |
| 237 | } |
| 238 | |
| 239 | raw_ostream &gsym::operator<<(raw_ostream &OS, |
| 240 | const CallSiteInfoCollection &CSIC) { |
| 241 | for (const auto &CS : CSIC.CallSites) { |
| 242 | OS << CS; |
| 243 | OS << "\n" ; |
| 244 | } |
| 245 | return OS; |
| 246 | } |
| 247 | |