1 | //===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains support for reading MemProf profiling data. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_PROFILEDATA_MEMPROFREADER_H_ |
14 | #define LLVM_PROFILEDATA_MEMPROFREADER_H_ |
15 | |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/ADT/MapVector.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" |
20 | #include "llvm/DebugInfo/Symbolize/Symbolize.h" |
21 | #include "llvm/IR/GlobalValue.h" |
22 | #include "llvm/Object/Binary.h" |
23 | #include "llvm/Object/ObjectFile.h" |
24 | #include "llvm/ProfileData/InstrProfReader.h" |
25 | #include "llvm/ProfileData/MemProf.h" |
26 | #include "llvm/ProfileData/MemProfData.inc" |
27 | #include "llvm/Support/Error.h" |
28 | #include "llvm/Support/MemoryBuffer.h" |
29 | |
30 | #include <functional> |
31 | |
32 | namespace llvm { |
33 | namespace memprof { |
34 | // A class for memprof profile data populated directly from external |
35 | // sources. |
36 | class MemProfReader { |
37 | public: |
38 | // The MemProfReader only holds memory profile information. |
39 | InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; } |
40 | |
41 | using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>; |
42 | using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>; |
43 | Iterator end() { return Iterator(); } |
44 | Iterator begin() { |
45 | Iter = FunctionProfileData.begin(); |
46 | return Iterator(this); |
47 | } |
48 | |
49 | // Return a const reference to the internal Id to Frame mappings. |
50 | const llvm::DenseMap<FrameId, Frame> &getFrameMapping() const { |
51 | return IdToFrame; |
52 | } |
53 | |
54 | // Return a const reference to the internal Id to call stacks. |
55 | const llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> & |
56 | getCallStacks() const { |
57 | return CSIdToCallStack; |
58 | } |
59 | |
60 | // Return a const reference to the internal function profile data. |
61 | const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> & |
62 | getProfileData() const { |
63 | return FunctionProfileData; |
64 | } |
65 | |
66 | virtual Error |
67 | readNextRecord(GuidMemProfRecordPair &GuidRecord, |
68 | std::function<const Frame(const FrameId)> Callback = nullptr) { |
69 | if (FunctionProfileData.empty()) |
70 | return make_error<InstrProfError>(Args: instrprof_error::empty_raw_profile); |
71 | |
72 | if (Iter == FunctionProfileData.end()) |
73 | return make_error<InstrProfError>(Args: instrprof_error::eof); |
74 | |
75 | if (Callback == nullptr) |
76 | Callback = |
77 | std::bind(f: &MemProfReader::idToFrame, args: this, args: std::placeholders::_1); |
78 | |
79 | CallStackIdConverter<decltype(CSIdToCallStack)> CSIdConv(CSIdToCallStack, |
80 | Callback); |
81 | |
82 | const IndexedMemProfRecord &IndexedRecord = Iter->second; |
83 | GuidRecord = { |
84 | Iter->first, |
85 | IndexedRecord.toMemProfRecord(Callback: CSIdConv), |
86 | }; |
87 | if (CSIdConv.LastUnmappedId) |
88 | return make_error<InstrProfError>(Args: instrprof_error::hash_mismatch); |
89 | Iter++; |
90 | return Error::success(); |
91 | } |
92 | |
93 | // Allow default construction for derived classes which can populate the |
94 | // contents after construction. |
95 | MemProfReader() = default; |
96 | virtual ~MemProfReader() = default; |
97 | |
98 | // Initialize the MemProfReader with the frame mappings and profile contents. |
99 | MemProfReader( |
100 | llvm::DenseMap<FrameId, Frame> FrameIdMap, |
101 | llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData); |
102 | |
103 | // Initialize the MemProfReader with the frame mappings, call stack mappings, |
104 | // and profile contents. |
105 | MemProfReader( |
106 | llvm::DenseMap<FrameId, Frame> FrameIdMap, |
107 | llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdMap, |
108 | llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData) |
109 | : IdToFrame(std::move(FrameIdMap)), CSIdToCallStack(std::move(CSIdMap)), |
110 | FunctionProfileData(std::move(ProfData)) {} |
111 | |
112 | protected: |
113 | // A helper method to extract the frame from the IdToFrame map. |
114 | const Frame &idToFrame(const FrameId Id) const { |
115 | auto It = IdToFrame.find(Val: Id); |
116 | assert(It != IdToFrame.end() && "Id not found in map." ); |
117 | return It->getSecond(); |
118 | } |
119 | // A mapping from FrameId (a hash of the contents) to the frame. |
120 | llvm::DenseMap<FrameId, Frame> IdToFrame; |
121 | // A mapping from CallStackId to the call stack. |
122 | llvm::DenseMap<CallStackId, llvm::SmallVector<FrameId>> CSIdToCallStack; |
123 | // A mapping from function GUID, hash of the canonical function symbol to the |
124 | // memprof profile data for that function, i.e allocation and callsite info. |
125 | llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData; |
126 | // An iterator to the internal function profile data structure. |
127 | llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter; |
128 | }; |
129 | |
130 | // Map from id (recorded from sanitizer stack depot) to virtual addresses for |
131 | // each program counter address in the callstack. |
132 | using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>; |
133 | |
134 | // Specializes the MemProfReader class to populate the contents from raw binary |
135 | // memprof profiles from instrumentation based profiling. |
136 | class RawMemProfReader final : public MemProfReader { |
137 | public: |
138 | RawMemProfReader(const RawMemProfReader &) = delete; |
139 | RawMemProfReader &operator=(const RawMemProfReader &) = delete; |
140 | virtual ~RawMemProfReader() override; |
141 | |
142 | // Prints the contents of the profile in YAML format. |
143 | void printYAML(raw_ostream &OS); |
144 | |
145 | // Return true if the \p DataBuffer starts with magic bytes indicating it is |
146 | // a raw binary memprof profile. |
147 | static bool hasFormat(const MemoryBuffer &DataBuffer); |
148 | // Return true if the file at \p Path starts with magic bytes indicating it is |
149 | // a raw binary memprof profile. |
150 | static bool hasFormat(const StringRef Path); |
151 | |
152 | // Create a RawMemProfReader after sanity checking the contents of the file at |
153 | // \p Path or the \p Buffer. The binary from which the profile has been |
154 | // collected is specified via a path in \p ProfiledBinary. |
155 | static Expected<std::unique_ptr<RawMemProfReader>> |
156 | create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false); |
157 | static Expected<std::unique_ptr<RawMemProfReader>> |
158 | create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary, |
159 | bool KeepName = false); |
160 | |
161 | // Returns a list of build ids recorded in the segment information. |
162 | static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer); |
163 | |
164 | Error |
165 | readNextRecord(GuidMemProfRecordPair &GuidRecord, |
166 | std::function<const Frame(const FrameId)> Callback) override; |
167 | |
168 | // Constructor for unittests only. |
169 | RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym, |
170 | llvm::SmallVectorImpl<SegmentEntry> &Seg, |
171 | llvm::MapVector<uint64_t, MemInfoBlock> &Prof, |
172 | CallStackMap &SM, bool KeepName = false) |
173 | : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof), |
174 | StackMap(SM), KeepSymbolName(KeepName) { |
175 | // We don't call initialize here since there is no raw profile to read. The |
176 | // test should pass in the raw profile as structured data. |
177 | |
178 | // If there is an error here then the mock symbolizer has not been |
179 | // initialized properly. |
180 | if (Error E = symbolizeAndFilterStackFrames(Symbolizer: std::move(Sym))) |
181 | report_fatal_error(Err: std::move(E)); |
182 | if (Error E = mapRawProfileToRecords()) |
183 | report_fatal_error(Err: std::move(E)); |
184 | } |
185 | |
186 | private: |
187 | RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName) |
188 | : Binary(std::move(Bin)), KeepSymbolName(KeepName) {} |
189 | // Initializes the RawMemProfReader with the contents in `DataBuffer`. |
190 | Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer); |
191 | // Read and parse the contents of the `DataBuffer` as a binary format profile. |
192 | Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer); |
193 | // Initialize the segment mapping information for symbolization. |
194 | Error setupForSymbolization(); |
195 | // Symbolize and cache all the virtual addresses we encounter in the |
196 | // callstacks from the raw profile. Also prune callstack frames which we can't |
197 | // symbolize or those that belong to the runtime. For profile entries where |
198 | // the entire callstack is pruned, we drop the entry from the profile. |
199 | Error symbolizeAndFilterStackFrames( |
200 | std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer); |
201 | // Construct memprof records for each function and store it in the |
202 | // `FunctionProfileData` map. A function may have allocation profile data or |
203 | // callsite data or both. |
204 | Error mapRawProfileToRecords(); |
205 | |
206 | object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); |
207 | |
208 | llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> |
209 | readMemInfoBlocks(const char *Ptr); |
210 | |
211 | // The profiled binary. |
212 | object::OwningBinary<object::Binary> Binary; |
213 | // Version of raw memprof binary currently being read. Defaults to most up |
214 | // to date version. |
215 | uint64_t MemprofRawVersion = MEMPROF_RAW_VERSION; |
216 | // The preferred load address of the executable segment. |
217 | uint64_t PreferredTextSegmentAddress = 0; |
218 | // The base address of the text segment in the process during profiling. |
219 | uint64_t ProfiledTextSegmentStart = 0; |
220 | // The limit address of the text segment in the process during profiling. |
221 | uint64_t ProfiledTextSegmentEnd = 0; |
222 | |
223 | // The memory mapped segment information for all executable segments in the |
224 | // profiled binary (filtered from the raw profile using the build id). |
225 | llvm::SmallVector<SegmentEntry, 2> SegmentInfo; |
226 | |
227 | // A map from callstack id (same as key in CallStackMap below) to the heap |
228 | // information recorded for that allocation context. |
229 | llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData; |
230 | CallStackMap StackMap; |
231 | |
232 | // Cached symbolization from PC to Frame. |
233 | llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame; |
234 | |
235 | // Whether to keep the symbol name for each frame after hashing. |
236 | bool KeepSymbolName = false; |
237 | // A mapping of the hash to symbol name, only used if KeepSymbolName is true. |
238 | llvm::DenseMap<uint64_t, std::string> GuidToSymbolName; |
239 | }; |
240 | } // namespace memprof |
241 | } // namespace llvm |
242 | |
243 | #endif // LLVM_PROFILEDATA_MEMPROFREADER_H_ |
244 | |