1 | #include "llvm/ProfileData/DataAccessProf.h" |
2 | #include "llvm/ADT/STLExtras.h" |
3 | #include "llvm/ProfileData/InstrProf.h" |
4 | #include "llvm/Support/Compression.h" |
5 | #include "llvm/Support/Endian.h" |
6 | #include "llvm/Support/Errc.h" |
7 | #include "llvm/Support/Error.h" |
8 | #include "llvm/Support/StringSaver.h" |
9 | #include "llvm/Support/raw_ostream.h" |
10 | |
11 | namespace llvm { |
12 | namespace memprof { |
13 | |
14 | // If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise, |
15 | // creates an owned copy of `Str`, adds a map entry for it and returns the |
16 | // iterator. |
17 | static std::pair<StringRef, uint64_t> |
18 | saveStringToMap(DataAccessProfData::StringToIndexMap &Map, |
19 | llvm::UniqueStringSaver &Saver, StringRef Str) { |
20 | auto [Iter, Inserted] = Map.try_emplace(Key: Saver.save(S: Str), Args: Map.size()); |
21 | return *Iter; |
22 | } |
23 | |
24 | // Returns the canonical name or error. |
25 | static Expected<StringRef> getCanonicalName(StringRef Name) { |
26 | if (Name.empty()) |
27 | return make_error<StringError>(Args: "Empty symbol name" , |
28 | Args: llvm::errc::invalid_argument); |
29 | return InstrProfSymtab::getCanonicalName(PGOName: Name); |
30 | } |
31 | |
32 | std::optional<DataAccessProfRecord> |
33 | DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const { |
34 | auto Key = SymbolID; |
35 | if (std::holds_alternative<StringRef>(v: SymbolID)) { |
36 | auto NameOrErr = getCanonicalName(Name: std::get<StringRef>(v: SymbolID)); |
37 | // If name canonicalization fails, suppress the error inside. |
38 | if (!NameOrErr) { |
39 | assert( |
40 | std::get<StringRef>(SymbolID).empty() && |
41 | "Name canonicalization only fails when stringified string is empty." ); |
42 | return std::nullopt; |
43 | } |
44 | Key = *NameOrErr; |
45 | } |
46 | |
47 | auto It = Records.find(Key); |
48 | if (It != Records.end()) { |
49 | return DataAccessProfRecord(Key, It->second.AccessCount, |
50 | It->second.Locations); |
51 | } |
52 | |
53 | return std::nullopt; |
54 | } |
55 | |
56 | bool DataAccessProfData::isKnownColdSymbol(const SymbolHandleRef SymID) const { |
57 | if (std::holds_alternative<uint64_t>(v: SymID)) |
58 | return KnownColdHashes.contains(key: std::get<uint64_t>(v: SymID)); |
59 | return KnownColdSymbols.contains(key: std::get<StringRef>(v: SymID)); |
60 | } |
61 | |
62 | Error DataAccessProfData::setDataAccessProfile(SymbolHandleRef Symbol, |
63 | uint64_t AccessCount) { |
64 | uint64_t RecordID = -1; |
65 | const bool IsStringLiteral = std::holds_alternative<uint64_t>(v: Symbol); |
66 | SymbolHandleRef Key; |
67 | if (IsStringLiteral) { |
68 | RecordID = std::get<uint64_t>(v&: Symbol); |
69 | Key = RecordID; |
70 | } else { |
71 | auto CanonicalName = getCanonicalName(Name: std::get<StringRef>(v&: Symbol)); |
72 | if (!CanonicalName) |
73 | return CanonicalName.takeError(); |
74 | std::tie(args&: Key, args&: RecordID) = |
75 | saveStringToMap(Map&: StrToIndexMap, Saver, Str: *CanonicalName); |
76 | } |
77 | |
78 | auto [Iter, Inserted] = |
79 | Records.try_emplace(Key, Args&: RecordID, Args&: AccessCount, Args: IsStringLiteral); |
80 | if (!Inserted) |
81 | return make_error<StringError>(Args: "Duplicate symbol or string literal added. " |
82 | "User of DataAccessProfData should " |
83 | "aggregate count for the same symbol. " , |
84 | Args: llvm::errc::invalid_argument); |
85 | |
86 | return Error::success(); |
87 | } |
88 | |
89 | Error DataAccessProfData::setDataAccessProfile( |
90 | SymbolHandleRef SymbolID, uint64_t AccessCount, |
91 | ArrayRef<SourceLocation> Locations) { |
92 | if (Error E = setDataAccessProfile(Symbol: SymbolID, AccessCount)) |
93 | return E; |
94 | |
95 | auto &Record = Records.back().second; |
96 | for (const auto &Location : Locations) |
97 | Record.Locations.push_back( |
98 | Elt: {saveStringToMap(Map&: StrToIndexMap, Saver, Str: Location.FileName).first, |
99 | Location.Line}); |
100 | |
101 | return Error::success(); |
102 | } |
103 | |
104 | Error DataAccessProfData::addKnownSymbolWithoutSamples( |
105 | SymbolHandleRef SymbolID) { |
106 | if (std::holds_alternative<uint64_t>(v: SymbolID)) { |
107 | KnownColdHashes.insert(X: std::get<uint64_t>(v&: SymbolID)); |
108 | return Error::success(); |
109 | } |
110 | auto CanonicalName = getCanonicalName(Name: std::get<StringRef>(v&: SymbolID)); |
111 | if (!CanonicalName) |
112 | return CanonicalName.takeError(); |
113 | KnownColdSymbols.insert( |
114 | X: saveStringToMap(Map&: StrToIndexMap, Saver, Str: *CanonicalName).first); |
115 | return Error::success(); |
116 | } |
117 | |
118 | Error DataAccessProfData::deserialize(const unsigned char *&Ptr) { |
119 | uint64_t NumSampledSymbols = |
120 | support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
121 | uint64_t NumColdKnownSymbols = |
122 | support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
123 | if (Error E = deserializeSymbolsAndFilenames(Ptr, NumSampledSymbols, |
124 | NumColdKnownSymbols)) |
125 | return E; |
126 | |
127 | uint64_t Num = |
128 | support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
129 | for (uint64_t I = 0; I < Num; ++I) |
130 | KnownColdHashes.insert( |
131 | X: support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr)); |
132 | |
133 | return deserializeRecords(Ptr); |
134 | } |
135 | |
136 | Error DataAccessProfData::serializeSymbolsAndFilenames(ProfOStream &OS) const { |
137 | OS.write(V: StrToIndexMap.size()); |
138 | OS.write(V: KnownColdSymbols.size()); |
139 | |
140 | std::vector<std::string> Strs; |
141 | Strs.reserve(n: StrToIndexMap.size() + KnownColdSymbols.size()); |
142 | for (const auto &Str : StrToIndexMap) |
143 | Strs.push_back(x: Str.first.str()); |
144 | for (const auto &Str : KnownColdSymbols) |
145 | Strs.push_back(x: Str.str()); |
146 | |
147 | std::string CompressedStrings; |
148 | if (!Strs.empty()) |
149 | if (Error E = collectGlobalObjectNameStrings( |
150 | NameStrs: Strs, doCompression: compression::zlib::isAvailable(), Result&: CompressedStrings)) |
151 | return E; |
152 | const uint64_t CompressedStringLen = CompressedStrings.length(); |
153 | // Record the length of compressed string. |
154 | OS.write(V: CompressedStringLen); |
155 | // Write the chars in compressed strings. |
156 | for (char C : CompressedStrings) |
157 | OS.writeByte(V: static_cast<uint8_t>(C)); |
158 | // Pad up to a multiple of 8. |
159 | // InstrProfReader could read bytes according to 'CompressedStringLen'. |
160 | const uint64_t PaddedLength = alignTo(Value: CompressedStringLen, Align: 8); |
161 | for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) |
162 | OS.writeByte(V: 0); |
163 | return Error::success(); |
164 | } |
165 | |
166 | uint64_t |
167 | DataAccessProfData::getEncodedIndex(const SymbolHandleRef SymbolID) const { |
168 | if (std::holds_alternative<uint64_t>(v: SymbolID)) |
169 | return std::get<uint64_t>(v: SymbolID); |
170 | |
171 | auto Iter = StrToIndexMap.find(Key: std::get<StringRef>(v: SymbolID)); |
172 | assert(Iter != StrToIndexMap.end() && |
173 | "String literals not found in StrToIndexMap" ); |
174 | return Iter->second; |
175 | } |
176 | |
177 | Error DataAccessProfData::serialize(ProfOStream &OS) const { |
178 | if (Error E = serializeSymbolsAndFilenames(OS)) |
179 | return E; |
180 | OS.write(V: KnownColdHashes.size()); |
181 | for (const auto &Hash : KnownColdHashes) |
182 | OS.write(V: Hash); |
183 | OS.write(V: (uint64_t)(Records.size())); |
184 | for (const auto &[Key, Rec] : Records) { |
185 | OS.write(V: getEncodedIndex(SymbolID: Rec.SymbolID)); |
186 | OS.writeByte(V: Rec.IsStringLiteral); |
187 | OS.write(V: Rec.AccessCount); |
188 | OS.write(V: Rec.Locations.size()); |
189 | for (const auto &Loc : Rec.Locations) { |
190 | OS.write(V: getEncodedIndex(SymbolID: Loc.FileName)); |
191 | OS.write32(V: Loc.Line); |
192 | } |
193 | } |
194 | return Error::success(); |
195 | } |
196 | |
197 | Error DataAccessProfData::deserializeSymbolsAndFilenames( |
198 | const unsigned char *&Ptr, const uint64_t NumSampledSymbols, |
199 | const uint64_t NumColdKnownSymbols) { |
200 | uint64_t Len = |
201 | support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
202 | |
203 | // The first NumSampledSymbols strings are symbols with samples, and next |
204 | // NumColdKnownSymbols strings are known cold symbols. |
205 | uint64_t StringCnt = 0; |
206 | std::function<Error(StringRef)> addName = [&](StringRef Name) { |
207 | if (StringCnt < NumSampledSymbols) |
208 | saveStringToMap(Map&: StrToIndexMap, Saver, Str: Name); |
209 | else |
210 | KnownColdSymbols.insert(X: Saver.save(S: Name)); |
211 | ++StringCnt; |
212 | return Error::success(); |
213 | }; |
214 | if (Error E = |
215 | readAndDecodeStrings(NameStrings: StringRef((const char *)Ptr, Len), NameCallback: addName)) |
216 | return E; |
217 | |
218 | Ptr += alignTo(Value: Len, Align: 8); |
219 | return Error::success(); |
220 | } |
221 | |
222 | Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) { |
223 | SmallVector<StringRef> Strings = |
224 | llvm::to_vector(Range: llvm::make_first_range(c: getStrToIndexMapRef())); |
225 | |
226 | uint64_t NumRecords = |
227 | support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
228 | |
229 | for (uint64_t I = 0; I < NumRecords; ++I) { |
230 | uint64_t ID = |
231 | support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
232 | |
233 | bool IsStringLiteral = |
234 | support::endian::readNext<uint8_t, llvm::endianness::little>(memory&: Ptr); |
235 | |
236 | uint64_t AccessCount = |
237 | support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
238 | |
239 | SymbolHandleRef SymbolID; |
240 | if (IsStringLiteral) |
241 | SymbolID = ID; |
242 | else |
243 | SymbolID = Strings[ID]; |
244 | if (Error E = setDataAccessProfile(Symbol: SymbolID, AccessCount)) |
245 | return E; |
246 | |
247 | auto &Record = Records.back().second; |
248 | |
249 | uint64_t NumLocations = |
250 | support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
251 | |
252 | Record.Locations.reserve(N: NumLocations); |
253 | for (uint64_t J = 0; J < NumLocations; ++J) { |
254 | uint64_t FileNameIndex = |
255 | support::endian::readNext<uint64_t, llvm::endianness::little>(memory&: Ptr); |
256 | uint32_t Line = |
257 | support::endian::readNext<uint32_t, llvm::endianness::little>(memory&: Ptr); |
258 | Record.Locations.push_back(Elt: {Strings[FileNameIndex], Line}); |
259 | } |
260 | } |
261 | return Error::success(); |
262 | } |
263 | } // namespace memprof |
264 | } // namespace llvm |
265 | |