1 | //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/DebugInfo/PDB/Native/PDBFile.h" |
10 | #include "llvm/ADT/ArrayRef.h" |
11 | #include "llvm/DebugInfo/MSF/MSFCommon.h" |
12 | #include "llvm/DebugInfo/MSF/MappedBlockStream.h" |
13 | #include "llvm/DebugInfo/PDB/Native/DbiStream.h" |
14 | #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" |
15 | #include "llvm/DebugInfo/PDB/Native/InfoStream.h" |
16 | #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h" |
17 | #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" |
18 | #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" |
19 | #include "llvm/DebugInfo/PDB/Native/RawError.h" |
20 | #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" |
21 | #include "llvm/DebugInfo/PDB/Native/TpiStream.h" |
22 | #include "llvm/Support/BinaryStream.h" |
23 | #include "llvm/Support/BinaryStreamArray.h" |
24 | #include "llvm/Support/BinaryStreamReader.h" |
25 | #include "llvm/Support/Endian.h" |
26 | #include "llvm/Support/Error.h" |
27 | #include "llvm/Support/Path.h" |
28 | #include <algorithm> |
29 | #include <cassert> |
30 | #include <cstdint> |
31 | |
32 | using namespace llvm; |
33 | using namespace llvm::codeview; |
34 | using namespace llvm::msf; |
35 | using namespace llvm::pdb; |
36 | |
37 | namespace { |
38 | typedef FixedStreamArray<support::ulittle32_t> ulittle_array; |
39 | } // end anonymous namespace |
40 | |
41 | PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer, |
42 | BumpPtrAllocator &Allocator) |
43 | : FilePath(std::string(Path)), Allocator(Allocator), |
44 | Buffer(std::move(PdbFileBuffer)) {} |
45 | |
46 | PDBFile::~PDBFile() = default; |
47 | |
48 | StringRef PDBFile::getFilePath() const { return FilePath; } |
49 | |
50 | StringRef PDBFile::getFileDirectory() const { |
51 | return sys::path::parent_path(path: FilePath); |
52 | } |
53 | |
54 | uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; } |
55 | |
56 | uint32_t PDBFile::getFreeBlockMapBlock() const { |
57 | return ContainerLayout.SB->FreeBlockMapBlock; |
58 | } |
59 | |
60 | uint32_t PDBFile::getBlockCount() const { |
61 | return ContainerLayout.SB->NumBlocks; |
62 | } |
63 | |
64 | uint32_t PDBFile::getNumDirectoryBytes() const { |
65 | return ContainerLayout.SB->NumDirectoryBytes; |
66 | } |
67 | |
68 | uint32_t PDBFile::getBlockMapIndex() const { |
69 | return ContainerLayout.SB->BlockMapAddr; |
70 | } |
71 | |
72 | uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; } |
73 | |
74 | uint32_t PDBFile::getNumDirectoryBlocks() const { |
75 | return msf::bytesToBlocks(NumBytes: ContainerLayout.SB->NumDirectoryBytes, |
76 | BlockSize: ContainerLayout.SB->BlockSize); |
77 | } |
78 | |
79 | uint64_t PDBFile::getBlockMapOffset() const { |
80 | return (uint64_t)ContainerLayout.SB->BlockMapAddr * |
81 | ContainerLayout.SB->BlockSize; |
82 | } |
83 | |
84 | uint32_t PDBFile::getNumStreams() const { |
85 | return ContainerLayout.StreamSizes.size(); |
86 | } |
87 | |
88 | uint32_t PDBFile::getMaxStreamSize() const { |
89 | return *llvm::max_element(Range: ContainerLayout.StreamSizes); |
90 | } |
91 | |
92 | uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const { |
93 | return ContainerLayout.StreamSizes[StreamIndex]; |
94 | } |
95 | |
96 | ArrayRef<support::ulittle32_t> |
97 | PDBFile::getStreamBlockList(uint32_t StreamIndex) const { |
98 | return ContainerLayout.StreamMap[StreamIndex]; |
99 | } |
100 | |
101 | uint64_t PDBFile::getFileSize() const { return Buffer->getLength(); } |
102 | |
103 | Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex, |
104 | uint32_t NumBytes) const { |
105 | uint64_t StreamBlockOffset = msf::blockToOffset(BlockNumber: BlockIndex, BlockSize: getBlockSize()); |
106 | |
107 | ArrayRef<uint8_t> Result; |
108 | if (auto EC = Buffer->readBytes(Offset: StreamBlockOffset, Size: NumBytes, Buffer&: Result)) |
109 | return std::move(EC); |
110 | return Result; |
111 | } |
112 | |
113 | Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset, |
114 | ArrayRef<uint8_t> Data) const { |
115 | return make_error<RawError>(Args: raw_error_code::not_writable, |
116 | Args: "PDBFile is immutable" ); |
117 | } |
118 | |
119 | Error PDBFile::() { |
120 | BinaryStreamReader Reader(*Buffer); |
121 | |
122 | // Initialize SB. |
123 | const msf::SuperBlock *SB = nullptr; |
124 | if (auto EC = Reader.readObject(Dest&: SB)) { |
125 | consumeError(Err: std::move(EC)); |
126 | return make_error<RawError>(Args: raw_error_code::corrupt_file, |
127 | Args: "MSF superblock is missing" ); |
128 | } |
129 | |
130 | if (auto EC = msf::validateSuperBlock(SB: *SB)) |
131 | return EC; |
132 | |
133 | if (Buffer->getLength() % SB->BlockSize != 0) |
134 | return make_error<RawError>(Args: raw_error_code::corrupt_file, |
135 | Args: "File size is not a multiple of block size" ); |
136 | ContainerLayout.SB = SB; |
137 | |
138 | // Initialize Free Page Map. |
139 | ContainerLayout.FreePageMap.resize(N: SB->NumBlocks); |
140 | // The Fpm exists either at block 1 or block 2 of the MSF. However, this |
141 | // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and |
142 | // thusly an equal number of total blocks in the file. For a block size |
143 | // of 4KiB (very common), this would yield 32KiB total blocks in file, for a |
144 | // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so |
145 | // the Fpm is split across the file at `getBlockSize()` intervals. As a |
146 | // result, every block whose index is of the form |{1,2} + getBlockSize() * k| |
147 | // for any non-negative integer k is an Fpm block. In theory, we only really |
148 | // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but |
149 | // current versions of the MSF format already expect the Fpm to be arranged |
150 | // at getBlockSize() intervals, so we have to be compatible. |
151 | // See the function fpmPn() for more information: |
152 | // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 |
153 | auto FpmStream = |
154 | MappedBlockStream::createFpmStream(Layout: ContainerLayout, MsfData: *Buffer, Allocator); |
155 | BinaryStreamReader FpmReader(*FpmStream); |
156 | ArrayRef<uint8_t> FpmBytes; |
157 | if (auto EC = FpmReader.readBytes(Buffer&: FpmBytes, Size: FpmReader.bytesRemaining())) |
158 | return EC; |
159 | uint32_t BlocksRemaining = getBlockCount(); |
160 | uint32_t BI = 0; |
161 | for (auto Byte : FpmBytes) { |
162 | uint32_t BlocksThisByte = std::min(a: BlocksRemaining, b: 8U); |
163 | for (uint32_t I = 0; I < BlocksThisByte; ++I) { |
164 | if (Byte & (1 << I)) |
165 | ContainerLayout.FreePageMap[BI] = true; |
166 | --BlocksRemaining; |
167 | ++BI; |
168 | } |
169 | } |
170 | |
171 | Reader.setOffset(getBlockMapOffset()); |
172 | if (auto EC = Reader.readArray(Array&: ContainerLayout.DirectoryBlocks, |
173 | NumElements: getNumDirectoryBlocks())) |
174 | return EC; |
175 | |
176 | return Error::success(); |
177 | } |
178 | |
179 | Error PDBFile::parseStreamData() { |
180 | assert(ContainerLayout.SB); |
181 | if (DirectoryStream) |
182 | return Error::success(); |
183 | |
184 | uint32_t NumStreams = 0; |
185 | |
186 | // Normally you can't use a MappedBlockStream without having fully parsed the |
187 | // PDB file, because it accesses the directory and various other things, which |
188 | // is exactly what we are attempting to parse. By specifying a custom |
189 | // subclass of IPDBStreamData which only accesses the fields that have already |
190 | // been parsed, we can avoid this and reuse MappedBlockStream. |
191 | auto DS = MappedBlockStream::createDirectoryStream(Layout: ContainerLayout, MsfData: *Buffer, |
192 | Allocator); |
193 | BinaryStreamReader Reader(*DS); |
194 | if (auto EC = Reader.readInteger(Dest&: NumStreams)) |
195 | return EC; |
196 | |
197 | if (auto EC = Reader.readArray(Array&: ContainerLayout.StreamSizes, NumElements: NumStreams)) |
198 | return EC; |
199 | for (uint32_t I = 0; I < NumStreams; ++I) { |
200 | uint32_t StreamSize = getStreamByteSize(StreamIndex: I); |
201 | // FIXME: What does StreamSize ~0U mean? |
202 | uint64_t NumExpectedStreamBlocks = |
203 | StreamSize == UINT32_MAX |
204 | ? 0 |
205 | : msf::bytesToBlocks(NumBytes: StreamSize, BlockSize: ContainerLayout.SB->BlockSize); |
206 | |
207 | // For convenience, we store the block array contiguously. This is because |
208 | // if someone calls setStreamMap(), it is more convenient to be able to call |
209 | // it with an ArrayRef instead of setting up a StreamRef. Since the |
210 | // DirectoryStream is cached in the class and thus lives for the life of the |
211 | // class, we can be guaranteed that readArray() will return a stable |
212 | // reference, even if it has to allocate from its internal pool. |
213 | ArrayRef<support::ulittle32_t> Blocks; |
214 | if (auto EC = Reader.readArray(Array&: Blocks, NumElements: NumExpectedStreamBlocks)) |
215 | return EC; |
216 | for (uint32_t Block : Blocks) { |
217 | uint64_t BlockEndOffset = |
218 | (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize; |
219 | if (BlockEndOffset > getFileSize()) |
220 | return make_error<RawError>(Args: raw_error_code::corrupt_file, |
221 | Args: "Stream block map is corrupt." ); |
222 | } |
223 | ContainerLayout.StreamMap.push_back(x: Blocks); |
224 | } |
225 | |
226 | // We should have read exactly SB->NumDirectoryBytes bytes. |
227 | assert(Reader.bytesRemaining() == 0); |
228 | DirectoryStream = std::move(DS); |
229 | return Error::success(); |
230 | } |
231 | |
232 | ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const { |
233 | return ContainerLayout.DirectoryBlocks; |
234 | } |
235 | |
236 | std::unique_ptr<MappedBlockStream> |
237 | PDBFile::createIndexedStream(uint16_t SN) const { |
238 | if (SN == kInvalidStreamIndex) |
239 | return nullptr; |
240 | return MappedBlockStream::createIndexedStream(Layout: ContainerLayout, MsfData: *Buffer, StreamIndex: SN, |
241 | Allocator); |
242 | } |
243 | |
244 | MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const { |
245 | MSFStreamLayout Result; |
246 | auto Blocks = getStreamBlockList(StreamIndex: StreamIdx); |
247 | Result.Blocks.assign(first: Blocks.begin(), last: Blocks.end()); |
248 | Result.Length = getStreamByteSize(StreamIndex: StreamIdx); |
249 | return Result; |
250 | } |
251 | |
252 | msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const { |
253 | return msf::getFpmStreamLayout(Msf: ContainerLayout); |
254 | } |
255 | |
256 | Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() { |
257 | if (!Globals) { |
258 | auto DbiS = getPDBDbiStream(); |
259 | if (!DbiS) |
260 | return DbiS.takeError(); |
261 | |
262 | auto GlobalS = |
263 | safelyCreateIndexedStream(StreamIndex: DbiS->getGlobalSymbolStreamIndex()); |
264 | if (!GlobalS) |
265 | return GlobalS.takeError(); |
266 | auto TempGlobals = std::make_unique<GlobalsStream>(args: std::move(*GlobalS)); |
267 | if (auto EC = TempGlobals->reload()) |
268 | return std::move(EC); |
269 | Globals = std::move(TempGlobals); |
270 | } |
271 | return *Globals; |
272 | } |
273 | |
274 | Expected<InfoStream &> PDBFile::getPDBInfoStream() { |
275 | if (!Info) { |
276 | auto InfoS = safelyCreateIndexedStream(StreamIndex: StreamPDB); |
277 | if (!InfoS) |
278 | return InfoS.takeError(); |
279 | auto TempInfo = std::make_unique<InfoStream>(args: std::move(*InfoS)); |
280 | if (auto EC = TempInfo->reload()) |
281 | return std::move(EC); |
282 | Info = std::move(TempInfo); |
283 | } |
284 | return *Info; |
285 | } |
286 | |
287 | Expected<DbiStream &> PDBFile::getPDBDbiStream() { |
288 | if (!Dbi) { |
289 | auto DbiS = safelyCreateIndexedStream(StreamIndex: StreamDBI); |
290 | if (!DbiS) |
291 | return DbiS.takeError(); |
292 | auto TempDbi = std::make_unique<DbiStream>(args: std::move(*DbiS)); |
293 | if (auto EC = TempDbi->reload(Pdb: this)) |
294 | return std::move(EC); |
295 | Dbi = std::move(TempDbi); |
296 | } |
297 | return *Dbi; |
298 | } |
299 | |
300 | Expected<TpiStream &> PDBFile::getPDBTpiStream() { |
301 | if (!Tpi) { |
302 | auto TpiS = safelyCreateIndexedStream(StreamIndex: StreamTPI); |
303 | if (!TpiS) |
304 | return TpiS.takeError(); |
305 | auto TempTpi = std::make_unique<TpiStream>(args&: *this, args: std::move(*TpiS)); |
306 | if (auto EC = TempTpi->reload()) |
307 | return std::move(EC); |
308 | Tpi = std::move(TempTpi); |
309 | } |
310 | return *Tpi; |
311 | } |
312 | |
313 | Expected<TpiStream &> PDBFile::getPDBIpiStream() { |
314 | if (!Ipi) { |
315 | if (!hasPDBIpiStream()) |
316 | return make_error<RawError>(Args: raw_error_code::no_stream); |
317 | |
318 | auto IpiS = safelyCreateIndexedStream(StreamIndex: StreamIPI); |
319 | if (!IpiS) |
320 | return IpiS.takeError(); |
321 | auto TempIpi = std::make_unique<TpiStream>(args&: *this, args: std::move(*IpiS)); |
322 | if (auto EC = TempIpi->reload()) |
323 | return std::move(EC); |
324 | Ipi = std::move(TempIpi); |
325 | } |
326 | return *Ipi; |
327 | } |
328 | |
329 | Expected<PublicsStream &> PDBFile::getPDBPublicsStream() { |
330 | if (!Publics) { |
331 | auto DbiS = getPDBDbiStream(); |
332 | if (!DbiS) |
333 | return DbiS.takeError(); |
334 | |
335 | auto PublicS = |
336 | safelyCreateIndexedStream(StreamIndex: DbiS->getPublicSymbolStreamIndex()); |
337 | if (!PublicS) |
338 | return PublicS.takeError(); |
339 | auto TempPublics = std::make_unique<PublicsStream>(args: std::move(*PublicS)); |
340 | if (auto EC = TempPublics->reload()) |
341 | return std::move(EC); |
342 | Publics = std::move(TempPublics); |
343 | } |
344 | return *Publics; |
345 | } |
346 | |
347 | Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { |
348 | if (!Symbols) { |
349 | auto DbiS = getPDBDbiStream(); |
350 | if (!DbiS) |
351 | return DbiS.takeError(); |
352 | |
353 | uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex(); |
354 | auto SymbolS = safelyCreateIndexedStream(StreamIndex: SymbolStreamNum); |
355 | if (!SymbolS) |
356 | return SymbolS.takeError(); |
357 | |
358 | auto TempSymbols = std::make_unique<SymbolStream>(args: std::move(*SymbolS)); |
359 | if (auto EC = TempSymbols->reload()) |
360 | return std::move(EC); |
361 | Symbols = std::move(TempSymbols); |
362 | } |
363 | return *Symbols; |
364 | } |
365 | |
366 | Expected<PDBStringTable &> PDBFile::getStringTable() { |
367 | if (!Strings) { |
368 | auto NS = safelyCreateNamedStream(Name: "/names" ); |
369 | if (!NS) |
370 | return NS.takeError(); |
371 | |
372 | auto N = std::make_unique<PDBStringTable>(); |
373 | BinaryStreamReader Reader(**NS); |
374 | if (auto EC = N->reload(Reader)) |
375 | return std::move(EC); |
376 | assert(Reader.bytesRemaining() == 0); |
377 | StringTableStream = std::move(*NS); |
378 | Strings = std::move(N); |
379 | } |
380 | return *Strings; |
381 | } |
382 | |
383 | Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() { |
384 | if (!InjectedSources) { |
385 | auto IJS = safelyCreateNamedStream(Name: "/src/headerblock" ); |
386 | if (!IJS) |
387 | return IJS.takeError(); |
388 | |
389 | auto Strings = getStringTable(); |
390 | if (!Strings) |
391 | return Strings.takeError(); |
392 | |
393 | auto IJ = std::make_unique<InjectedSourceStream>(args: std::move(*IJS)); |
394 | if (auto EC = IJ->reload(Strings: *Strings)) |
395 | return std::move(EC); |
396 | InjectedSources = std::move(IJ); |
397 | } |
398 | return *InjectedSources; |
399 | } |
400 | |
401 | uint32_t PDBFile::getPointerSize() { |
402 | auto DbiS = getPDBDbiStream(); |
403 | if (!DbiS) |
404 | return 0; |
405 | PDB_Machine Machine = DbiS->getMachineType(); |
406 | if (Machine == PDB_Machine::Amd64) |
407 | return 8; |
408 | return 4; |
409 | } |
410 | |
411 | bool PDBFile::hasPDBDbiStream() const { |
412 | return StreamDBI < getNumStreams() && getStreamByteSize(StreamIndex: StreamDBI) > 0; |
413 | } |
414 | |
415 | bool PDBFile::hasPDBGlobalsStream() { |
416 | auto DbiS = getPDBDbiStream(); |
417 | if (!DbiS) { |
418 | consumeError(Err: DbiS.takeError()); |
419 | return false; |
420 | } |
421 | |
422 | return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); |
423 | } |
424 | |
425 | bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); } |
426 | |
427 | bool PDBFile::hasPDBIpiStream() const { |
428 | if (!hasPDBInfoStream()) |
429 | return false; |
430 | |
431 | if (StreamIPI >= getNumStreams()) |
432 | return false; |
433 | |
434 | auto &InfoStream = cantFail(ValOrErr: const_cast<PDBFile *>(this)->getPDBInfoStream()); |
435 | return InfoStream.containsIdStream(); |
436 | } |
437 | |
438 | bool PDBFile::hasPDBPublicsStream() { |
439 | auto DbiS = getPDBDbiStream(); |
440 | if (!DbiS) { |
441 | consumeError(Err: DbiS.takeError()); |
442 | return false; |
443 | } |
444 | return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); |
445 | } |
446 | |
447 | bool PDBFile::hasPDBSymbolStream() { |
448 | auto DbiS = getPDBDbiStream(); |
449 | if (!DbiS) |
450 | return false; |
451 | return DbiS->getSymRecordStreamIndex() < getNumStreams(); |
452 | } |
453 | |
454 | bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } |
455 | |
456 | bool PDBFile::hasPDBStringTable() { |
457 | auto IS = getPDBInfoStream(); |
458 | if (!IS) |
459 | return false; |
460 | Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name: "/names" ); |
461 | if (!ExpectedNSI) { |
462 | consumeError(Err: ExpectedNSI.takeError()); |
463 | return false; |
464 | } |
465 | assert(*ExpectedNSI < getNumStreams()); |
466 | return true; |
467 | } |
468 | |
469 | bool PDBFile::hasPDBInjectedSourceStream() { |
470 | auto IS = getPDBInfoStream(); |
471 | if (!IS) |
472 | return false; |
473 | Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name: "/src/headerblock" ); |
474 | if (!ExpectedNSI) { |
475 | consumeError(Err: ExpectedNSI.takeError()); |
476 | return false; |
477 | } |
478 | assert(*ExpectedNSI < getNumStreams()); |
479 | return true; |
480 | } |
481 | |
482 | /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a |
483 | /// stream with that index actually exists. If it does not, the return value |
484 | /// will have an MSFError with code msf_error_code::no_stream. Else, the return |
485 | /// value will contain the stream returned by createIndexedStream(). |
486 | Expected<std::unique_ptr<MappedBlockStream>> |
487 | PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const { |
488 | if (StreamIndex >= getNumStreams()) |
489 | // This rejects kInvalidStreamIndex with an error as well. |
490 | return make_error<RawError>(Args: raw_error_code::no_stream); |
491 | return createIndexedStream(SN: StreamIndex); |
492 | } |
493 | |
494 | Expected<std::unique_ptr<MappedBlockStream>> |
495 | PDBFile::safelyCreateNamedStream(StringRef Name) { |
496 | auto IS = getPDBInfoStream(); |
497 | if (!IS) |
498 | return IS.takeError(); |
499 | |
500 | Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name); |
501 | if (!ExpectedNSI) |
502 | return ExpectedNSI.takeError(); |
503 | uint32_t NameStreamIndex = *ExpectedNSI; |
504 | |
505 | return safelyCreateIndexedStream(StreamIndex: NameStreamIndex); |
506 | } |
507 | |