1//===- ModuleFile.h - Module file description -------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the Module class, which describes a module that has
10// been loaded from an AST file.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H
15#define LLVM_CLANG_SERIALIZATION_MODULEFILE_H
16
17#include "clang/Basic/FileManager.h"
18#include "clang/Basic/LLVM.h"
19#include "clang/Basic/Module.h"
20#include "clang/Basic/SourceLocation.h"
21#include "clang/Serialization/ASTBitCodes.h"
22#include "clang/Serialization/ContinuousRangeMap.h"
23#include "clang/Serialization/ModuleFileExtension.h"
24#include "llvm/ADT/BitVector.h"
25#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/PointerIntPair.h"
27#include "llvm/ADT/SetVector.h"
28#include "llvm/ADT/SmallVector.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/Bitstream/BitstreamReader.h"
31#include "llvm/Support/Endian.h"
32#include <cassert>
33#include <cstdint>
34#include <memory>
35#include <string>
36#include <vector>
37
38namespace clang {
39
40namespace serialization {
41
42/// Specifies the kind of module that has been loaded.
43enum ModuleKind {
44 /// File is an implicitly-loaded module.
45 MK_ImplicitModule,
46
47 /// File is an explicitly-loaded module.
48 MK_ExplicitModule,
49
50 /// File is a PCH file treated as such.
51 MK_PCH,
52
53 /// File is a PCH file treated as the preamble.
54 MK_Preamble,
55
56 /// File is a PCH file treated as the actual main file.
57 MK_MainFile,
58
59 /// File is from a prebuilt module path.
60 MK_PrebuiltModule
61};
62
63/// The input file info that has been loaded from an AST file.
64struct InputFileInfo {
65 StringRef UnresolvedImportedFilenameAsRequested;
66 StringRef UnresolvedImportedFilename;
67
68 uint64_t ContentHash;
69 off_t StoredSize;
70 time_t StoredTime;
71 bool Overridden;
72 bool Transient;
73 bool TopLevel;
74 bool ModuleMap;
75
76 bool isValid() const {
77 return !UnresolvedImportedFilenameAsRequested.empty();
78 }
79};
80
81/// The input file that has been loaded from this AST file, along with
82/// bools indicating whether this was an overridden buffer or if it was
83/// out-of-date or not-found.
84class InputFile {
85 enum {
86 Overridden = 1,
87 OutOfDate = 2,
88 NotFound = 3
89 };
90 llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val;
91
92public:
93 InputFile() = default;
94
95 InputFile(FileEntryRef File, bool isOverridden = false,
96 bool isOutOfDate = false) {
97 unsigned intVal = 0;
98 // Make isOutOfDate with higher priority than isOverridden.
99 // It is possible if the recorded hash value mismatches.
100 if (isOutOfDate)
101 intVal = OutOfDate;
102 else if (isOverridden)
103 intVal = Overridden;
104 Val.setPointerAndInt(PtrVal: &File.getMapEntry(), IntVal: intVal);
105 }
106
107 static InputFile getNotFound() {
108 InputFile File;
109 File.Val.setInt(NotFound);
110 return File;
111 }
112
113 OptionalFileEntryRef getFile() const {
114 if (auto *P = Val.getPointer())
115 return FileEntryRef(*P);
116 return std::nullopt;
117 }
118 bool isOverridden() const { return Val.getInt() == Overridden; }
119 bool isOutOfDate() const { return Val.getInt() == OutOfDate; }
120 bool isNotFound() const { return Val.getInt() == NotFound; }
121};
122
123/// Information about a module that has been loaded by the ASTReader.
124///
125/// Each instance of the Module class corresponds to a single AST file, which
126/// may be a precompiled header, precompiled preamble, a module, or an AST file
127/// of some sort loaded as the main file, all of which are specific formulations
128/// of the general notion of a "module". A module may depend on any number of
129/// other modules.
130class ModuleFile {
131public:
132 ModuleFile(ModuleKind Kind, FileEntryRef File, unsigned Generation)
133 : Kind(Kind), File(File), Generation(Generation) {}
134 ~ModuleFile();
135
136 // === General information ===
137
138 /// The index of this module in the list of modules.
139 unsigned Index = 0;
140
141 /// The type of this module.
142 ModuleKind Kind;
143
144 /// The file name of the module file.
145 std::string FileName;
146
147 /// The name of the module.
148 std::string ModuleName;
149
150 /// The base directory of the module.
151 std::string BaseDirectory;
152
153 static std::string getTimestampFilename(StringRef FileName) {
154 return (FileName + ".timestamp").str();
155 }
156
157 /// The original source file name that was used to build the
158 /// primary AST file, which may have been modified for
159 /// relocatable-pch support.
160 std::string OriginalSourceFileName;
161
162 /// The actual original source file name that was used to
163 /// build this AST file.
164 std::string ActualOriginalSourceFileName;
165
166 /// The file ID for the original source file that was used to
167 /// build this AST file.
168 FileID OriginalSourceFileID;
169
170 std::string ModuleMapPath;
171
172 /// Whether this precompiled header is a relocatable PCH file.
173 bool RelocatablePCH = false;
174
175 /// Whether this module file is a standard C++ module.
176 bool StandardCXXModule = false;
177
178 /// Whether timestamps are included in this module file.
179 bool HasTimestamps = false;
180
181 /// Whether the top-level module has been read from the AST file.
182 bool DidReadTopLevelSubmodule = false;
183
184 /// The file entry for the module file.
185 FileEntryRef File;
186
187 /// The signature of the module file, which may be used instead of the size
188 /// and modification time to identify this particular file.
189 ASTFileSignature Signature;
190
191 /// The signature of the AST block of the module file, this can be used to
192 /// unique module files based on AST contents.
193 ASTFileSignature ASTBlockHash;
194
195 /// The bit vector denoting usage of each header search entry (true = used).
196 llvm::BitVector SearchPathUsage;
197
198 /// The bit vector denoting usage of each VFS entry (true = used).
199 llvm::BitVector VFSUsage;
200
201 /// Whether this module has been directly imported by the
202 /// user.
203 bool DirectlyImported = false;
204
205 /// The generation of which this module file is a part.
206 unsigned Generation;
207
208 /// The memory buffer that stores the data associated with
209 /// this AST file, owned by the InMemoryModuleCache.
210 llvm::MemoryBuffer *Buffer = nullptr;
211
212 /// The size of this file, in bits.
213 uint64_t SizeInBits = 0;
214
215 /// The global bit offset (or base) of this module
216 uint64_t GlobalBitOffset = 0;
217
218 /// The bit offset of the AST block of this module.
219 uint64_t ASTBlockStartOffset = 0;
220
221 /// The serialized bitstream data for this file.
222 StringRef Data;
223
224 /// The main bitstream cursor for the main block.
225 llvm::BitstreamCursor Stream;
226
227 /// The source location where the module was explicitly or implicitly
228 /// imported in the local translation unit.
229 ///
230 /// If module A depends on and imports module B, both modules will have the
231 /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a
232 /// source location inside module A).
233 ///
234 /// WARNING: This is largely useless. It doesn't tell you when a module was
235 /// made visible, just when the first submodule of that module was imported.
236 SourceLocation DirectImportLoc;
237
238 /// The source location where this module was first imported.
239 SourceLocation ImportLoc;
240
241 /// The first source location in this module.
242 SourceLocation FirstLoc;
243
244 /// The list of extension readers that are attached to this module
245 /// file.
246 std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders;
247
248 /// The module offset map data for this file. If non-empty, the various
249 /// ContinuousRangeMaps described below have not yet been populated.
250 StringRef ModuleOffsetMap;
251
252 // === Input Files ===
253
254 /// The cursor to the start of the input-files block.
255 llvm::BitstreamCursor InputFilesCursor;
256
257 /// Absolute offset of the start of the input-files block.
258 uint64_t InputFilesOffsetBase = 0;
259
260 /// Relative offsets for all of the input file entries in the AST file.
261 const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;
262
263 /// The input files that have been loaded from this AST file.
264 std::vector<InputFile> InputFilesLoaded;
265
266 /// The input file infos that have been loaded from this AST file.
267 std::vector<InputFileInfo> InputFileInfosLoaded;
268
269 // All user input files reside at the index range [0, NumUserInputFiles), and
270 // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()).
271 unsigned NumUserInputFiles = 0;
272
273 /// If non-zero, specifies the time when we last validated input
274 /// files. Zero means we never validated them.
275 ///
276 /// The time is specified in seconds since the start of the Epoch.
277 uint64_t InputFilesValidationTimestamp = 0;
278
279 // === Source Locations ===
280
281 /// Cursor used to read source location entries.
282 llvm::BitstreamCursor SLocEntryCursor;
283
284 /// The bit offset to the start of the SOURCE_MANAGER_BLOCK.
285 uint64_t SourceManagerBlockStartOffset = 0;
286
287 /// The number of source location entries in this AST file.
288 unsigned LocalNumSLocEntries = 0;
289
290 /// The base ID in the source manager's view of this module.
291 int SLocEntryBaseID = 0;
292
293 /// The base offset in the source manager's view of this module.
294 SourceLocation::UIntTy SLocEntryBaseOffset = 0;
295
296 /// Base file offset for the offsets in SLocEntryOffsets. Real file offset
297 /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i].
298 uint64_t SLocEntryOffsetsBase = 0;
299
300 /// Offsets for all of the source location entries in the
301 /// AST file.
302 const uint32_t *SLocEntryOffsets = nullptr;
303
304 // === Identifiers ===
305
306 /// The number of identifiers in this AST file.
307 unsigned LocalNumIdentifiers = 0;
308
309 /// Offsets into the identifier table data.
310 ///
311 /// This array is indexed by the identifier ID (-1), and provides
312 /// the offset into IdentifierTableData where the string data is
313 /// stored.
314 const uint32_t *IdentifierOffsets = nullptr;
315
316 /// Base identifier ID for identifiers local to this module.
317 serialization::IdentifierID BaseIdentifierID = 0;
318
319 /// Actual data for the on-disk hash table of identifiers.
320 ///
321 /// This pointer points into a memory buffer, where the on-disk hash
322 /// table for identifiers actually lives.
323 const unsigned char *IdentifierTableData = nullptr;
324
325 /// A pointer to an on-disk hash table of opaque type
326 /// IdentifierHashTable.
327 void *IdentifierLookupTable = nullptr;
328
329 /// Offsets of identifiers that we're going to preload within
330 /// IdentifierTableData.
331 std::vector<unsigned> PreloadIdentifierOffsets;
332
333 // === Macros ===
334
335 /// The cursor to the start of the preprocessor block, which stores
336 /// all of the macro definitions.
337 llvm::BitstreamCursor MacroCursor;
338
339 /// The number of macros in this AST file.
340 unsigned LocalNumMacros = 0;
341
342 /// Base file offset for the offsets in MacroOffsets. Real file offset for
343 /// the entry is MacroOffsetsBase + MacroOffsets[i].
344 uint64_t MacroOffsetsBase = 0;
345
346 /// Offsets of macros in the preprocessor block.
347 ///
348 /// This array is indexed by the macro ID (-1), and provides
349 /// the offset into the preprocessor block where macro definitions are
350 /// stored.
351 const uint32_t *MacroOffsets = nullptr;
352
353 /// Base macro ID for macros local to this module.
354 serialization::MacroID BaseMacroID = 0;
355
356 /// Remapping table for macro IDs in this module.
357 ContinuousRangeMap<uint32_t, int, 2> MacroRemap;
358
359 /// The offset of the start of the set of defined macros.
360 uint64_t MacroStartOffset = 0;
361
362 // === Detailed PreprocessingRecord ===
363
364 /// The cursor to the start of the (optional) detailed preprocessing
365 /// record block.
366 llvm::BitstreamCursor PreprocessorDetailCursor;
367
368 /// The offset of the start of the preprocessor detail cursor.
369 uint64_t PreprocessorDetailStartOffset = 0;
370
371 /// Base preprocessed entity ID for preprocessed entities local to
372 /// this module.
373 serialization::PreprocessedEntityID BasePreprocessedEntityID = 0;
374
375 /// Remapping table for preprocessed entity IDs in this module.
376 ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap;
377
378 const PPEntityOffset *PreprocessedEntityOffsets = nullptr;
379 unsigned NumPreprocessedEntities = 0;
380
381 /// Base ID for preprocessed skipped ranges local to this module.
382 unsigned BasePreprocessedSkippedRangeID = 0;
383
384 const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr;
385 unsigned NumPreprocessedSkippedRanges = 0;
386
387 // === Header search information ===
388
389 /// The number of local HeaderFileInfo structures.
390 unsigned LocalNumHeaderFileInfos = 0;
391
392 /// Actual data for the on-disk hash table of header file
393 /// information.
394 ///
395 /// This pointer points into a memory buffer, where the on-disk hash
396 /// table for header file information actually lives.
397 const char *HeaderFileInfoTableData = nullptr;
398
399 /// The on-disk hash table that contains information about each of
400 /// the header files.
401 void *HeaderFileInfoTable = nullptr;
402
403 // === Submodule information ===
404
405 /// The number of submodules in this module.
406 unsigned LocalNumSubmodules = 0;
407
408 /// Base submodule ID for submodules local to this module.
409 serialization::SubmoduleID BaseSubmoduleID = 0;
410
411 /// Remapping table for submodule IDs in this module.
412 ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
413
414 // === Selectors ===
415
416 /// The number of selectors new to this file.
417 ///
418 /// This is the number of entries in SelectorOffsets.
419 unsigned LocalNumSelectors = 0;
420
421 /// Offsets into the selector lookup table's data array
422 /// where each selector resides.
423 const uint32_t *SelectorOffsets = nullptr;
424
425 /// Base selector ID for selectors local to this module.
426 serialization::SelectorID BaseSelectorID = 0;
427
428 /// Remapping table for selector IDs in this module.
429 ContinuousRangeMap<uint32_t, int, 2> SelectorRemap;
430
431 /// A pointer to the character data that comprises the selector table
432 ///
433 /// The SelectorOffsets table refers into this memory.
434 const unsigned char *SelectorLookupTableData = nullptr;
435
436 /// A pointer to an on-disk hash table of opaque type
437 /// ASTSelectorLookupTable.
438 ///
439 /// This hash table provides the IDs of all selectors, and the associated
440 /// instance and factory methods.
441 void *SelectorLookupTable = nullptr;
442
443 // === Declarations ===
444
445 /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block.
446 /// It has read all the abbreviations at the start of the block and is ready
447 /// to jump around with these in context.
448 llvm::BitstreamCursor DeclsCursor;
449
450 /// The offset to the start of the DECLTYPES_BLOCK block.
451 uint64_t DeclsBlockStartOffset = 0;
452
453 /// The number of declarations in this AST file.
454 unsigned LocalNumDecls = 0;
455
456 /// Offset of each declaration within the bitstream, indexed
457 /// by the declaration ID (-1).
458 const DeclOffset *DeclOffsets = nullptr;
459
460 /// Base declaration index in ASTReader for declarations local to this module.
461 unsigned BaseDeclIndex = 0;
462
463 /// Array of file-level DeclIDs sorted by file.
464 const serialization::unaligned_decl_id_t *FileSortedDecls = nullptr;
465 unsigned NumFileSortedDecls = 0;
466
467 /// Array of category list location information within this
468 /// module file, sorted by the definition ID.
469 const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr;
470
471 /// The number of redeclaration info entries in ObjCCategoriesMap.
472 unsigned LocalNumObjCCategoriesInMap = 0;
473
474 /// The Objective-C category lists for categories known to this
475 /// module.
476 SmallVector<uint64_t, 1> ObjCCategories;
477
478 // === Types ===
479
480 /// The number of types in this AST file.
481 unsigned LocalNumTypes = 0;
482
483 /// Offset of each type within the bitstream, indexed by the
484 /// type ID, or the representation of a Type*.
485 const UnalignedUInt64 *TypeOffsets = nullptr;
486
487 /// Base type ID for types local to this module as represented in
488 /// the global type ID space.
489 serialization::TypeID BaseTypeIndex = 0;
490
491 // === Miscellaneous ===
492
493 /// Diagnostic IDs and their mappings that the user changed.
494 SmallVector<uint64_t, 8> PragmaDiagMappings;
495
496 /// List of modules which depend on this module
497 llvm::SetVector<ModuleFile *> ImportedBy;
498
499 /// List of modules which this module directly imported
500 llvm::SetVector<ModuleFile *> Imports;
501
502 /// List of modules which this modules dependent on. Different
503 /// from `Imports`, this includes indirectly imported modules too.
504 /// The order of TransitiveImports is significant. It should keep
505 /// the same order with that module file manager when we write
506 /// the current module file. The value of the member will be initialized
507 /// in `ASTReader::ReadModuleOffsetMap`.
508 llvm::SmallVector<ModuleFile *, 16> TransitiveImports;
509
510 /// Determine whether this module was directly imported at
511 /// any point during translation.
512 bool isDirectlyImported() const { return DirectlyImported; }
513
514 /// Is this a module file for a module (rather than a PCH or similar).
515 bool isModule() const {
516 return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule ||
517 Kind == MK_PrebuiltModule;
518 }
519
520 /// Dump debugging output for this module.
521 void dump();
522};
523
524} // namespace serialization
525
526} // namespace clang
527
528#endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H
529