1//===-- FileCollector.h -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_SUPPORT_FILECOLLECTOR_H
10#define LLVM_SUPPORT_FILECOLLECTOR_H
11
12#include "llvm/ADT/StringMap.h"
13#include "llvm/ADT/StringSet.h"
14#include "llvm/Support/Compiler.h"
15#include "llvm/Support/VirtualFileSystem.h"
16#include <mutex>
17#include <string>
18
19namespace llvm {
20class FileCollectorFileSystem;
21class Twine;
22
23class LLVM_ABI FileCollectorBase {
24public:
25 FileCollectorBase();
26 virtual ~FileCollectorBase();
27
28 void addFile(const Twine &file);
29 void addDirectory(const Twine &Dir);
30
31protected:
32 bool markAsSeen(StringRef Path) {
33 if (Path.empty())
34 return false;
35 return Seen.insert(key: Path).second;
36 }
37
38 virtual void addFileImpl(StringRef SrcPath) = 0;
39
40 virtual llvm::vfs::directory_iterator
41 addDirectoryImpl(const llvm::Twine &Dir,
42 IntrusiveRefCntPtr<vfs::FileSystem> FS,
43 std::error_code &EC) = 0;
44
45 /// Synchronizes access to internal data structures.
46 std::mutex Mutex;
47
48 /// Tracks already seen files so they can be skipped.
49 StringSet<> Seen;
50};
51
52/// Captures file system interaction and generates data to be later replayed
53/// with the RedirectingFileSystem.
54///
55/// For any file that gets accessed we eventually create:
56/// - a copy of the file inside Root
57/// - a record in RedirectingFileSystem mapping that maps:
58/// current real path -> path to the copy in Root
59///
60/// That intent is that later when the mapping is used by RedirectingFileSystem
61/// it simulates the state of FS that we collected.
62///
63/// We generate file copies and mapping lazily - see writeMapping and copyFiles.
64/// We don't try to capture the state of the file at the exact time when it's
65/// accessed. Files might get changed, deleted ... we record only the "final"
66/// state.
67///
68/// In order to preserve the relative topology of files we use their real paths
69/// as relative paths inside of the Root.
70class LLVM_ABI FileCollector : public FileCollectorBase {
71public:
72 /// Helper utility that encapsulates the logic for canonicalizing a virtual
73 /// path and a path to copy from.
74 class PathCanonicalizer {
75 public:
76 struct PathStorage {
77 SmallString<256> CopyFrom;
78 SmallString<256> VirtualPath;
79 };
80
81 /// Canonicalize a pair of virtual and real paths.
82 LLVM_ABI PathStorage canonicalize(StringRef SrcPath);
83
84 /// Return the underlying file system.
85 vfs::FileSystem &getFileSystem() const { return *VFS; };
86
87 explicit PathCanonicalizer(IntrusiveRefCntPtr<vfs::FileSystem> VFS)
88 : VFS(std::move(VFS)) {}
89
90 private:
91 /// Replace with a (mostly) real path, or don't modify. Resolves symlinks
92 /// in the directory, using \a CachedDirs to avoid redundant lookups, but
93 /// leaves the filename as a possible symlink.
94 void updateWithRealPath(SmallVectorImpl<char> &Path);
95
96 IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS;
97
98 StringMap<std::string> CachedDirs;
99 };
100
101 /// \p Root is the directory where collected files are will be stored.
102 /// \p OverlayRoot is VFS mapping root.
103 /// \p Root directory gets created in copyFiles unless it already exists.
104 FileCollector(std::string Root, std::string OverlayRoot,
105 IntrusiveRefCntPtr<vfs::FileSystem> VFS);
106
107 /// Write the yaml mapping (for the VFS) to the given file.
108 std::error_code writeMapping(StringRef MappingFile);
109
110 /// Copy the files into the root directory.
111 ///
112 /// When StopOnError is true (the default) we abort as soon as one file
113 /// cannot be copied. This is relatively common, for example when a file was
114 /// removed after it was added to the mapping.
115 std::error_code copyFiles(bool StopOnError = true);
116
117 /// Create a VFS that uses \p Collector to collect files accessed via \p
118 /// BaseFS.
119 static IntrusiveRefCntPtr<vfs::FileSystem>
120 createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
121 std::shared_ptr<FileCollector> Collector);
122
123private:
124 friend FileCollectorFileSystem;
125
126 void addFileToMapping(StringRef VirtualPath, StringRef RealPath) {
127 if (sys::fs::is_directory(Path: VirtualPath))
128 VFSWriter.addDirectoryMapping(VirtualPath, RealPath);
129 else
130 VFSWriter.addFileMapping(VirtualPath, RealPath);
131 }
132
133protected:
134 void addFileImpl(StringRef SrcPath) override;
135
136 llvm::vfs::directory_iterator
137 addDirectoryImpl(const llvm::Twine &Dir,
138 IntrusiveRefCntPtr<vfs::FileSystem> FS,
139 std::error_code &EC) override;
140
141 /// The directory where collected files are copied to in copyFiles().
142 const std::string Root;
143
144 /// The root directory where the VFS overlay lives.
145 const std::string OverlayRoot;
146
147 /// The yaml mapping writer.
148 vfs::YAMLVFSWriter VFSWriter;
149
150 /// Helper utility for canonicalizing paths.
151 PathCanonicalizer Canonicalizer;
152};
153
154} // end namespace llvm
155
156#endif // LLVM_SUPPORT_FILECOLLECTOR_H
157