1//===-- FileCollector.h -----------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_SUPPORT_FILECOLLECTOR_H
10#define LLVM_SUPPORT_FILECOLLECTOR_H
11
12#include "llvm/ADT/StringMap.h"
13#include "llvm/ADT/StringSet.h"
14#include "llvm/Support/VirtualFileSystem.h"
15#include <mutex>
16#include <string>
17
18namespace llvm {
19class FileCollectorFileSystem;
20class Twine;
21
22class FileCollectorBase {
23public:
24 FileCollectorBase();
25 virtual ~FileCollectorBase();
26
27 void addFile(const Twine &file);
28 void addDirectory(const Twine &Dir);
29
30protected:
31 bool markAsSeen(StringRef Path) {
32 if (Path.empty())
33 return false;
34 return Seen.insert(key: Path).second;
35 }
36
37 virtual void addFileImpl(StringRef SrcPath) = 0;
38
39 virtual llvm::vfs::directory_iterator
40 addDirectoryImpl(const llvm::Twine &Dir,
41 IntrusiveRefCntPtr<vfs::FileSystem> FS,
42 std::error_code &EC) = 0;
43
44 /// Synchronizes access to internal data structures.
45 std::mutex Mutex;
46
47 /// Tracks already seen files so they can be skipped.
48 StringSet<> Seen;
49};
50
51/// Captures file system interaction and generates data to be later replayed
52/// with the RedirectingFileSystem.
53///
54/// For any file that gets accessed we eventually create:
55/// - a copy of the file inside Root
56/// - a record in RedirectingFileSystem mapping that maps:
57/// current real path -> path to the copy in Root
58///
59/// That intent is that later when the mapping is used by RedirectingFileSystem
60/// it simulates the state of FS that we collected.
61///
62/// We generate file copies and mapping lazily - see writeMapping and copyFiles.
63/// We don't try to capture the state of the file at the exact time when it's
64/// accessed. Files might get changed, deleted ... we record only the "final"
65/// state.
66///
67/// In order to preserve the relative topology of files we use their real paths
68/// as relative paths inside of the Root.
69class FileCollector : public FileCollectorBase {
70public:
71 /// Helper utility that encapsulates the logic for canonicalizing a virtual
72 /// path and a path to copy from.
73 class PathCanonicalizer {
74 public:
75 struct PathStorage {
76 SmallString<256> CopyFrom;
77 SmallString<256> VirtualPath;
78 };
79
80 /// Canonicalize a pair of virtual and real paths.
81 PathStorage canonicalize(StringRef SrcPath);
82
83 private:
84 /// Replace with a (mostly) real path, or don't modify. Resolves symlinks
85 /// in the directory, using \a CachedDirs to avoid redundant lookups, but
86 /// leaves the filename as a possible symlink.
87 void updateWithRealPath(SmallVectorImpl<char> &Path);
88
89 StringMap<std::string> CachedDirs;
90 };
91
92 /// \p Root is the directory where collected files are will be stored.
93 /// \p OverlayRoot is VFS mapping root.
94 /// \p Root directory gets created in copyFiles unless it already exists.
95 FileCollector(std::string Root, std::string OverlayRoot);
96
97 /// Write the yaml mapping (for the VFS) to the given file.
98 std::error_code writeMapping(StringRef MappingFile);
99
100 /// Copy the files into the root directory.
101 ///
102 /// When StopOnError is true (the default) we abort as soon as one file
103 /// cannot be copied. This is relatively common, for example when a file was
104 /// removed after it was added to the mapping.
105 std::error_code copyFiles(bool StopOnError = true);
106
107 /// Create a VFS that uses \p Collector to collect files accessed via \p
108 /// BaseFS.
109 static IntrusiveRefCntPtr<vfs::FileSystem>
110 createCollectorVFS(IntrusiveRefCntPtr<vfs::FileSystem> BaseFS,
111 std::shared_ptr<FileCollector> Collector);
112
113private:
114 friend FileCollectorFileSystem;
115
116 void addFileToMapping(StringRef VirtualPath, StringRef RealPath) {
117 if (sys::fs::is_directory(Path: VirtualPath))
118 VFSWriter.addDirectoryMapping(VirtualPath, RealPath);
119 else
120 VFSWriter.addFileMapping(VirtualPath, RealPath);
121 }
122
123protected:
124 void addFileImpl(StringRef SrcPath) override;
125
126 llvm::vfs::directory_iterator
127 addDirectoryImpl(const llvm::Twine &Dir,
128 IntrusiveRefCntPtr<vfs::FileSystem> FS,
129 std::error_code &EC) override;
130
131 /// The directory where collected files are copied to in copyFiles().
132 const std::string Root;
133
134 /// The root directory where the VFS overlay lives.
135 const std::string OverlayRoot;
136
137 /// The yaml mapping writer.
138 vfs::YAMLVFSWriter VFSWriter;
139
140 /// Helper utility for canonicalizing paths.
141 PathCanonicalizer Canonicalizer;
142};
143
144} // end namespace llvm
145
146#endif // LLVM_SUPPORT_FILECOLLECTOR_H
147