1 | //===-- TarWriter.cpp - Tar archive file creator --------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // TarWriter class provides a feature to create a tar archive file. |
10 | // |
11 | // I put emphasis on simplicity over comprehensiveness when implementing this |
12 | // class because we don't need a full-fledged archive file generator in LLVM |
13 | // at the moment. |
14 | // |
15 | // The filename field in the Unix V7 tar header is 100 bytes. Longer filenames |
16 | // are stored using the PAX extension. The PAX header is standardized in |
17 | // POSIX.1-2001. |
18 | // |
19 | // The struct definition of UstarHeader is copied from |
20 | // https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 |
21 | // |
22 | //===----------------------------------------------------------------------===// |
23 | |
24 | #include "llvm/Support/TarWriter.h" |
25 | #include "llvm/ADT/StringRef.h" |
26 | #include "llvm/Support/FileSystem.h" |
27 | #include "llvm/Support/MathExtras.h" |
28 | #include "llvm/Support/Path.h" |
29 | |
30 | using namespace llvm; |
31 | |
32 | // Each file in an archive must be aligned to this block size. |
33 | static const int BlockSize = 512; |
34 | |
35 | struct { |
36 | char [100]; |
37 | char [8]; |
38 | char [8]; |
39 | char [8]; |
40 | char [12]; |
41 | char [12]; |
42 | char [8]; |
43 | char ; |
44 | char [100]; |
45 | char [6]; |
46 | char [2]; |
47 | char [32]; |
48 | char [32]; |
49 | char [8]; |
50 | char [8]; |
51 | char [155]; |
52 | char [12]; |
53 | }; |
54 | static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header" ); |
55 | |
56 | static UstarHeader () { |
57 | UstarHeader Hdr = {}; |
58 | memcpy(dest: Hdr.Magic, src: "ustar" , n: 5); // Ustar magic |
59 | memcpy(dest: Hdr.Version, src: "00" , n: 2); // Ustar version |
60 | return Hdr; |
61 | } |
62 | |
63 | // A PAX attribute is in the form of "<length> <key>=<value>\n" |
64 | // where <length> is the length of the entire string including |
65 | // the length field itself. An example string is this. |
66 | // |
67 | // 25 ctime=1084839148.1212\n |
68 | // |
69 | // This function create such string. |
70 | static std::string formatPax(StringRef Key, StringRef Val) { |
71 | int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n" |
72 | |
73 | // We need to compute total size twice because appending |
74 | // a length field could change total size by one. |
75 | int Total = Len + Twine(Len).str().size(); |
76 | Total = Len + Twine(Total).str().size(); |
77 | return (Twine(Total) + " " + Key + "=" + Val + "\n" ).str(); |
78 | } |
79 | |
80 | // Headers in tar files must be aligned to 512 byte boundaries. |
81 | // This function forwards the current file position to the next boundary. |
82 | static void pad(raw_fd_ostream &OS) { |
83 | uint64_t Pos = OS.tell(); |
84 | OS.seek(off: alignTo(Value: Pos, Align: BlockSize)); |
85 | } |
86 | |
87 | // Computes a checksum for a tar header. |
88 | static void (UstarHeader &Hdr) { |
89 | // Before computing a checksum, checksum field must be |
90 | // filled with space characters. |
91 | memset(s: Hdr.Checksum, c: ' ', n: sizeof(Hdr.Checksum)); |
92 | |
93 | // Compute a checksum and set it to the checksum field. |
94 | unsigned Chksum = 0; |
95 | for (size_t I = 0; I < sizeof(Hdr); ++I) |
96 | Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I]; |
97 | snprintf(s: Hdr.Checksum, maxlen: sizeof(Hdr.Checksum), format: "%06o" , Chksum); |
98 | } |
99 | |
100 | // Create a tar header and write it to a given output stream. |
101 | static void (raw_fd_ostream &OS, StringRef Path) { |
102 | // A PAX header consists of a 512-byte header followed |
103 | // by key-value strings. First, create key-value strings. |
104 | std::string PaxAttr = formatPax(Key: "path" , Val: Path); |
105 | |
106 | // Create a 512-byte header. |
107 | UstarHeader Hdr = makeUstarHeader(); |
108 | snprintf(s: Hdr.Size, maxlen: sizeof(Hdr.Size), format: "%011zo" , PaxAttr.size()); |
109 | Hdr.TypeFlag = 'x'; // PAX magic |
110 | computeChecksum(Hdr); |
111 | |
112 | // Write them down. |
113 | OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); |
114 | OS << PaxAttr; |
115 | pad(OS); |
116 | } |
117 | |
118 | // Path fits in a Ustar header if |
119 | // |
120 | // - Path is less than 100 characters long, or |
121 | // - Path is in the form of "<prefix>/<name>" where <prefix> is less |
122 | // than or equal to 155 characters long and <name> is less than 100 |
123 | // characters long. Both <prefix> and <name> can contain extra '/'. |
124 | // |
125 | // If Path fits in a Ustar header, updates Prefix and Name and returns true. |
126 | // Otherwise, returns false. |
127 | static bool splitUstar(StringRef Path, StringRef &Prefix, StringRef &Name) { |
128 | if (Path.size() < sizeof(UstarHeader::Name)) { |
129 | Prefix = "" ; |
130 | Name = Path; |
131 | return true; |
132 | } |
133 | |
134 | // tar 1.13 and earlier unconditionally look at the tar header interpreted |
135 | // as an 'oldgnu_header', which has an 'isextended' byte at offset 482 in the |
136 | // header, corresponding to offset 137 in the prefix. That's the version of |
137 | // tar in gnuwin, so only use 137 of the 155 bytes in the prefix. This means |
138 | // we'll need a pax header after 237 bytes of path instead of after 255, |
139 | // but in return paths up to 237 bytes work with gnuwin, instead of just |
140 | // 137 bytes of directory + 100 bytes of basename previously. |
141 | // (tar-1.13 also doesn't support pax headers, but in practice all paths in |
142 | // llvm's test suite are short enough for that to not matter.) |
143 | const int MaxPrefix = 137; |
144 | size_t Sep = Path.rfind(C: '/', From: MaxPrefix + 1); |
145 | if (Sep == StringRef::npos) |
146 | return false; |
147 | if (Path.size() - Sep - 1 >= sizeof(UstarHeader::Name)) |
148 | return false; |
149 | |
150 | Prefix = Path.substr(Start: 0, N: Sep); |
151 | Name = Path.substr(Start: Sep + 1); |
152 | return true; |
153 | } |
154 | |
155 | // The PAX header is an extended format, so a PAX header needs |
156 | // to be followed by a "real" header. |
157 | static void (raw_fd_ostream &OS, StringRef Prefix, |
158 | StringRef Name, size_t Size) { |
159 | UstarHeader Hdr = makeUstarHeader(); |
160 | memcpy(dest: Hdr.Name, src: Name.data(), n: Name.size()); |
161 | memcpy(dest: Hdr.Mode, src: "0000664" , n: 8); |
162 | snprintf(s: Hdr.Size, maxlen: sizeof(Hdr.Size), format: "%011zo" , Size); |
163 | memcpy(dest: Hdr.Prefix, src: Prefix.data(), n: Prefix.size()); |
164 | computeChecksum(Hdr); |
165 | OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); |
166 | } |
167 | |
168 | // Creates a TarWriter instance and returns it. |
169 | Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath, |
170 | StringRef BaseDir) { |
171 | using namespace sys::fs; |
172 | int FD; |
173 | if (std::error_code EC = |
174 | openFileForWrite(Name: OutputPath, ResultFD&: FD, Disp: CD_CreateAlways, Flags: OF_None)) |
175 | return make_error<StringError>(Args: "cannot open " + OutputPath, Args&: EC); |
176 | return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir)); |
177 | } |
178 | |
179 | TarWriter::TarWriter(int FD, StringRef BaseDir) |
180 | : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), |
181 | BaseDir(std::string(BaseDir)) {} |
182 | |
183 | // Append a given file to an archive. |
184 | void TarWriter::append(StringRef Path, StringRef Data) { |
185 | // Write Path and Data. |
186 | std::string Fullpath = BaseDir + "/" + sys::path::convert_to_slash(path: Path); |
187 | |
188 | // We do not want to include the same file more than once. |
189 | if (!Files.insert(key: Fullpath).second) |
190 | return; |
191 | |
192 | StringRef Prefix; |
193 | StringRef Name; |
194 | if (splitUstar(Path: Fullpath, Prefix, Name)) { |
195 | writeUstarHeader(OS, Prefix, Name, Size: Data.size()); |
196 | } else { |
197 | writePaxHeader(OS, Path: Fullpath); |
198 | writeUstarHeader(OS, Prefix: "" , Name: "" , Size: Data.size()); |
199 | } |
200 | |
201 | OS << Data; |
202 | pad(OS); |
203 | |
204 | // POSIX requires tar archives end with two null blocks. |
205 | // Here, we write the terminator and then seek back, so that |
206 | // the file being output is terminated correctly at any moment. |
207 | uint64_t Pos = OS.tell(); |
208 | OS << std::string(BlockSize * 2, '\0'); |
209 | OS.seek(off: Pos); |
210 | OS.flush(); |
211 | } |
212 | |