| 1 | //===-- TarWriter.cpp - Tar archive file creator --------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // TarWriter class provides a feature to create a tar archive file. |
| 10 | // |
| 11 | // I put emphasis on simplicity over comprehensiveness when implementing this |
| 12 | // class because we don't need a full-fledged archive file generator in LLVM |
| 13 | // at the moment. |
| 14 | // |
| 15 | // The filename field in the Unix V7 tar header is 100 bytes. Longer filenames |
| 16 | // are stored using the PAX extension. The PAX header is standardized in |
| 17 | // POSIX.1-2001. |
| 18 | // |
| 19 | // The struct definition of UstarHeader is copied from |
| 20 | // https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 |
| 21 | // |
| 22 | //===----------------------------------------------------------------------===// |
| 23 | |
| 24 | #include "llvm/Support/TarWriter.h" |
| 25 | #include "llvm/ADT/StringRef.h" |
| 26 | #include "llvm/Support/FileSystem.h" |
| 27 | #include "llvm/Support/MathExtras.h" |
| 28 | #include "llvm/Support/Path.h" |
| 29 | |
| 30 | using namespace llvm; |
| 31 | |
| 32 | // Each file in an archive must be aligned to this block size. |
| 33 | static const int BlockSize = 512; |
| 34 | |
| 35 | struct { |
| 36 | char [100]; |
| 37 | char [8]; |
| 38 | char [8]; |
| 39 | char [8]; |
| 40 | char [12]; |
| 41 | char [12]; |
| 42 | char [8]; |
| 43 | char ; |
| 44 | char [100]; |
| 45 | char [6]; |
| 46 | char [2]; |
| 47 | char [32]; |
| 48 | char [32]; |
| 49 | char [8]; |
| 50 | char [8]; |
| 51 | char [155]; |
| 52 | char [12]; |
| 53 | }; |
| 54 | static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header" ); |
| 55 | |
| 56 | static UstarHeader () { |
| 57 | UstarHeader Hdr = {}; |
| 58 | memcpy(dest: Hdr.Magic, src: "ustar" , n: 5); // Ustar magic |
| 59 | memcpy(dest: Hdr.Version, src: "00" , n: 2); // Ustar version |
| 60 | return Hdr; |
| 61 | } |
| 62 | |
| 63 | // A PAX attribute is in the form of "<length> <key>=<value>\n" |
| 64 | // where <length> is the length of the entire string including |
| 65 | // the length field itself. An example string is this. |
| 66 | // |
| 67 | // 25 ctime=1084839148.1212\n |
| 68 | // |
| 69 | // This function create such string. |
| 70 | static std::string formatPax(StringRef Key, StringRef Val) { |
| 71 | int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n" |
| 72 | |
| 73 | // We need to compute total size twice because appending |
| 74 | // a length field could change total size by one. |
| 75 | int Total = Len + Twine(Len).str().size(); |
| 76 | Total = Len + Twine(Total).str().size(); |
| 77 | return (Twine(Total) + " " + Key + "=" + Val + "\n" ).str(); |
| 78 | } |
| 79 | |
| 80 | // Headers in tar files must be aligned to 512 byte boundaries. |
| 81 | // This function forwards the current file position to the next boundary. |
| 82 | static void pad(raw_fd_ostream &OS) { |
| 83 | uint64_t Pos = OS.tell(); |
| 84 | OS.seek(off: alignTo(Value: Pos, Align: BlockSize)); |
| 85 | } |
| 86 | |
| 87 | // Computes a checksum for a tar header. |
| 88 | static void (UstarHeader &Hdr) { |
| 89 | // Before computing a checksum, checksum field must be |
| 90 | // filled with space characters. |
| 91 | memset(s: Hdr.Checksum, c: ' ', n: sizeof(Hdr.Checksum)); |
| 92 | |
| 93 | // Compute a checksum and set it to the checksum field. |
| 94 | unsigned Chksum = 0; |
| 95 | for (size_t I = 0; I < sizeof(Hdr); ++I) |
| 96 | Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I]; |
| 97 | snprintf(s: Hdr.Checksum, maxlen: sizeof(Hdr.Checksum), format: "%06o" , Chksum); |
| 98 | } |
| 99 | |
| 100 | // Create a tar header and write it to a given output stream. |
| 101 | static void (raw_fd_ostream &OS, StringRef Path) { |
| 102 | // A PAX header consists of a 512-byte header followed |
| 103 | // by key-value strings. First, create key-value strings. |
| 104 | std::string PaxAttr = formatPax(Key: "path" , Val: Path); |
| 105 | |
| 106 | // Create a 512-byte header. |
| 107 | UstarHeader Hdr = makeUstarHeader(); |
| 108 | snprintf(s: Hdr.Size, maxlen: sizeof(Hdr.Size), format: "%011zo" , PaxAttr.size()); |
| 109 | Hdr.TypeFlag = 'x'; // PAX magic |
| 110 | computeChecksum(Hdr); |
| 111 | |
| 112 | // Write them down. |
| 113 | OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); |
| 114 | OS << PaxAttr; |
| 115 | pad(OS); |
| 116 | } |
| 117 | |
| 118 | // Path fits in a Ustar header if |
| 119 | // |
| 120 | // - Path is less than 100 characters long, or |
| 121 | // - Path is in the form of "<prefix>/<name>" where <prefix> is less |
| 122 | // than or equal to 155 characters long and <name> is less than 100 |
| 123 | // characters long. Both <prefix> and <name> can contain extra '/'. |
| 124 | // |
| 125 | // If Path fits in a Ustar header, updates Prefix and Name and returns true. |
| 126 | // Otherwise, returns false. |
| 127 | static bool splitUstar(StringRef Path, StringRef &Prefix, StringRef &Name) { |
| 128 | if (Path.size() < sizeof(UstarHeader::Name)) { |
| 129 | Prefix = "" ; |
| 130 | Name = Path; |
| 131 | return true; |
| 132 | } |
| 133 | |
| 134 | // tar 1.13 and earlier unconditionally look at the tar header interpreted |
| 135 | // as an 'oldgnu_header', which has an 'isextended' byte at offset 482 in the |
| 136 | // header, corresponding to offset 137 in the prefix. That's the version of |
| 137 | // tar in gnuwin, so only use 137 of the 155 bytes in the prefix. This means |
| 138 | // we'll need a pax header after 237 bytes of path instead of after 255, |
| 139 | // but in return paths up to 237 bytes work with gnuwin, instead of just |
| 140 | // 137 bytes of directory + 100 bytes of basename previously. |
| 141 | // (tar-1.13 also doesn't support pax headers, but in practice all paths in |
| 142 | // llvm's test suite are short enough for that to not matter.) |
| 143 | const int MaxPrefix = 137; |
| 144 | size_t Sep = Path.rfind(C: '/', From: MaxPrefix + 1); |
| 145 | if (Sep == StringRef::npos) |
| 146 | return false; |
| 147 | if (Path.size() - Sep - 1 >= sizeof(UstarHeader::Name)) |
| 148 | return false; |
| 149 | |
| 150 | Prefix = Path.substr(Start: 0, N: Sep); |
| 151 | Name = Path.substr(Start: Sep + 1); |
| 152 | return true; |
| 153 | } |
| 154 | |
| 155 | // The PAX header is an extended format, so a PAX header needs |
| 156 | // to be followed by a "real" header. |
| 157 | static void (raw_fd_ostream &OS, StringRef Prefix, |
| 158 | StringRef Name, size_t Size) { |
| 159 | UstarHeader Hdr = makeUstarHeader(); |
| 160 | memcpy(dest: Hdr.Name, src: Name.data(), n: Name.size()); |
| 161 | memcpy(dest: Hdr.Mode, src: "0000664" , n: 8); |
| 162 | snprintf(s: Hdr.Size, maxlen: sizeof(Hdr.Size), format: "%011zo" , Size); |
| 163 | memcpy(dest: Hdr.Prefix, src: Prefix.data(), n: Prefix.size()); |
| 164 | computeChecksum(Hdr); |
| 165 | OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr)); |
| 166 | } |
| 167 | |
| 168 | // Creates a TarWriter instance and returns it. |
| 169 | Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath, |
| 170 | StringRef BaseDir) { |
| 171 | using namespace sys::fs; |
| 172 | int FD; |
| 173 | if (std::error_code EC = |
| 174 | openFileForWrite(Name: OutputPath, ResultFD&: FD, Disp: CD_CreateAlways, Flags: OF_None)) |
| 175 | return make_error<StringError>(Args: "cannot open " + OutputPath, Args&: EC); |
| 176 | return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir)); |
| 177 | } |
| 178 | |
| 179 | TarWriter::TarWriter(int FD, StringRef BaseDir) |
| 180 | : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false), |
| 181 | BaseDir(std::string(BaseDir)) {} |
| 182 | |
| 183 | // Append a given file to an archive. |
| 184 | void TarWriter::append(StringRef Path, StringRef Data) { |
| 185 | // Write Path and Data. |
| 186 | std::string Fullpath = BaseDir + "/" + sys::path::convert_to_slash(path: Path); |
| 187 | |
| 188 | // We do not want to include the same file more than once. |
| 189 | if (!Files.insert(key: Fullpath).second) |
| 190 | return; |
| 191 | |
| 192 | StringRef Prefix; |
| 193 | StringRef Name; |
| 194 | if (splitUstar(Path: Fullpath, Prefix, Name)) { |
| 195 | writeUstarHeader(OS, Prefix, Name, Size: Data.size()); |
| 196 | } else { |
| 197 | writePaxHeader(OS, Path: Fullpath); |
| 198 | writeUstarHeader(OS, Prefix: "" , Name: "" , Size: Data.size()); |
| 199 | } |
| 200 | |
| 201 | OS << Data; |
| 202 | pad(OS); |
| 203 | |
| 204 | // POSIX requires tar archives end with two null blocks. |
| 205 | // Here, we write the terminator and then seek back, so that |
| 206 | // the file being output is terminated correctly at any moment. |
| 207 | uint64_t Pos = OS.tell(); |
| 208 | OS << std::string(BlockSize * 2, '\0'); |
| 209 | OS.seek(off: Pos); |
| 210 | OS.flush(); |
| 211 | } |
| 212 | |