| 1 | //===- split-file.cpp - Input splitting utility ---------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Split input into multipe parts separated by regex '^(.|//)--- ' and extract |
| 10 | // the specified part. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/ADT/DenseMap.h" |
| 15 | #include "llvm/ADT/StringExtras.h" |
| 16 | #include "llvm/ADT/StringRef.h" |
| 17 | #include "llvm/Support/CommandLine.h" |
| 18 | #include "llvm/Support/FileOutputBuffer.h" |
| 19 | #include "llvm/Support/FileSystem.h" |
| 20 | #include "llvm/Support/LineIterator.h" |
| 21 | #include "llvm/Support/MemoryBuffer.h" |
| 22 | #include "llvm/Support/Path.h" |
| 23 | #include "llvm/Support/ToolOutputFile.h" |
| 24 | #include "llvm/Support/WithColor.h" |
| 25 | #include <string> |
| 26 | #include <system_error> |
| 27 | |
| 28 | using namespace llvm; |
| 29 | |
| 30 | static cl::OptionCategory cat("split-file Options" ); |
| 31 | |
| 32 | static cl::opt<std::string> input(cl::Positional, cl::desc("filename" ), |
| 33 | cl::cat(cat)); |
| 34 | |
| 35 | static cl::opt<std::string> output(cl::Positional, cl::desc("directory" ), |
| 36 | cl::value_desc("directory" ), cl::cat(cat)); |
| 37 | |
| 38 | static cl::opt<bool> leadingLines("leading-lines" , |
| 39 | cl::desc("Preserve line numbers" ), |
| 40 | cl::cat(cat)); |
| 41 | |
| 42 | static cl::opt<bool> noLeadingLines("no-leading-lines" , |
| 43 | cl::desc("Don't preserve line numbers (default)" ), |
| 44 | cl::cat(cat)); |
| 45 | |
| 46 | static StringRef toolName; |
| 47 | static int errorCount; |
| 48 | |
| 49 | [[noreturn]] static void fatal(StringRef filename, const Twine &message) { |
| 50 | if (filename.empty()) |
| 51 | WithColor::error(OS&: errs(), Prefix: toolName) << message << '\n'; |
| 52 | else |
| 53 | WithColor::error(OS&: errs(), Prefix: toolName) << filename << ": " << message << '\n'; |
| 54 | exit(status: 1); |
| 55 | } |
| 56 | |
| 57 | static void error(StringRef filename, int64_t line, const Twine &message) { |
| 58 | ++errorCount; |
| 59 | errs() << filename << ':' << line << ": " ; |
| 60 | WithColor::error(OS&: errs()) << message << '\n'; |
| 61 | } |
| 62 | |
| 63 | namespace { |
| 64 | struct Part { |
| 65 | const char *begin = nullptr; |
| 66 | const char *end = nullptr; |
| 67 | int64_t leadingLines = 0; |
| 68 | }; |
| 69 | } // namespace |
| 70 | |
| 71 | static int handle(MemoryBuffer &inputBuf, StringRef input) { |
| 72 | DenseMap<StringRef, Part> partToBegin; |
| 73 | StringRef lastPart, separator; |
| 74 | StringRef EOL = inputBuf.getBuffer().detectEOL(); |
| 75 | for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) { |
| 76 | const int64_t lineNo = i.line_number(); |
| 77 | const StringRef line = *i++; |
| 78 | const size_t markerLen = line.starts_with(Prefix: "//" ) ? 6 : 5; |
| 79 | if (!(line.size() >= markerLen && |
| 80 | line.substr(Start: markerLen - 4).starts_with(Prefix: "--- " ))) |
| 81 | continue; |
| 82 | separator = line.substr(Start: 0, N: markerLen); |
| 83 | const StringRef partName = line.substr(Start: markerLen); |
| 84 | if (partName.empty()) { |
| 85 | error(filename: input, line: lineNo, message: "empty part name" ); |
| 86 | continue; |
| 87 | } |
| 88 | if (isSpace(C: partName.front()) || isSpace(C: partName.back())) { |
| 89 | error(filename: input, line: lineNo, message: "part name cannot have leading or trailing space" ); |
| 90 | continue; |
| 91 | } |
| 92 | |
| 93 | auto res = partToBegin.try_emplace(Key: partName); |
| 94 | if (!res.second) { |
| 95 | error(filename: input, line: lineNo, |
| 96 | message: "'" + separator + partName + "' occurs more than once" ); |
| 97 | continue; |
| 98 | } |
| 99 | if (!lastPart.empty()) |
| 100 | partToBegin[lastPart].end = line.data(); |
| 101 | Part &cur = res.first->second; |
| 102 | if (!i.is_at_eof()) |
| 103 | cur.begin = i->data(); |
| 104 | // If --leading-lines is specified, numEmptyLines is 0. Append newlines so |
| 105 | // that the extracted part preserves line numbers. |
| 106 | cur.leadingLines = leadingLines ? i.line_number() - 1 : 0; |
| 107 | |
| 108 | lastPart = partName; |
| 109 | } |
| 110 | if (lastPart.empty()) |
| 111 | fatal(filename: input, message: "no part separator was found" ); |
| 112 | if (errorCount) |
| 113 | return 1; |
| 114 | partToBegin[lastPart].end = inputBuf.getBufferEnd(); |
| 115 | |
| 116 | std::vector<std::unique_ptr<ToolOutputFile>> outputFiles; |
| 117 | SmallString<256> partPath; |
| 118 | for (auto &keyValue : partToBegin) { |
| 119 | partPath.clear(); |
| 120 | sys::path::append(path&: partPath, a: output, b: keyValue.first); |
| 121 | std::error_code ec = |
| 122 | sys::fs::create_directories(path: sys::path::parent_path(path: partPath)); |
| 123 | if (ec) |
| 124 | fatal(filename: input, message: ec.message()); |
| 125 | auto f = std::make_unique<ToolOutputFile>(args: partPath.str(), args&: ec, |
| 126 | args: llvm::sys::fs::OF_Text); |
| 127 | if (!f) |
| 128 | fatal(filename: input, message: ec.message()); |
| 129 | |
| 130 | Part &part = keyValue.second; |
| 131 | for (int64_t i = 0; i != part.leadingLines; ++i) |
| 132 | (*f).os() << EOL; |
| 133 | if (part.begin) |
| 134 | (*f).os().write(Ptr: part.begin, Size: part.end - part.begin); |
| 135 | outputFiles.push_back(x: std::move(f)); |
| 136 | } |
| 137 | |
| 138 | for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles) |
| 139 | outputFile->keep(); |
| 140 | return 0; |
| 141 | } |
| 142 | |
| 143 | int main(int argc, const char **argv) { |
| 144 | toolName = sys::path::stem(path: argv[0]); |
| 145 | cl::HideUnrelatedOptions(Categories: {&cat}); |
| 146 | cl::ParseCommandLineOptions( |
| 147 | argc, argv, |
| 148 | Overview: "Split input into multiple parts separated by regex '^(.|//)--- ' and " |
| 149 | "extract the part specified by '^(.|//)--- <part>'\n" , |
| 150 | Errs: nullptr, |
| 151 | /*EnvVar=*/nullptr, |
| 152 | /*LongOptionsUseDoubleDash=*/true); |
| 153 | |
| 154 | if (input.empty()) |
| 155 | fatal(filename: "" , message: "input filename is not specified" ); |
| 156 | if (output.empty()) |
| 157 | fatal(filename: "" , message: "output directory is not specified" ); |
| 158 | ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr = |
| 159 | MemoryBuffer::getFileOrSTDIN(Filename: input, /*IsText=*/true); |
| 160 | if (std::error_code ec = bufferOrErr.getError()) |
| 161 | fatal(filename: input, message: ec.message()); |
| 162 | |
| 163 | // Delete output if it is a file or an empty directory, so that we can create |
| 164 | // a directory. |
| 165 | sys::fs::file_status status; |
| 166 | if (std::error_code ec = sys::fs::status(path: output, result&: status)) |
| 167 | if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory)) |
| 168 | fatal(filename: output, message: ec.message()); |
| 169 | if (status.type() != sys::fs::file_type::file_not_found && |
| 170 | status.type() != sys::fs::file_type::directory_file && |
| 171 | status.type() != sys::fs::file_type::regular_file) |
| 172 | fatal(filename: output, message: "output cannot be a special file" ); |
| 173 | if (std::error_code ec = sys::fs::remove(path: output, /*IgnoreNonExisting=*/true)) |
| 174 | if (ec.value() != static_cast<int>(std::errc::directory_not_empty) && |
| 175 | ec.value() != static_cast<int>(std::errc::file_exists)) |
| 176 | fatal(filename: output, message: ec.message()); |
| 177 | return handle(inputBuf&: **bufferOrErr, input); |
| 178 | } |
| 179 | |