1 | //===- split-file.cpp - Input splitting utility ---------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Split input into multipe parts separated by regex '^(.|//)--- ' and extract |
10 | // the specified part. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/ADT/DenseMap.h" |
15 | #include "llvm/ADT/StringExtras.h" |
16 | #include "llvm/ADT/StringRef.h" |
17 | #include "llvm/Support/CommandLine.h" |
18 | #include "llvm/Support/FileOutputBuffer.h" |
19 | #include "llvm/Support/FileSystem.h" |
20 | #include "llvm/Support/LineIterator.h" |
21 | #include "llvm/Support/MemoryBuffer.h" |
22 | #include "llvm/Support/Path.h" |
23 | #include "llvm/Support/ToolOutputFile.h" |
24 | #include "llvm/Support/WithColor.h" |
25 | #include <string> |
26 | #include <system_error> |
27 | |
28 | using namespace llvm; |
29 | |
30 | static cl::OptionCategory cat("split-file Options" ); |
31 | |
32 | static cl::opt<std::string> input(cl::Positional, cl::desc("filename" ), |
33 | cl::cat(cat)); |
34 | |
35 | static cl::opt<std::string> output(cl::Positional, cl::desc("directory" ), |
36 | cl::value_desc("directory" ), cl::cat(cat)); |
37 | |
38 | static cl::opt<bool> leadingLines("leading-lines" , |
39 | cl::desc("Preserve line numbers" ), |
40 | cl::cat(cat)); |
41 | |
42 | static cl::opt<bool> noLeadingLines("no-leading-lines" , |
43 | cl::desc("Don't preserve line numbers (default)" ), |
44 | cl::cat(cat)); |
45 | |
46 | static StringRef toolName; |
47 | static int errorCount; |
48 | |
49 | [[noreturn]] static void fatal(StringRef filename, const Twine &message) { |
50 | if (filename.empty()) |
51 | WithColor::error(OS&: errs(), Prefix: toolName) << message << '\n'; |
52 | else |
53 | WithColor::error(OS&: errs(), Prefix: toolName) << filename << ": " << message << '\n'; |
54 | exit(status: 1); |
55 | } |
56 | |
57 | static void error(StringRef filename, int64_t line, const Twine &message) { |
58 | ++errorCount; |
59 | errs() << filename << ':' << line << ": " ; |
60 | WithColor::error(OS&: errs()) << message << '\n'; |
61 | } |
62 | |
63 | namespace { |
64 | struct Part { |
65 | const char *begin = nullptr; |
66 | const char *end = nullptr; |
67 | int64_t leadingLines = 0; |
68 | }; |
69 | } // namespace |
70 | |
71 | static int handle(MemoryBuffer &inputBuf, StringRef input) { |
72 | DenseMap<StringRef, Part> partToBegin; |
73 | StringRef lastPart, separator; |
74 | StringRef EOL = inputBuf.getBuffer().detectEOL(); |
75 | for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) { |
76 | const int64_t lineNo = i.line_number(); |
77 | const StringRef line = *i++; |
78 | const size_t markerLen = line.starts_with(Prefix: "//" ) ? 6 : 5; |
79 | if (!(line.size() >= markerLen && |
80 | line.substr(Start: markerLen - 4).starts_with(Prefix: "--- " ))) |
81 | continue; |
82 | separator = line.substr(Start: 0, N: markerLen); |
83 | const StringRef partName = line.substr(Start: markerLen); |
84 | if (partName.empty()) { |
85 | error(filename: input, line: lineNo, message: "empty part name" ); |
86 | continue; |
87 | } |
88 | if (isSpace(C: partName.front()) || isSpace(C: partName.back())) { |
89 | error(filename: input, line: lineNo, message: "part name cannot have leading or trailing space" ); |
90 | continue; |
91 | } |
92 | |
93 | auto res = partToBegin.try_emplace(Key: partName); |
94 | if (!res.second) { |
95 | error(filename: input, line: lineNo, |
96 | message: "'" + separator + partName + "' occurs more than once" ); |
97 | continue; |
98 | } |
99 | if (!lastPart.empty()) |
100 | partToBegin[lastPart].end = line.data(); |
101 | Part &cur = res.first->second; |
102 | if (!i.is_at_eof()) |
103 | cur.begin = i->data(); |
104 | // If --leading-lines is specified, numEmptyLines is 0. Append newlines so |
105 | // that the extracted part preserves line numbers. |
106 | cur.leadingLines = leadingLines ? i.line_number() - 1 : 0; |
107 | |
108 | lastPart = partName; |
109 | } |
110 | if (lastPart.empty()) |
111 | fatal(filename: input, message: "no part separator was found" ); |
112 | if (errorCount) |
113 | return 1; |
114 | partToBegin[lastPart].end = inputBuf.getBufferEnd(); |
115 | |
116 | std::vector<std::unique_ptr<ToolOutputFile>> outputFiles; |
117 | SmallString<256> partPath; |
118 | for (auto &keyValue : partToBegin) { |
119 | partPath.clear(); |
120 | sys::path::append(path&: partPath, a: output, b: keyValue.first); |
121 | std::error_code ec = |
122 | sys::fs::create_directories(path: sys::path::parent_path(path: partPath)); |
123 | if (ec) |
124 | fatal(filename: input, message: ec.message()); |
125 | auto f = std::make_unique<ToolOutputFile>(args: partPath.str(), args&: ec, |
126 | args: llvm::sys::fs::OF_None); |
127 | if (!f) |
128 | fatal(filename: input, message: ec.message()); |
129 | |
130 | Part &part = keyValue.second; |
131 | for (int64_t i = 0; i != part.leadingLines; ++i) |
132 | (*f).os() << EOL; |
133 | if (part.begin) |
134 | (*f).os().write(Ptr: part.begin, Size: part.end - part.begin); |
135 | outputFiles.push_back(x: std::move(f)); |
136 | } |
137 | |
138 | for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles) |
139 | outputFile->keep(); |
140 | return 0; |
141 | } |
142 | |
143 | int main(int argc, const char **argv) { |
144 | toolName = sys::path::stem(path: argv[0]); |
145 | cl::HideUnrelatedOptions(Categories: {&cat}); |
146 | cl::ParseCommandLineOptions( |
147 | argc, argv, |
148 | Overview: "Split input into multiple parts separated by regex '^(.|//)--- ' and " |
149 | "extract the part specified by '^(.|//)--- <part>'\n" , |
150 | Errs: nullptr, |
151 | /*EnvVar=*/nullptr, |
152 | /*LongOptionsUseDoubleDash=*/true); |
153 | |
154 | if (input.empty()) |
155 | fatal(filename: "" , message: "input filename is not specified" ); |
156 | if (output.empty()) |
157 | fatal(filename: "" , message: "output directory is not specified" ); |
158 | ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr = |
159 | MemoryBuffer::getFileOrSTDIN(Filename: input); |
160 | if (std::error_code ec = bufferOrErr.getError()) |
161 | fatal(filename: input, message: ec.message()); |
162 | |
163 | // Delete output if it is a file or an empty directory, so that we can create |
164 | // a directory. |
165 | sys::fs::file_status status; |
166 | if (std::error_code ec = sys::fs::status(path: output, result&: status)) |
167 | if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory)) |
168 | fatal(filename: output, message: ec.message()); |
169 | if (status.type() != sys::fs::file_type::file_not_found && |
170 | status.type() != sys::fs::file_type::directory_file && |
171 | status.type() != sys::fs::file_type::regular_file) |
172 | fatal(filename: output, message: "output cannot be a special file" ); |
173 | if (std::error_code ec = sys::fs::remove(path: output, /*IgnoreNonExisting=*/true)) |
174 | if (ec.value() != static_cast<int>(std::errc::directory_not_empty) && |
175 | ec.value() != static_cast<int>(std::errc::file_exists)) |
176 | fatal(filename: output, message: ec.message()); |
177 | return handle(inputBuf&: **bufferOrErr, input); |
178 | } |
179 | |