1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef PATH_PARSER_H
10#define PATH_PARSER_H
11
12#include <__config>
13#include <__utility/unreachable.h>
14#include <cstddef>
15#include <filesystem>
16#include <utility>
17
18#include "format_string.h"
19
20_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
21
22inline bool isSeparator(path::value_type C) {
23 if (C == '/')
24 return true;
25#if defined(_LIBCPP_WIN32API)
26 if (C == '\\')
27 return true;
28#endif
29 return false;
30}
31
32inline bool isDriveLetter(path::value_type C) { return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); }
33
34namespace parser {
35
36using string_view_t = path::__string_view;
37using string_view_pair = pair<string_view_t, string_view_t>;
38using PosPtr = path::value_type const*;
39
40struct PathParser {
41 enum ParserState : unsigned char {
42 // Zero is a special sentinel value used by default constructed iterators.
43 PS_BeforeBegin = path::iterator::_BeforeBegin,
44 PS_InRootName = path::iterator::_InRootName,
45 PS_InRootDir = path::iterator::_InRootDir,
46 PS_InFilenames = path::iterator::_InFilenames,
47 PS_InTrailingSep = path::iterator::_InTrailingSep,
48 PS_AtEnd = path::iterator::_AtEnd
49 };
50
51 const string_view_t Path;
52 string_view_t RawEntry;
53 ParserState State_;
54
55private:
56 PathParser(string_view_t P, ParserState State) noexcept : Path(P), State_(State) {}
57
58public:
59 PathParser(string_view_t P, string_view_t E, unsigned char S)
60 : Path(P), RawEntry(E), State_(static_cast<ParserState>(S)) {
61 // S cannot be '0' or PS_BeforeBegin.
62 }
63
64 static PathParser CreateBegin(string_view_t P) noexcept {
65 PathParser PP(P, PS_BeforeBegin);
66 PP.increment();
67 return PP;
68 }
69
70 static PathParser CreateEnd(string_view_t P) noexcept {
71 PathParser PP(P, PS_AtEnd);
72 return PP;
73 }
74
75 PosPtr peek() const noexcept {
76 auto TkEnd = getNextTokenStartPos();
77 auto End = getAfterBack();
78 return TkEnd == End ? nullptr : TkEnd;
79 }
80
81 void increment() noexcept {
82 const PosPtr End = getAfterBack();
83 const PosPtr Start = getNextTokenStartPos();
84 if (Start == End)
85 return makeState(NewState: PS_AtEnd);
86
87 switch (State_) {
88 case PS_BeforeBegin: {
89 PosPtr TkEnd = consumeRootName(P: Start, End);
90 if (TkEnd)
91 return makeState(NewState: PS_InRootName, Start, End: TkEnd);
92 }
93 _LIBCPP_FALLTHROUGH();
94 case PS_InRootName: {
95 PosPtr TkEnd = consumeAllSeparators(P: Start, End);
96 if (TkEnd)
97 return makeState(NewState: PS_InRootDir, Start, End: TkEnd);
98 else
99 return makeState(NewState: PS_InFilenames, Start, End: consumeName(P: Start, End));
100 }
101 case PS_InRootDir:
102 return makeState(NewState: PS_InFilenames, Start, End: consumeName(P: Start, End));
103
104 case PS_InFilenames: {
105 PosPtr SepEnd = consumeAllSeparators(P: Start, End);
106 if (SepEnd != End) {
107 PosPtr TkEnd = consumeName(P: SepEnd, End);
108 if (TkEnd)
109 return makeState(NewState: PS_InFilenames, Start: SepEnd, End: TkEnd);
110 }
111 return makeState(NewState: PS_InTrailingSep, Start, End: SepEnd);
112 }
113
114 case PS_InTrailingSep:
115 return makeState(NewState: PS_AtEnd);
116
117 case PS_AtEnd:
118 __libcpp_unreachable();
119 }
120 }
121
122 void decrement() noexcept {
123 const PosPtr REnd = getBeforeFront();
124 const PosPtr RStart = getCurrentTokenStartPos() - 1;
125 if (RStart == REnd) // we're decrementing the begin
126 return makeState(NewState: PS_BeforeBegin);
127
128 switch (State_) {
129 case PS_AtEnd: {
130 // Try to consume a trailing separator or root directory first.
131 if (PosPtr SepEnd = consumeAllSeparators(P: RStart, End: REnd)) {
132 if (SepEnd == REnd)
133 return makeState(NewState: PS_InRootDir, Start: Path.data(), End: RStart + 1);
134 PosPtr TkStart = consumeRootName(P: SepEnd, End: REnd);
135 if (TkStart == REnd)
136 return makeState(NewState: PS_InRootDir, Start: RStart, End: RStart + 1);
137 return makeState(NewState: PS_InTrailingSep, Start: SepEnd + 1, End: RStart + 1);
138 } else {
139 PosPtr TkStart = consumeRootName(P: RStart, End: REnd);
140 if (TkStart == REnd)
141 return makeState(NewState: PS_InRootName, Start: TkStart + 1, End: RStart + 1);
142 TkStart = consumeName(P: RStart, End: REnd);
143 return makeState(NewState: PS_InFilenames, Start: TkStart + 1, End: RStart + 1);
144 }
145 }
146 case PS_InTrailingSep:
147 return makeState(NewState: PS_InFilenames, Start: consumeName(P: RStart, End: REnd) + 1, End: RStart + 1);
148 case PS_InFilenames: {
149 PosPtr SepEnd = consumeAllSeparators(P: RStart, End: REnd);
150 if (SepEnd == REnd)
151 return makeState(NewState: PS_InRootDir, Start: Path.data(), End: RStart + 1);
152 PosPtr TkStart = consumeRootName(P: SepEnd ? SepEnd : RStart, End: REnd);
153 if (TkStart == REnd) {
154 if (SepEnd)
155 return makeState(NewState: PS_InRootDir, Start: SepEnd + 1, End: RStart + 1);
156 return makeState(NewState: PS_InRootName, Start: TkStart + 1, End: RStart + 1);
157 }
158 TkStart = consumeName(P: SepEnd, End: REnd);
159 return makeState(NewState: PS_InFilenames, Start: TkStart + 1, End: SepEnd + 1);
160 }
161 case PS_InRootDir:
162 return makeState(NewState: PS_InRootName, Start: Path.data(), End: RStart + 1);
163 case PS_InRootName:
164 case PS_BeforeBegin:
165 __libcpp_unreachable();
166 }
167 }
168
169 /// \brief Return a view with the "preferred representation" of the current
170 /// element. For example trailing separators are represented as a '.'
171 string_view_t operator*() const noexcept {
172 switch (State_) {
173 case PS_BeforeBegin:
174 case PS_AtEnd:
175 return PATHSTR("");
176 case PS_InRootDir:
177 if (RawEntry[0] == '\\')
178 return PATHSTR("\\");
179 else
180 return PATHSTR("/");
181 case PS_InTrailingSep:
182 return PATHSTR("");
183 case PS_InRootName:
184 case PS_InFilenames:
185 return RawEntry;
186 }
187 __libcpp_unreachable();
188 }
189
190 explicit operator bool() const noexcept { return State_ != PS_BeforeBegin && State_ != PS_AtEnd; }
191
192 PathParser& operator++() noexcept {
193 increment();
194 return *this;
195 }
196
197 PathParser& operator--() noexcept {
198 decrement();
199 return *this;
200 }
201
202 bool atEnd() const noexcept { return State_ == PS_AtEnd; }
203
204 bool inRootDir() const noexcept { return State_ == PS_InRootDir; }
205
206 bool inRootName() const noexcept { return State_ == PS_InRootName; }
207
208 bool inRootPath() const noexcept { return inRootName() || inRootDir(); }
209
210private:
211 void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
212 State_ = NewState;
213 RawEntry = string_view_t(Start, End - Start);
214 }
215 void makeState(ParserState NewState) noexcept {
216 State_ = NewState;
217 RawEntry = {};
218 }
219
220 PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }
221
222 PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }
223
224 /// \brief Return a pointer to the first character after the currently
225 /// lexed element.
226 PosPtr getNextTokenStartPos() const noexcept {
227 switch (State_) {
228 case PS_BeforeBegin:
229 return Path.data();
230 case PS_InRootName:
231 case PS_InRootDir:
232 case PS_InFilenames:
233 return &RawEntry.back() + 1;
234 case PS_InTrailingSep:
235 case PS_AtEnd:
236 return getAfterBack();
237 }
238 __libcpp_unreachable();
239 }
240
241 /// \brief Return a pointer to the first character in the currently lexed
242 /// element.
243 PosPtr getCurrentTokenStartPos() const noexcept {
244 switch (State_) {
245 case PS_BeforeBegin:
246 case PS_InRootName:
247 return &Path.front();
248 case PS_InRootDir:
249 case PS_InFilenames:
250 case PS_InTrailingSep:
251 return &RawEntry.front();
252 case PS_AtEnd:
253 return &Path.back() + 1;
254 }
255 __libcpp_unreachable();
256 }
257
258 // Consume all consecutive separators.
259 PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {
260 if (P == nullptr || P == End || !isSeparator(C: *P))
261 return nullptr;
262 const int Inc = P < End ? 1 : -1;
263 P += Inc;
264 while (P != End && isSeparator(C: *P))
265 P += Inc;
266 return P;
267 }
268
269 // Consume exactly N separators, or return nullptr.
270 PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {
271 PosPtr Ret = consumeAllSeparators(P, End);
272 if (Ret == nullptr)
273 return nullptr;
274 if (P < End) {
275 if (Ret == P + N)
276 return Ret;
277 } else {
278 if (Ret == P - N)
279 return Ret;
280 }
281 return nullptr;
282 }
283
284 PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {
285 PosPtr Start = P;
286 if (P == nullptr || P == End || isSeparator(C: *P))
287 return nullptr;
288 const int Inc = P < End ? 1 : -1;
289 P += Inc;
290 while (P != End && !isSeparator(C: *P))
291 P += Inc;
292 if (P == End && Inc < 0) {
293 // Iterating backwards and consumed all the rest of the input.
294 // Check if the start of the string would have been considered
295 // a root name.
296 PosPtr RootEnd = consumeRootName(P: End + 1, End: Start);
297 if (RootEnd)
298 return RootEnd - 1;
299 }
300 return P;
301 }
302
303 PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {
304 if (P == End)
305 return nullptr;
306 if (P < End) {
307 if (P + 1 == End || !isDriveLetter(C: P[0]) || P[1] != ':')
308 return nullptr;
309 return P + 2;
310 } else {
311 if (P - 1 == End || !isDriveLetter(C: P[-1]) || P[0] != ':')
312 return nullptr;
313 return P - 2;
314 }
315 }
316
317 PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {
318 if (P == End)
319 return nullptr;
320 if (P < End)
321 return consumeName(P: consumeNSeparators(P, End, N: 2), End);
322 else
323 return consumeNSeparators(P: consumeName(P, End), End, N: 2);
324 }
325
326 PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {
327#if defined(_LIBCPP_WIN32API)
328 if (PosPtr Ret = consumeDriveLetter(P, End))
329 return Ret;
330 if (PosPtr Ret = consumeNetworkRoot(P, End))
331 return Ret;
332#endif
333 return nullptr;
334 }
335};
336
337inline string_view_pair separate_filename(string_view_t const& s) {
338 if (s == PATHSTR(".") || s == PATHSTR("..") || s.empty())
339 return string_view_pair{s, PATHSTR("")};
340 auto pos = s.find_last_of(c: '.');
341 if (pos == string_view_t::npos || pos == 0)
342 return string_view_pair{s, string_view_t{}};
343 return string_view_pair{s.substr(pos: 0, n: pos), s.substr(pos: pos)};
344}
345
346inline string_view_t createView(PosPtr S, PosPtr E) noexcept { return {S, static_cast<size_t>(E - S) + 1}; }
347
348} // namespace parser
349
350_LIBCPP_END_NAMESPACE_FILESYSTEM
351
352#endif // PATH_PARSER_H
353