1 | //===----------------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef PATH_PARSER_H |
10 | #define PATH_PARSER_H |
11 | |
12 | #include <__config> |
13 | #include <__utility/unreachable.h> |
14 | #include <cstddef> |
15 | #include <filesystem> |
16 | #include <utility> |
17 | |
18 | #include "format_string.h" |
19 | |
20 | _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM |
21 | |
22 | inline bool isSeparator(path::value_type C) { |
23 | if (C == '/') |
24 | return true; |
25 | #if defined(_LIBCPP_WIN32API) |
26 | if (C == '\\') |
27 | return true; |
28 | #endif |
29 | return false; |
30 | } |
31 | |
32 | inline bool isDriveLetter(path::value_type C) { return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); } |
33 | |
34 | namespace parser { |
35 | |
36 | using string_view_t = path::__string_view; |
37 | using string_view_pair = pair<string_view_t, string_view_t>; |
38 | using PosPtr = path::value_type const*; |
39 | |
40 | struct PathParser { |
41 | enum ParserState : unsigned char { |
42 | // Zero is a special sentinel value used by default constructed iterators. |
43 | PS_BeforeBegin = path::iterator::_BeforeBegin, |
44 | PS_InRootName = path::iterator::_InRootName, |
45 | PS_InRootDir = path::iterator::_InRootDir, |
46 | PS_InFilenames = path::iterator::_InFilenames, |
47 | PS_InTrailingSep = path::iterator::_InTrailingSep, |
48 | PS_AtEnd = path::iterator::_AtEnd |
49 | }; |
50 | |
51 | const string_view_t Path; |
52 | string_view_t RawEntry; |
53 | ParserState State_; |
54 | |
55 | private: |
56 | PathParser(string_view_t P, ParserState State) noexcept : Path(P), State_(State) {} |
57 | |
58 | public: |
59 | PathParser(string_view_t P, string_view_t E, unsigned char S) |
60 | : Path(P), RawEntry(E), State_(static_cast<ParserState>(S)) { |
61 | // S cannot be '0' or PS_BeforeBegin. |
62 | } |
63 | |
64 | static PathParser CreateBegin(string_view_t P) noexcept { |
65 | PathParser PP(P, PS_BeforeBegin); |
66 | PP.increment(); |
67 | return PP; |
68 | } |
69 | |
70 | static PathParser CreateEnd(string_view_t P) noexcept { |
71 | PathParser PP(P, PS_AtEnd); |
72 | return PP; |
73 | } |
74 | |
75 | PosPtr peek() const noexcept { |
76 | auto TkEnd = getNextTokenStartPos(); |
77 | auto End = getAfterBack(); |
78 | return TkEnd == End ? nullptr : TkEnd; |
79 | } |
80 | |
81 | void increment() noexcept { |
82 | const PosPtr End = getAfterBack(); |
83 | const PosPtr Start = getNextTokenStartPos(); |
84 | if (Start == End) |
85 | return makeState(NewState: PS_AtEnd); |
86 | |
87 | switch (State_) { |
88 | case PS_BeforeBegin: { |
89 | PosPtr TkEnd = consumeRootName(P: Start, End); |
90 | if (TkEnd) |
91 | return makeState(NewState: PS_InRootName, Start, End: TkEnd); |
92 | } |
93 | _LIBCPP_FALLTHROUGH(); |
94 | case PS_InRootName: { |
95 | PosPtr TkEnd = consumeAllSeparators(P: Start, End); |
96 | if (TkEnd) |
97 | return makeState(NewState: PS_InRootDir, Start, End: TkEnd); |
98 | else |
99 | return makeState(NewState: PS_InFilenames, Start, End: consumeName(P: Start, End)); |
100 | } |
101 | case PS_InRootDir: |
102 | return makeState(NewState: PS_InFilenames, Start, End: consumeName(P: Start, End)); |
103 | |
104 | case PS_InFilenames: { |
105 | PosPtr SepEnd = consumeAllSeparators(P: Start, End); |
106 | if (SepEnd != End) { |
107 | PosPtr TkEnd = consumeName(P: SepEnd, End); |
108 | if (TkEnd) |
109 | return makeState(NewState: PS_InFilenames, Start: SepEnd, End: TkEnd); |
110 | } |
111 | return makeState(NewState: PS_InTrailingSep, Start, End: SepEnd); |
112 | } |
113 | |
114 | case PS_InTrailingSep: |
115 | return makeState(NewState: PS_AtEnd); |
116 | |
117 | case PS_AtEnd: |
118 | __libcpp_unreachable(); |
119 | } |
120 | } |
121 | |
122 | void decrement() noexcept { |
123 | const PosPtr REnd = getBeforeFront(); |
124 | const PosPtr RStart = getCurrentTokenStartPos() - 1; |
125 | if (RStart == REnd) // we're decrementing the begin |
126 | return makeState(NewState: PS_BeforeBegin); |
127 | |
128 | switch (State_) { |
129 | case PS_AtEnd: { |
130 | // Try to consume a trailing separator or root directory first. |
131 | if (PosPtr SepEnd = consumeAllSeparators(P: RStart, End: REnd)) { |
132 | if (SepEnd == REnd) |
133 | return makeState(NewState: PS_InRootDir, Start: Path.data(), End: RStart + 1); |
134 | PosPtr TkStart = consumeRootName(P: SepEnd, End: REnd); |
135 | if (TkStart == REnd) |
136 | return makeState(NewState: PS_InRootDir, Start: RStart, End: RStart + 1); |
137 | return makeState(NewState: PS_InTrailingSep, Start: SepEnd + 1, End: RStart + 1); |
138 | } else { |
139 | PosPtr TkStart = consumeRootName(P: RStart, End: REnd); |
140 | if (TkStart == REnd) |
141 | return makeState(NewState: PS_InRootName, Start: TkStart + 1, End: RStart + 1); |
142 | TkStart = consumeName(P: RStart, End: REnd); |
143 | return makeState(NewState: PS_InFilenames, Start: TkStart + 1, End: RStart + 1); |
144 | } |
145 | } |
146 | case PS_InTrailingSep: |
147 | return makeState(NewState: PS_InFilenames, Start: consumeName(P: RStart, End: REnd) + 1, End: RStart + 1); |
148 | case PS_InFilenames: { |
149 | PosPtr SepEnd = consumeAllSeparators(P: RStart, End: REnd); |
150 | if (SepEnd == REnd) |
151 | return makeState(NewState: PS_InRootDir, Start: Path.data(), End: RStart + 1); |
152 | PosPtr TkStart = consumeRootName(P: SepEnd ? SepEnd : RStart, End: REnd); |
153 | if (TkStart == REnd) { |
154 | if (SepEnd) |
155 | return makeState(NewState: PS_InRootDir, Start: SepEnd + 1, End: RStart + 1); |
156 | return makeState(NewState: PS_InRootName, Start: TkStart + 1, End: RStart + 1); |
157 | } |
158 | TkStart = consumeName(P: SepEnd, End: REnd); |
159 | return makeState(NewState: PS_InFilenames, Start: TkStart + 1, End: SepEnd + 1); |
160 | } |
161 | case PS_InRootDir: |
162 | return makeState(NewState: PS_InRootName, Start: Path.data(), End: RStart + 1); |
163 | case PS_InRootName: |
164 | case PS_BeforeBegin: |
165 | __libcpp_unreachable(); |
166 | } |
167 | } |
168 | |
169 | /// \brief Return a view with the "preferred representation" of the current |
170 | /// element. For example trailing separators are represented as a '.' |
171 | string_view_t operator*() const noexcept { |
172 | switch (State_) { |
173 | case PS_BeforeBegin: |
174 | case PS_AtEnd: |
175 | return PATHSTR("" ); |
176 | case PS_InRootDir: |
177 | if (RawEntry[0] == '\\') |
178 | return PATHSTR("\\" ); |
179 | else |
180 | return PATHSTR("/" ); |
181 | case PS_InTrailingSep: |
182 | return PATHSTR("" ); |
183 | case PS_InRootName: |
184 | case PS_InFilenames: |
185 | return RawEntry; |
186 | } |
187 | __libcpp_unreachable(); |
188 | } |
189 | |
190 | explicit operator bool() const noexcept { return State_ != PS_BeforeBegin && State_ != PS_AtEnd; } |
191 | |
192 | PathParser& operator++() noexcept { |
193 | increment(); |
194 | return *this; |
195 | } |
196 | |
197 | PathParser& operator--() noexcept { |
198 | decrement(); |
199 | return *this; |
200 | } |
201 | |
202 | bool atEnd() const noexcept { return State_ == PS_AtEnd; } |
203 | |
204 | bool inRootDir() const noexcept { return State_ == PS_InRootDir; } |
205 | |
206 | bool inRootName() const noexcept { return State_ == PS_InRootName; } |
207 | |
208 | bool inRootPath() const noexcept { return inRootName() || inRootDir(); } |
209 | |
210 | private: |
211 | void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept { |
212 | State_ = NewState; |
213 | RawEntry = string_view_t(Start, End - Start); |
214 | } |
215 | void makeState(ParserState NewState) noexcept { |
216 | State_ = NewState; |
217 | RawEntry = {}; |
218 | } |
219 | |
220 | PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); } |
221 | |
222 | PosPtr getBeforeFront() const noexcept { return Path.data() - 1; } |
223 | |
224 | /// \brief Return a pointer to the first character after the currently |
225 | /// lexed element. |
226 | PosPtr getNextTokenStartPos() const noexcept { |
227 | switch (State_) { |
228 | case PS_BeforeBegin: |
229 | return Path.data(); |
230 | case PS_InRootName: |
231 | case PS_InRootDir: |
232 | case PS_InFilenames: |
233 | return &RawEntry.back() + 1; |
234 | case PS_InTrailingSep: |
235 | case PS_AtEnd: |
236 | return getAfterBack(); |
237 | } |
238 | __libcpp_unreachable(); |
239 | } |
240 | |
241 | /// \brief Return a pointer to the first character in the currently lexed |
242 | /// element. |
243 | PosPtr getCurrentTokenStartPos() const noexcept { |
244 | switch (State_) { |
245 | case PS_BeforeBegin: |
246 | case PS_InRootName: |
247 | return &Path.front(); |
248 | case PS_InRootDir: |
249 | case PS_InFilenames: |
250 | case PS_InTrailingSep: |
251 | return &RawEntry.front(); |
252 | case PS_AtEnd: |
253 | return &Path.back() + 1; |
254 | } |
255 | __libcpp_unreachable(); |
256 | } |
257 | |
258 | // Consume all consecutive separators. |
259 | PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept { |
260 | if (P == nullptr || P == End || !isSeparator(C: *P)) |
261 | return nullptr; |
262 | const int Inc = P < End ? 1 : -1; |
263 | P += Inc; |
264 | while (P != End && isSeparator(C: *P)) |
265 | P += Inc; |
266 | return P; |
267 | } |
268 | |
269 | // Consume exactly N separators, or return nullptr. |
270 | PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept { |
271 | PosPtr Ret = consumeAllSeparators(P, End); |
272 | if (Ret == nullptr) |
273 | return nullptr; |
274 | if (P < End) { |
275 | if (Ret == P + N) |
276 | return Ret; |
277 | } else { |
278 | if (Ret == P - N) |
279 | return Ret; |
280 | } |
281 | return nullptr; |
282 | } |
283 | |
284 | PosPtr consumeName(PosPtr P, PosPtr End) const noexcept { |
285 | PosPtr Start = P; |
286 | if (P == nullptr || P == End || isSeparator(C: *P)) |
287 | return nullptr; |
288 | const int Inc = P < End ? 1 : -1; |
289 | P += Inc; |
290 | while (P != End && !isSeparator(C: *P)) |
291 | P += Inc; |
292 | if (P == End && Inc < 0) { |
293 | // Iterating backwards and consumed all the rest of the input. |
294 | // Check if the start of the string would have been considered |
295 | // a root name. |
296 | PosPtr RootEnd = consumeRootName(P: End + 1, End: Start); |
297 | if (RootEnd) |
298 | return RootEnd - 1; |
299 | } |
300 | return P; |
301 | } |
302 | |
303 | PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept { |
304 | if (P == End) |
305 | return nullptr; |
306 | if (P < End) { |
307 | if (P + 1 == End || !isDriveLetter(C: P[0]) || P[1] != ':') |
308 | return nullptr; |
309 | return P + 2; |
310 | } else { |
311 | if (P - 1 == End || !isDriveLetter(C: P[-1]) || P[0] != ':') |
312 | return nullptr; |
313 | return P - 2; |
314 | } |
315 | } |
316 | |
317 | PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept { |
318 | if (P == End) |
319 | return nullptr; |
320 | if (P < End) |
321 | return consumeName(P: consumeNSeparators(P, End, N: 2), End); |
322 | else |
323 | return consumeNSeparators(P: consumeName(P, End), End, N: 2); |
324 | } |
325 | |
326 | PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept { |
327 | #if defined(_LIBCPP_WIN32API) |
328 | if (PosPtr Ret = consumeDriveLetter(P, End)) |
329 | return Ret; |
330 | if (PosPtr Ret = consumeNetworkRoot(P, End)) |
331 | return Ret; |
332 | #endif |
333 | return nullptr; |
334 | } |
335 | }; |
336 | |
337 | inline string_view_pair separate_filename(string_view_t const& s) { |
338 | if (s == PATHSTR("." ) || s == PATHSTR(".." ) || s.empty()) |
339 | return string_view_pair{s, PATHSTR("" )}; |
340 | auto pos = s.find_last_of(c: '.'); |
341 | if (pos == string_view_t::npos || pos == 0) |
342 | return string_view_pair{s, string_view_t{}}; |
343 | return string_view_pair{s.substr(pos: 0, n: pos), s.substr(pos: pos)}; |
344 | } |
345 | |
346 | inline string_view_t createView(PosPtr S, PosPtr E) noexcept { return {S, static_cast<size_t>(E - S) + 1}; } |
347 | |
348 | } // namespace parser |
349 | |
350 | _LIBCPP_END_NAMESPACE_FILESYSTEM |
351 | |
352 | #endif // PATH_PARSER_H |
353 | |