| 1 | //===----------------------------------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include <__config> |
| 10 | #include <filesystem> |
| 11 | #include <vector> |
| 12 | |
| 13 | #include "error.h" |
| 14 | #include "path_parser.h" |
| 15 | |
| 16 | _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM |
| 17 | |
| 18 | using detail::ErrorHandler; |
| 19 | using parser::createView; |
| 20 | using parser::PathParser; |
| 21 | using parser::string_view_t; |
| 22 | |
| 23 | /////////////////////////////////////////////////////////////////////////////// |
| 24 | // path definitions |
| 25 | /////////////////////////////////////////////////////////////////////////////// |
| 26 | |
| 27 | _LIBCPP_DIAGNOSTIC_PUSH |
| 28 | _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wdeprecated" ) |
| 29 | constexpr path::value_type path::preferred_separator; |
| 30 | _LIBCPP_DIAGNOSTIC_POP |
| 31 | |
| 32 | path& path::replace_extension(path const& replacement) { |
| 33 | path p = extension(); |
| 34 | if (not p.empty()) { |
| 35 | __pn_.erase(pos: __pn_.size() - p.native().size()); |
| 36 | } |
| 37 | if (!replacement.empty()) { |
| 38 | if (replacement.native()[0] != '.') { |
| 39 | __pn_ += PATHSTR("." ); |
| 40 | } |
| 41 | __pn_.append(str: replacement.__pn_); |
| 42 | } |
| 43 | return *this; |
| 44 | } |
| 45 | |
| 46 | /////////////////////////////////////////////////////////////////////////////// |
| 47 | // path.decompose |
| 48 | |
| 49 | string_view_t path::__root_name() const { |
| 50 | auto PP = PathParser::CreateBegin(P: __pn_); |
| 51 | if (PP.State_ == PathParser::PS_InRootName) |
| 52 | return *PP; |
| 53 | return {}; |
| 54 | } |
| 55 | |
| 56 | string_view_t path::__root_directory() const { |
| 57 | auto PP = PathParser::CreateBegin(P: __pn_); |
| 58 | if (PP.State_ == PathParser::PS_InRootName) |
| 59 | ++PP; |
| 60 | if (PP.State_ == PathParser::PS_InRootDir) |
| 61 | return *PP; |
| 62 | return {}; |
| 63 | } |
| 64 | |
| 65 | string_view_t path::__root_path_raw() const { |
| 66 | auto PP = PathParser::CreateBegin(P: __pn_); |
| 67 | if (PP.State_ == PathParser::PS_InRootName) { |
| 68 | auto NextCh = PP.peek(); |
| 69 | if (NextCh && isSeparator(C: *NextCh)) { |
| 70 | ++PP; |
| 71 | return createView(S: __pn_.data(), E: &PP.RawEntry.back()); |
| 72 | } |
| 73 | return PP.RawEntry; |
| 74 | } |
| 75 | if (PP.State_ == PathParser::PS_InRootDir) |
| 76 | return *PP; |
| 77 | return {}; |
| 78 | } |
| 79 | |
| 80 | static bool ConsumeRootName(PathParser* PP) { |
| 81 | static_assert(PathParser::PS_BeforeBegin == 1 && PathParser::PS_InRootName == 2, "Values for enums are incorrect" ); |
| 82 | while (PP->State_ <= PathParser::PS_InRootName) |
| 83 | ++(*PP); |
| 84 | return PP->State_ == PathParser::PS_AtEnd; |
| 85 | } |
| 86 | |
| 87 | static bool ConsumeRootDir(PathParser* PP) { |
| 88 | static_assert(PathParser::PS_BeforeBegin == 1 && PathParser::PS_InRootName == 2 && PathParser::PS_InRootDir == 3, |
| 89 | "Values for enums are incorrect" ); |
| 90 | while (PP->State_ <= PathParser::PS_InRootDir) |
| 91 | ++(*PP); |
| 92 | return PP->State_ == PathParser::PS_AtEnd; |
| 93 | } |
| 94 | |
| 95 | string_view_t path::__relative_path() const { |
| 96 | auto PP = PathParser::CreateBegin(P: __pn_); |
| 97 | if (ConsumeRootDir(PP: &PP)) |
| 98 | return {}; |
| 99 | return createView(S: PP.RawEntry.data(), E: &__pn_.back()); |
| 100 | } |
| 101 | |
| 102 | string_view_t path::__parent_path() const { |
| 103 | if (empty()) |
| 104 | return {}; |
| 105 | // Determine if we have a root path but not a relative path. In that case |
| 106 | // return *this. |
| 107 | { |
| 108 | auto PP = PathParser::CreateBegin(P: __pn_); |
| 109 | if (ConsumeRootDir(PP: &PP)) |
| 110 | return __pn_; |
| 111 | } |
| 112 | // Otherwise remove a single element from the end of the path, and return |
| 113 | // a string representing that path |
| 114 | { |
| 115 | auto PP = PathParser::CreateEnd(P: __pn_); |
| 116 | --PP; |
| 117 | if (PP.RawEntry.data() == __pn_.data()) |
| 118 | return {}; |
| 119 | --PP; |
| 120 | return createView(S: __pn_.data(), E: &PP.RawEntry.back()); |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | string_view_t path::__filename() const { |
| 125 | if (empty()) |
| 126 | return {}; |
| 127 | { |
| 128 | PathParser PP = PathParser::CreateBegin(P: __pn_); |
| 129 | if (ConsumeRootDir(PP: &PP)) |
| 130 | return {}; |
| 131 | } |
| 132 | return *(--PathParser::CreateEnd(P: __pn_)); |
| 133 | } |
| 134 | |
| 135 | string_view_t path::__stem() const { return parser::separate_filename(s: __filename()).first; } |
| 136 | |
| 137 | string_view_t path::__extension() const { return parser::separate_filename(s: __filename()).second; } |
| 138 | |
| 139 | //////////////////////////////////////////////////////////////////////////// |
| 140 | // path.gen |
| 141 | |
| 142 | enum PathPartKind : unsigned char { PK_None, PK_RootSep, PK_Filename, PK_Dot, PK_DotDot, PK_TrailingSep }; |
| 143 | |
| 144 | static PathPartKind ClassifyPathPart(string_view_t Part) { |
| 145 | if (Part.empty()) |
| 146 | return PK_TrailingSep; |
| 147 | if (Part == PATHSTR("." )) |
| 148 | return PK_Dot; |
| 149 | if (Part == PATHSTR(".." )) |
| 150 | return PK_DotDot; |
| 151 | if (Part == PATHSTR("/" )) |
| 152 | return PK_RootSep; |
| 153 | #if defined(_LIBCPP_WIN32API) |
| 154 | if (Part == PATHSTR("\\" )) |
| 155 | return PK_RootSep; |
| 156 | #endif |
| 157 | return PK_Filename; |
| 158 | } |
| 159 | |
| 160 | path path::lexically_normal() const { |
| 161 | if (__pn_.empty()) |
| 162 | return *this; |
| 163 | |
| 164 | using PartKindPair = pair<string_view_t, PathPartKind>; |
| 165 | vector<PartKindPair> Parts; |
| 166 | // Guess as to how many elements the path has to avoid reallocating. |
| 167 | Parts.reserve(n: 32); |
| 168 | |
| 169 | // Track the total size of the parts as we collect them. This allows the |
| 170 | // resulting path to reserve the correct amount of memory. |
| 171 | size_t NewPathSize = 0; |
| 172 | auto AddPart = [&](PathPartKind K, string_view_t P) { |
| 173 | NewPathSize += P.size(); |
| 174 | Parts.emplace_back(args&: P, args&: K); |
| 175 | }; |
| 176 | auto LastPartKind = [&]() { |
| 177 | if (Parts.empty()) |
| 178 | return PK_None; |
| 179 | return Parts.back().second; |
| 180 | }; |
| 181 | |
| 182 | bool MaybeNeedTrailingSep = false; |
| 183 | // Build a stack containing the remaining elements of the path, popping off |
| 184 | // elements which occur before a '..' entry. |
| 185 | for (auto PP = PathParser::CreateBegin(P: __pn_); PP; ++PP) { |
| 186 | auto Part = *PP; |
| 187 | PathPartKind Kind = ClassifyPathPart(Part); |
| 188 | switch (Kind) { |
| 189 | case PK_Filename: |
| 190 | case PK_RootSep: { |
| 191 | // Add all non-dot and non-dot-dot elements to the stack of elements. |
| 192 | AddPart(Kind, Part); |
| 193 | MaybeNeedTrailingSep = false; |
| 194 | break; |
| 195 | } |
| 196 | case PK_DotDot: { |
| 197 | // Only push a ".." element if there are no elements preceding the "..", |
| 198 | // or if the preceding element is itself "..". |
| 199 | auto LastKind = LastPartKind(); |
| 200 | if (LastKind == PK_Filename) { |
| 201 | NewPathSize -= Parts.back().first.size(); |
| 202 | Parts.pop_back(); |
| 203 | } else if (LastKind != PK_RootSep) |
| 204 | AddPart(PK_DotDot, PATHSTR(".." )); |
| 205 | MaybeNeedTrailingSep = LastKind == PK_Filename; |
| 206 | break; |
| 207 | } |
| 208 | case PK_Dot: |
| 209 | case PK_TrailingSep: { |
| 210 | MaybeNeedTrailingSep = true; |
| 211 | break; |
| 212 | } |
| 213 | case PK_None: |
| 214 | __libcpp_unreachable(); |
| 215 | } |
| 216 | } |
| 217 | // [fs.path.generic]p6.8: If the path is empty, add a dot. |
| 218 | if (Parts.empty()) |
| 219 | return PATHSTR("." ); |
| 220 | |
| 221 | // [fs.path.generic]p6.7: If the last filename is dot-dot, remove any |
| 222 | // trailing directory-separator. |
| 223 | bool NeedTrailingSep = MaybeNeedTrailingSep && LastPartKind() == PK_Filename; |
| 224 | |
| 225 | path Result; |
| 226 | Result.__pn_.reserve(requested_capacity: Parts.size() + NewPathSize + NeedTrailingSep); |
| 227 | for (auto& PK : Parts) |
| 228 | Result /= PK.first; |
| 229 | |
| 230 | if (NeedTrailingSep) |
| 231 | Result /= PATHSTR("" ); |
| 232 | |
| 233 | Result.make_preferred(); |
| 234 | return Result; |
| 235 | } |
| 236 | |
| 237 | static int DetermineLexicalElementCount(PathParser PP) { |
| 238 | int Count = 0; |
| 239 | for (; PP; ++PP) { |
| 240 | auto Elem = *PP; |
| 241 | if (Elem == PATHSTR(".." )) |
| 242 | --Count; |
| 243 | else if (Elem != PATHSTR("." ) && Elem != PATHSTR("" )) |
| 244 | ++Count; |
| 245 | } |
| 246 | return Count; |
| 247 | } |
| 248 | |
| 249 | path path::lexically_relative(const path& base) const { |
| 250 | { // perform root-name/root-directory mismatch checks |
| 251 | auto PP = PathParser::CreateBegin(P: __pn_); |
| 252 | auto PPBase = PathParser::CreateBegin(P: base.__pn_); |
| 253 | auto CheckIterMismatchAtBase = [&]() { |
| 254 | return PP.State_ != PPBase.State_ && (PP.inRootPath() || PPBase.inRootPath()); |
| 255 | }; |
| 256 | if (PP.inRootName() && PPBase.inRootName()) { |
| 257 | if (*PP != *PPBase) |
| 258 | return {}; |
| 259 | } else if (CheckIterMismatchAtBase()) |
| 260 | return {}; |
| 261 | |
| 262 | if (PP.inRootPath()) |
| 263 | ++PP; |
| 264 | if (PPBase.inRootPath()) |
| 265 | ++PPBase; |
| 266 | if (CheckIterMismatchAtBase()) |
| 267 | return {}; |
| 268 | } |
| 269 | |
| 270 | // Find the first mismatching element |
| 271 | auto PP = PathParser::CreateBegin(P: __pn_); |
| 272 | auto PPBase = PathParser::CreateBegin(P: base.__pn_); |
| 273 | while (PP && PPBase && PP.State_ == PPBase.State_ && (*PP == *PPBase || PP.inRootDir())) { |
| 274 | ++PP; |
| 275 | ++PPBase; |
| 276 | } |
| 277 | |
| 278 | // If there is no mismatch, return ".". |
| 279 | if (!PP && !PPBase) |
| 280 | return "." ; |
| 281 | |
| 282 | // Otherwise, determine the number of elements, 'n', which are not dot or |
| 283 | // dot-dot minus the number of dot-dot elements. |
| 284 | int ElemCount = DetermineLexicalElementCount(PP: PPBase); |
| 285 | if (ElemCount < 0) |
| 286 | return {}; |
| 287 | |
| 288 | // if n == 0 and (a == end() || a->empty()), returns path("."); otherwise |
| 289 | if (ElemCount == 0 && (PP.atEnd() || *PP == PATHSTR("" ))) |
| 290 | return PATHSTR("." ); |
| 291 | |
| 292 | // return a path constructed with 'n' dot-dot elements, followed by the |
| 293 | // elements of '*this' after the mismatch. |
| 294 | path Result; |
| 295 | // FIXME: Reserve enough room in Result that it won't have to re-allocate. |
| 296 | while (ElemCount--) |
| 297 | Result /= PATHSTR(".." ); |
| 298 | for (; PP; ++PP) |
| 299 | Result /= *PP; |
| 300 | return Result; |
| 301 | } |
| 302 | |
| 303 | //////////////////////////////////////////////////////////////////////////// |
| 304 | // path.comparisons |
| 305 | static int CompareRootName(PathParser* LHS, PathParser* RHS) { |
| 306 | if (!LHS->inRootName() && !RHS->inRootName()) |
| 307 | return 0; |
| 308 | |
| 309 | auto GetRootName = [](PathParser* Parser) -> string_view_t { return Parser->inRootName() ? **Parser : PATHSTR("" ); }; |
| 310 | int res = GetRootName(LHS).compare(sv: GetRootName(RHS)); |
| 311 | ConsumeRootName(PP: LHS); |
| 312 | ConsumeRootName(PP: RHS); |
| 313 | return res; |
| 314 | } |
| 315 | |
| 316 | static int CompareRootDir(PathParser* LHS, PathParser* RHS) { |
| 317 | if (!LHS->inRootDir() && RHS->inRootDir()) |
| 318 | return -1; |
| 319 | else if (LHS->inRootDir() && !RHS->inRootDir()) |
| 320 | return 1; |
| 321 | else { |
| 322 | ConsumeRootDir(PP: LHS); |
| 323 | ConsumeRootDir(PP: RHS); |
| 324 | return 0; |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | static int CompareRelative(PathParser* LHSPtr, PathParser* RHSPtr) { |
| 329 | auto& LHS = *LHSPtr; |
| 330 | auto& RHS = *RHSPtr; |
| 331 | |
| 332 | int res; |
| 333 | while (LHS && RHS) { |
| 334 | if ((res = (*LHS).compare(sv: *RHS)) != 0) |
| 335 | return res; |
| 336 | ++LHS; |
| 337 | ++RHS; |
| 338 | } |
| 339 | return 0; |
| 340 | } |
| 341 | |
| 342 | static int CompareEndState(PathParser* LHS, PathParser* RHS) { |
| 343 | if (LHS->atEnd() && !RHS->atEnd()) |
| 344 | return -1; |
| 345 | else if (!LHS->atEnd() && RHS->atEnd()) |
| 346 | return 1; |
| 347 | return 0; |
| 348 | } |
| 349 | |
| 350 | int path::__compare(string_view_t __s) const { |
| 351 | auto LHS = PathParser::CreateBegin(P: __pn_); |
| 352 | auto RHS = PathParser::CreateBegin(P: __s); |
| 353 | int res; |
| 354 | |
| 355 | if ((res = CompareRootName(LHS: &LHS, RHS: &RHS)) != 0) |
| 356 | return res; |
| 357 | |
| 358 | if ((res = CompareRootDir(LHS: &LHS, RHS: &RHS)) != 0) |
| 359 | return res; |
| 360 | |
| 361 | if ((res = CompareRelative(LHSPtr: &LHS, RHSPtr: &RHS)) != 0) |
| 362 | return res; |
| 363 | |
| 364 | return CompareEndState(LHS: &LHS, RHS: &RHS); |
| 365 | } |
| 366 | |
| 367 | //////////////////////////////////////////////////////////////////////////// |
| 368 | // path.nonmembers |
| 369 | size_t hash_value(const path& __p) noexcept { |
| 370 | auto PP = PathParser::CreateBegin(P: __p.native()); |
| 371 | size_t hash_value = 0; |
| 372 | hash<string_view_t> hasher; |
| 373 | while (PP) { |
| 374 | string_view_t Part = PP.inRootDir() ? PATHSTR("/" ) : *PP; |
| 375 | hash_value = __hash_combine(lhs: hash_value, rhs: hasher(Part)); |
| 376 | ++PP; |
| 377 | } |
| 378 | return hash_value; |
| 379 | } |
| 380 | |
| 381 | //////////////////////////////////////////////////////////////////////////// |
| 382 | // path.itr |
| 383 | path::iterator path::begin() const { |
| 384 | auto PP = PathParser::CreateBegin(P: __pn_); |
| 385 | iterator it; |
| 386 | it.__path_ptr_ = this; |
| 387 | it.__state_ = static_cast<path::iterator::_ParserState>(PP.State_); |
| 388 | it.__entry_ = PP.RawEntry; |
| 389 | it.__stashed_elem_.__assign_view(s: *PP); |
| 390 | return it; |
| 391 | } |
| 392 | |
| 393 | path::iterator path::end() const { |
| 394 | iterator it{}; |
| 395 | it.__state_ = path::iterator::_AtEnd; |
| 396 | it.__path_ptr_ = this; |
| 397 | return it; |
| 398 | } |
| 399 | |
| 400 | path::iterator& path::iterator::__increment() { |
| 401 | PathParser PP(__path_ptr_->native(), __entry_, __state_); |
| 402 | ++PP; |
| 403 | __state_ = static_cast<_ParserState>(PP.State_); |
| 404 | __entry_ = PP.RawEntry; |
| 405 | __stashed_elem_.__assign_view(s: *PP); |
| 406 | return *this; |
| 407 | } |
| 408 | |
| 409 | path::iterator& path::iterator::__decrement() { |
| 410 | PathParser PP(__path_ptr_->native(), __entry_, __state_); |
| 411 | --PP; |
| 412 | __state_ = static_cast<_ParserState>(PP.State_); |
| 413 | __entry_ = PP.RawEntry; |
| 414 | __stashed_elem_.__assign_view(s: *PP); |
| 415 | return *this; |
| 416 | } |
| 417 | |
| 418 | #if defined(_LIBCPP_WIN32API) |
| 419 | //////////////////////////////////////////////////////////////////////////// |
| 420 | // Windows path conversions |
| 421 | size_t __wide_to_char(const wstring& str, char* out, size_t outlen) { |
| 422 | if (str.empty()) |
| 423 | return 0; |
| 424 | ErrorHandler<size_t> err("__wide_to_char" , nullptr); |
| 425 | UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP; |
| 426 | BOOL used_default = FALSE; |
| 427 | int ret = WideCharToMultiByte(codepage, 0, str.data(), str.size(), out, outlen, nullptr, &used_default); |
| 428 | if (ret <= 0 || used_default) |
| 429 | return err.report(errc::illegal_byte_sequence); |
| 430 | return ret; |
| 431 | } |
| 432 | |
| 433 | size_t __char_to_wide(const string& str, wchar_t* out, size_t outlen) { |
| 434 | if (str.empty()) |
| 435 | return 0; |
| 436 | ErrorHandler<size_t> err("__char_to_wide" , nullptr); |
| 437 | UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP; |
| 438 | int ret = MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, str.data(), str.size(), out, outlen); |
| 439 | if (ret <= 0) |
| 440 | return err.report(errc::illegal_byte_sequence); |
| 441 | return ret; |
| 442 | } |
| 443 | #endif |
| 444 | |
| 445 | _LIBCPP_END_NAMESPACE_FILESYSTEM |
| 446 | |