1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html
10
11// TODO TZDB look at optimizations
12//
13// The current algorithm is correct but not efficient. For example, in a named
14// rule based continuation finding the next rule does quite a bit of work,
15// returns the next rule and "forgets" its state. This could be better.
16//
17// It would be possible to cache lookups. If a time for a zone is calculated its
18// sys_info could be kept and the next lookup could test whether the time is in
19// a "known" sys_info. The wording in the Standard hints at this slowness by
20// "suggesting" this could be implemented on the user's side.
21
22// TODO TZDB look at removing quirks
23//
24// The code has some special rules to adjust the timing at the continuation
25// switches. This works correctly, but some of the places feel odd. It would be
26// good to investigate this further and see whether all quirks are needed or
27// that there are better fixes.
28//
29// These quirks often use a 12h interval; this is the scan interval of zdump,
30// which implies there are no sys_info objects with a duration of less than 12h.
31
32// Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120502
33
34#include <__config>
35
36// TODO(LLVM 23): When upgrading to GCC 16 this can be removed
37#ifdef _LIBCPP_COMPILER_GCC
38# pragma GCC optimize("-O0")
39#endif
40
41#include <algorithm>
42#include <cctype>
43#include <chrono>
44#include <expected>
45#include <map>
46#include <numeric>
47#include <ranges>
48
49#include "include/tzdb/time_zone_private.h"
50#include "include/tzdb/tzdb_list_private.h"
51
52// TODO TZDB remove debug printing
53#ifdef PRINT
54# include <print>
55#endif
56
57_LIBCPP_BEGIN_NAMESPACE_STD
58
59#ifdef PRINT
60template <>
61struct formatter<chrono::sys_info, char> {
62 template <class ParseContext>
63 constexpr typename ParseContext::iterator parse(ParseContext& ctx) {
64 return ctx.begin();
65 }
66
67 template <class FormatContext>
68 typename FormatContext::iterator format(const chrono::sys_info& info, FormatContext& ctx) const {
69 return std::format_to(
70 ctx.out(), "[{}, {}) {:%Q%q} {:%Q%q} {}", info.begin, info.end, info.offset, info.save, info.abbrev);
71 }
72};
73#endif
74
75namespace chrono {
76
77//===----------------------------------------------------------------------===//
78// Details
79//===----------------------------------------------------------------------===//
80
81struct __sys_info {
82 sys_info __info;
83 bool __can_merge; // Can the returned sys_info object be merged with
84};
85
86// Return type for helper function to get a sys_info.
87// - The expected result returns the "best" sys_info object. This object can be
88// before the requested time. Sometimes sys_info objects from different
89// continuations share their offset, save, and abbrev and these objects are
90// merged to one sys_info object. The __can_merge flag determines whether the
91// current result can be merged with the next result.
92// - The unexpected result means no sys_info object was found and the time is
93// the time to be used for the next search iteration.
94using __sys_info_result = expected<__sys_info, sys_seconds>;
95
96template <ranges::forward_range _Range,
97 class _Type,
98 class _Proj = identity,
99 indirect_strict_weak_order<const _Type*, projected<ranges::iterator_t<_Range>, _Proj>> _Comp = ranges::less>
100[[nodiscard]] static ranges::borrowed_iterator_t<_Range>
101__binary_find(_Range&& __r, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) {
102 auto __end = ranges::end(__r);
103 auto __ret = ranges::lower_bound(ranges::begin(__r), __end, __value, __comp, __proj);
104 if (__ret == __end)
105 return __end;
106
107 // When the value does not match the predicate it's equal and a valid result
108 // was found.
109 return !std::invoke(__comp, __value, std::invoke(__proj, *__ret)) ? __ret : __end;
110}
111
112// Format based on https://data.iana.org/time-zones/tz-how-to.html
113//
114// 1 a time zone abbreviation that is a string of three or more characters that
115// are either ASCII alphanumerics, "+", or "-"
116// 2 the string "%z", in which case the "%z" will be replaced by a numeric time
117// zone abbreviation
118// 3 a pair of time zone abbreviations separated by a slash ('/'), in which
119// case the first string is the abbreviation for the standard time name and
120// the second string is the abbreviation for the daylight saving time name
121// 4 a string containing "%s", in which case the "%s" will be replaced by the
122// text in the appropriate Rule's LETTER column, and the resulting string
123// should be a time zone abbreviation
124//
125// Rule 1 is not strictly validated since America/Barbados uses a two letter
126// abbreviation AT.
127[[nodiscard]] static string
128__format(const __tz::__continuation& __continuation, const string& __letters, seconds __save) {
129 bool __shift = false;
130 string __result;
131 for (char __c : __continuation.__format) {
132 if (__shift) {
133 switch (__c) {
134 case 's':
135 std::ranges::copy(__letters, std::back_inserter(x&: __result));
136 break;
137
138 case 'z': {
139 if (__continuation.__format.size() != 2)
140 std::__throw_runtime_error(
141 std::format(fmt: "corrupt tzdb FORMAT field: %z should be the entire contents, instead contains '{}'",
142 args: __continuation.__format)
143 .c_str());
144 chrono::hh_mm_ss __offset{__continuation.__stdoff + __save};
145 if (__offset.is_negative()) {
146 __result += '-';
147 __offset = chrono::hh_mm_ss{-(__continuation.__stdoff + __save)};
148 } else
149 __result += '+';
150
151 if (__offset.minutes() != 0min)
152 std::format_to(out_it: std::back_inserter(x&: __result), fmt: "{:%H%M}", args&: __offset);
153 else
154 std::format_to(out_it: std::back_inserter(x&: __result), fmt: "{:%H}", args&: __offset);
155 } break;
156
157 default:
158 std::__throw_runtime_error(
159 std::format(fmt: "corrupt tzdb FORMAT field: invalid sequence '%{}' found, expected %s or %z", args&: __c).c_str());
160 }
161 __shift = false;
162
163 } else if (__c == '/') {
164 if (__save != 0s)
165 __result.clear();
166 else
167 break;
168
169 } else if (__c == '%') {
170 __shift = true;
171 } else if (__c == '+' || __c == '-' || std::isalnum(__c)) {
172 __result.push_back(__c);
173 } else {
174 std::__throw_runtime_error(
175 std::format(
176 fmt: "corrupt tzdb FORMAT field: invalid character '{}' found, expected +, -, or an alphanumeric value", args&: __c)
177 .c_str());
178 }
179 }
180
181 if (__shift)
182 std::__throw_runtime_error("corrupt tzdb FORMAT field: input ended with the start of the escape sequence '%'");
183
184 if (__result.empty())
185 std::__throw_runtime_error("corrupt tzdb FORMAT field: result is empty");
186
187 return __result;
188}
189
190[[nodiscard]] static sys_seconds __to_sys_seconds(year_month_day __ymd, seconds __seconds) {
191 seconds __result = static_cast<sys_days>(__ymd).time_since_epoch() + __seconds;
192 return sys_seconds{__result};
193}
194
195[[nodiscard]] static seconds __at_to_sys_seconds(const __tz::__continuation& __continuation) {
196 switch (__continuation.__at.__clock) {
197 case __tz::__clock::__local:
198 return __continuation.__at.__time - __continuation.__stdoff -
199 std::visit(
200 visitor: [](const auto& __value) {
201 using _Tp = decay_t<decltype(__value)>;
202 if constexpr (same_as<_Tp, monostate>)
203 return chrono::seconds{0};
204 else if constexpr (same_as<_Tp, __tz::__save>)
205 return chrono::duration_cast<seconds>(__value.__time);
206 else if constexpr (same_as<_Tp, std::string>)
207 // For a named rule based continuation the SAVE depends on the RULE
208 // active at the end. This should be determined separately.
209 return chrono::seconds{0};
210 else
211 static_assert(false);
212
213 std::__libcpp_unreachable();
214 },
215 vs: __continuation.__rules);
216
217 case __tz::__clock::__universal:
218 return __continuation.__at.__time;
219
220 case __tz::__clock::__standard:
221 return __continuation.__at.__time - __continuation.__stdoff;
222 }
223 std::__libcpp_unreachable();
224}
225
226[[nodiscard]] static year_month_day __to_year_month_day(year __year, month __month, __tz::__on __on) {
227 return std::visit(
228 visitor: [&](const auto& __value) {
229 using _Tp = decay_t<decltype(__value)>;
230 if constexpr (same_as<_Tp, chrono::day>)
231 return year_month_day{__year, __month, __value};
232 else if constexpr (same_as<_Tp, weekday_last>)
233 return year_month_day{static_cast<sys_days>(year_month_weekday_last{__year, __month, __value})};
234 else if constexpr (same_as<_Tp, __tz::__constrained_weekday>)
235 return __value(__year, __month);
236 else
237 static_assert(false);
238
239 std::__libcpp_unreachable();
240 },
241 vs&: __on);
242}
243
244[[nodiscard]] static sys_seconds __until_to_sys_seconds(const __tz::__continuation& __continuation) {
245 // Does UNTIL contain the magic value for the last continuation?
246 if (__continuation.__year == chrono::year::min())
247 return sys_seconds::max();
248
249 year_month_day __ymd = chrono::__to_year_month_day(year: __continuation.__year, month: __continuation.__in, on: __continuation.__on);
250 return chrono::__to_sys_seconds(__ymd, seconds: chrono::__at_to_sys_seconds(__continuation));
251}
252
253// Holds the UNTIL time for a continuation with a named rule.
254//
255// Unlike continuations with an fixed SAVE named rules have a variable SAVE.
256// This means when the UNTIL uses the local wall time the actual UNTIL value can
257// only be determined when the SAVE is known. This class holds that abstraction.
258class __named_rule_until {
259public:
260 explicit __named_rule_until(const __tz::__continuation& __continuation)
261 : __until_{chrono::__until_to_sys_seconds(__continuation)},
262 __needs_adjustment_{
263 // The last continuation of a ZONE has no UNTIL which basically is
264 // until the end of _local_ time. This value is expressed by
265 // sys_seconds::max(). Subtracting the SAVE leaves large value.
266 // However SAVE can be negative, which would add a value to maximum
267 // leading to undefined behaviour. In practice this often results in
268 // an overflow to a very small value.
269 __until_ != sys_seconds::max() && __continuation.__at.__clock == __tz::__clock::__local} {}
270
271 // Gives the unadjusted until value, this is useful when the SAVE is not known
272 // at all.
273 sys_seconds __until() const noexcept { return __until_; }
274
275 bool __needs_adjustment() const noexcept { return __needs_adjustment_; }
276
277 // Returns the UNTIL adjusted for SAVE.
278 sys_seconds operator()(seconds __save) const noexcept { return __until_ - __needs_adjustment_ * __save; }
279
280private:
281 sys_seconds __until_;
282 bool __needs_adjustment_;
283};
284
285[[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, const __tz::__rule& __rule) {
286 switch (__rule.__at.__clock) {
287 case __tz::__clock::__local:
288 // Local time and standard time behave the same. This is not
289 // correct. Local time needs to adjust for the current saved time.
290 // To know the saved time the rules need to be known and sorted.
291 // This needs a time so to avoid the chicken and egg adjust the
292 // saving of the local time later.
293 return __rule.__at.__time - __stdoff;
294
295 case __tz::__clock::__universal:
296 return __rule.__at.__time;
297
298 case __tz::__clock::__standard:
299 return __rule.__at.__time - __stdoff;
300 }
301 std::__libcpp_unreachable();
302}
303
304[[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule, year __year) {
305 year_month_day __ymd = chrono::__to_year_month_day(__year, month: __rule.__in, on: __rule.__on);
306
307 seconds __at = chrono::__at_to_seconds(__stdoff, __rule);
308 return chrono::__to_sys_seconds(__ymd, seconds: __at);
309}
310
311[[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule) {
312 return chrono::__from_to_sys_seconds(__stdoff, __rule, year: __rule.__from);
313}
314
315[[nodiscard]] static const vector<__tz::__rule>&
316__get_rules(const __tz::__rules_storage_type& __rules_db, const string& __rule_name) {
317 auto __result = chrono::__binary_find(r: __rules_db, value: __rule_name, comp: {}, proj: [](const auto& __p) { return __p.first; });
318 if (__result == std::end(c: __rules_db))
319 std::__throw_runtime_error(("corrupt tzdb: rule '" + __rule_name + " 'does not exist").c_str());
320
321 return __result->second;
322}
323
324// Returns the letters field for a time before the first rule.
325//
326// Per https://data.iana.org/time-zones/tz-how-to.html
327// One wrinkle, not fully explained in zic.8.txt, is what happens when switching
328// to a named rule. To what values should the SAVE and LETTER data be
329// initialized?
330//
331// 1 If at least one transition has happened, use the SAVE and LETTER data from
332// the most recent.
333// 2 If switching to a named rule before any transition has happened, assume
334// standard time (SAVE zero), and use the LETTER data from the earliest
335// transition with a SAVE of zero.
336//
337// This function implements case 2.
338[[nodiscard]] static string __letters_before_first_rule(const vector<__tz::__rule>& __rules) {
339 auto __letters =
340 __rules //
341 | views::filter([](const __tz::__rule& __rule) { return __rule.__save.__time == 0s; }) //
342 | views::transform([](const __tz::__rule& __rule) { return __rule.__letters; }) //
343 | views::take(1);
344
345 if (__letters.empty())
346 std::__throw_runtime_error("corrupt tzdb: rule has zero entries");
347
348 return __letters.front();
349}
350
351// Determines the information based on the continuation and the rules.
352//
353// There are several special cases to take into account
354//
355// === Entries before the first rule becomes active ===
356// Asia/Hong_Kong
357// 9 - JST 1945 N 18 2 // (1)
358// 8 HK HK%sT // (2)
359// R HK 1946 o - Ap 21 0 1 S // (3)
360// There (1) is active until Novemer 18th 1945 at 02:00, after this time
361// (2) becomes active. The first rule entry for HK (3) becomes active
362// from April 21st 1945 at 01:00. In the period between (2) is active.
363// This entry has an offset.
364// This entry has no save, letters, or dst flag. So in the period
365// after (1) and until (3) no rule entry is associated with the time.
366
367[[nodiscard]] static sys_info __get_sys_info_before_first_rule(
368 sys_seconds __begin,
369 sys_seconds __end,
370 const __tz::__continuation& __continuation,
371 const vector<__tz::__rule>& __rules) {
372 return sys_info{
373 .begin: __begin,
374 .end: __end,
375 .offset: __continuation.__stdoff,
376 .save: chrono::minutes(0),
377 .abbrev: chrono::__format(__continuation, letters: __letters_before_first_rule(__rules), save: 0s)};
378}
379
380// Returns the sys_info object for a time before the first rule.
381// When this first rule has a SAVE of 0s the sys_info for the time before the
382// first rule and for the first rule are identical and will be merged.
383[[nodiscard]] static sys_info __get_sys_info_before_first_rule(
384 sys_seconds __begin,
385 sys_seconds __rule_end, // The end used when SAVE != 0s
386 sys_seconds __next_end, // The end used when SAVE == 0s the times are merged
387 const __tz::__continuation& __continuation,
388 const vector<__tz::__rule>& __rules,
389 vector<__tz::__rule>::const_iterator __rule) {
390 if (__rule->__save.__time != 0s)
391 return __get_sys_info_before_first_rule(__begin, end: __rule_end, __continuation, __rules);
392
393 return sys_info{
394 .begin: __begin, .end: __next_end, .offset: __continuation.__stdoff, .save: 0min, .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: 0s)};
395}
396
397[[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule) {
398 switch (__rule.__at.__clock) {
399 case __tz::__clock::__local:
400 return __rule.__at.__time - __stdoff - __save;
401
402 case __tz::__clock::__universal:
403 return __rule.__at.__time;
404
405 case __tz::__clock::__standard:
406 return __rule.__at.__time - __stdoff;
407 }
408 std::__libcpp_unreachable();
409}
410
411[[nodiscard]] static sys_seconds
412__rule_to_sys_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule, year __year) {
413 year_month_day __ymd = chrono::__to_year_month_day(__year, month: __rule.__in, on: __rule.__on);
414
415 seconds __at = chrono::__at_to_seconds(__stdoff, __save, __rule);
416 return chrono::__to_sys_seconds(__ymd, seconds: __at);
417}
418
419// Returns the first rule after __time.
420// Note that a rule can be "active" in multiple years, this may result in an
421// infinite loop where the same rule is returned every time, use __current to
422// guard against that.
423//
424// When no next rule exists the returned time will be sys_seconds::max(). This
425// can happen in practice. For example,
426//
427// R So 1945 o - May 24 2 2 M
428// R So 1945 o - S 24 3 1 S
429// R So 1945 o - N 18 2s 0 -
430//
431// Has 3 rules that are all only active in 1945.
432[[nodiscard]] static pair<sys_seconds, vector<__tz::__rule>::const_iterator>
433__next_rule(sys_seconds __time,
434 seconds __stdoff,
435 seconds __save,
436 const vector<__tz::__rule>& __rules,
437 vector<__tz::__rule>::const_iterator __current) {
438 year __year = year_month_day{chrono::floor<days>(t: __time)}.year();
439
440 // Note it would probably be better to store the pairs in a vector and then
441 // use min() to get the smallest element
442 map<sys_seconds, vector<__tz::__rule>::const_iterator> __candidates;
443 // Note this evaluates all rules which is a waste of effort; when the entries
444 // are beyond the current year's "next year" (where "next year" is not always
445 // year + 1) the algorithm should end.
446 for (auto __it = __rules.begin(); __it != __rules.end(); ++__it) {
447 for (year __y = __it->__from; __y <= __it->__to; ++__y) {
448 // Adding the current entry for the current year may lead to infinite
449 // loops due to the SAVE adjustment. Skip these entries.
450 if (__y == __year && __it == __current)
451 continue;
452
453 sys_seconds __t = chrono::__rule_to_sys_seconds(__stdoff, __save, rule: *__it, year: __y);
454 if (__t <= __time)
455 continue;
456
457 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(!__candidates.contains(__t), "duplicated rule");
458 __candidates[__t] = __it;
459 break;
460 }
461 }
462
463 if (!__candidates.empty()) [[likely]] {
464 auto __it = __candidates.begin();
465
466 // When no rule is selected the time before the first rule and the first rule
467 // should not be merged.
468 if (__time == sys_seconds::min())
469 return *__it;
470
471 // There can be two constitutive rules that are the same. For example,
472 // Hong Kong
473 //
474 // R HK 1973 o - D 30 3:30 1 S (R1)
475 // R HK 1965 1976 - Ap Su>=16 3:30 1 S (R2)
476 //
477 // 1973-12-29 19:30:00 R1 becomes active.
478 // 1974-04-20 18:30:00 R2 becomes active.
479 // Both rules have a SAVE of 1 hour and LETTERS are S for both of them.
480 while (__it != __candidates.end()) {
481 if (__current->__save.__time != __it->second->__save.__time || __current->__letters != __it->second->__letters)
482 return *__it;
483
484 ++__it;
485 }
486 }
487
488 return {sys_seconds::max(), __rules.end()};
489}
490
491// Returns the first rule of a set of rules.
492// This is not always the first of the listed rules. For example
493// R Sa 2008 2009 - Mar Su>=8 0 0 -
494// R Sa 2007 2008 - O Su>=8 0 1 -
495// The transition in October 2007 happens before the transition in March 2008.
496[[nodiscard]] static vector<__tz::__rule>::const_iterator
497__first_rule(seconds __stdoff, const vector<__tz::__rule>& __rules) {
498 return chrono::__next_rule(time: sys_seconds::min(), __stdoff, save: 0s, __rules, current: __rules.end()).second;
499}
500
501[[nodiscard]] static __sys_info_result __get_sys_info_rule(
502 sys_seconds __time,
503 sys_seconds __continuation_begin,
504 const __tz::__continuation& __continuation,
505 const vector<__tz::__rule>& __rules) {
506 auto __rule = chrono::__first_rule(stdoff: __continuation.__stdoff, __rules);
507 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__rule != __rules.end(), "the set of rules has no first rule");
508
509 // Avoid selecting a time before the start of the continuation
510 __time = std::max(a: __time, b: __continuation_begin);
511
512 sys_seconds __rule_begin = chrono::__from_to_sys_seconds(stdoff: __continuation.__stdoff, rule: *__rule);
513
514 // The time sought is very likely inside the current rule.
515 // When the continuation's UNTIL uses the local clock there are edge cases
516 // where this is not true.
517 //
518 // Start to walk the rules to find the proper one.
519 //
520 // For now we just walk all the rules TODO TZDB investigate whether a smarter
521 // algorithm would work.
522 auto __next = chrono::__next_rule(time: __rule_begin, stdoff: __continuation.__stdoff, save: __rule->__save.__time, __rules, current: __rule);
523
524 // Ignore small steps, this happens with America/Punta_Arenas for the
525 // transition
526 // -4:42:46 - SMT 1927 S
527 // -5 x -05/-04 1932 S
528 // ...
529 //
530 // R x 1927 1931 - S 1 0 1 -
531 // R x 1928 1932 - Ap 1 0 0 -
532 //
533 // America/Punta_Arenas Thu Sep 1 04:42:45 1927 UT = Thu Sep 1 00:42:45 1927 -04 isdst=1 gmtoff=-14400
534 // America/Punta_Arenas Sun Apr 1 03:59:59 1928 UT = Sat Mar 31 23:59:59 1928 -04 isdst=1 gmtoff=-14400
535 // America/Punta_Arenas Sun Apr 1 04:00:00 1928 UT = Sat Mar 31 23:00:00 1928 -05 isdst=0 gmtoff=-18000
536 //
537 // Without this there will be a transition
538 // [1927-09-01 04:42:45, 1927-09-01 05:00:00) -05:00:00 0min -05
539
540 if (sys_seconds __begin = __rule->__save.__time != 0s ? __rule_begin : __next.first; __time < __begin) {
541 if (__continuation_begin == sys_seconds::min() || __begin - __continuation_begin > 12h)
542 return __sys_info{__get_sys_info_before_first_rule(
543 begin: __continuation_begin, rule_end: __rule_begin, next_end: __next.first, __continuation, __rules, __rule),
544 false};
545
546 // Europe/Berlin
547 // 1 c CE%sT 1945 May 24 2 (C1)
548 // 1 So CE%sT 1946 (C2)
549 //
550 // R c 1944 1945 - Ap M>=1 2s 1 S (R1)
551 //
552 // R So 1945 o - May 24 2 2 M (R2)
553 //
554 // When C2 becomes active the time would be before the first rule R2,
555 // giving a 1 hour sys_info.
556 seconds __save = __rule->__save.__time;
557 __named_rule_until __continuation_end{__continuation};
558 sys_seconds __sys_info_end = std::min(a: __continuation_end(__save), b: __next.first);
559
560 return __sys_info{
561 sys_info{.begin: __continuation_begin,
562 .end: __sys_info_end,
563 .offset: __continuation.__stdoff + __save,
564 .save: chrono::duration_cast<minutes>(fd: __save),
565 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, __save)},
566 __sys_info_end == __continuation_end(__save)};
567 }
568
569 // See above for America/Asuncion
570 if (__rule->__save.__time == 0s && __time < __next.first) {
571 return __sys_info{
572 sys_info{.begin: __continuation_begin,
573 .end: __next.first,
574 .offset: __continuation.__stdoff,
575 .save: 0min,
576 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: 0s)},
577 false};
578 }
579
580 if (__rule->__save.__time != 0s) {
581 // another fix for America/Punta_Arenas when not at the start of the
582 // sys_info object.
583 seconds __save = __rule->__save.__time;
584 if (__continuation_begin >= __rule_begin - __save && __time < __next.first) {
585 return __sys_info{
586 sys_info{.begin: __continuation_begin,
587 .end: __next.first,
588 .offset: __continuation.__stdoff + __save,
589 .save: chrono::duration_cast<minutes>(fd: __save),
590 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, __save)},
591 false};
592 }
593 }
594
595 __named_rule_until __continuation_end{__continuation};
596 while (__next.second != __rules.end()) {
597#ifdef PRINT
598 std::print(
599 stderr,
600 "Rule for {}: [{}, {}) off={} save={} duration={}\n",
601 __time,
602 __rule_begin,
603 __next.first,
604 __continuation.__stdoff,
605 __rule->__save.__time,
606 __next.first - __rule_begin);
607#endif
608
609 sys_seconds __end = __continuation_end(__rule->__save.__time);
610
611 sys_seconds __sys_info_begin = std::max(a: __continuation_begin, b: __rule_begin);
612 sys_seconds __sys_info_end = std::min(a: __end, b: __next.first);
613 seconds __diff = chrono::abs(d: __sys_info_end - __sys_info_begin);
614
615 if (__diff < 12h) {
616 // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
617 // -4:16:48 - CMT 1920 May
618 // -4 - -04 1930 D
619 // -4 A -04/-03 1969 O 5
620 // -3 A -03/-02 1999 O 3
621 // -4 A -04/-03 2000 Mar 3
622 // ...
623 //
624 // ...
625 // R A 1989 1992 - O Su>=15 0 1 -
626 // R A 1999 o - O Su>=1 0 1 -
627 // R A 2000 o - Mar 3 0 0 -
628 // R A 2007 o - D 30 0 1 -
629 // ...
630
631 // The 1999 switch uses the same rule, but with a different stdoff.
632 // R A 1999 o - O Su>=1 0 1 -
633 // stdoff -3 -> 1999-10-03 03:00:00
634 // stdoff -4 -> 1999-10-03 04:00:00
635 // This generates an invalid entry and this is evaluated as a transition.
636 // Looking at the zdump like output in libc++ this generates jumps in
637 // the UTC time.
638
639 __rule = __next.second;
640 __next = __next_rule(time: __next.first, stdoff: __continuation.__stdoff, save: __rule->__save.__time, __rules, current: __rule);
641 __end = __continuation_end(__rule->__save.__time);
642 __sys_info_end = std::min(a: __end, b: __next.first);
643 }
644
645 if ((__time >= __rule_begin && __time < __next.first) || __next.first >= __end) {
646 __sys_info_begin = std::max(a: __continuation_begin, b: __rule_begin);
647 __sys_info_end = std::min(a: __end, b: __next.first);
648
649 return __sys_info{
650 sys_info{.begin: __sys_info_begin,
651 .end: __sys_info_end,
652 .offset: __continuation.__stdoff + __rule->__save.__time,
653 .save: chrono::duration_cast<minutes>(fd: __rule->__save.__time),
654 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: __rule->__save.__time)},
655 __sys_info_end == __end};
656 }
657
658 __rule_begin = __next.first;
659 __rule = __next.second;
660 __next = __next_rule(time: __rule_begin, stdoff: __continuation.__stdoff, save: __rule->__save.__time, __rules, current: __rule);
661 }
662
663 return __sys_info{
664 sys_info{.begin: std::max(a: __continuation_begin, b: __rule_begin),
665 .end: __continuation_end(__rule->__save.__time),
666 .offset: __continuation.__stdoff + __rule->__save.__time,
667 .save: chrono::duration_cast<minutes>(fd: __rule->__save.__time),
668 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: __rule->__save.__time)},
669 true};
670}
671
672[[nodiscard]] static __sys_info_result __get_sys_info_basic(
673 sys_seconds __time, sys_seconds __continuation_begin, const __tz::__continuation& __continuation, seconds __save) {
674 sys_seconds __continuation_end = chrono::__until_to_sys_seconds(__continuation);
675 return __sys_info{
676 sys_info{.begin: __continuation_begin,
677 .end: __continuation_end,
678 .offset: __continuation.__stdoff + __save,
679 .save: chrono::duration_cast<minutes>(fd: __save),
680 .abbrev: chrono::__format(__continuation, letters: __continuation.__format, __save)},
681 true};
682}
683
684[[nodiscard]] static __sys_info_result
685__get_sys_info(sys_seconds __time,
686 sys_seconds __continuation_begin,
687 const __tz::__continuation& __continuation,
688 const __tz::__rules_storage_type& __rules_db) {
689 return std::visit(
690 visitor: [&](const auto& __value) {
691 using _Tp = decay_t<decltype(__value)>;
692 if constexpr (same_as<_Tp, std::string>)
693 return chrono::__get_sys_info_rule(
694 __time, __continuation_begin, __continuation, rules: __get_rules(__rules_db, __value));
695 else if constexpr (same_as<_Tp, monostate>)
696 return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, save: chrono::seconds(0));
697 else if constexpr (same_as<_Tp, __tz::__save>)
698 return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, save: __value.__time);
699 else
700 static_assert(false);
701
702 std::__libcpp_unreachable();
703 },
704 vs: __continuation.__rules);
705}
706
707// The transition from one continuation to the next continuation may result in
708// two constitutive continuations with the same "offset" information.
709// [time.zone.info.sys]/3
710// The begin and end data members indicate that, for the associated time_zone
711// and time_point, the offset and abbrev are in effect in the range
712// [begin, end). This information can be used to efficiently iterate the
713// transitions of a time_zone.
714//
715// Note that this does considers a change in the SAVE field not to be a
716// different sys_info, zdump does consider this different.
717// LWG XXXX The sys_info range should be affected by save
718// matches the behaviour of the Standard and zdump.
719//
720// Iff the "offsets" are the same '__current.__end' is replaced with
721// '__next.__end', which effectively merges the two objects in one object. The
722// function returns true if a merge occurred.
723[[nodiscard]] bool __merge_continuation(sys_info& __current, const sys_info& __next) {
724 if (__current.end != __next.begin)
725 return false;
726
727 if (__current.offset != __next.offset || __current.abbrev != __next.abbrev || __current.save != __next.save)
728 return false;
729
730 __current.end = __next.end;
731 return true;
732}
733
734//===----------------------------------------------------------------------===//
735// Public API
736//===----------------------------------------------------------------------===//
737
738[[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI time_zone time_zone::__create(unique_ptr<time_zone::__impl>&& __p) {
739 _LIBCPP_ASSERT_NON_NULL(__p != nullptr, "initialized time_zone without a valid pimpl object");
740 time_zone result;
741 result.__impl_ = std::move(__p);
742 return result;
743}
744
745_LIBCPP_EXPORTED_FROM_ABI time_zone::~time_zone() = default;
746
747[[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI string_view time_zone::__name() const noexcept { return __impl_->__name(); }
748
749[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI sys_info
750time_zone::__get_info(sys_seconds __time) const {
751 optional<sys_info> __result;
752 bool __valid_result = false; // true iff __result.has_value() is true and
753 // __result.begin <= __time < __result.end is true.
754 bool __can_merge = false;
755 sys_seconds __continuation_begin = sys_seconds::min();
756 // Iterates over the Zone entry and its continuations. Internally the Zone
757 // entry is split in a Zone information and the first continuation. The last
758 // continuation has no UNTIL field. This means the loop should always find a
759 // continuation.
760 //
761 // For more information on background of zone information please consult the
762 // following information
763 // [zic manual](https://www.man7.org/linux/man-pages/man8/zic.8.html)
764 // [tz source info](https://data.iana.org/time-zones/tz-how-to.html)
765 // On POSIX systems the zdump tool can be useful:
766 // zdump -v Asia/Hong_Kong
767 // Gives all transitions in the Hong Kong time zone.
768 //
769 // During iteration the result for the current continuation is returned. If
770 // no continuation is applicable it will return the end time as "error". When
771 // two continuations are contiguous and contain the "same" information these
772 // ranges are merged as one range.
773 // The merging requires keeping any result that occurs before __time,
774 // likewise when a valid result is found the algorithm needs to test the next
775 // continuation to see whether it can be merged. For example, Africa/Ceuta
776 // Continuations
777 // 0 s WE%sT 1929 (C1)
778 // 0 - WET 1967 (C2)
779 // 0 Sp WE%sT 1984 Mar 16 (C3)
780 //
781 // Rules
782 // R s 1926 1929 - O Sa>=1 24s 0 - (R1)
783 //
784 // R Sp 1967 o - Jun 3 12 1 S (R2)
785 //
786 // The rule R1 is the last rule used in C1. The rule R2 is the first rule in
787 // C3. Since R2 is the first rule this means when a continuation uses this
788 // rule its value prior to R2 will be SAVE 0 LETTERS of the first entry with a
789 // SAVE of 0, in this case WET.
790 // This gives the following changes in the information.
791 // 1928-10-07 00:00:00 C1 R1 becomes active: offset 0 save 0 abbrev WET
792 // 1929-01-01 00:00:00 C2 becomes active: offset 0 save 0 abbrev WET
793 // 1967-01-01 00:00:00 C3 becomes active: offset 0 save 0 abbrev WET
794 // 1967-06-03 12:00:00 C3 R2 becomes active: offset 0 save 1 abbrev WEST
795 //
796 // The first 3 entries are contiguous and contain the same information, this
797 // means the period [1928-10-07 00:00:00, 1967-06-03 12:00:00) should be
798 // returned in one sys_info object.
799
800 const auto& __continuations = __impl_->__continuations();
801 const __tz::__rules_storage_type& __rules_db = __impl_->__rules_db();
802 for (auto __it = __continuations.begin(); __it != __continuations.end(); ++__it) {
803 const auto& __continuation = *__it;
804 __sys_info_result __sys_info = chrono::__get_sys_info(__time, __continuation_begin, __continuation, __rules_db);
805
806 if (__sys_info) {
807 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
808 __sys_info->__info.begin < __sys_info->__info.end, "invalid sys_info range");
809
810 // Filters out dummy entries
811 // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
812 // ...
813 // -4 A -04/-03 2000 Mar 3 (C1)
814 // -3 A -03/-02 (C2)
815 //
816 // ...
817 // R A 2000 o - Mar 3 0 0 -
818 // R A 2007 o - D 30 0 1 -
819 // ...
820 //
821 // This results in an entry
822 // [2000-03-03 03:00:00, 2000-03-03 04:00:00) -10800s 60min -03
823 // for [C1 & R1, C1, R2) which due to the end of the continuation is an
824 // one hour "sys_info". Instead the entry should be ignored and replaced
825 // by [C2 & R1, C2 & R2) which is the proper range
826 // "[2000-03-03 03:00:00, 2007-12-30 03:00:00) -02:00:00 60min -02
827
828 if (std::holds_alternative<string>(v: __continuation.__rules) && __sys_info->__can_merge &&
829 __sys_info->__info.begin + 12h > __sys_info->__info.end) {
830 __continuation_begin = __sys_info->__info.begin;
831 continue;
832 }
833
834 if (!__result) {
835 // First entry found, always keep it.
836 __result = __sys_info->__info;
837
838 __valid_result = __time >= __result->begin && __time < __result->end;
839 __can_merge = __sys_info->__can_merge;
840 } else if (__can_merge && chrono::__merge_continuation(current&: *__result, next: __sys_info->__info)) {
841 // The results are merged, update the result state. This may
842 // "overwrite" a valid sys_info object with another valid sys_info
843 // object.
844 __valid_result = __time >= __result->begin && __time < __result->end;
845 __can_merge = __sys_info->__can_merge;
846 } else {
847 // Here things get interesting:
848 // For example, America/Argentina/San_Luis
849 //
850 // -3 A -03/-02 2008 Ja 21 (C1)
851 // -4 Sa -04/-03 2009 O 11 (C2)
852 //
853 // R A 2007 o - D 30 0 1 - (R1)
854 //
855 // R Sa 2007 2008 - O Su>=8 0 1 - (R2)
856 //
857 // Based on C1 & R1 the end time of C1 is 2008-01-21 03:00:00
858 // Based on C2 & R2 the end time of C1 is 2008-01-21 02:00:00
859 // In this case the earlier time is the real time of the transition.
860 // However the algorithm used gives 2008-01-21 03:00:00.
861 //
862 // So we need to calculate the previous UNTIL in the current context and
863 // see whether it's earlier.
864
865 // The results could not be merged.
866 // - When we have a valid result that result is the final result.
867 // - Otherwise the result we had is before __time and the result we got
868 // is at a later time (possibly valid). This result is always better
869 // than the previous result.
870 if (__valid_result) {
871 return *__result;
872 } else {
873 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
874 __it != __continuations.begin(), "the first rule should always seed the result");
875 const auto& __last = *(__it - 1);
876 if (std::holds_alternative<string>(v: __last.__rules)) {
877 // Europe/Berlin
878 // 1 c CE%sT 1945 May 24 2 (C1)
879 // 1 So CE%sT 1946 (C2)
880 //
881 // R c 1944 1945 - Ap M>=1 2s 1 S (R1)
882 //
883 // R So 1945 o - May 24 2 2 M (R2)
884 //
885 // When C2 becomes active the time would be before the first rule R2,
886 // giving a 1 hour sys_info. This is not valid and the results need
887 // merging.
888
889 if (__result->end != __sys_info->__info.begin) {
890 // When the UTC gap between the rules is due to the change of
891 // offsets adjust the new time to remove the gap.
892 sys_seconds __end = __result->end - __result->offset;
893 sys_seconds __begin = __sys_info->__info.begin - __sys_info->__info.offset;
894 if (__end == __begin) {
895 __sys_info->__info.begin = __result->end;
896 }
897 }
898 }
899
900 __result = __sys_info->__info;
901 __valid_result = __time >= __result->begin && __time < __result->end;
902 __can_merge = __sys_info->__can_merge;
903 }
904 }
905 __continuation_begin = __result->end;
906 } else {
907 __continuation_begin = __sys_info.error();
908 }
909 }
910 if (__valid_result)
911 return *__result;
912
913 std::__throw_runtime_error("tzdb: corrupt db");
914}
915
916// Is the "__local_time" present in "__first" and "__second". If so the
917// local_info has an ambiguous result.
918[[nodiscard]] static bool
919__is_ambiguous(local_seconds __local_time, const sys_info& __first, const sys_info& __second) {
920 std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset};
921 std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset};
922
923 return __local_time < __end_first && __local_time >= __begin_second;
924}
925
926// Determines the result of the "__local_time". This expects the object
927// "__first" to be earlier in time than "__second".
928[[nodiscard]] static local_info
929__get_info(local_seconds __local_time, const sys_info& __first, const sys_info& __second) {
930 std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset};
931 std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset};
932
933 if (__local_time < __end_first) {
934 if (__local_time >= __begin_second)
935 // |--------|
936 // |------|
937 // ^
938 return {.result: local_info::ambiguous, .first: __first, .second: __second};
939
940 // |--------|
941 // |------|
942 // ^
943 return {.result: local_info::unique, .first: __first, .second: sys_info{}};
944 }
945
946 if (__local_time < __begin_second)
947 // |--------|
948 // |------|
949 // ^
950 return {.result: local_info::nonexistent, .first: __first, .second: __second};
951
952 // |--------|
953 // |------|
954 // ^
955 return {.result: local_info::unique, .first: __second, .second: sys_info{}};
956}
957
958[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI local_info
959time_zone::__get_info(local_seconds __local_time) const {
960 seconds __local_seconds = __local_time.time_since_epoch();
961
962 /* An example of a typical year with a DST switch displayed in local time.
963 *
964 * At the first of April the time goes forward one hour. This means the
965 * time marked with ~~ is not a valid local time. This is represented by the
966 * nonexistent value in local_info.result.
967 *
968 * At the first of November the time goes backward one hour. This means the
969 * time marked with ^^ happens twice. This is represented by the ambiguous
970 * value in local_info.result.
971 *
972 * 2020.11.01 2021.04.01 2021.11.01
973 * offset +05 offset +05 offset +05
974 * save 0s save 1h save 0s
975 * |------------//----------|
976 * |---------//--------------|
977 * |-------------
978 * ~~ ^^
979 *
980 * These shifts can happen due to changes in the current time zone for a
981 * location. For example, Indian/Kerguelen switched only once. In 1950 from an
982 * offset of 0 hours to an offset of +05 hours.
983 *
984 * During all these shifts the UTC time will not have gaps.
985 */
986
987 // The code needs to determine the system time for the local time. There is no
988 // information available. Assume the offset between system time and local time
989 // is 0s. This gives an initial estimate.
990 sys_seconds __guess{__local_seconds};
991 sys_info __info = __get_info(time: __guess);
992
993 // At this point the offset can be used to determine an estimate for the local
994 // time. Before doing that, determine the offset and validate whether the
995 // local time is the range [chrono::local_seconds::min(),
996 // chrono::local_seconds::max()).
997 if (__local_seconds < 0s && __info.offset > 0s)
998 if (__local_seconds - chrono::local_seconds::min().time_since_epoch() < __info.offset)
999 return {.result: -1, .first: __info, .second: {}};
1000
1001 if (__local_seconds > 0s && __info.offset < 0s)
1002 if (chrono::local_seconds::max().time_since_epoch() - __local_seconds < -__info.offset)
1003 return {.result: -2, .first: __info, .second: {}};
1004
1005 // Based on the information found in the sys_info, the local time can be
1006 // converted to a system time. This resulting time can be in the following
1007 // locations of the sys_info:
1008 //
1009 // |---------//--------------|
1010 // 1 2.1 2.2 2.3 3
1011 //
1012 // 1. The estimate is before the returned sys_info object.
1013 // The result is either non-existent or unique in the previous sys_info.
1014 // 2. The estimate is in the sys_info object
1015 // - If the sys_info begin is not sys_seconds::min(), then it might be at
1016 // 2.1 and could be ambiguous with the previous or unique.
1017 // - If sys_info end is not sys_seconds::max(), then it might be at 2.3
1018 // and could be ambiguous with the next or unique.
1019 // - Else it is at 2.2 and always unique. This case happens when a
1020 // time zone has no transitions. For example, UTC or GMT+1.
1021 // 3. The estimate is after the returned sys_info object.
1022 // The result is either non-existent or unique in the next sys_info.
1023 //
1024 // There is no specification where the "middle" starts. Similar issues can
1025 // happen when sys_info objects are "short", then "unique in the next" could
1026 // become "ambiguous in the next and the one following". Theoretically there
1027 // is the option of the following time-line
1028 //
1029 // |------------|
1030 // |----|
1031 // |-----------------|
1032 //
1033 // However the local_info object only has 2 sys_info objects, so this option
1034 // is not tested.
1035
1036 sys_seconds __sys_time{__local_seconds - __info.offset};
1037 if (__sys_time < __info.begin)
1038 // Case 1 before __info
1039 return chrono::__get_info(__local_time, first: __get_info(time: __info.begin - 1s), second: __info);
1040
1041 if (__sys_time >= __info.end)
1042 // Case 3 after __info
1043 return chrono::__get_info(__local_time, first: __info, second: __get_info(time: __info.end));
1044
1045 // Case 2 in __info
1046 if (__info.begin != sys_seconds::min()) {
1047 // Case 2.1 Not at the beginning, when not ambiguous the result should test
1048 // case 2.3.
1049 sys_info __prev = __get_info(time: __info.begin - 1s);
1050 if (__is_ambiguous(__local_time, first: __prev, second: __info))
1051 return {.result: local_info::ambiguous, .first: __prev, .second: __info};
1052 }
1053
1054 if (__info.end == sys_seconds::max())
1055 // At the end so it's case 2.2
1056 return {.result: local_info::unique, .first: __info, .second: sys_info{}};
1057
1058 // This tests case 2.2 or case 2.3.
1059 return chrono::__get_info(__local_time, first: __info, second: __get_info(time: __info.end));
1060}
1061
1062} // namespace chrono
1063
1064_LIBCPP_END_NAMESPACE_STD
1065