1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html
10
11// TODO TZDB look at optimizations
12//
13// The current algorithm is correct but not efficient. For example, in a named
14// rule based continuation finding the next rule does quite a bit of work,
15// returns the next rule and "forgets" its state. This could be better.
16//
17// It would be possible to cache lookups. If a time for a zone is calculated its
18// sys_info could be kept and the next lookup could test whether the time is in
19// a "known" sys_info. The wording in the Standard hints at this slowness by
20// "suggesting" this could be implemented on the user's side.
21
22// TODO TZDB look at removing quirks
23//
24// The code has some special rules to adjust the timing at the continuation
25// switches. This works correctly, but some of the places feel odd. It would be
26// good to investigate this further and see whether all quirks are needed or
27// that there are better fixes.
28//
29// These quirks often use a 12h interval; this is the scan interval of zdump,
30// which implies there are no sys_info objects with a duration of less than 12h.
31
32// Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120502
33
34#include <__config>
35
36// TODO(LLVM 23): When upgrading to GCC 16 this can be removed
37#ifdef _LIBCPP_COMPILER_GCC
38# pragma GCC optimize("-O0")
39#endif
40
41#include <algorithm>
42#include <cctype>
43#include <chrono>
44#include <expected>
45#include <map>
46#include <numeric>
47#include <ranges>
48
49#include "include/tzdb/time_zone_private.h"
50#include "include/tzdb/tzdb_list_private.h"
51
52// TODO TZDB remove debug printing
53#ifdef PRINT
54# include <print>
55#endif
56
57_LIBCPP_BEGIN_NAMESPACE_STD
58
59#ifdef PRINT
60template <>
61struct formatter<chrono::sys_info, char> {
62 template <class ParseContext>
63 constexpr typename ParseContext::iterator parse(ParseContext& ctx) {
64 return ctx.begin();
65 }
66
67 template <class FormatContext>
68 typename FormatContext::iterator format(const chrono::sys_info& info, FormatContext& ctx) const {
69 return std::format_to(
70 ctx.out(), "[{}, {}) {:%Q%q} {:%Q%q} {}", info.begin, info.end, info.offset, info.save, info.abbrev);
71 }
72};
73#endif
74
75namespace chrono {
76
77//===----------------------------------------------------------------------===//
78// Details
79//===----------------------------------------------------------------------===//
80
81struct __sys_info {
82 sys_info __info;
83 bool __can_merge; // Can the returned sys_info object be merged with
84};
85
86// Return type for helper function to get a sys_info.
87// - The expected result returns the "best" sys_info object. This object can be
88// before the requested time. Sometimes sys_info objects from different
89// continuations share their offset, save, and abbrev and these objects are
90// merged to one sys_info object. The __can_merge flag determines whether the
91// current result can be merged with the next result.
92// - The unexpected result means no sys_info object was found and the time is
93// the time to be used for the next search iteration.
94using __sys_info_result = expected<__sys_info, sys_seconds>;
95
96template <ranges::forward_range _Range,
97 class _Type,
98 class _Proj = identity,
99 indirect_strict_weak_order<const _Type*, projected<ranges::iterator_t<_Range>, _Proj>> _Comp = ranges::less>
100[[nodiscard]] static ranges::borrowed_iterator_t<_Range>
101__binary_find(_Range&& __r, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) {
102 auto __end = ranges::end(__r);
103 auto __ret = ranges::lower_bound(ranges::begin(__r), __end, __value, __comp, __proj);
104 if (__ret == __end)
105 return __end;
106
107 // When the value does not match the predicate it's equal and a valid result
108 // was found.
109 return !std::invoke(__comp, __value, std::invoke(__proj, *__ret)) ? __ret : __end;
110}
111
112// Format based on https://data.iana.org/time-zones/tz-how-to.html
113//
114// 1 a time zone abbreviation that is a string of three or more characters that
115// are either ASCII alphanumerics, "+", or "-"
116// 2 the string "%z", in which case the "%z" will be replaced by a numeric time
117// zone abbreviation
118// 3 a pair of time zone abbreviations separated by a slash ('/'), in which
119// case the first string is the abbreviation for the standard time name and
120// the second string is the abbreviation for the daylight saving time name
121// 4 a string containing "%s", in which case the "%s" will be replaced by the
122// text in the appropriate Rule's LETTER column, and the resulting string
123// should be a time zone abbreviation
124//
125// Rule 1 is not strictly validated since America/Barbados uses a two letter
126// abbreviation AT.
127[[nodiscard]] static string
128__format(const __tz::__continuation& __continuation, const string& __letters, seconds __save) {
129 bool __shift = false;
130 string __result;
131 for (char __c : __continuation.__format) {
132 if (__shift) {
133 switch (__c) {
134 case 's':
135 std::ranges::copy(__letters, std::back_inserter(x&: __result));
136 break;
137
138 case 'z': {
139 if (__continuation.__format.size() != 2)
140 std::__throw_runtime_error(
141 std::format(fmt: "corrupt tzdb FORMAT field: %z should be the entire contents, instead contains '{}'",
142 args: __continuation.__format)
143 .c_str());
144 chrono::hh_mm_ss __offset{__continuation.__stdoff + __save};
145 if (__offset.is_negative()) {
146 __result += '-';
147 __offset = chrono::hh_mm_ss{-(__continuation.__stdoff + __save)};
148 } else
149 __result += '+';
150
151 if (__offset.minutes() != 0min)
152 std::format_to(out_it: std::back_inserter(x&: __result), fmt: "{:%H%M}", args&: __offset);
153 else
154 std::format_to(out_it: std::back_inserter(x&: __result), fmt: "{:%H}", args&: __offset);
155 } break;
156
157 default:
158 std::__throw_runtime_error(
159 std::format(fmt: "corrupt tzdb FORMAT field: invalid sequence '%{}' found, expected %s or %z", args&: __c).c_str());
160 }
161 __shift = false;
162
163 } else if (__c == '/') {
164 if (__save != 0s)
165 __result.clear();
166 else
167 break;
168
169 } else if (__c == '%') {
170 __shift = true;
171 } else if (__c == '+' || __c == '-' || std::isalnum(__c)) {
172 __result.push_back(__c);
173 } else {
174 std::__throw_runtime_error(
175 std::format(
176 fmt: "corrupt tzdb FORMAT field: invalid character '{}' found, expected +, -, or an alphanumeric value", args&: __c)
177 .c_str());
178 }
179 }
180
181 if (__shift)
182 std::__throw_runtime_error("corrupt tzdb FORMAT field: input ended with the start of the escape sequence '%'");
183
184 if (__result.empty())
185 std::__throw_runtime_error("corrupt tzdb FORMAT field: result is empty");
186
187 return __result;
188}
189
190[[nodiscard]] static sys_seconds __to_sys_seconds(year_month_day __ymd, seconds __seconds) {
191 seconds __result = static_cast<sys_days>(__ymd).time_since_epoch() + __seconds;
192 return sys_seconds{__result};
193}
194
195[[nodiscard]] static seconds __at_to_sys_seconds(const __tz::__continuation& __continuation) {
196 switch (__continuation.__at.__clock) {
197 case __tz::__clock::__local:
198 return __continuation.__at.__time - __continuation.__stdoff -
199 std::visit(
200 visitor: [](const auto& __value) {
201 using _Tp = decay_t<decltype(__value)>;
202 if constexpr (same_as<_Tp, monostate>)
203 return chrono::seconds{0};
204 else if constexpr (same_as<_Tp, __tz::__save>)
205 return chrono::duration_cast<seconds>(__value.__time);
206 else if constexpr (same_as<_Tp, std::string>)
207 // For a named rule based continuation the SAVE depends on the RULE
208 // active at the end. This should be determined separately.
209 return chrono::seconds{0};
210 else
211 static_assert(false);
212 },
213 vs: __continuation.__rules);
214
215 case __tz::__clock::__universal:
216 return __continuation.__at.__time;
217
218 case __tz::__clock::__standard:
219 return __continuation.__at.__time - __continuation.__stdoff;
220 }
221 std::__libcpp_unreachable();
222}
223
224[[nodiscard]] static year_month_day __to_year_month_day(year __year, month __month, __tz::__on __on) {
225 return std::visit(
226 visitor: [&](const auto& __value) {
227 using _Tp = decay_t<decltype(__value)>;
228 if constexpr (same_as<_Tp, chrono::day>)
229 return year_month_day{__year, __month, __value};
230 else if constexpr (same_as<_Tp, weekday_last>)
231 return year_month_day{static_cast<sys_days>(year_month_weekday_last{__year, __month, __value})};
232 else if constexpr (same_as<_Tp, __tz::__constrained_weekday>)
233 return __value(__year, __month);
234 else
235 static_assert(false);
236 },
237 vs&: __on);
238}
239
240[[nodiscard]] static sys_seconds __until_to_sys_seconds(const __tz::__continuation& __continuation) {
241 // Does UNTIL contain the magic value for the last continuation?
242 if (__continuation.__year == chrono::year::min())
243 return sys_seconds::max();
244
245 year_month_day __ymd = chrono::__to_year_month_day(year: __continuation.__year, month: __continuation.__in, on: __continuation.__on);
246 return chrono::__to_sys_seconds(__ymd, seconds: chrono::__at_to_sys_seconds(__continuation));
247}
248
249// Holds the UNTIL time for a continuation with a named rule.
250//
251// Unlike continuations with an fixed SAVE named rules have a variable SAVE.
252// This means when the UNTIL uses the local wall time the actual UNTIL value can
253// only be determined when the SAVE is known. This class holds that abstraction.
254class __named_rule_until {
255public:
256 explicit __named_rule_until(const __tz::__continuation& __continuation)
257 : __until_{chrono::__until_to_sys_seconds(__continuation)},
258 __needs_adjustment_{
259 // The last continuation of a ZONE has no UNTIL which basically is
260 // until the end of _local_ time. This value is expressed by
261 // sys_seconds::max(). Subtracting the SAVE leaves large value.
262 // However SAVE can be negative, which would add a value to maximum
263 // leading to undefined behaviour. In practice this often results in
264 // an overflow to a very small value.
265 __until_ != sys_seconds::max() && __continuation.__at.__clock == __tz::__clock::__local} {}
266
267 // Gives the unadjusted until value, this is useful when the SAVE is not known
268 // at all.
269 sys_seconds __until() const noexcept { return __until_; }
270
271 bool __needs_adjustment() const noexcept { return __needs_adjustment_; }
272
273 // Returns the UNTIL adjusted for SAVE.
274 sys_seconds operator()(seconds __save) const noexcept { return __until_ - __needs_adjustment_ * __save; }
275
276private:
277 sys_seconds __until_;
278 bool __needs_adjustment_;
279};
280
281[[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, const __tz::__rule& __rule) {
282 switch (__rule.__at.__clock) {
283 case __tz::__clock::__local:
284 // Local time and standard time behave the same. This is not
285 // correct. Local time needs to adjust for the current saved time.
286 // To know the saved time the rules need to be known and sorted.
287 // This needs a time so to avoid the chicken and egg adjust the
288 // saving of the local time later.
289 return __rule.__at.__time - __stdoff;
290
291 case __tz::__clock::__universal:
292 return __rule.__at.__time;
293
294 case __tz::__clock::__standard:
295 return __rule.__at.__time - __stdoff;
296 }
297 std::__libcpp_unreachable();
298}
299
300[[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule, year __year) {
301 year_month_day __ymd = chrono::__to_year_month_day(__year, month: __rule.__in, on: __rule.__on);
302
303 seconds __at = chrono::__at_to_seconds(__stdoff, __rule);
304 return chrono::__to_sys_seconds(__ymd, seconds: __at);
305}
306
307[[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule) {
308 return chrono::__from_to_sys_seconds(__stdoff, __rule, year: __rule.__from);
309}
310
311[[nodiscard]] static const vector<__tz::__rule>&
312__get_rules(const __tz::__rules_storage_type& __rules_db, const string& __rule_name) {
313 auto __result = chrono::__binary_find(r: __rules_db, value: __rule_name, comp: {}, proj: [](const auto& __p) { return __p.first; });
314 if (__result == std::end(c: __rules_db))
315 std::__throw_runtime_error(("corrupt tzdb: rule '" + __rule_name + " 'does not exist").c_str());
316
317 return __result->second;
318}
319
320// Returns the letters field for a time before the first rule.
321//
322// Per https://data.iana.org/time-zones/tz-how-to.html
323// One wrinkle, not fully explained in zic.8.txt, is what happens when switching
324// to a named rule. To what values should the SAVE and LETTER data be
325// initialized?
326//
327// 1 If at least one transition has happened, use the SAVE and LETTER data from
328// the most recent.
329// 2 If switching to a named rule before any transition has happened, assume
330// standard time (SAVE zero), and use the LETTER data from the earliest
331// transition with a SAVE of zero.
332//
333// This function implements case 2.
334[[nodiscard]] static string __letters_before_first_rule(const vector<__tz::__rule>& __rules) {
335 auto __letters =
336 __rules //
337 | views::filter([](const __tz::__rule& __rule) { return __rule.__save.__time == 0s; }) //
338 | views::transform([](const __tz::__rule& __rule) { return __rule.__letters; }) //
339 | views::take(1);
340
341 if (__letters.empty())
342 std::__throw_runtime_error("corrupt tzdb: rule has zero entries");
343
344 return __letters.front();
345}
346
347// Determines the information based on the continuation and the rules.
348//
349// There are several special cases to take into account
350//
351// === Entries before the first rule becomes active ===
352// Asia/Hong_Kong
353// 9 - JST 1945 N 18 2 // (1)
354// 8 HK HK%sT // (2)
355// R HK 1946 o - Ap 21 0 1 S // (3)
356// There (1) is active until Novemer 18th 1945 at 02:00, after this time
357// (2) becomes active. The first rule entry for HK (3) becomes active
358// from April 21st 1945 at 01:00. In the period between (2) is active.
359// This entry has an offset.
360// This entry has no save, letters, or dst flag. So in the period
361// after (1) and until (3) no rule entry is associated with the time.
362
363[[nodiscard]] static sys_info __get_sys_info_before_first_rule(
364 sys_seconds __begin,
365 sys_seconds __end,
366 const __tz::__continuation& __continuation,
367 const vector<__tz::__rule>& __rules) {
368 return sys_info{
369 .begin: __begin,
370 .end: __end,
371 .offset: __continuation.__stdoff,
372 .save: chrono::minutes(0),
373 .abbrev: chrono::__format(__continuation, letters: __letters_before_first_rule(__rules), save: 0s)};
374}
375
376// Returns the sys_info object for a time before the first rule.
377// When this first rule has a SAVE of 0s the sys_info for the time before the
378// first rule and for the first rule are identical and will be merged.
379[[nodiscard]] static sys_info __get_sys_info_before_first_rule(
380 sys_seconds __begin,
381 sys_seconds __rule_end, // The end used when SAVE != 0s
382 sys_seconds __next_end, // The end used when SAVE == 0s the times are merged
383 const __tz::__continuation& __continuation,
384 const vector<__tz::__rule>& __rules,
385 vector<__tz::__rule>::const_iterator __rule) {
386 if (__rule->__save.__time != 0s)
387 return __get_sys_info_before_first_rule(__begin, end: __rule_end, __continuation, __rules);
388
389 return sys_info{
390 .begin: __begin, .end: __next_end, .offset: __continuation.__stdoff, .save: 0min, .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: 0s)};
391}
392
393[[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule) {
394 switch (__rule.__at.__clock) {
395 case __tz::__clock::__local:
396 return __rule.__at.__time - __stdoff - __save;
397
398 case __tz::__clock::__universal:
399 return __rule.__at.__time;
400
401 case __tz::__clock::__standard:
402 return __rule.__at.__time - __stdoff;
403 }
404 std::__libcpp_unreachable();
405}
406
407[[nodiscard]] static sys_seconds
408__rule_to_sys_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule, year __year) {
409 year_month_day __ymd = chrono::__to_year_month_day(__year, month: __rule.__in, on: __rule.__on);
410
411 seconds __at = chrono::__at_to_seconds(__stdoff, __save, __rule);
412 return chrono::__to_sys_seconds(__ymd, seconds: __at);
413}
414
415// Returns the first rule after __time.
416// Note that a rule can be "active" in multiple years, this may result in an
417// infinite loop where the same rule is returned every time, use __current to
418// guard against that.
419//
420// When no next rule exists the returned time will be sys_seconds::max(). This
421// can happen in practice. For example,
422//
423// R So 1945 o - May 24 2 2 M
424// R So 1945 o - S 24 3 1 S
425// R So 1945 o - N 18 2s 0 -
426//
427// Has 3 rules that are all only active in 1945.
428[[nodiscard]] static pair<sys_seconds, vector<__tz::__rule>::const_iterator>
429__next_rule(sys_seconds __time,
430 seconds __stdoff,
431 seconds __save,
432 const vector<__tz::__rule>& __rules,
433 vector<__tz::__rule>::const_iterator __current) {
434 year __year = year_month_day{chrono::floor<days>(t: __time)}.year();
435
436 // Note it would probably be better to store the pairs in a vector and then
437 // use min() to get the smallest element
438 map<sys_seconds, vector<__tz::__rule>::const_iterator> __candidates;
439 // Note this evaluates all rules which is a waste of effort; when the entries
440 // are beyond the current year's "next year" (where "next year" is not always
441 // year + 1) the algorithm should end.
442 for (auto __it = __rules.begin(); __it != __rules.end(); ++__it) {
443 for (year __y = __it->__from; __y <= __it->__to; ++__y) {
444 // Adding the current entry for the current year may lead to infinite
445 // loops due to the SAVE adjustment. Skip these entries.
446 if (__y == __year && __it == __current)
447 continue;
448
449 sys_seconds __t = chrono::__rule_to_sys_seconds(__stdoff, __save, rule: *__it, year: __y);
450 if (__t <= __time)
451 continue;
452
453 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(!__candidates.contains(__t), "duplicated rule");
454 __candidates[__t] = __it;
455 break;
456 }
457 }
458
459 if (!__candidates.empty()) [[likely]] {
460 auto __it = __candidates.begin();
461
462 // When no rule is selected the time before the first rule and the first rule
463 // should not be merged.
464 if (__time == sys_seconds::min())
465 return *__it;
466
467 // There can be two constitutive rules that are the same. For example,
468 // Hong Kong
469 //
470 // R HK 1973 o - D 30 3:30 1 S (R1)
471 // R HK 1965 1976 - Ap Su>=16 3:30 1 S (R2)
472 //
473 // 1973-12-29 19:30:00 R1 becomes active.
474 // 1974-04-20 18:30:00 R2 becomes active.
475 // Both rules have a SAVE of 1 hour and LETTERS are S for both of them.
476 while (__it != __candidates.end()) {
477 if (__current->__save.__time != __it->second->__save.__time || __current->__letters != __it->second->__letters)
478 return *__it;
479
480 ++__it;
481 }
482 }
483
484 return {sys_seconds::max(), __rules.end()};
485}
486
487// Returns the first rule of a set of rules.
488// This is not always the first of the listed rules. For example
489// R Sa 2008 2009 - Mar Su>=8 0 0 -
490// R Sa 2007 2008 - O Su>=8 0 1 -
491// The transition in October 2007 happens before the transition in March 2008.
492[[nodiscard]] static vector<__tz::__rule>::const_iterator
493__first_rule(seconds __stdoff, const vector<__tz::__rule>& __rules) {
494 return chrono::__next_rule(time: sys_seconds::min(), __stdoff, save: 0s, __rules, current: __rules.end()).second;
495}
496
497[[nodiscard]] static __sys_info_result __get_sys_info_rule(
498 sys_seconds __time,
499 sys_seconds __continuation_begin,
500 const __tz::__continuation& __continuation,
501 const vector<__tz::__rule>& __rules) {
502 auto __rule = chrono::__first_rule(stdoff: __continuation.__stdoff, __rules);
503 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__rule != __rules.end(), "the set of rules has no first rule");
504
505 // Avoid selecting a time before the start of the continuation
506 __time = std::max(a: __time, b: __continuation_begin);
507
508 sys_seconds __rule_begin = chrono::__from_to_sys_seconds(stdoff: __continuation.__stdoff, rule: *__rule);
509
510 // The time sought is very likely inside the current rule.
511 // When the continuation's UNTIL uses the local clock there are edge cases
512 // where this is not true.
513 //
514 // Start to walk the rules to find the proper one.
515 //
516 // For now we just walk all the rules TODO TZDB investigate whether a smarter
517 // algorithm would work.
518 auto __next = chrono::__next_rule(time: __rule_begin, stdoff: __continuation.__stdoff, save: __rule->__save.__time, __rules, current: __rule);
519
520 // Ignore small steps, this happens with America/Punta_Arenas for the
521 // transition
522 // -4:42:46 - SMT 1927 S
523 // -5 x -05/-04 1932 S
524 // ...
525 //
526 // R x 1927 1931 - S 1 0 1 -
527 // R x 1928 1932 - Ap 1 0 0 -
528 //
529 // America/Punta_Arenas Thu Sep 1 04:42:45 1927 UT = Thu Sep 1 00:42:45 1927 -04 isdst=1 gmtoff=-14400
530 // America/Punta_Arenas Sun Apr 1 03:59:59 1928 UT = Sat Mar 31 23:59:59 1928 -04 isdst=1 gmtoff=-14400
531 // America/Punta_Arenas Sun Apr 1 04:00:00 1928 UT = Sat Mar 31 23:00:00 1928 -05 isdst=0 gmtoff=-18000
532 //
533 // Without this there will be a transition
534 // [1927-09-01 04:42:45, 1927-09-01 05:00:00) -05:00:00 0min -05
535
536 if (sys_seconds __begin = __rule->__save.__time != 0s ? __rule_begin : __next.first; __time < __begin) {
537 if (__continuation_begin == sys_seconds::min() || __begin - __continuation_begin > 12h)
538 return __sys_info{__get_sys_info_before_first_rule(
539 begin: __continuation_begin, rule_end: __rule_begin, next_end: __next.first, __continuation, __rules, __rule),
540 false};
541
542 // Europe/Berlin
543 // 1 c CE%sT 1945 May 24 2 (C1)
544 // 1 So CE%sT 1946 (C2)
545 //
546 // R c 1944 1945 - Ap M>=1 2s 1 S (R1)
547 //
548 // R So 1945 o - May 24 2 2 M (R2)
549 //
550 // When C2 becomes active the time would be before the first rule R2,
551 // giving a 1 hour sys_info.
552 seconds __save = __rule->__save.__time;
553 __named_rule_until __continuation_end{__continuation};
554 sys_seconds __sys_info_end = std::min(a: __continuation_end(__save), b: __next.first);
555
556 return __sys_info{
557 sys_info{.begin: __continuation_begin,
558 .end: __sys_info_end,
559 .offset: __continuation.__stdoff + __save,
560 .save: chrono::duration_cast<minutes>(fd: __save),
561 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, __save)},
562 __sys_info_end == __continuation_end(__save)};
563 }
564
565 // See above for America/Asuncion
566 if (__rule->__save.__time == 0s && __time < __next.first) {
567 return __sys_info{
568 sys_info{.begin: __continuation_begin,
569 .end: __next.first,
570 .offset: __continuation.__stdoff,
571 .save: 0min,
572 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: 0s)},
573 false};
574 }
575
576 if (__rule->__save.__time != 0s) {
577 // another fix for America/Punta_Arenas when not at the start of the
578 // sys_info object.
579 seconds __save = __rule->__save.__time;
580 if (__continuation_begin >= __rule_begin - __save && __time < __next.first) {
581 return __sys_info{
582 sys_info{.begin: __continuation_begin,
583 .end: __next.first,
584 .offset: __continuation.__stdoff + __save,
585 .save: chrono::duration_cast<minutes>(fd: __save),
586 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, __save)},
587 false};
588 }
589 }
590
591 __named_rule_until __continuation_end{__continuation};
592 while (__next.second != __rules.end()) {
593#ifdef PRINT
594 std::print(
595 stderr,
596 "Rule for {}: [{}, {}) off={} save={} duration={}\n",
597 __time,
598 __rule_begin,
599 __next.first,
600 __continuation.__stdoff,
601 __rule->__save.__time,
602 __next.first - __rule_begin);
603#endif
604
605 sys_seconds __end = __continuation_end(__rule->__save.__time);
606
607 sys_seconds __sys_info_begin = std::max(a: __continuation_begin, b: __rule_begin);
608 sys_seconds __sys_info_end = std::min(a: __end, b: __next.first);
609 seconds __diff = chrono::abs(d: __sys_info_end - __sys_info_begin);
610
611 if (__diff < 12h) {
612 // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
613 // -4:16:48 - CMT 1920 May
614 // -4 - -04 1930 D
615 // -4 A -04/-03 1969 O 5
616 // -3 A -03/-02 1999 O 3
617 // -4 A -04/-03 2000 Mar 3
618 // ...
619 //
620 // ...
621 // R A 1989 1992 - O Su>=15 0 1 -
622 // R A 1999 o - O Su>=1 0 1 -
623 // R A 2000 o - Mar 3 0 0 -
624 // R A 2007 o - D 30 0 1 -
625 // ...
626
627 // The 1999 switch uses the same rule, but with a different stdoff.
628 // R A 1999 o - O Su>=1 0 1 -
629 // stdoff -3 -> 1999-10-03 03:00:00
630 // stdoff -4 -> 1999-10-03 04:00:00
631 // This generates an invalid entry and this is evaluated as a transition.
632 // Looking at the zdump like output in libc++ this generates jumps in
633 // the UTC time.
634
635 __rule = __next.second;
636 __next = __next_rule(time: __next.first, stdoff: __continuation.__stdoff, save: __rule->__save.__time, __rules, current: __rule);
637 __end = __continuation_end(__rule->__save.__time);
638 __sys_info_end = std::min(a: __end, b: __next.first);
639 }
640
641 if ((__time >= __rule_begin && __time < __next.first) || __next.first >= __end) {
642 __sys_info_begin = std::max(a: __continuation_begin, b: __rule_begin);
643 __sys_info_end = std::min(a: __end, b: __next.first);
644
645 return __sys_info{
646 sys_info{.begin: __sys_info_begin,
647 .end: __sys_info_end,
648 .offset: __continuation.__stdoff + __rule->__save.__time,
649 .save: chrono::duration_cast<minutes>(fd: __rule->__save.__time),
650 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: __rule->__save.__time)},
651 __sys_info_end == __end};
652 }
653
654 __rule_begin = __next.first;
655 __rule = __next.second;
656 __next = __next_rule(time: __rule_begin, stdoff: __continuation.__stdoff, save: __rule->__save.__time, __rules, current: __rule);
657 }
658
659 return __sys_info{
660 sys_info{.begin: std::max(a: __continuation_begin, b: __rule_begin),
661 .end: __continuation_end(__rule->__save.__time),
662 .offset: __continuation.__stdoff + __rule->__save.__time,
663 .save: chrono::duration_cast<minutes>(fd: __rule->__save.__time),
664 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: __rule->__save.__time)},
665 true};
666}
667
668[[nodiscard]] static __sys_info_result __get_sys_info_basic(
669 sys_seconds __time, sys_seconds __continuation_begin, const __tz::__continuation& __continuation, seconds __save) {
670 sys_seconds __continuation_end = chrono::__until_to_sys_seconds(__continuation);
671 return __sys_info{
672 sys_info{.begin: __continuation_begin,
673 .end: __continuation_end,
674 .offset: __continuation.__stdoff + __save,
675 .save: chrono::duration_cast<minutes>(fd: __save),
676 .abbrev: chrono::__format(__continuation, letters: __continuation.__format, __save)},
677 true};
678}
679
680[[nodiscard]] static __sys_info_result
681__get_sys_info(sys_seconds __time,
682 sys_seconds __continuation_begin,
683 const __tz::__continuation& __continuation,
684 const __tz::__rules_storage_type& __rules_db) {
685 return std::visit(
686 visitor: [&](const auto& __value) {
687 using _Tp = decay_t<decltype(__value)>;
688 if constexpr (same_as<_Tp, std::string>)
689 return chrono::__get_sys_info_rule(
690 __time, __continuation_begin, __continuation, rules: __get_rules(__rules_db, __value));
691 else if constexpr (same_as<_Tp, monostate>)
692 return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, save: chrono::seconds(0));
693 else if constexpr (same_as<_Tp, __tz::__save>)
694 return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, save: __value.__time);
695 else
696 static_assert(false);
697 },
698 vs: __continuation.__rules);
699}
700
701// The transition from one continuation to the next continuation may result in
702// two constitutive continuations with the same "offset" information.
703// [time.zone.info.sys]/3
704// The begin and end data members indicate that, for the associated time_zone
705// and time_point, the offset and abbrev are in effect in the range
706// [begin, end). This information can be used to efficiently iterate the
707// transitions of a time_zone.
708//
709// Note that this does considers a change in the SAVE field not to be a
710// different sys_info, zdump does consider this different.
711// LWG XXXX The sys_info range should be affected by save
712// matches the behaviour of the Standard and zdump.
713//
714// Iff the "offsets" are the same '__current.__end' is replaced with
715// '__next.__end', which effectively merges the two objects in one object. The
716// function returns true if a merge occurred.
717[[nodiscard]] static bool __merge_continuation(sys_info& __current, const sys_info& __next) {
718 if (__current.end != __next.begin)
719 return false;
720
721 if (__current.offset != __next.offset || __current.abbrev != __next.abbrev || __current.save != __next.save)
722 return false;
723
724 __current.end = __next.end;
725 return true;
726}
727
728//===----------------------------------------------------------------------===//
729// Public API
730//===----------------------------------------------------------------------===//
731
732[[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI time_zone time_zone::__create(unique_ptr<time_zone::__impl>&& __p) {
733 _LIBCPP_ASSERT_NON_NULL(__p != nullptr, "initialized time_zone without a valid pimpl object");
734 time_zone result;
735 result.__impl_ = std::move(__p);
736 return result;
737}
738
739_LIBCPP_EXPORTED_FROM_ABI time_zone::~time_zone() = default;
740
741[[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI string_view time_zone::__name() const noexcept { return __impl_->__name(); }
742
743[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI sys_info
744time_zone::__get_info(sys_seconds __time) const {
745 optional<sys_info> __result;
746 bool __valid_result = false; // true iff __result.has_value() is true and
747 // __result.begin <= __time < __result.end is true.
748 bool __can_merge = false;
749 sys_seconds __continuation_begin = sys_seconds::min();
750 // Iterates over the Zone entry and its continuations. Internally the Zone
751 // entry is split in a Zone information and the first continuation. The last
752 // continuation has no UNTIL field. This means the loop should always find a
753 // continuation.
754 //
755 // For more information on background of zone information please consult the
756 // following information
757 // [zic manual](https://www.man7.org/linux/man-pages/man8/zic.8.html)
758 // [tz source info](https://data.iana.org/time-zones/tz-how-to.html)
759 // On POSIX systems the zdump tool can be useful:
760 // zdump -v Asia/Hong_Kong
761 // Gives all transitions in the Hong Kong time zone.
762 //
763 // During iteration the result for the current continuation is returned. If
764 // no continuation is applicable it will return the end time as "error". When
765 // two continuations are contiguous and contain the "same" information these
766 // ranges are merged as one range.
767 // The merging requires keeping any result that occurs before __time,
768 // likewise when a valid result is found the algorithm needs to test the next
769 // continuation to see whether it can be merged. For example, Africa/Ceuta
770 // Continuations
771 // 0 s WE%sT 1929 (C1)
772 // 0 - WET 1967 (C2)
773 // 0 Sp WE%sT 1984 Mar 16 (C3)
774 //
775 // Rules
776 // R s 1926 1929 - O Sa>=1 24s 0 - (R1)
777 //
778 // R Sp 1967 o - Jun 3 12 1 S (R2)
779 //
780 // The rule R1 is the last rule used in C1. The rule R2 is the first rule in
781 // C3. Since R2 is the first rule this means when a continuation uses this
782 // rule its value prior to R2 will be SAVE 0 LETTERS of the first entry with a
783 // SAVE of 0, in this case WET.
784 // This gives the following changes in the information.
785 // 1928-10-07 00:00:00 C1 R1 becomes active: offset 0 save 0 abbrev WET
786 // 1929-01-01 00:00:00 C2 becomes active: offset 0 save 0 abbrev WET
787 // 1967-01-01 00:00:00 C3 becomes active: offset 0 save 0 abbrev WET
788 // 1967-06-03 12:00:00 C3 R2 becomes active: offset 0 save 1 abbrev WEST
789 //
790 // The first 3 entries are contiguous and contain the same information, this
791 // means the period [1928-10-07 00:00:00, 1967-06-03 12:00:00) should be
792 // returned in one sys_info object.
793
794 const auto& __continuations = __impl_->__continuations();
795 const __tz::__rules_storage_type& __rules_db = __impl_->__rules_db();
796 for (auto __it = __continuations.begin(); __it != __continuations.end(); ++__it) {
797 const auto& __continuation = *__it;
798 __sys_info_result __sys_info = chrono::__get_sys_info(__time, __continuation_begin, __continuation, __rules_db);
799
800 if (__sys_info) {
801 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
802 __sys_info->__info.begin < __sys_info->__info.end, "invalid sys_info range");
803
804 // Filters out dummy entries
805 // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
806 // ...
807 // -4 A -04/-03 2000 Mar 3 (C1)
808 // -3 A -03/-02 (C2)
809 //
810 // ...
811 // R A 2000 o - Mar 3 0 0 -
812 // R A 2007 o - D 30 0 1 -
813 // ...
814 //
815 // This results in an entry
816 // [2000-03-03 03:00:00, 2000-03-03 04:00:00) -10800s 60min -03
817 // for [C1 & R1, C1, R2) which due to the end of the continuation is an
818 // one hour "sys_info". Instead the entry should be ignored and replaced
819 // by [C2 & R1, C2 & R2) which is the proper range
820 // "[2000-03-03 03:00:00, 2007-12-30 03:00:00) -02:00:00 60min -02
821
822 if (std::holds_alternative<string>(v: __continuation.__rules) && __sys_info->__can_merge &&
823 __sys_info->__info.begin + 12h > __sys_info->__info.end) {
824 __continuation_begin = __sys_info->__info.begin;
825 continue;
826 }
827
828 if (!__result) {
829 // First entry found, always keep it.
830 __result = __sys_info->__info;
831
832 __valid_result = __time >= __result->begin && __time < __result->end;
833 __can_merge = __sys_info->__can_merge;
834 } else if (__can_merge && chrono::__merge_continuation(current&: *__result, next: __sys_info->__info)) {
835 // The results are merged, update the result state. This may
836 // "overwrite" a valid sys_info object with another valid sys_info
837 // object.
838 __valid_result = __time >= __result->begin && __time < __result->end;
839 __can_merge = __sys_info->__can_merge;
840 } else {
841 // Here things get interesting:
842 // For example, America/Argentina/San_Luis
843 //
844 // -3 A -03/-02 2008 Ja 21 (C1)
845 // -4 Sa -04/-03 2009 O 11 (C2)
846 //
847 // R A 2007 o - D 30 0 1 - (R1)
848 //
849 // R Sa 2007 2008 - O Su>=8 0 1 - (R2)
850 //
851 // Based on C1 & R1 the end time of C1 is 2008-01-21 03:00:00
852 // Based on C2 & R2 the end time of C1 is 2008-01-21 02:00:00
853 // In this case the earlier time is the real time of the transition.
854 // However the algorithm used gives 2008-01-21 03:00:00.
855 //
856 // So we need to calculate the previous UNTIL in the current context and
857 // see whether it's earlier.
858
859 // The results could not be merged.
860 // - When we have a valid result that result is the final result.
861 // - Otherwise the result we had is before __time and the result we got
862 // is at a later time (possibly valid). This result is always better
863 // than the previous result.
864 if (__valid_result) {
865 return *__result;
866 } else {
867 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
868 __it != __continuations.begin(), "the first rule should always seed the result");
869 const auto& __last = *(__it - 1);
870 if (std::holds_alternative<string>(v: __last.__rules)) {
871 // Europe/Berlin
872 // 1 c CE%sT 1945 May 24 2 (C1)
873 // 1 So CE%sT 1946 (C2)
874 //
875 // R c 1944 1945 - Ap M>=1 2s 1 S (R1)
876 //
877 // R So 1945 o - May 24 2 2 M (R2)
878 //
879 // When C2 becomes active the time would be before the first rule R2,
880 // giving a 1 hour sys_info. This is not valid and the results need
881 // merging.
882
883 if (__result->end != __sys_info->__info.begin) {
884 // When the UTC gap between the rules is due to the change of
885 // offsets adjust the new time to remove the gap.
886 sys_seconds __end = __result->end - __result->offset;
887 sys_seconds __begin = __sys_info->__info.begin - __sys_info->__info.offset;
888 if (__end == __begin) {
889 __sys_info->__info.begin = __result->end;
890 }
891 }
892 }
893
894 __result = __sys_info->__info;
895 __valid_result = __time >= __result->begin && __time < __result->end;
896 __can_merge = __sys_info->__can_merge;
897 }
898 }
899 __continuation_begin = __result->end;
900 } else {
901 __continuation_begin = __sys_info.error();
902 }
903 }
904 if (__valid_result)
905 return *__result;
906
907 std::__throw_runtime_error("tzdb: corrupt db");
908}
909
910// Is the "__local_time" present in "__first" and "__second". If so the
911// local_info has an ambiguous result.
912[[nodiscard]] static bool
913__is_ambiguous(local_seconds __local_time, const sys_info& __first, const sys_info& __second) {
914 std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset};
915 std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset};
916
917 return __local_time < __end_first && __local_time >= __begin_second;
918}
919
920// Determines the result of the "__local_time". This expects the object
921// "__first" to be earlier in time than "__second".
922[[nodiscard]] static local_info
923__get_info(local_seconds __local_time, const sys_info& __first, const sys_info& __second) {
924 std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset};
925 std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset};
926
927 if (__local_time < __end_first) {
928 if (__local_time >= __begin_second)
929 // |--------|
930 // |------|
931 // ^
932 return {.result: local_info::ambiguous, .first: __first, .second: __second};
933
934 // |--------|
935 // |------|
936 // ^
937 return {.result: local_info::unique, .first: __first, .second: sys_info{}};
938 }
939
940 if (__local_time < __begin_second)
941 // |--------|
942 // |------|
943 // ^
944 return {.result: local_info::nonexistent, .first: __first, .second: __second};
945
946 // |--------|
947 // |------|
948 // ^
949 return {.result: local_info::unique, .first: __second, .second: sys_info{}};
950}
951
952[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI local_info
953time_zone::__get_info(local_seconds __local_time) const {
954 seconds __local_seconds = __local_time.time_since_epoch();
955
956 /* An example of a typical year with a DST switch displayed in local time.
957 *
958 * At the first of April the time goes forward one hour. This means the
959 * time marked with ~~ is not a valid local time. This is represented by the
960 * nonexistent value in local_info.result.
961 *
962 * At the first of November the time goes backward one hour. This means the
963 * time marked with ^^ happens twice. This is represented by the ambiguous
964 * value in local_info.result.
965 *
966 * 2020.11.01 2021.04.01 2021.11.01
967 * offset +05 offset +05 offset +05
968 * save 0s save 1h save 0s
969 * |------------//----------|
970 * |---------//--------------|
971 * |-------------
972 * ~~ ^^
973 *
974 * These shifts can happen due to changes in the current time zone for a
975 * location. For example, Indian/Kerguelen switched only once. In 1950 from an
976 * offset of 0 hours to an offset of +05 hours.
977 *
978 * During all these shifts the UTC time will not have gaps.
979 */
980
981 // The code needs to determine the system time for the local time. There is no
982 // information available. Assume the offset between system time and local time
983 // is 0s. This gives an initial estimate.
984 sys_seconds __guess{__local_seconds};
985 sys_info __info = __get_info(time: __guess);
986
987 // At this point the offset can be used to determine an estimate for the local
988 // time. Before doing that, determine the offset and validate whether the
989 // local time is the range [chrono::local_seconds::min(),
990 // chrono::local_seconds::max()).
991 if (__local_seconds < 0s && __info.offset > 0s)
992 if (__local_seconds - chrono::local_seconds::min().time_since_epoch() < __info.offset)
993 return {.result: -1, .first: __info, .second: {}};
994
995 if (__local_seconds > 0s && __info.offset < 0s)
996 if (chrono::local_seconds::max().time_since_epoch() - __local_seconds < -__info.offset)
997 return {.result: -2, .first: __info, .second: {}};
998
999 // Based on the information found in the sys_info, the local time can be
1000 // converted to a system time. This resulting time can be in the following
1001 // locations of the sys_info:
1002 //
1003 // |---------//--------------|
1004 // 1 2.1 2.2 2.3 3
1005 //
1006 // 1. The estimate is before the returned sys_info object.
1007 // The result is either non-existent or unique in the previous sys_info.
1008 // 2. The estimate is in the sys_info object
1009 // - If the sys_info begin is not sys_seconds::min(), then it might be at
1010 // 2.1 and could be ambiguous with the previous or unique.
1011 // - If sys_info end is not sys_seconds::max(), then it might be at 2.3
1012 // and could be ambiguous with the next or unique.
1013 // - Else it is at 2.2 and always unique. This case happens when a
1014 // time zone has no transitions. For example, UTC or GMT+1.
1015 // 3. The estimate is after the returned sys_info object.
1016 // The result is either non-existent or unique in the next sys_info.
1017 //
1018 // There is no specification where the "middle" starts. Similar issues can
1019 // happen when sys_info objects are "short", then "unique in the next" could
1020 // become "ambiguous in the next and the one following". Theoretically there
1021 // is the option of the following time-line
1022 //
1023 // |------------|
1024 // |----|
1025 // |-----------------|
1026 //
1027 // However the local_info object only has 2 sys_info objects, so this option
1028 // is not tested.
1029
1030 sys_seconds __sys_time{__local_seconds - __info.offset};
1031 if (__sys_time < __info.begin)
1032 // Case 1 before __info
1033 return chrono::__get_info(__local_time, first: __get_info(time: __info.begin - 1s), second: __info);
1034
1035 if (__sys_time >= __info.end)
1036 // Case 3 after __info
1037 return chrono::__get_info(__local_time, first: __info, second: __get_info(time: __info.end));
1038
1039 // Case 2 in __info
1040 if (__info.begin != sys_seconds::min()) {
1041 // Case 2.1 Not at the beginning, when not ambiguous the result should test
1042 // case 2.3.
1043 sys_info __prev = __get_info(time: __info.begin - 1s);
1044 if (__is_ambiguous(__local_time, first: __prev, second: __info))
1045 return {.result: local_info::ambiguous, .first: __prev, .second: __info};
1046 }
1047
1048 if (__info.end == sys_seconds::max())
1049 // At the end so it's case 2.2
1050 return {.result: local_info::unique, .first: __info, .second: sys_info{}};
1051
1052 // This tests case 2.2 or case 2.3.
1053 return chrono::__get_info(__local_time, first: __info, second: __get_info(time: __info.end));
1054}
1055
1056} // namespace chrono
1057
1058_LIBCPP_END_NAMESPACE_STD
1059