1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html
10
11// TODO TZDB look at optimizations
12//
13// The current algorithm is correct but not efficient. For example, in a named
14// rule based continuation finding the next rule does quite a bit of work,
15// returns the next rule and "forgets" its state. This could be better.
16//
17// It would be possible to cache lookups. If a time for a zone is calculated its
18// sys_info could be kept and the next lookup could test whether the time is in
19// a "known" sys_info. The wording in the Standard hints at this slowness by
20// "suggesting" this could be implemented on the user's side.
21
22// TODO TZDB look at removing quirks
23//
24// The code has some special rules to adjust the timing at the continuation
25// switches. This works correctly, but some of the places feel odd. It would be
26// good to investigate this further and see whether all quirks are needed or
27// that there are better fixes.
28//
29// These quirks often use a 12h interval; this is the scan interval of zdump,
30// which implies there are no sys_info objects with a duration of less than 12h.
31
32#include <algorithm>
33#include <cctype>
34#include <chrono>
35#include <expected>
36#include <map>
37#include <numeric>
38#include <ranges>
39
40#include "include/tzdb/time_zone_private.h"
41#include "include/tzdb/tzdb_list_private.h"
42
43// TODO TZDB remove debug printing
44#ifdef PRINT
45# include <print>
46#endif
47
48_LIBCPP_BEGIN_NAMESPACE_STD
49
50#ifdef PRINT
51template <>
52struct formatter<chrono::sys_info, char> {
53 template <class ParseContext>
54 constexpr typename ParseContext::iterator parse(ParseContext& ctx) {
55 return ctx.begin();
56 }
57
58 template <class FormatContext>
59 typename FormatContext::iterator format(const chrono::sys_info& info, FormatContext& ctx) const {
60 return std::format_to(
61 ctx.out(), "[{}, {}) {:%Q%q} {:%Q%q} {}", info.begin, info.end, info.offset, info.save, info.abbrev);
62 }
63};
64#endif
65
66namespace chrono {
67
68//===----------------------------------------------------------------------===//
69// Details
70//===----------------------------------------------------------------------===//
71
72struct __sys_info {
73 sys_info __info;
74 bool __can_merge; // Can the returned sys_info object be merged with
75};
76
77// Return type for helper function to get a sys_info.
78// - The expected result returns the "best" sys_info object. This object can be
79// before the requested time. Sometimes sys_info objects from different
80// continuations share their offset, save, and abbrev and these objects are
81// merged to one sys_info object. The __can_merge flag determines whether the
82// current result can be merged with the next result.
83// - The unexpected result means no sys_info object was found and the time is
84// the time to be used for the next search iteration.
85using __sys_info_result = expected<__sys_info, sys_seconds>;
86
87template <ranges::forward_range _Range,
88 class _Type,
89 class _Proj = identity,
90 indirect_strict_weak_order<const _Type*, projected<ranges::iterator_t<_Range>, _Proj>> _Comp = ranges::less>
91[[nodiscard]] static ranges::borrowed_iterator_t<_Range>
92__binary_find(_Range&& __r, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) {
93 auto __end = ranges::end(__r);
94 auto __ret = ranges::lower_bound(ranges::begin(__r), __end, __value, __comp, __proj);
95 if (__ret == __end)
96 return __end;
97
98 // When the value does not match the predicate it's equal and a valid result
99 // was found.
100 return !std::invoke(__comp, __value, std::invoke(__proj, *__ret)) ? __ret : __end;
101}
102
103// Format based on https://data.iana.org/time-zones/tz-how-to.html
104//
105// 1 a time zone abbreviation that is a string of three or more characters that
106// are either ASCII alphanumerics, "+", or "-"
107// 2 the string "%z", in which case the "%z" will be replaced by a numeric time
108// zone abbreviation
109// 3 a pair of time zone abbreviations separated by a slash ('/'), in which
110// case the first string is the abbreviation for the standard time name and
111// the second string is the abbreviation for the daylight saving time name
112// 4 a string containing "%s", in which case the "%s" will be replaced by the
113// text in the appropriate Rule's LETTER column, and the resulting string
114// should be a time zone abbreviation
115//
116// Rule 1 is not strictly validated since America/Barbados uses a two letter
117// abbreviation AT.
118[[nodiscard]] static string
119__format(const __tz::__continuation& __continuation, const string& __letters, seconds __save) {
120 bool __shift = false;
121 string __result;
122 for (char __c : __continuation.__format) {
123 if (__shift) {
124 switch (__c) {
125 case 's':
126 std::ranges::copy(__letters, std::back_inserter(x&: __result));
127 break;
128
129 case 'z': {
130 if (__continuation.__format.size() != 2)
131 std::__throw_runtime_error(
132 std::format(fmt: "corrupt tzdb FORMAT field: %z should be the entire contents, instead contains '{}'",
133 args: __continuation.__format)
134 .c_str());
135 chrono::hh_mm_ss __offset{__continuation.__stdoff + __save};
136 if (__offset.is_negative()) {
137 __result += '-';
138 __offset = chrono::hh_mm_ss{-(__continuation.__stdoff + __save)};
139 } else
140 __result += '+';
141
142 if (__offset.minutes() != 0min)
143 std::format_to(out_it: std::back_inserter(x&: __result), fmt: "{:%H%M}", args&: __offset);
144 else
145 std::format_to(out_it: std::back_inserter(x&: __result), fmt: "{:%H}", args&: __offset);
146 } break;
147
148 default:
149 std::__throw_runtime_error(
150 std::format(fmt: "corrupt tzdb FORMAT field: invalid sequence '%{}' found, expected %s or %z", args&: __c).c_str());
151 }
152 __shift = false;
153
154 } else if (__c == '/') {
155 if (__save != 0s)
156 __result.clear();
157 else
158 break;
159
160 } else if (__c == '%') {
161 __shift = true;
162 } else if (__c == '+' || __c == '-' || std::isalnum(__c)) {
163 __result.push_back(__c);
164 } else {
165 std::__throw_runtime_error(
166 std::format(
167 fmt: "corrupt tzdb FORMAT field: invalid character '{}' found, expected +, -, or an alphanumeric value", args&: __c)
168 .c_str());
169 }
170 }
171
172 if (__shift)
173 std::__throw_runtime_error("corrupt tzdb FORMAT field: input ended with the start of the escape sequence '%'");
174
175 if (__result.empty())
176 std::__throw_runtime_error("corrupt tzdb FORMAT field: result is empty");
177
178 return __result;
179}
180
181[[nodiscard]] static sys_seconds __to_sys_seconds(year_month_day __ymd, seconds __seconds) {
182 seconds __result = static_cast<sys_days>(__ymd).time_since_epoch() + __seconds;
183 return sys_seconds{__result};
184}
185
186[[nodiscard]] static seconds __at_to_sys_seconds(const __tz::__continuation& __continuation) {
187 switch (__continuation.__at.__clock) {
188 case __tz::__clock::__local:
189 return __continuation.__at.__time - __continuation.__stdoff -
190 std::visit(
191 visitor: [](const auto& __value) {
192 using _Tp = decay_t<decltype(__value)>;
193 if constexpr (same_as<_Tp, monostate>)
194 return chrono::seconds{0};
195 else if constexpr (same_as<_Tp, __tz::__save>)
196 return chrono::duration_cast<seconds>(__value.__time);
197 else if constexpr (same_as<_Tp, std::string>)
198 // For a named rule based continuation the SAVE depends on the RULE
199 // active at the end. This should be determined separately.
200 return chrono::seconds{0};
201 else
202 static_assert(sizeof(_Tp) == 0); // TODO TZDB static_assert(false); after droping clang-16 support
203
204 std::__libcpp_unreachable();
205 },
206 vs: __continuation.__rules);
207
208 case __tz::__clock::__universal:
209 return __continuation.__at.__time;
210
211 case __tz::__clock::__standard:
212 return __continuation.__at.__time - __continuation.__stdoff;
213 }
214 std::__libcpp_unreachable();
215}
216
217[[nodiscard]] static year_month_day __to_year_month_day(year __year, month __month, __tz::__on __on) {
218 return std::visit(
219 visitor: [&](const auto& __value) {
220 using _Tp = decay_t<decltype(__value)>;
221 if constexpr (same_as<_Tp, chrono::day>)
222 return year_month_day{__year, __month, __value};
223 else if constexpr (same_as<_Tp, weekday_last>)
224 return year_month_day{static_cast<sys_days>(year_month_weekday_last{__year, __month, __value})};
225 else if constexpr (same_as<_Tp, __tz::__constrained_weekday>)
226 return __value(__year, __month);
227 else
228 static_assert(sizeof(_Tp) == 0); // TODO TZDB static_assert(false); after droping clang-16 support
229
230 std::__libcpp_unreachable();
231 },
232 vs&: __on);
233}
234
235[[nodiscard]] static sys_seconds __until_to_sys_seconds(const __tz::__continuation& __continuation) {
236 // Does UNTIL contain the magic value for the last continuation?
237 if (__continuation.__year == chrono::year::min())
238 return sys_seconds::max();
239
240 year_month_day __ymd = chrono::__to_year_month_day(year: __continuation.__year, month: __continuation.__in, on: __continuation.__on);
241 return chrono::__to_sys_seconds(__ymd, seconds: chrono::__at_to_sys_seconds(__continuation));
242}
243
244// Holds the UNTIL time for a continuation with a named rule.
245//
246// Unlike continuations with an fixed SAVE named rules have a variable SAVE.
247// This means when the UNTIL uses the local wall time the actual UNTIL value can
248// only be determined when the SAVE is known. This class holds that abstraction.
249class __named_rule_until {
250public:
251 explicit __named_rule_until(const __tz::__continuation& __continuation)
252 : __until_{chrono::__until_to_sys_seconds(__continuation)},
253 __needs_adjustment_{
254 // The last continuation of a ZONE has no UNTIL which basically is
255 // until the end of _local_ time. This value is expressed by
256 // sys_seconds::max(). Subtracting the SAVE leaves large value.
257 // However SAVE can be negative, which would add a value to maximum
258 // leading to undefined behaviour. In practice this often results in
259 // an overflow to a very small value.
260 __until_ != sys_seconds::max() && __continuation.__at.__clock == __tz::__clock::__local} {}
261
262 // Gives the unadjusted until value, this is useful when the SAVE is not known
263 // at all.
264 sys_seconds __until() const noexcept { return __until_; }
265
266 bool __needs_adjustment() const noexcept { return __needs_adjustment_; }
267
268 // Returns the UNTIL adjusted for SAVE.
269 sys_seconds operator()(seconds __save) const noexcept { return __until_ - __needs_adjustment_ * __save; }
270
271private:
272 sys_seconds __until_;
273 bool __needs_adjustment_;
274};
275
276[[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, const __tz::__rule& __rule) {
277 switch (__rule.__at.__clock) {
278 case __tz::__clock::__local:
279 // Local time and standard time behave the same. This is not
280 // correct. Local time needs to adjust for the current saved time.
281 // To know the saved time the rules need to be known and sorted.
282 // This needs a time so to avoid the chicken and egg adjust the
283 // saving of the local time later.
284 return __rule.__at.__time - __stdoff;
285
286 case __tz::__clock::__universal:
287 return __rule.__at.__time;
288
289 case __tz::__clock::__standard:
290 return __rule.__at.__time - __stdoff;
291 }
292 std::__libcpp_unreachable();
293}
294
295[[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule, year __year) {
296 year_month_day __ymd = chrono::__to_year_month_day(__year, month: __rule.__in, on: __rule.__on);
297
298 seconds __at = chrono::__at_to_seconds(__stdoff, __rule);
299 return chrono::__to_sys_seconds(__ymd, seconds: __at);
300}
301
302[[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule) {
303 return chrono::__from_to_sys_seconds(__stdoff, __rule, year: __rule.__from);
304}
305
306[[nodiscard]] static const vector<__tz::__rule>&
307__get_rules(const __tz::__rules_storage_type& __rules_db, const string& __rule_name) {
308 auto __result = chrono::__binary_find(r: __rules_db, value: __rule_name, comp: {}, proj: [](const auto& __p) { return __p.first; });
309 if (__result == std::end(c: __rules_db))
310 std::__throw_runtime_error(("corrupt tzdb: rule '" + __rule_name + " 'does not exist").c_str());
311
312 return __result->second;
313}
314
315// Returns the letters field for a time before the first rule.
316//
317// Per https://data.iana.org/time-zones/tz-how-to.html
318// One wrinkle, not fully explained in zic.8.txt, is what happens when switching
319// to a named rule. To what values should the SAVE and LETTER data be
320// initialized?
321//
322// 1 If at least one transition has happened, use the SAVE and LETTER data from
323// the most recent.
324// 2 If switching to a named rule before any transition has happened, assume
325// standard time (SAVE zero), and use the LETTER data from the earliest
326// transition with a SAVE of zero.
327//
328// This function implements case 2.
329[[nodiscard]] static string __letters_before_first_rule(const vector<__tz::__rule>& __rules) {
330 auto __letters =
331 __rules //
332 | views::filter([](const __tz::__rule& __rule) { return __rule.__save.__time == 0s; }) //
333 | views::transform([](const __tz::__rule& __rule) { return __rule.__letters; }) //
334 | views::take(1);
335
336 if (__letters.empty())
337 std::__throw_runtime_error("corrupt tzdb: rule has zero entries");
338
339 return __letters.front();
340}
341
342// Determines the information based on the continuation and the rules.
343//
344// There are several special cases to take into account
345//
346// === Entries before the first rule becomes active ===
347// Asia/Hong_Kong
348// 9 - JST 1945 N 18 2 // (1)
349// 8 HK HK%sT // (2)
350// R HK 1946 o - Ap 21 0 1 S // (3)
351// There (1) is active until Novemer 18th 1945 at 02:00, after this time
352// (2) becomes active. The first rule entry for HK (3) becomes active
353// from April 21st 1945 at 01:00. In the period between (2) is active.
354// This entry has an offset.
355// This entry has no save, letters, or dst flag. So in the period
356// after (1) and until (3) no rule entry is associated with the time.
357
358[[nodiscard]] static sys_info __get_sys_info_before_first_rule(
359 sys_seconds __begin,
360 sys_seconds __end,
361 const __tz::__continuation& __continuation,
362 const vector<__tz::__rule>& __rules) {
363 return sys_info{
364 .begin: __begin,
365 .end: __end,
366 .offset: __continuation.__stdoff,
367 .save: chrono::minutes(0),
368 .abbrev: chrono::__format(__continuation, letters: __letters_before_first_rule(__rules), save: 0s)};
369}
370
371// Returns the sys_info object for a time before the first rule.
372// When this first rule has a SAVE of 0s the sys_info for the time before the
373// first rule and for the first rule are identical and will be merged.
374[[nodiscard]] static sys_info __get_sys_info_before_first_rule(
375 sys_seconds __begin,
376 sys_seconds __rule_end, // The end used when SAVE != 0s
377 sys_seconds __next_end, // The end used when SAVE == 0s the times are merged
378 const __tz::__continuation& __continuation,
379 const vector<__tz::__rule>& __rules,
380 vector<__tz::__rule>::const_iterator __rule) {
381 if (__rule->__save.__time != 0s)
382 return __get_sys_info_before_first_rule(__begin, end: __rule_end, __continuation, __rules);
383
384 return sys_info{
385 .begin: __begin, .end: __next_end, .offset: __continuation.__stdoff, .save: 0min, .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: 0s)};
386}
387
388[[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule) {
389 switch (__rule.__at.__clock) {
390 case __tz::__clock::__local:
391 return __rule.__at.__time - __stdoff - __save;
392
393 case __tz::__clock::__universal:
394 return __rule.__at.__time;
395
396 case __tz::__clock::__standard:
397 return __rule.__at.__time - __stdoff;
398 }
399 std::__libcpp_unreachable();
400}
401
402[[nodiscard]] static sys_seconds
403__rule_to_sys_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule, year __year) {
404 year_month_day __ymd = chrono::__to_year_month_day(__year, month: __rule.__in, on: __rule.__on);
405
406 seconds __at = chrono::__at_to_seconds(__stdoff, __save, __rule);
407 return chrono::__to_sys_seconds(__ymd, seconds: __at);
408}
409
410// Returns the first rule after __time.
411// Note that a rule can be "active" in multiple years, this may result in an
412// infinite loop where the same rule is returned every time, use __current to
413// guard against that.
414//
415// When no next rule exists the returned time will be sys_seconds::max(). This
416// can happen in practice. For example,
417//
418// R So 1945 o - May 24 2 2 M
419// R So 1945 o - S 24 3 1 S
420// R So 1945 o - N 18 2s 0 -
421//
422// Has 3 rules that are all only active in 1945.
423[[nodiscard]] static pair<sys_seconds, vector<__tz::__rule>::const_iterator>
424__next_rule(sys_seconds __time,
425 seconds __stdoff,
426 seconds __save,
427 const vector<__tz::__rule>& __rules,
428 vector<__tz::__rule>::const_iterator __current) {
429 year __year = year_month_day{chrono::floor<days>(t: __time)}.year();
430
431 // Note it would probably be better to store the pairs in a vector and then
432 // use min() to get the smallest element
433 map<sys_seconds, vector<__tz::__rule>::const_iterator> __candidates;
434 // Note this evaluates all rules which is a waste of effort; when the entries
435 // are beyond the current year's "next year" (where "next year" is not always
436 // year + 1) the algorithm should end.
437 for (auto __it = __rules.begin(); __it != __rules.end(); ++__it) {
438 for (year __y = __it->__from; __y <= __it->__to; ++__y) {
439 // Adding the current entry for the current year may lead to infinite
440 // loops due to the SAVE adjustment. Skip these entries.
441 if (__y == __year && __it == __current)
442 continue;
443
444 sys_seconds __t = chrono::__rule_to_sys_seconds(__stdoff, __save, rule: *__it, year: __y);
445 if (__t <= __time)
446 continue;
447
448 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(!__candidates.contains(__t), "duplicated rule");
449 __candidates[__t] = __it;
450 break;
451 }
452 }
453
454 if (!__candidates.empty()) [[likely]] {
455 auto __it = __candidates.begin();
456
457 // When no rule is selected the time before the first rule and the first rule
458 // should not be merged.
459 if (__time == sys_seconds::min())
460 return *__it;
461
462 // There can be two constitutive rules that are the same. For example,
463 // Hong Kong
464 //
465 // R HK 1973 o - D 30 3:30 1 S (R1)
466 // R HK 1965 1976 - Ap Su>=16 3:30 1 S (R2)
467 //
468 // 1973-12-29 19:30:00 R1 becomes active.
469 // 1974-04-20 18:30:00 R2 becomes active.
470 // Both rules have a SAVE of 1 hour and LETTERS are S for both of them.
471 while (__it != __candidates.end()) {
472 if (__current->__save.__time != __it->second->__save.__time || __current->__letters != __it->second->__letters)
473 return *__it;
474
475 ++__it;
476 }
477 }
478
479 return {sys_seconds::max(), __rules.end()};
480}
481
482// Returns the first rule of a set of rules.
483// This is not always the first of the listed rules. For example
484// R Sa 2008 2009 - Mar Su>=8 0 0 -
485// R Sa 2007 2008 - O Su>=8 0 1 -
486// The transition in October 2007 happens before the transition in March 2008.
487[[nodiscard]] static vector<__tz::__rule>::const_iterator
488__first_rule(seconds __stdoff, const vector<__tz::__rule>& __rules) {
489 return chrono::__next_rule(time: sys_seconds::min(), __stdoff, save: 0s, __rules, current: __rules.end()).second;
490}
491
492[[nodiscard]] static __sys_info_result __get_sys_info_rule(
493 sys_seconds __time,
494 sys_seconds __continuation_begin,
495 const __tz::__continuation& __continuation,
496 const vector<__tz::__rule>& __rules) {
497 auto __rule = chrono::__first_rule(stdoff: __continuation.__stdoff, __rules);
498 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__rule != __rules.end(), "the set of rules has no first rule");
499
500 // Avoid selecting a time before the start of the continuation
501 __time = std::max(a: __time, b: __continuation_begin);
502
503 sys_seconds __rule_begin = chrono::__from_to_sys_seconds(stdoff: __continuation.__stdoff, rule: *__rule);
504
505 // The time sought is very likely inside the current rule.
506 // When the continuation's UNTIL uses the local clock there are edge cases
507 // where this is not true.
508 //
509 // Start to walk the rules to find the proper one.
510 //
511 // For now we just walk all the rules TODO TZDB investigate whether a smarter
512 // algorithm would work.
513 auto __next = chrono::__next_rule(time: __rule_begin, stdoff: __continuation.__stdoff, save: __rule->__save.__time, __rules, current: __rule);
514
515 // Ignore small steps, this happens with America/Punta_Arenas for the
516 // transition
517 // -4:42:46 - SMT 1927 S
518 // -5 x -05/-04 1932 S
519 // ...
520 //
521 // R x 1927 1931 - S 1 0 1 -
522 // R x 1928 1932 - Ap 1 0 0 -
523 //
524 // America/Punta_Arenas Thu Sep 1 04:42:45 1927 UT = Thu Sep 1 00:42:45 1927 -04 isdst=1 gmtoff=-14400
525 // America/Punta_Arenas Sun Apr 1 03:59:59 1928 UT = Sat Mar 31 23:59:59 1928 -04 isdst=1 gmtoff=-14400
526 // America/Punta_Arenas Sun Apr 1 04:00:00 1928 UT = Sat Mar 31 23:00:00 1928 -05 isdst=0 gmtoff=-18000
527 //
528 // Without this there will be a transition
529 // [1927-09-01 04:42:45, 1927-09-01 05:00:00) -05:00:00 0min -05
530
531 if (sys_seconds __begin = __rule->__save.__time != 0s ? __rule_begin : __next.first; __time < __begin) {
532 if (__continuation_begin == sys_seconds::min() || __begin - __continuation_begin > 12h)
533 return __sys_info{__get_sys_info_before_first_rule(
534 begin: __continuation_begin, rule_end: __rule_begin, next_end: __next.first, __continuation, __rules, __rule),
535 false};
536
537 // Europe/Berlin
538 // 1 c CE%sT 1945 May 24 2 (C1)
539 // 1 So CE%sT 1946 (C2)
540 //
541 // R c 1944 1945 - Ap M>=1 2s 1 S (R1)
542 //
543 // R So 1945 o - May 24 2 2 M (R2)
544 //
545 // When C2 becomes active the time would be before the first rule R2,
546 // giving a 1 hour sys_info.
547 seconds __save = __rule->__save.__time;
548 __named_rule_until __continuation_end{__continuation};
549 sys_seconds __sys_info_end = std::min(a: __continuation_end(__save), b: __next.first);
550
551 return __sys_info{
552 sys_info{.begin: __continuation_begin,
553 .end: __sys_info_end,
554 .offset: __continuation.__stdoff + __save,
555 .save: chrono::duration_cast<minutes>(fd: __save),
556 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, __save)},
557 __sys_info_end == __continuation_end(__save)};
558 }
559
560 // See above for America/Asuncion
561 if (__rule->__save.__time == 0s && __time < __next.first) {
562 return __sys_info{
563 sys_info{.begin: __continuation_begin,
564 .end: __next.first,
565 .offset: __continuation.__stdoff,
566 .save: 0min,
567 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: 0s)},
568 false};
569 }
570
571 if (__rule->__save.__time != 0s) {
572 // another fix for America/Punta_Arenas when not at the start of the
573 // sys_info object.
574 seconds __save = __rule->__save.__time;
575 if (__continuation_begin >= __rule_begin - __save && __time < __next.first) {
576 return __sys_info{
577 sys_info{.begin: __continuation_begin,
578 .end: __next.first,
579 .offset: __continuation.__stdoff + __save,
580 .save: chrono::duration_cast<minutes>(fd: __save),
581 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, __save)},
582 false};
583 }
584 }
585
586 __named_rule_until __continuation_end{__continuation};
587 while (__next.second != __rules.end()) {
588#ifdef PRINT
589 std::print(
590 stderr,
591 "Rule for {}: [{}, {}) off={} save={} duration={}\n",
592 __time,
593 __rule_begin,
594 __next.first,
595 __continuation.__stdoff,
596 __rule->__save.__time,
597 __next.first - __rule_begin);
598#endif
599
600 sys_seconds __end = __continuation_end(__rule->__save.__time);
601
602 sys_seconds __sys_info_begin = std::max(a: __continuation_begin, b: __rule_begin);
603 sys_seconds __sys_info_end = std::min(a: __end, b: __next.first);
604 seconds __diff = chrono::abs(d: __sys_info_end - __sys_info_begin);
605
606 if (__diff < 12h) {
607 // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
608 // -4:16:48 - CMT 1920 May
609 // -4 - -04 1930 D
610 // -4 A -04/-03 1969 O 5
611 // -3 A -03/-02 1999 O 3
612 // -4 A -04/-03 2000 Mar 3
613 // ...
614 //
615 // ...
616 // R A 1989 1992 - O Su>=15 0 1 -
617 // R A 1999 o - O Su>=1 0 1 -
618 // R A 2000 o - Mar 3 0 0 -
619 // R A 2007 o - D 30 0 1 -
620 // ...
621
622 // The 1999 switch uses the same rule, but with a different stdoff.
623 // R A 1999 o - O Su>=1 0 1 -
624 // stdoff -3 -> 1999-10-03 03:00:00
625 // stdoff -4 -> 1999-10-03 04:00:00
626 // This generates an invalid entry and this is evaluated as a transition.
627 // Looking at the zdump like output in libc++ this generates jumps in
628 // the UTC time.
629
630 __rule = __next.second;
631 __next = __next_rule(time: __next.first, stdoff: __continuation.__stdoff, save: __rule->__save.__time, __rules, current: __rule);
632 __end = __continuation_end(__rule->__save.__time);
633 __sys_info_end = std::min(a: __end, b: __next.first);
634 }
635
636 if ((__time >= __rule_begin && __time < __next.first) || __next.first >= __end) {
637 __sys_info_begin = std::max(a: __continuation_begin, b: __rule_begin);
638 __sys_info_end = std::min(a: __end, b: __next.first);
639
640 return __sys_info{
641 sys_info{.begin: __sys_info_begin,
642 .end: __sys_info_end,
643 .offset: __continuation.__stdoff + __rule->__save.__time,
644 .save: chrono::duration_cast<minutes>(fd: __rule->__save.__time),
645 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: __rule->__save.__time)},
646 __sys_info_end == __end};
647 }
648
649 __rule_begin = __next.first;
650 __rule = __next.second;
651 __next = __next_rule(time: __rule_begin, stdoff: __continuation.__stdoff, save: __rule->__save.__time, __rules, current: __rule);
652 }
653
654 return __sys_info{
655 sys_info{.begin: std::max(a: __continuation_begin, b: __rule_begin),
656 .end: __continuation_end(__rule->__save.__time),
657 .offset: __continuation.__stdoff + __rule->__save.__time,
658 .save: chrono::duration_cast<minutes>(fd: __rule->__save.__time),
659 .abbrev: chrono::__format(__continuation, letters: __rule->__letters, save: __rule->__save.__time)},
660 true};
661}
662
663[[nodiscard]] static __sys_info_result __get_sys_info_basic(
664 sys_seconds __time, sys_seconds __continuation_begin, const __tz::__continuation& __continuation, seconds __save) {
665 sys_seconds __continuation_end = chrono::__until_to_sys_seconds(__continuation);
666 return __sys_info{
667 sys_info{.begin: __continuation_begin,
668 .end: __continuation_end,
669 .offset: __continuation.__stdoff + __save,
670 .save: chrono::duration_cast<minutes>(fd: __save),
671 .abbrev: __continuation.__format},
672 true};
673}
674
675[[nodiscard]] static __sys_info_result
676__get_sys_info(sys_seconds __time,
677 sys_seconds __continuation_begin,
678 const __tz::__continuation& __continuation,
679 const __tz::__rules_storage_type& __rules_db) {
680 return std::visit(
681 visitor: [&](const auto& __value) {
682 using _Tp = decay_t<decltype(__value)>;
683 if constexpr (same_as<_Tp, std::string>)
684 return chrono::__get_sys_info_rule(
685 __time, __continuation_begin, __continuation, rules: __get_rules(__rules_db, __value));
686 else if constexpr (same_as<_Tp, monostate>)
687 return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, save: chrono::seconds(0));
688 else if constexpr (same_as<_Tp, __tz::__save>)
689 return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, save: __value.__time);
690 else
691 static_assert(sizeof(_Tp) == 0); // TODO TZDB static_assert(false); after droping clang-16 support
692
693 std::__libcpp_unreachable();
694 },
695 vs: __continuation.__rules);
696}
697
698// The transition from one continuation to the next continuation may result in
699// two constitutive continuations with the same "offset" information.
700// [time.zone.info.sys]/3
701// The begin and end data members indicate that, for the associated time_zone
702// and time_point, the offset and abbrev are in effect in the range
703// [begin, end). This information can be used to efficiently iterate the
704// transitions of a time_zone.
705//
706// Note that this does considers a change in the SAVE field not to be a
707// different sys_info, zdump does consider this different.
708// LWG XXXX The sys_info range should be affected by save
709// matches the behaviour of the Standard and zdump.
710//
711// Iff the "offsets" are the same '__current.__end' is replaced with
712// '__next.__end', which effectively merges the two objects in one object. The
713// function returns true if a merge occurred.
714[[nodiscard]] bool __merge_continuation(sys_info& __current, const sys_info& __next) {
715 if (__current.end != __next.begin)
716 return false;
717
718 if (__current.offset != __next.offset || __current.abbrev != __next.abbrev || __current.save != __next.save)
719 return false;
720
721 __current.end = __next.end;
722 return true;
723}
724
725//===----------------------------------------------------------------------===//
726// Public API
727//===----------------------------------------------------------------------===//
728
729[[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI time_zone time_zone::__create(unique_ptr<time_zone::__impl>&& __p) {
730 _LIBCPP_ASSERT_NON_NULL(__p != nullptr, "initialized time_zone without a valid pimpl object");
731 time_zone result;
732 result.__impl_ = std::move(__p);
733 return result;
734}
735
736_LIBCPP_EXPORTED_FROM_ABI time_zone::~time_zone() = default;
737
738[[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI string_view time_zone::__name() const noexcept { return __impl_->__name(); }
739
740[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI sys_info
741time_zone::__get_info(sys_seconds __time) const {
742 optional<sys_info> __result;
743 bool __valid_result = false; // true iff __result.has_value() is true and
744 // __result.begin <= __time < __result.end is true.
745 bool __can_merge = false;
746 sys_seconds __continuation_begin = sys_seconds::min();
747 // Iterates over the Zone entry and its continuations. Internally the Zone
748 // entry is split in a Zone information and the first continuation. The last
749 // continuation has no UNTIL field. This means the loop should always find a
750 // continuation.
751 //
752 // For more information on background of zone information please consult the
753 // following information
754 // [zic manual](https://www.man7.org/linux/man-pages/man8/zic.8.html)
755 // [tz source info](https://data.iana.org/time-zones/tz-how-to.html)
756 // On POSIX systems the zdump tool can be useful:
757 // zdump -v Asia/Hong_Kong
758 // Gives all transitions in the Hong Kong time zone.
759 //
760 // During iteration the result for the current continuation is returned. If
761 // no continuation is applicable it will return the end time as "error". When
762 // two continuations are contiguous and contain the "same" information these
763 // ranges are merged as one range.
764 // The merging requires keeping any result that occurs before __time,
765 // likewise when a valid result is found the algorithm needs to test the next
766 // continuation to see whether it can be merged. For example, Africa/Ceuta
767 // Continuations
768 // 0 s WE%sT 1929 (C1)
769 // 0 - WET 1967 (C2)
770 // 0 Sp WE%sT 1984 Mar 16 (C3)
771 //
772 // Rules
773 // R s 1926 1929 - O Sa>=1 24s 0 - (R1)
774 //
775 // R Sp 1967 o - Jun 3 12 1 S (R2)
776 //
777 // The rule R1 is the last rule used in C1. The rule R2 is the first rule in
778 // C3. Since R2 is the first rule this means when a continuation uses this
779 // rule its value prior to R2 will be SAVE 0 LETTERS of the first entry with a
780 // SAVE of 0, in this case WET.
781 // This gives the following changes in the information.
782 // 1928-10-07 00:00:00 C1 R1 becomes active: offset 0 save 0 abbrev WET
783 // 1929-01-01 00:00:00 C2 becomes active: offset 0 save 0 abbrev WET
784 // 1967-01-01 00:00:00 C3 becomes active: offset 0 save 0 abbrev WET
785 // 1967-06-03 12:00:00 C3 R2 becomes active: offset 0 save 1 abbrev WEST
786 //
787 // The first 3 entries are contiguous and contain the same information, this
788 // means the period [1928-10-07 00:00:00, 1967-06-03 12:00:00) should be
789 // returned in one sys_info object.
790
791 const auto& __continuations = __impl_->__continuations();
792 const __tz::__rules_storage_type& __rules_db = __impl_->__rules_db();
793 for (auto __it = __continuations.begin(); __it != __continuations.end(); ++__it) {
794 const auto& __continuation = *__it;
795 __sys_info_result __sys_info = chrono::__get_sys_info(__time, __continuation_begin, __continuation, __rules_db);
796
797 if (__sys_info) {
798 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
799 __sys_info->__info.begin < __sys_info->__info.end, "invalid sys_info range");
800
801 // Filters out dummy entries
802 // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
803 // ...
804 // -4 A -04/-03 2000 Mar 3 (C1)
805 // -3 A -03/-02 (C2)
806 //
807 // ...
808 // R A 2000 o - Mar 3 0 0 -
809 // R A 2007 o - D 30 0 1 -
810 // ...
811 //
812 // This results in an entry
813 // [2000-03-03 03:00:00, 2000-03-03 04:00:00) -10800s 60min -03
814 // for [C1 & R1, C1, R2) which due to the end of the continuation is an
815 // one hour "sys_info". Instead the entry should be ignored and replaced
816 // by [C2 & R1, C2 & R2) which is the proper range
817 // "[2000-03-03 03:00:00, 2007-12-30 03:00:00) -02:00:00 60min -02
818
819 if (std::holds_alternative<string>(v: __continuation.__rules) && __sys_info->__can_merge &&
820 __sys_info->__info.begin + 12h > __sys_info->__info.end) {
821 __continuation_begin = __sys_info->__info.begin;
822 continue;
823 }
824
825 if (!__result) {
826 // First entry found, always keep it.
827 __result = __sys_info->__info;
828
829 __valid_result = __time >= __result->begin && __time < __result->end;
830 __can_merge = __sys_info->__can_merge;
831 } else if (__can_merge && chrono::__merge_continuation(current&: *__result, next: __sys_info->__info)) {
832 // The results are merged, update the result state. This may
833 // "overwrite" a valid sys_info object with another valid sys_info
834 // object.
835 __valid_result = __time >= __result->begin && __time < __result->end;
836 __can_merge = __sys_info->__can_merge;
837 } else {
838 // Here things get interesting:
839 // For example, America/Argentina/San_Luis
840 //
841 // -3 A -03/-02 2008 Ja 21 (C1)
842 // -4 Sa -04/-03 2009 O 11 (C2)
843 //
844 // R A 2007 o - D 30 0 1 - (R1)
845 //
846 // R Sa 2007 2008 - O Su>=8 0 1 - (R2)
847 //
848 // Based on C1 & R1 the end time of C1 is 2008-01-21 03:00:00
849 // Based on C2 & R2 the end time of C1 is 2008-01-21 02:00:00
850 // In this case the earlier time is the real time of the transition.
851 // However the algorithm used gives 2008-01-21 03:00:00.
852 //
853 // So we need to calculate the previous UNTIL in the current context and
854 // see whether it's earlier.
855
856 // The results could not be merged.
857 // - When we have a valid result that result is the final result.
858 // - Otherwise the result we had is before __time and the result we got
859 // is at a later time (possibly valid). This result is always better
860 // than the previous result.
861 if (__valid_result) {
862 return *__result;
863 } else {
864 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
865 __it != __continuations.begin(), "the first rule should always seed the result");
866 const auto& __last = *(__it - 1);
867 if (std::holds_alternative<string>(v: __last.__rules)) {
868 // Europe/Berlin
869 // 1 c CE%sT 1945 May 24 2 (C1)
870 // 1 So CE%sT 1946 (C2)
871 //
872 // R c 1944 1945 - Ap M>=1 2s 1 S (R1)
873 //
874 // R So 1945 o - May 24 2 2 M (R2)
875 //
876 // When C2 becomes active the time would be before the first rule R2,
877 // giving a 1 hour sys_info. This is not valid and the results need
878 // merging.
879
880 if (__result->end != __sys_info->__info.begin) {
881 // When the UTC gap between the rules is due to the change of
882 // offsets adjust the new time to remove the gap.
883 sys_seconds __end = __result->end - __result->offset;
884 sys_seconds __begin = __sys_info->__info.begin - __sys_info->__info.offset;
885 if (__end == __begin) {
886 __sys_info->__info.begin = __result->end;
887 }
888 }
889 }
890
891 __result = __sys_info->__info;
892 __valid_result = __time >= __result->begin && __time < __result->end;
893 __can_merge = __sys_info->__can_merge;
894 }
895 }
896 __continuation_begin = __result->end;
897 } else {
898 __continuation_begin = __sys_info.error();
899 }
900 }
901 if (__valid_result)
902 return *__result;
903
904 std::__throw_runtime_error("tzdb: corrupt db");
905}
906
907// Is the "__local_time" present in "__first" and "__second". If so the
908// local_info has an ambiguous result.
909[[nodiscard]] static bool
910__is_ambiguous(local_seconds __local_time, const sys_info& __first, const sys_info& __second) {
911 std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset};
912 std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset};
913
914 return __local_time < __end_first && __local_time >= __begin_second;
915}
916
917// Determines the result of the "__local_time". This expects the object
918// "__first" to be earlier in time than "__second".
919[[nodiscard]] static local_info
920__get_info(local_seconds __local_time, const sys_info& __first, const sys_info& __second) {
921 std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset};
922 std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset};
923
924 if (__local_time < __end_first) {
925 if (__local_time >= __begin_second)
926 // |--------|
927 // |------|
928 // ^
929 return {.result: local_info::ambiguous, .first: __first, .second: __second};
930
931 // |--------|
932 // |------|
933 // ^
934 return {.result: local_info::unique, .first: __first, .second: sys_info{}};
935 }
936
937 if (__local_time < __begin_second)
938 // |--------|
939 // |------|
940 // ^
941 return {.result: local_info::nonexistent, .first: __first, .second: __second};
942
943 // |--------|
944 // |------|
945 // ^
946 return {.result: local_info::unique, .first: __second, .second: sys_info{}};
947}
948
949[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI local_info
950time_zone::__get_info(local_seconds __local_time) const {
951 seconds __local_seconds = __local_time.time_since_epoch();
952
953 /* An example of a typical year with a DST switch displayed in local time.
954 *
955 * At the first of April the time goes forward one hour. This means the
956 * time marked with ~~ is not a valid local time. This is represented by the
957 * nonexistent value in local_info.result.
958 *
959 * At the first of November the time goes backward one hour. This means the
960 * time marked with ^^ happens twice. This is represented by the ambiguous
961 * value in local_info.result.
962 *
963 * 2020.11.01 2021.04.01 2021.11.01
964 * offset +05 offset +05 offset +05
965 * save 0s save 1h save 0s
966 * |------------//----------|
967 * |---------//--------------|
968 * |-------------
969 * ~~ ^^
970 *
971 * These shifts can happen due to changes in the current time zone for a
972 * location. For example, Indian/Kerguelen switched only once. In 1950 from an
973 * offset of 0 hours to an offset of +05 hours.
974 *
975 * During all these shifts the UTC time will not have gaps.
976 */
977
978 // The code needs to determine the system time for the local time. There is no
979 // information available. Assume the offset between system time and local time
980 // is 0s. This gives an initial estimate.
981 sys_seconds __guess{__local_seconds};
982 sys_info __info = __get_info(time: __guess);
983
984 // At this point the offset can be used to determine an estimate for the local
985 // time. Before doing that, determine the offset and validate whether the
986 // local time is the range [chrono::local_seconds::min(),
987 // chrono::local_seconds::max()).
988 if (__local_seconds < 0s && __info.offset > 0s)
989 if (__local_seconds - chrono::local_seconds::min().time_since_epoch() < __info.offset)
990 return {.result: -1, .first: __info, .second: {}};
991
992 if (__local_seconds > 0s && __info.offset < 0s)
993 if (chrono::local_seconds::max().time_since_epoch() - __local_seconds < -__info.offset)
994 return {.result: -2, .first: __info, .second: {}};
995
996 // Based on the information found in the sys_info, the local time can be
997 // converted to a system time. This resulting time can be in the following
998 // locations of the sys_info:
999 //
1000 // |---------//--------------|
1001 // 1 2.1 2.2 2.3 3
1002 //
1003 // 1. The estimate is before the returned sys_info object.
1004 // The result is either non-existent or unique in the previous sys_info.
1005 // 2. The estimate is in the sys_info object
1006 // - If the sys_info begin is not sys_seconds::min(), then it might be at
1007 // 2.1 and could be ambiguous with the previous or unique.
1008 // - If sys_info end is not sys_seconds::max(), then it might be at 2.3
1009 // and could be ambiguous with the next or unique.
1010 // - Else it is at 2.2 and always unique. This case happens when a
1011 // time zone has no transitions. For example, UTC or GMT+1.
1012 // 3. The estimate is after the returned sys_info object.
1013 // The result is either non-existent or unique in the next sys_info.
1014 //
1015 // There is no specification where the "middle" starts. Similar issues can
1016 // happen when sys_info objects are "short", then "unique in the next" could
1017 // become "ambiguous in the next and the one following". Theoretically there
1018 // is the option of the following time-line
1019 //
1020 // |------------|
1021 // |----|
1022 // |-----------------|
1023 //
1024 // However the local_info object only has 2 sys_info objects, so this option
1025 // is not tested.
1026
1027 sys_seconds __sys_time{__local_seconds - __info.offset};
1028 if (__sys_time < __info.begin)
1029 // Case 1 before __info
1030 return chrono::__get_info(__local_time, first: __get_info(time: __info.begin - 1s), second: __info);
1031
1032 if (__sys_time >= __info.end)
1033 // Case 3 after __info
1034 return chrono::__get_info(__local_time, first: __info, second: __get_info(time: __info.end));
1035
1036 // Case 2 in __info
1037 if (__info.begin != sys_seconds::min()) {
1038 // Case 2.1 Not at the beginning, when not ambiguous the result should test
1039 // case 2.3.
1040 sys_info __prev = __get_info(time: __info.begin - 1s);
1041 if (__is_ambiguous(__local_time, first: __prev, second: __info))
1042 return {.result: local_info::ambiguous, .first: __prev, .second: __info};
1043 }
1044
1045 if (__info.end == sys_seconds::max())
1046 // At the end so it's case 2.2
1047 return {.result: local_info::unique, .first: __info, .second: sys_info{}};
1048
1049 // This tests case 2.2 or case 2.3.
1050 return chrono::__get_info(__local_time, first: __info, second: __get_info(time: __info.end));
1051}
1052
1053} // namespace chrono
1054
1055_LIBCPP_END_NAMESPACE_STD
1056