1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 // For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html
10
11 #include <algorithm>
12 #include <chrono>
13 #include <filesystem>
14 #include <fstream>
15 #include <stdexcept>
16 #include <string>
17
18 #include "include/tzdb/time_zone_link_private.h"
19 #include "include/tzdb/time_zone_private.h"
20 #include "include/tzdb/types_private.h"
21 #include "include/tzdb/tzdb_list_private.h"
22 #include "include/tzdb/tzdb_private.h"
23
24 // Contains a parser for the IANA time zone data files.
25 //
26 // These files can be found at https://data.iana.org/time-zones/ and are in the
27 // public domain. Information regarding the input can be found at
28 // https://data.iana.org/time-zones/tz-how-to.html and
29 // https://man7.org/linux/man-pages/man8/zic.8.html.
30 //
31 // As indicated at https://howardhinnant.github.io/date/tz.html#Installation
32 // For Windows another file seems to be required
33 // https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml
34 // This file seems to contain the mapping of Windows time zone name to IANA
35 // time zone names.
36 //
37 // However this article mentions another way to do the mapping on Windows
38 // https://devblogs.microsoft.com/oldnewthing/20210527-00/?p=105255
39 // This requires Windows 10 Version 1903, which was released in May of 2019
40 // and considered end of life in December 2020
41 // https://learn.microsoft.com/en-us/lifecycle/announcements/windows-10-1903-end-of-servicing
42 //
43 // TODO TZDB Implement the Windows mapping in tzdb::current_zone
44
45 _LIBCPP_BEGIN_NAMESPACE_STD
46
47 namespace chrono {
48
49 // This function is weak so it can be overriden in the tests. The
50 // declaration is in the test header test/support/test_tzdb.h
__libcpp_tzdb_directory()51 _LIBCPP_WEAK string_view __libcpp_tzdb_directory() {
52 #if defined(__linux__)
53 return "/usr/share/zoneinfo/";
54 #else
55 # error "unknown path to the IANA Time Zone Database"
56 #endif
57 }
58
59 //===----------------------------------------------------------------------===//
60 // Details
61 //===----------------------------------------------------------------------===//
62
__is_whitespace(int __c)63 [[nodiscard]] static bool __is_whitespace(int __c) { return __c == ' ' || __c == '\t'; }
64
__skip_optional_whitespace(istream & __input)65 static void __skip_optional_whitespace(istream& __input) {
66 while (chrono::__is_whitespace(__input.peek()))
67 __input.get();
68 }
69
__skip_mandatory_whitespace(istream & __input)70 static void __skip_mandatory_whitespace(istream& __input) {
71 if (!chrono::__is_whitespace(__input.get()))
72 std::__throw_runtime_error("corrupt tzdb: expected whitespace");
73
74 chrono::__skip_optional_whitespace(__input);
75 }
76
__is_eol(int __c)77 [[nodiscard]] static bool __is_eol(int __c) { return __c == '\n' || __c == std::char_traits<char>::eof(); }
78
__skip_line(istream & __input)79 static void __skip_line(istream& __input) {
80 while (!chrono::__is_eol(__input.peek())) {
81 __input.get();
82 }
83 __input.get();
84 }
85
__skip(istream & __input,char __suffix)86 static void __skip(istream& __input, char __suffix) {
87 if (std::tolower(__input.peek()) == __suffix)
88 __input.get();
89 }
90
__skip(istream & __input,string_view __suffix)91 static void __skip(istream& __input, string_view __suffix) {
92 for (auto __c : __suffix)
93 if (std::tolower(__input.peek()) == __c)
94 __input.get();
95 }
96
__matches(istream & __input,char __expected)97 static void __matches(istream& __input, char __expected) {
98 if (std::tolower(__input.get()) != __expected)
99 std::__throw_runtime_error((string("corrupt tzdb: expected character '") + __expected + '\'').c_str());
100 }
101
__matches(istream & __input,string_view __expected)102 static void __matches(istream& __input, string_view __expected) {
103 for (auto __c : __expected)
104 if (std::tolower(__input.get()) != __c)
105 std::__throw_runtime_error((string("corrupt tzdb: expected string '") + string(__expected) + '\'').c_str());
106 }
107
__parse_string(istream & __input)108 [[nodiscard]] static string __parse_string(istream& __input) {
109 string __result;
110 while (true) {
111 int __c = __input.get();
112 switch (__c) {
113 case ' ':
114 case '\t':
115 case '\n':
116 __input.unget();
117 [[fallthrough]];
118 case istream::traits_type::eof():
119 if (__result.empty())
120 std::__throw_runtime_error("corrupt tzdb: expected a string");
121
122 return __result;
123
124 default:
125 __result.push_back(__c);
126 }
127 }
128 }
129
__parse_integral(istream & __input,bool __leading_zero_allowed)130 [[nodiscard]] static int64_t __parse_integral(istream& __input, bool __leading_zero_allowed) {
131 int64_t __result = __input.get();
132 if (__leading_zero_allowed) {
133 if (__result < '0' || __result > '9')
134 std::__throw_runtime_error("corrupt tzdb: expected a digit");
135 } else {
136 if (__result < '1' || __result > '9')
137 std::__throw_runtime_error("corrupt tzdb: expected a non-zero digit");
138 }
139 __result -= '0';
140 while (true) {
141 if (__input.peek() < '0' || __input.peek() > '9')
142 return __result;
143
144 // In order to avoid possible overflows we limit the accepted range.
145 // Most values parsed are expected to be very small:
146 // - 8784 hours in a year
147 // - 31 days in a month
148 // - year no real maximum, these values are expected to be less than
149 // the range of the year type.
150 //
151 // However the leapseconds use a seconds after epoch value. Using an
152 // int would run into an overflow in 2038. By using a 64-bit value
153 // the range is large enough for the bilions of years. Limiting that
154 // range slightly to make the code easier is not an issue.
155 if (__result > (std::numeric_limits<int64_t>::max() / 16))
156 std::__throw_runtime_error("corrupt tzdb: integral too large");
157
158 __result *= 10;
159 __result += __input.get() - '0';
160 }
161 }
162
163 //===----------------------------------------------------------------------===//
164 // Calendar
165 //===----------------------------------------------------------------------===//
166
__parse_day(istream & __input)167 [[nodiscard]] static day __parse_day(istream& __input) {
168 unsigned __result = chrono::__parse_integral(__input, false);
169 if (__result > 31)
170 std::__throw_runtime_error("corrupt tzdb day: value too large");
171 return day{__result};
172 }
173
__parse_weekday(istream & __input)174 [[nodiscard]] static weekday __parse_weekday(istream& __input) {
175 // TZDB allows the shortest unique name.
176 switch (std::tolower(__input.get())) {
177 case 'f':
178 chrono::__skip(__input, "riday");
179 return Friday;
180
181 case 'm':
182 chrono::__skip(__input, "onday");
183 return Monday;
184
185 case 's':
186 switch (std::tolower(__input.get())) {
187 case 'a':
188 chrono::__skip(__input, "turday");
189 return Saturday;
190
191 case 'u':
192 chrono::__skip(__input, "nday");
193 return Sunday;
194 }
195 break;
196
197 case 't':
198 switch (std::tolower(__input.get())) {
199 case 'h':
200 chrono::__skip(__input, "ursday");
201 return Thursday;
202
203 case 'u':
204 chrono::__skip(__input, "esday");
205 return Tuesday;
206 }
207 break;
208 case 'w':
209 chrono::__skip(__input, "ednesday");
210 return Wednesday;
211 }
212
213 std::__throw_runtime_error("corrupt tzdb weekday: invalid name");
214 }
215
__parse_month(istream & __input)216 [[nodiscard]] static month __parse_month(istream& __input) {
217 // TZDB allows the shortest unique name.
218 switch (std::tolower(__input.get())) {
219 case 'a':
220 switch (std::tolower(__input.get())) {
221 case 'p':
222 chrono::__skip(__input, "ril");
223 return April;
224
225 case 'u':
226 chrono::__skip(__input, "gust");
227 return August;
228 }
229 break;
230
231 case 'd':
232 chrono::__skip(__input, "ecember");
233 return December;
234
235 case 'f':
236 chrono::__skip(__input, "ebruary");
237 return February;
238
239 case 'j':
240 switch (std::tolower(__input.get())) {
241 case 'a':
242 chrono::__skip(__input, "nuary");
243 return January;
244
245 case 'u':
246 switch (std::tolower(__input.get())) {
247 case 'n':
248 chrono::__skip(__input, 'e');
249 return June;
250
251 case 'l':
252 chrono::__skip(__input, 'y');
253 return July;
254 }
255 }
256 break;
257
258 case 'm':
259 if (std::tolower(__input.get()) == 'a')
260 switch (std::tolower(__input.get())) {
261 case 'y':
262 return May;
263
264 case 'r':
265 chrono::__skip(__input, "ch");
266 return March;
267 }
268 break;
269
270 case 'n':
271 chrono::__skip(__input, "ovember");
272 return November;
273
274 case 'o':
275 chrono::__skip(__input, "ctober");
276 return October;
277
278 case 's':
279 chrono::__skip(__input, "eptember");
280 return September;
281 }
282 std::__throw_runtime_error("corrupt tzdb month: invalid name");
283 }
284
__parse_year_value(istream & __input)285 [[nodiscard]] static year __parse_year_value(istream& __input) {
286 bool __negative = __input.peek() == '-';
287 if (__negative) [[unlikely]]
288 __input.get();
289
290 int64_t __result = __parse_integral(__input, true);
291 if (__result > static_cast<int>(year::max())) {
292 if (__negative)
293 std::__throw_runtime_error("corrupt tzdb year: year is less than the minimum");
294
295 std::__throw_runtime_error("corrupt tzdb year: year is greater than the maximum");
296 }
297
298 return year{static_cast<int>(__negative ? -__result : __result)};
299 }
300
__parse_year(istream & __input)301 [[nodiscard]] static year __parse_year(istream& __input) {
302 if (std::tolower(__input.peek()) != 'm') [[likely]]
303 return chrono::__parse_year_value(__input);
304
305 __input.get();
306 switch (std::tolower(__input.peek())) {
307 case 'i':
308 __input.get();
309 chrono::__skip(__input, 'n');
310 [[fallthrough]];
311
312 case ' ':
313 // The m is minimum, even when that is ambiguous.
314 return year::min();
315
316 case 'a':
317 __input.get();
318 chrono::__skip(__input, 'x');
319 return year::max();
320 }
321
322 std::__throw_runtime_error("corrupt tzdb year: expected 'min' or 'max'");
323 }
324
325 //===----------------------------------------------------------------------===//
326 // TZDB fields
327 //===----------------------------------------------------------------------===//
328
__parse_to(istream & __input,year __only)329 [[nodiscard]] static year __parse_to(istream& __input, year __only) {
330 if (std::tolower(__input.peek()) != 'o')
331 return chrono::__parse_year(__input);
332
333 __input.get();
334 chrono::__skip(__input, "nly");
335 return __only;
336 }
337
__parse_comparison(istream & __input)338 [[nodiscard]] static __tz::__constrained_weekday::__comparison_t __parse_comparison(istream& __input) {
339 switch (__input.get()) {
340 case '>':
341 chrono::__matches(__input, '=');
342 return __tz::__constrained_weekday::__ge;
343
344 case '<':
345 chrono::__matches(__input, '=');
346 return __tz::__constrained_weekday::__le;
347 }
348 std::__throw_runtime_error("corrupt tzdb on: expected '>=' or '<='");
349 }
350
__parse_on(istream & __input)351 [[nodiscard]] static __tz::__on __parse_on(istream& __input) {
352 if (std::isdigit(__input.peek()))
353 return chrono::__parse_day(__input);
354
355 if (std::tolower(__input.peek()) == 'l') {
356 chrono::__matches(__input, "last");
357 return weekday_last(chrono::__parse_weekday(__input));
358 }
359
360 return __tz::__constrained_weekday{
361 chrono::__parse_weekday(__input), chrono::__parse_comparison(__input), chrono::__parse_day(__input)};
362 }
363
__parse_duration(istream & __input)364 [[nodiscard]] static seconds __parse_duration(istream& __input) {
365 seconds __result{0};
366 int __c = __input.peek();
367 bool __negative = __c == '-';
368 if (__negative) {
369 __input.get();
370 // Negative is either a negative value or a single -.
371 // The latter means 0 and the parsing is complete.
372 if (!std::isdigit(__input.peek()))
373 return __result;
374 }
375
376 __result += hours(__parse_integral(__input, true));
377 if (__input.peek() != ':')
378 return __negative ? -__result : __result;
379
380 __input.get();
381 __result += minutes(__parse_integral(__input, true));
382 if (__input.peek() != ':')
383 return __negative ? -__result : __result;
384
385 __input.get();
386 __result += seconds(__parse_integral(__input, true));
387 if (__input.peek() != '.')
388 return __negative ? -__result : __result;
389
390 __input.get();
391 (void)__parse_integral(__input, true); // Truncate the digits.
392
393 return __negative ? -__result : __result;
394 }
395
__parse_clock(istream & __input)396 [[nodiscard]] static __tz::__clock __parse_clock(istream& __input) {
397 switch (__input.get()) { // case sensitive
398 case 'w':
399 return __tz::__clock::__local;
400 case 's':
401 return __tz::__clock::__standard;
402
403 case 'u':
404 case 'g':
405 case 'z':
406 return __tz::__clock::__universal;
407 }
408
409 __input.unget();
410 return __tz::__clock::__local;
411 }
412
__parse_dst(istream & __input,seconds __offset)413 [[nodiscard]] static bool __parse_dst(istream& __input, seconds __offset) {
414 switch (__input.get()) { // case sensitive
415 case 's':
416 return false;
417
418 case 'd':
419 return true;
420 }
421
422 __input.unget();
423 return __offset != 0s;
424 }
425
__parse_at(istream & __input)426 [[nodiscard]] static __tz::__at __parse_at(istream& __input) {
427 return {__parse_duration(__input), __parse_clock(__input)};
428 }
429
__parse_save(istream & __input)430 [[nodiscard]] static __tz::__save __parse_save(istream& __input) {
431 seconds __time = chrono::__parse_duration(__input);
432 return {__time, chrono::__parse_dst(__input, __time)};
433 }
434
__parse_letters(istream & __input)435 [[nodiscard]] static string __parse_letters(istream& __input) {
436 string __result = __parse_string(__input);
437 // Canonicalize "-" to "" since they are equivalent in the specification.
438 return __result != "-" ? __result : "";
439 }
440
__parse_rules(istream & __input)441 [[nodiscard]] static __tz::__continuation::__rules_t __parse_rules(istream& __input) {
442 int __c = __input.peek();
443 // A single - is not a SAVE but a special case.
444 if (__c == '-') {
445 __input.get();
446 if (chrono::__is_whitespace(__input.peek()))
447 return monostate{};
448 __input.unget();
449 return chrono::__parse_save(__input);
450 }
451
452 if (std::isdigit(__c) || __c == '+')
453 return chrono::__parse_save(__input);
454
455 return chrono::__parse_string(__input);
456 }
457
__parse_continuation(__tz::__rules_storage_type & __rules,istream & __input)458 [[nodiscard]] static __tz::__continuation __parse_continuation(__tz::__rules_storage_type& __rules, istream& __input) {
459 __tz::__continuation __result;
460
461 __result.__rule_database_ = std::addressof(__rules);
462
463 // Note STDOFF is specified as
464 // This field has the same format as the AT and SAVE fields of rule lines;
465 // These fields have different suffix letters, these letters seem
466 // not to be used so do not allow any of them.
467
468 __result.__stdoff = chrono::__parse_duration(__input);
469 chrono::__skip_mandatory_whitespace(__input);
470 __result.__rules = chrono::__parse_rules(__input);
471 chrono::__skip_mandatory_whitespace(__input);
472 __result.__format = chrono::__parse_string(__input);
473 chrono::__skip_optional_whitespace(__input);
474
475 if (chrono::__is_eol(__input.peek()))
476 return __result;
477 __result.__year = chrono::__parse_year(__input);
478 chrono::__skip_optional_whitespace(__input);
479
480 if (chrono::__is_eol(__input.peek()))
481 return __result;
482 __result.__in = chrono::__parse_month(__input);
483 chrono::__skip_optional_whitespace(__input);
484
485 if (chrono::__is_eol(__input.peek()))
486 return __result;
487 __result.__on = chrono::__parse_on(__input);
488 chrono::__skip_optional_whitespace(__input);
489
490 if (chrono::__is_eol(__input.peek()))
491 return __result;
492 __result.__at = __parse_at(__input);
493
494 return __result;
495 }
496
497 //===----------------------------------------------------------------------===//
498 // Time Zone Database entries
499 //===----------------------------------------------------------------------===//
500
__parse_version(istream & __input)501 static string __parse_version(istream& __input) {
502 // The first line in tzdata.zi contains
503 // # version YYYYw
504 // The parser expects this pattern
505 // #\s*version\s*\(.*)
506 // This part is not documented.
507 chrono::__matches(__input, '#');
508 chrono::__skip_optional_whitespace(__input);
509 chrono::__matches(__input, "version");
510 chrono::__skip_mandatory_whitespace(__input);
511 return chrono::__parse_string(__input);
512 }
513
514 [[nodiscard]]
__create_entry(__tz::__rules_storage_type & __rules,const string & __name)515 static __tz::__rule& __create_entry(__tz::__rules_storage_type& __rules, const string& __name) {
516 auto __result = [&]() -> __tz::__rule& {
517 auto& __rule = __rules.emplace_back(__name, vector<__tz::__rule>{});
518 return __rule.second.emplace_back();
519 };
520
521 if (__rules.empty())
522 return __result();
523
524 // Typically rules are in contiguous order in the database.
525 // But there are exceptions, some rules are interleaved.
526 if (__rules.back().first == __name)
527 return __rules.back().second.emplace_back();
528
529 if (auto __it = ranges::find(__rules, __name, [](const auto& __r) { return __r.first; });
530 __it != ranges::end(__rules))
531 return __it->second.emplace_back();
532
533 return __result();
534 }
535
__parse_rule(tzdb & __tzdb,__tz::__rules_storage_type & __rules,istream & __input)536 static void __parse_rule(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) {
537 chrono::__skip_mandatory_whitespace(__input);
538 string __name = chrono::__parse_string(__input);
539
540 __tz::__rule& __rule = __create_entry(__rules, __name);
541
542 chrono::__skip_mandatory_whitespace(__input);
543 __rule.__from = chrono::__parse_year(__input);
544 chrono::__skip_mandatory_whitespace(__input);
545 __rule.__to = chrono::__parse_to(__input, __rule.__from);
546 chrono::__skip_mandatory_whitespace(__input);
547 chrono::__matches(__input, '-');
548 chrono::__skip_mandatory_whitespace(__input);
549 __rule.__in = chrono::__parse_month(__input);
550 chrono::__skip_mandatory_whitespace(__input);
551 __rule.__on = chrono::__parse_on(__input);
552 chrono::__skip_mandatory_whitespace(__input);
553 __rule.__at = __parse_at(__input);
554 chrono::__skip_mandatory_whitespace(__input);
555 __rule.__save = __parse_save(__input);
556 chrono::__skip_mandatory_whitespace(__input);
557 __rule.__letters = chrono::__parse_letters(__input);
558 chrono::__skip_line(__input);
559 }
560
__parse_zone(tzdb & __tzdb,__tz::__rules_storage_type & __rules,istream & __input)561 static void __parse_zone(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) {
562 chrono::__skip_mandatory_whitespace(__input);
563 auto __p = std::make_unique<time_zone::__impl>(chrono::__parse_string(__input));
564 vector<__tz::__continuation>& __continuations = __p->__continuations();
565 chrono::__skip_mandatory_whitespace(__input);
566
567 do {
568 // The first line must be valid, continuations are optional.
569 __continuations.emplace_back(__parse_continuation(__rules, __input));
570 chrono::__skip_line(__input);
571 chrono::__skip_optional_whitespace(__input);
572 } while (std::isdigit(__input.peek()) || __input.peek() == '-');
573
574 __tzdb.zones.emplace_back(time_zone::__create(std::move(__p)));
575 }
576
__parse_link(tzdb & __tzdb,istream & __input)577 static void __parse_link(tzdb& __tzdb, istream& __input) {
578 chrono::__skip_mandatory_whitespace(__input);
579 string __target = chrono::__parse_string(__input);
580 chrono::__skip_mandatory_whitespace(__input);
581 string __name = chrono::__parse_string(__input);
582 chrono::__skip_line(__input);
583
584 __tzdb.links.emplace_back(time_zone_link::__constructor_tag{}, std::move(__name), std::move(__target));
585 }
586
__parse_tzdata(tzdb & __db,__tz::__rules_storage_type & __rules,istream & __input)587 static void __parse_tzdata(tzdb& __db, __tz::__rules_storage_type& __rules, istream& __input) {
588 while (true) {
589 int __c = std::tolower(__input.get());
590
591 switch (__c) {
592 case istream::traits_type::eof():
593 return;
594
595 case ' ':
596 case '\t':
597 case '\n':
598 break;
599
600 case '#':
601 chrono::__skip_line(__input);
602 break;
603
604 case 'r':
605 chrono::__skip(__input, "ule");
606 chrono::__parse_rule(__db, __rules, __input);
607 break;
608
609 case 'z':
610 chrono::__skip(__input, "one");
611 chrono::__parse_zone(__db, __rules, __input);
612 break;
613
614 case 'l':
615 chrono::__skip(__input, "ink");
616 chrono::__parse_link(__db, __input);
617 break;
618
619 default:
620 std::__throw_runtime_error("corrupt tzdb: unexpected input");
621 }
622 }
623 }
624
__init_tzdb(tzdb & __tzdb,__tz::__rules_storage_type & __rules)625 void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) {
626 filesystem::path __root = chrono::__libcpp_tzdb_directory();
627 ifstream __tzdata{__root / "tzdata.zi"};
628
629 __tzdb.version = chrono::__parse_version(__tzdata);
630 chrono::__parse_tzdata(__tzdb, __rules, __tzdata);
631 std::ranges::sort(__tzdb.zones);
632 std::ranges::sort(__tzdb.links);
633 std::ranges::sort(__rules, {}, [](const auto& p) { return p.first; });
634 }
635
636 //===----------------------------------------------------------------------===//
637 // Public API
638 //===----------------------------------------------------------------------===//
639
get_tzdb_list()640 _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI tzdb_list& get_tzdb_list() {
641 static tzdb_list __result{new tzdb_list::__impl()};
642 return __result;
643 }
644
reload_tzdb()645 _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const tzdb& reload_tzdb() {
646 if (chrono::remote_version() == chrono::get_tzdb().version)
647 return chrono::get_tzdb();
648
649 return chrono::get_tzdb_list().__implementation().__load();
650 }
651
remote_version()652 _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI string remote_version() {
653 filesystem::path __root = chrono::__libcpp_tzdb_directory();
654 ifstream __tzdata{__root / "tzdata.zi"};
655 return chrono::__parse_version(__tzdata);
656 }
657
658 } // namespace chrono
659
660 _LIBCPP_END_NAMESPACE_STD
661