xref: /aosp_15_r20/external/cronet/third_party/libc++/src/src/tzdb.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html
10 
11 #include <algorithm>
12 #include <chrono>
13 #include <filesystem>
14 #include <fstream>
15 #include <stdexcept>
16 #include <string>
17 
18 #include "include/tzdb/time_zone_link_private.h"
19 #include "include/tzdb/time_zone_private.h"
20 #include "include/tzdb/types_private.h"
21 #include "include/tzdb/tzdb_list_private.h"
22 #include "include/tzdb/tzdb_private.h"
23 
24 // Contains a parser for the IANA time zone data files.
25 //
26 // These files can be found at https://data.iana.org/time-zones/ and are in the
27 // public domain. Information regarding the input can be found at
28 // https://data.iana.org/time-zones/tz-how-to.html and
29 // https://man7.org/linux/man-pages/man8/zic.8.html.
30 //
31 // As indicated at https://howardhinnant.github.io/date/tz.html#Installation
32 // For Windows another file seems to be required
33 // https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml
34 // This file seems to contain the mapping of Windows time zone name to IANA
35 // time zone names.
36 //
37 // However this article mentions another way to do the mapping on Windows
38 // https://devblogs.microsoft.com/oldnewthing/20210527-00/?p=105255
39 // This requires Windows 10 Version 1903, which was released in May of 2019
40 // and considered end of life in December 2020
41 // https://learn.microsoft.com/en-us/lifecycle/announcements/windows-10-1903-end-of-servicing
42 //
43 // TODO TZDB Implement the Windows mapping in tzdb::current_zone
44 
45 _LIBCPP_BEGIN_NAMESPACE_STD
46 
47 namespace chrono {
48 
49 // This function is weak so it can be overriden in the tests. The
50 // declaration is in the test header test/support/test_tzdb.h
__libcpp_tzdb_directory()51 _LIBCPP_WEAK string_view __libcpp_tzdb_directory() {
52 #if defined(__linux__)
53   return "/usr/share/zoneinfo/";
54 #else
55 #  error "unknown path to the IANA Time Zone Database"
56 #endif
57 }
58 
59 //===----------------------------------------------------------------------===//
60 //                           Details
61 //===----------------------------------------------------------------------===//
62 
__is_whitespace(int __c)63 [[nodiscard]] static bool __is_whitespace(int __c) { return __c == ' ' || __c == '\t'; }
64 
__skip_optional_whitespace(istream & __input)65 static void __skip_optional_whitespace(istream& __input) {
66   while (chrono::__is_whitespace(__input.peek()))
67     __input.get();
68 }
69 
__skip_mandatory_whitespace(istream & __input)70 static void __skip_mandatory_whitespace(istream& __input) {
71   if (!chrono::__is_whitespace(__input.get()))
72     std::__throw_runtime_error("corrupt tzdb: expected whitespace");
73 
74   chrono::__skip_optional_whitespace(__input);
75 }
76 
__is_eol(int __c)77 [[nodiscard]] static bool __is_eol(int __c) { return __c == '\n' || __c == std::char_traits<char>::eof(); }
78 
__skip_line(istream & __input)79 static void __skip_line(istream& __input) {
80   while (!chrono::__is_eol(__input.peek())) {
81     __input.get();
82   }
83   __input.get();
84 }
85 
__skip(istream & __input,char __suffix)86 static void __skip(istream& __input, char __suffix) {
87   if (std::tolower(__input.peek()) == __suffix)
88     __input.get();
89 }
90 
__skip(istream & __input,string_view __suffix)91 static void __skip(istream& __input, string_view __suffix) {
92   for (auto __c : __suffix)
93     if (std::tolower(__input.peek()) == __c)
94       __input.get();
95 }
96 
__matches(istream & __input,char __expected)97 static void __matches(istream& __input, char __expected) {
98   if (std::tolower(__input.get()) != __expected)
99     std::__throw_runtime_error((string("corrupt tzdb: expected character '") + __expected + '\'').c_str());
100 }
101 
__matches(istream & __input,string_view __expected)102 static void __matches(istream& __input, string_view __expected) {
103   for (auto __c : __expected)
104     if (std::tolower(__input.get()) != __c)
105       std::__throw_runtime_error((string("corrupt tzdb: expected string '") + string(__expected) + '\'').c_str());
106 }
107 
__parse_string(istream & __input)108 [[nodiscard]] static string __parse_string(istream& __input) {
109   string __result;
110   while (true) {
111     int __c = __input.get();
112     switch (__c) {
113     case ' ':
114     case '\t':
115     case '\n':
116       __input.unget();
117       [[fallthrough]];
118     case istream::traits_type::eof():
119       if (__result.empty())
120         std::__throw_runtime_error("corrupt tzdb: expected a string");
121 
122       return __result;
123 
124     default:
125       __result.push_back(__c);
126     }
127   }
128 }
129 
__parse_integral(istream & __input,bool __leading_zero_allowed)130 [[nodiscard]] static int64_t __parse_integral(istream& __input, bool __leading_zero_allowed) {
131   int64_t __result = __input.get();
132   if (__leading_zero_allowed) {
133     if (__result < '0' || __result > '9')
134       std::__throw_runtime_error("corrupt tzdb: expected a digit");
135   } else {
136     if (__result < '1' || __result > '9')
137       std::__throw_runtime_error("corrupt tzdb: expected a non-zero digit");
138   }
139   __result -= '0';
140   while (true) {
141     if (__input.peek() < '0' || __input.peek() > '9')
142       return __result;
143 
144     // In order to avoid possible overflows we limit the accepted range.
145     // Most values parsed are expected to be very small:
146     // - 8784 hours in a year
147     // - 31 days in a month
148     // - year no real maximum, these values are expected to be less than
149     //   the range of the year type.
150     //
151     // However the leapseconds use a seconds after epoch value. Using an
152     // int would run into an overflow in 2038. By using a 64-bit value
153     // the range is large enough for the bilions of years. Limiting that
154     // range slightly to make the code easier is not an issue.
155     if (__result > (std::numeric_limits<int64_t>::max() / 16))
156       std::__throw_runtime_error("corrupt tzdb: integral too large");
157 
158     __result *= 10;
159     __result += __input.get() - '0';
160   }
161 }
162 
163 //===----------------------------------------------------------------------===//
164 //                          Calendar
165 //===----------------------------------------------------------------------===//
166 
__parse_day(istream & __input)167 [[nodiscard]] static day __parse_day(istream& __input) {
168   unsigned __result = chrono::__parse_integral(__input, false);
169   if (__result > 31)
170     std::__throw_runtime_error("corrupt tzdb day: value too large");
171   return day{__result};
172 }
173 
__parse_weekday(istream & __input)174 [[nodiscard]] static weekday __parse_weekday(istream& __input) {
175   // TZDB allows the shortest unique name.
176   switch (std::tolower(__input.get())) {
177   case 'f':
178     chrono::__skip(__input, "riday");
179     return Friday;
180 
181   case 'm':
182     chrono::__skip(__input, "onday");
183     return Monday;
184 
185   case 's':
186     switch (std::tolower(__input.get())) {
187     case 'a':
188       chrono::__skip(__input, "turday");
189       return Saturday;
190 
191     case 'u':
192       chrono::__skip(__input, "nday");
193       return Sunday;
194     }
195     break;
196 
197   case 't':
198     switch (std::tolower(__input.get())) {
199     case 'h':
200       chrono::__skip(__input, "ursday");
201       return Thursday;
202 
203     case 'u':
204       chrono::__skip(__input, "esday");
205       return Tuesday;
206     }
207     break;
208   case 'w':
209     chrono::__skip(__input, "ednesday");
210     return Wednesday;
211   }
212 
213   std::__throw_runtime_error("corrupt tzdb weekday: invalid name");
214 }
215 
__parse_month(istream & __input)216 [[nodiscard]] static month __parse_month(istream& __input) {
217   // TZDB allows the shortest unique name.
218   switch (std::tolower(__input.get())) {
219   case 'a':
220     switch (std::tolower(__input.get())) {
221     case 'p':
222       chrono::__skip(__input, "ril");
223       return April;
224 
225     case 'u':
226       chrono::__skip(__input, "gust");
227       return August;
228     }
229     break;
230 
231   case 'd':
232     chrono::__skip(__input, "ecember");
233     return December;
234 
235   case 'f':
236     chrono::__skip(__input, "ebruary");
237     return February;
238 
239   case 'j':
240     switch (std::tolower(__input.get())) {
241     case 'a':
242       chrono::__skip(__input, "nuary");
243       return January;
244 
245     case 'u':
246       switch (std::tolower(__input.get())) {
247       case 'n':
248         chrono::__skip(__input, 'e');
249         return June;
250 
251       case 'l':
252         chrono::__skip(__input, 'y');
253         return July;
254       }
255     }
256     break;
257 
258   case 'm':
259     if (std::tolower(__input.get()) == 'a')
260       switch (std::tolower(__input.get())) {
261       case 'y':
262         return May;
263 
264       case 'r':
265         chrono::__skip(__input, "ch");
266         return March;
267       }
268     break;
269 
270   case 'n':
271     chrono::__skip(__input, "ovember");
272     return November;
273 
274   case 'o':
275     chrono::__skip(__input, "ctober");
276     return October;
277 
278   case 's':
279     chrono::__skip(__input, "eptember");
280     return September;
281   }
282   std::__throw_runtime_error("corrupt tzdb month: invalid name");
283 }
284 
__parse_year_value(istream & __input)285 [[nodiscard]] static year __parse_year_value(istream& __input) {
286   bool __negative = __input.peek() == '-';
287   if (__negative) [[unlikely]]
288     __input.get();
289 
290   int64_t __result = __parse_integral(__input, true);
291   if (__result > static_cast<int>(year::max())) {
292     if (__negative)
293       std::__throw_runtime_error("corrupt tzdb year: year is less than the minimum");
294 
295     std::__throw_runtime_error("corrupt tzdb year: year is greater than the maximum");
296   }
297 
298   return year{static_cast<int>(__negative ? -__result : __result)};
299 }
300 
__parse_year(istream & __input)301 [[nodiscard]] static year __parse_year(istream& __input) {
302   if (std::tolower(__input.peek()) != 'm') [[likely]]
303     return chrono::__parse_year_value(__input);
304 
305   __input.get();
306   switch (std::tolower(__input.peek())) {
307   case 'i':
308     __input.get();
309     chrono::__skip(__input, 'n');
310     [[fallthrough]];
311 
312   case ' ':
313     // The m is minimum, even when that is ambiguous.
314     return year::min();
315 
316   case 'a':
317     __input.get();
318     chrono::__skip(__input, 'x');
319     return year::max();
320   }
321 
322   std::__throw_runtime_error("corrupt tzdb year: expected 'min' or 'max'");
323 }
324 
325 //===----------------------------------------------------------------------===//
326 //                        TZDB fields
327 //===----------------------------------------------------------------------===//
328 
__parse_to(istream & __input,year __only)329 [[nodiscard]] static year __parse_to(istream& __input, year __only) {
330   if (std::tolower(__input.peek()) != 'o')
331     return chrono::__parse_year(__input);
332 
333   __input.get();
334   chrono::__skip(__input, "nly");
335   return __only;
336 }
337 
__parse_comparison(istream & __input)338 [[nodiscard]] static __tz::__constrained_weekday::__comparison_t __parse_comparison(istream& __input) {
339   switch (__input.get()) {
340   case '>':
341     chrono::__matches(__input, '=');
342     return __tz::__constrained_weekday::__ge;
343 
344   case '<':
345     chrono::__matches(__input, '=');
346     return __tz::__constrained_weekday::__le;
347   }
348   std::__throw_runtime_error("corrupt tzdb on: expected '>=' or '<='");
349 }
350 
__parse_on(istream & __input)351 [[nodiscard]] static __tz::__on __parse_on(istream& __input) {
352   if (std::isdigit(__input.peek()))
353     return chrono::__parse_day(__input);
354 
355   if (std::tolower(__input.peek()) == 'l') {
356     chrono::__matches(__input, "last");
357     return weekday_last(chrono::__parse_weekday(__input));
358   }
359 
360   return __tz::__constrained_weekday{
361       chrono::__parse_weekday(__input), chrono::__parse_comparison(__input), chrono::__parse_day(__input)};
362 }
363 
__parse_duration(istream & __input)364 [[nodiscard]] static seconds __parse_duration(istream& __input) {
365   seconds __result{0};
366   int __c         = __input.peek();
367   bool __negative = __c == '-';
368   if (__negative) {
369     __input.get();
370     // Negative is either a negative value or a single -.
371     // The latter means 0 and the parsing is complete.
372     if (!std::isdigit(__input.peek()))
373       return __result;
374   }
375 
376   __result += hours(__parse_integral(__input, true));
377   if (__input.peek() != ':')
378     return __negative ? -__result : __result;
379 
380   __input.get();
381   __result += minutes(__parse_integral(__input, true));
382   if (__input.peek() != ':')
383     return __negative ? -__result : __result;
384 
385   __input.get();
386   __result += seconds(__parse_integral(__input, true));
387   if (__input.peek() != '.')
388     return __negative ? -__result : __result;
389 
390   __input.get();
391   (void)__parse_integral(__input, true); // Truncate the digits.
392 
393   return __negative ? -__result : __result;
394 }
395 
__parse_clock(istream & __input)396 [[nodiscard]] static __tz::__clock __parse_clock(istream& __input) {
397   switch (__input.get()) { // case sensitive
398   case 'w':
399     return __tz::__clock::__local;
400   case 's':
401     return __tz::__clock::__standard;
402 
403   case 'u':
404   case 'g':
405   case 'z':
406     return __tz::__clock::__universal;
407   }
408 
409   __input.unget();
410   return __tz::__clock::__local;
411 }
412 
__parse_dst(istream & __input,seconds __offset)413 [[nodiscard]] static bool __parse_dst(istream& __input, seconds __offset) {
414   switch (__input.get()) { // case sensitive
415   case 's':
416     return false;
417 
418   case 'd':
419     return true;
420   }
421 
422   __input.unget();
423   return __offset != 0s;
424 }
425 
__parse_at(istream & __input)426 [[nodiscard]] static __tz::__at __parse_at(istream& __input) {
427   return {__parse_duration(__input), __parse_clock(__input)};
428 }
429 
__parse_save(istream & __input)430 [[nodiscard]] static __tz::__save __parse_save(istream& __input) {
431   seconds __time = chrono::__parse_duration(__input);
432   return {__time, chrono::__parse_dst(__input, __time)};
433 }
434 
__parse_letters(istream & __input)435 [[nodiscard]] static string __parse_letters(istream& __input) {
436   string __result = __parse_string(__input);
437   // Canonicalize "-" to "" since they are equivalent in the specification.
438   return __result != "-" ? __result : "";
439 }
440 
__parse_rules(istream & __input)441 [[nodiscard]] static __tz::__continuation::__rules_t __parse_rules(istream& __input) {
442   int __c = __input.peek();
443   // A single -  is not a SAVE but a special case.
444   if (__c == '-') {
445     __input.get();
446     if (chrono::__is_whitespace(__input.peek()))
447       return monostate{};
448     __input.unget();
449     return chrono::__parse_save(__input);
450   }
451 
452   if (std::isdigit(__c) || __c == '+')
453     return chrono::__parse_save(__input);
454 
455   return chrono::__parse_string(__input);
456 }
457 
__parse_continuation(__tz::__rules_storage_type & __rules,istream & __input)458 [[nodiscard]] static __tz::__continuation __parse_continuation(__tz::__rules_storage_type& __rules, istream& __input) {
459   __tz::__continuation __result;
460 
461   __result.__rule_database_ = std::addressof(__rules);
462 
463   // Note STDOFF is specified as
464   //   This field has the same format as the AT and SAVE fields of rule lines;
465   // These fields have different suffix letters, these letters seem
466   // not to be used so do not allow any of them.
467 
468   __result.__stdoff = chrono::__parse_duration(__input);
469   chrono::__skip_mandatory_whitespace(__input);
470   __result.__rules = chrono::__parse_rules(__input);
471   chrono::__skip_mandatory_whitespace(__input);
472   __result.__format = chrono::__parse_string(__input);
473   chrono::__skip_optional_whitespace(__input);
474 
475   if (chrono::__is_eol(__input.peek()))
476     return __result;
477   __result.__year = chrono::__parse_year(__input);
478   chrono::__skip_optional_whitespace(__input);
479 
480   if (chrono::__is_eol(__input.peek()))
481     return __result;
482   __result.__in = chrono::__parse_month(__input);
483   chrono::__skip_optional_whitespace(__input);
484 
485   if (chrono::__is_eol(__input.peek()))
486     return __result;
487   __result.__on = chrono::__parse_on(__input);
488   chrono::__skip_optional_whitespace(__input);
489 
490   if (chrono::__is_eol(__input.peek()))
491     return __result;
492   __result.__at = __parse_at(__input);
493 
494   return __result;
495 }
496 
497 //===----------------------------------------------------------------------===//
498 //                   Time Zone Database entries
499 //===----------------------------------------------------------------------===//
500 
__parse_version(istream & __input)501 static string __parse_version(istream& __input) {
502   // The first line in tzdata.zi contains
503   //    # version YYYYw
504   // The parser expects this pattern
505   // #\s*version\s*\(.*)
506   // This part is not documented.
507   chrono::__matches(__input, '#');
508   chrono::__skip_optional_whitespace(__input);
509   chrono::__matches(__input, "version");
510   chrono::__skip_mandatory_whitespace(__input);
511   return chrono::__parse_string(__input);
512 }
513 
514 [[nodiscard]]
__create_entry(__tz::__rules_storage_type & __rules,const string & __name)515 static __tz::__rule& __create_entry(__tz::__rules_storage_type& __rules, const string& __name) {
516   auto __result = [&]() -> __tz::__rule& {
517     auto& __rule = __rules.emplace_back(__name, vector<__tz::__rule>{});
518     return __rule.second.emplace_back();
519   };
520 
521   if (__rules.empty())
522     return __result();
523 
524   // Typically rules are in contiguous order in the database.
525   // But there are exceptions, some rules are interleaved.
526   if (__rules.back().first == __name)
527     return __rules.back().second.emplace_back();
528 
529   if (auto __it = ranges::find(__rules, __name, [](const auto& __r) { return __r.first; });
530       __it != ranges::end(__rules))
531     return __it->second.emplace_back();
532 
533   return __result();
534 }
535 
__parse_rule(tzdb & __tzdb,__tz::__rules_storage_type & __rules,istream & __input)536 static void __parse_rule(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) {
537   chrono::__skip_mandatory_whitespace(__input);
538   string __name = chrono::__parse_string(__input);
539 
540   __tz::__rule& __rule = __create_entry(__rules, __name);
541 
542   chrono::__skip_mandatory_whitespace(__input);
543   __rule.__from = chrono::__parse_year(__input);
544   chrono::__skip_mandatory_whitespace(__input);
545   __rule.__to = chrono::__parse_to(__input, __rule.__from);
546   chrono::__skip_mandatory_whitespace(__input);
547   chrono::__matches(__input, '-');
548   chrono::__skip_mandatory_whitespace(__input);
549   __rule.__in = chrono::__parse_month(__input);
550   chrono::__skip_mandatory_whitespace(__input);
551   __rule.__on = chrono::__parse_on(__input);
552   chrono::__skip_mandatory_whitespace(__input);
553   __rule.__at = __parse_at(__input);
554   chrono::__skip_mandatory_whitespace(__input);
555   __rule.__save = __parse_save(__input);
556   chrono::__skip_mandatory_whitespace(__input);
557   __rule.__letters = chrono::__parse_letters(__input);
558   chrono::__skip_line(__input);
559 }
560 
__parse_zone(tzdb & __tzdb,__tz::__rules_storage_type & __rules,istream & __input)561 static void __parse_zone(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) {
562   chrono::__skip_mandatory_whitespace(__input);
563   auto __p                                      = std::make_unique<time_zone::__impl>(chrono::__parse_string(__input));
564   vector<__tz::__continuation>& __continuations = __p->__continuations();
565   chrono::__skip_mandatory_whitespace(__input);
566 
567   do {
568     // The first line must be valid, continuations are optional.
569     __continuations.emplace_back(__parse_continuation(__rules, __input));
570     chrono::__skip_line(__input);
571     chrono::__skip_optional_whitespace(__input);
572   } while (std::isdigit(__input.peek()) || __input.peek() == '-');
573 
574   __tzdb.zones.emplace_back(time_zone::__create(std::move(__p)));
575 }
576 
__parse_link(tzdb & __tzdb,istream & __input)577 static void __parse_link(tzdb& __tzdb, istream& __input) {
578   chrono::__skip_mandatory_whitespace(__input);
579   string __target = chrono::__parse_string(__input);
580   chrono::__skip_mandatory_whitespace(__input);
581   string __name = chrono::__parse_string(__input);
582   chrono::__skip_line(__input);
583 
584   __tzdb.links.emplace_back(time_zone_link::__constructor_tag{}, std::move(__name), std::move(__target));
585 }
586 
__parse_tzdata(tzdb & __db,__tz::__rules_storage_type & __rules,istream & __input)587 static void __parse_tzdata(tzdb& __db, __tz::__rules_storage_type& __rules, istream& __input) {
588   while (true) {
589     int __c = std::tolower(__input.get());
590 
591     switch (__c) {
592     case istream::traits_type::eof():
593       return;
594 
595     case ' ':
596     case '\t':
597     case '\n':
598       break;
599 
600     case '#':
601       chrono::__skip_line(__input);
602       break;
603 
604     case 'r':
605       chrono::__skip(__input, "ule");
606       chrono::__parse_rule(__db, __rules, __input);
607       break;
608 
609     case 'z':
610       chrono::__skip(__input, "one");
611       chrono::__parse_zone(__db, __rules, __input);
612       break;
613 
614     case 'l':
615       chrono::__skip(__input, "ink");
616       chrono::__parse_link(__db, __input);
617       break;
618 
619     default:
620       std::__throw_runtime_error("corrupt tzdb: unexpected input");
621     }
622   }
623 }
624 
__init_tzdb(tzdb & __tzdb,__tz::__rules_storage_type & __rules)625 void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) {
626   filesystem::path __root = chrono::__libcpp_tzdb_directory();
627   ifstream __tzdata{__root / "tzdata.zi"};
628 
629   __tzdb.version = chrono::__parse_version(__tzdata);
630   chrono::__parse_tzdata(__tzdb, __rules, __tzdata);
631   std::ranges::sort(__tzdb.zones);
632   std::ranges::sort(__tzdb.links);
633   std::ranges::sort(__rules, {}, [](const auto& p) { return p.first; });
634 }
635 
636 //===----------------------------------------------------------------------===//
637 //                           Public API
638 //===----------------------------------------------------------------------===//
639 
get_tzdb_list()640 _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI tzdb_list& get_tzdb_list() {
641   static tzdb_list __result{new tzdb_list::__impl()};
642   return __result;
643 }
644 
reload_tzdb()645 _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const tzdb& reload_tzdb() {
646   if (chrono::remote_version() == chrono::get_tzdb().version)
647     return chrono::get_tzdb();
648 
649   return chrono::get_tzdb_list().__implementation().__load();
650 }
651 
remote_version()652 _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI string remote_version() {
653   filesystem::path __root = chrono::__libcpp_tzdb_directory();
654   ifstream __tzdata{__root / "tzdata.zi"};
655   return chrono::__parse_version(__tzdata);
656 }
657 
658 } // namespace chrono
659 
660 _LIBCPP_END_NAMESPACE_STD
661