xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/ZoneParser.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import com.ibm.icu.util.ICUUncheckedIOException;
4 import java.io.BufferedReader;
5 import java.io.IOException;
6 import java.util.ArrayList;
7 import java.util.Arrays;
8 import java.util.Collections;
9 import java.util.Comparator;
10 import java.util.HashSet;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Locale;
14 import java.util.Map;
15 import java.util.Set;
16 import java.util.TreeMap;
17 import java.util.TreeSet;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 
21 public class ZoneParser {
22     static final boolean DEBUG = false;
23 
24     private String version;
25 
26     private Map<String, String> zone_to_country;
27 
28     private Map<String, Set<String>> country_to_zoneSet;
29 
30     /**
31      * @return mapping from zone id to country. If a zone has no country, then XX is used.
32      */
getZoneToCounty()33     public Map<String, String> getZoneToCounty() {
34         if (zone_to_country == null) make_zone_to_country();
35         return zone_to_country;
36     }
37 
38     /**
39      * @return mapping from country to zoneid. If a zone has no country, then XX is used.
40      */
getCountryToZoneSet()41     public Map<String, Set<String>> getCountryToZoneSet() {
42         if (country_to_zoneSet == null) make_zone_to_country();
43         return country_to_zoneSet;
44     }
45 
46     /**
47      * @return map from tzids to a list: latitude, longitude, country, comment?. + = N or E
48      */
getZoneData()49     public Map<String, List<String>> getZoneData() {
50         if (zoneData == null) makeZoneData();
51         return zoneData;
52     }
53 
getDeprecatedZoneIDs()54     public List<String> getDeprecatedZoneIDs() {
55         return Arrays.asList(FIX_DEPRECATED_ZONE_DATA);
56     }
57 
58     /** */
make_zone_to_country()59     private void make_zone_to_country() {
60         zone_to_country = new TreeMap<>(TZIDComparator);
61         country_to_zoneSet = new TreeMap<>();
62         // Map aliasMap = getAliasMap();
63         Map<String, List<String>> zoneData = getZoneData();
64         for (String zone : zoneData.keySet()) {
65             String country = zoneData.get(zone).get(2);
66             zone_to_country.put(zone, country);
67             Set<String> s = country_to_zoneSet.get(country);
68             if (s == null) country_to_zoneSet.put(country, s = new TreeSet<>());
69             s.add(zone);
70         }
71         /*
72          * Set territories = getAvailableCodes("territory"); for (Iterator it =
73          * territories.iterator(); it.hasNext();) { String code = (String)
74          * it.next(); String[] zones = TimeZone.getAvailableIDs(code); for (int i =
75          * 0; i < zones.length; ++i) { if (aliasMap.get(zones[i]) != null) continue;
76          * zone_to_country.put(zones[i], code); } } String[] zones =
77          * TimeZone.getAvailableIDs(); for (int i = 0; i < zones.length; ++i) { if
78          * (aliasMap.get(zones[i]) != null) continue; if
79          * (zone_to_country.get(zones[i]) == null) { zone_to_country.put(zones[i],
80          * NO_COUNTRY); } } for (Iterator it = zone_to_country.keySet().iterator();
81          * it.hasNext();) { String tzid = (String) it.next(); String country =
82          * (String) zone_to_country.get(tzid); Set s = (Set)
83          * country_to_zoneSet.get(country); if (s == null)
84          * country_to_zoneSet.put(country, s = new TreeSet()); s.add(tzid); }
85          */
86         // protect
87         zone_to_country = Collections.unmodifiableMap(zone_to_country);
88         country_to_zoneSet = CldrUtility.protectCollection(country_to_zoneSet);
89     }
90 
91     /**
92      * private Map bogusZones = null;
93      *
94      * <p>private Map getAliasMap() { if (bogusZones == null) { try { bogusZones = new TreeMap();
95      * BufferedReader in = Utility.getUTF8Data"TimeZoneAliases.txt"); while (true) { String line =
96      * in.readLine(); if (line == null) break; line = line.trim(); int pos = line.indexOf('#'); if
97      * (pos >= 0) { skippedAliases.add(line); line = line.substring(0,pos).trim(); } if
98      * (line.length() == 0) continue; List pieces = Utility.splitList(line,';', true);
99      * bogusZones.put(pieces.get(0), pieces.get(1)); } in.close(); } catch (IOException e) { throw
100      * new IllegalArgumentException("Can't find timezone aliases"); } } return bogusZones; }
101      */
102     Map<String, List<String>> zoneData;
103 
104     Set<String> skippedAliases = new TreeSet<>();
105 
106     /*
107      * # This file contains a table with the following columns: # 1. ISO 3166
108      * 2-character country code. See the file `iso3166.tab'. # 2. Latitude and
109      * longitude of the zone's principal location # in ISO 6709
110      * sign-degrees-minutes-seconds format, # either +-DDMM+-DDDMM or
111      * +-DDMMSS+-DDDMMSS, # first latitude (+ is north), then longitude (+ is
112      * east). # 3. Zone name used in value of TZ environment variable. # 4.
113      * Comments; present if and only if the country has multiple rows. # # Columns
114      * are separated by a single tab.
115      */
parseYear(String year, int defaultValue)116     static int parseYear(String year, int defaultValue) {
117         if ("only".startsWith(year)) return defaultValue;
118         if ("minimum".startsWith(year)) return Integer.MIN_VALUE;
119         if ("maximum".startsWith(year)) return Integer.MAX_VALUE;
120         return Integer.parseInt(year);
121     }
122 
123     public static class Time {
124         public int seconds;
125         public byte type;
126         static final byte WALL = 0, STANDARD = 1, UNIVERSAL = 2;
127 
Time(String in)128         Time(String in) {
129             if (in.equals("-")) return; // zero/WALL is the default
130             char suffix = in.charAt(in.length() - 1);
131             switch (suffix) {
132                 case 'w':
133                     in = in.substring(0, in.length() - 1);
134                     break;
135                 case 's':
136                     in = in.substring(0, in.length() - 1);
137                     type = STANDARD;
138                     break;
139                 case 'u':
140                 case 'g':
141                 case 'z':
142                     in = in.substring(0, in.length() - 1);
143                     type = UNIVERSAL;
144                     break;
145             }
146             seconds = parseSeconds(in, false);
147         }
148 
parseSeconds(String in, boolean allowNegative)149         public static int parseSeconds(String in, boolean allowNegative) {
150             boolean negative = false;
151             if (in.startsWith("-")) {
152                 assert (allowNegative);
153                 negative = true;
154                 in = in.substring(1);
155             }
156             String[] pieces = in.split(":");
157             int multiplier = 3600;
158             int result = 0;
159             for (int i = 0; i < pieces.length; ++i) {
160                 result += multiplier * Integer.parseInt(pieces[i]);
161                 multiplier /= 60;
162                 assert (multiplier >= 0);
163             }
164             if (negative) result = -result;
165             return result;
166         }
167 
168         @Override
toString()169         public String toString() {
170             return BoilerplateUtilities.toStringHelper(this);
171         }
172     }
173 
174     static final String[] months = {
175         "january",
176         "february",
177         "march",
178         "april",
179         "may",
180         "june",
181         "july",
182         "august",
183         "september",
184         "october",
185         "november",
186         "december"
187     };
188     static final String[] weekdays = {
189         "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"
190     };
191 
findStartsWith(String value, String[] array, boolean exact)192     static int findStartsWith(String value, String[] array, boolean exact) {
193         value = value.toLowerCase(Locale.ENGLISH);
194         for (int i = 0; i < array.length; ++i) {
195             if (array[i].startsWith(value)) return i;
196         }
197         throw new IllegalArgumentException("Can't find " + value + " in " + Arrays.asList(months));
198     }
199 
200     static Pattern dayPattern =
201             PatternCache.get("([0-9]+)|(last)([a-z]+)|([a-z]+)([<=>]+)([0-9]+)");
202     static final String[] relations = {"<=", ">="};
203 
204     public static class Day implements Comparable<Object> {
205         public int number;
206         public byte relation;
207         public int weekDay;
208         static final byte NONE = 0, LEQ = 2, GEQ = 4;
209 
Day(String value)210         Day(String value) {
211             value = value.toLowerCase();
212             Matcher matcher = dayPattern.matcher(value);
213             if (!matcher.matches()) {
214                 throw new IllegalArgumentException();
215             }
216             if (matcher.group(1) != null) {
217                 number = Integer.parseInt(matcher.group(1));
218                 return;
219             }
220             if (matcher.group(2) != null) {
221                 weekDay = findStartsWith(matcher.group(3), weekdays, false);
222                 number = 31;
223                 relation = LEQ;
224                 return;
225             }
226             if (matcher.group(4) != null) {
227                 weekDay = findStartsWith(matcher.group(4), weekdays, false);
228                 relation = (byte) findStartsWith(matcher.group(5), relations, false);
229                 number = Integer.parseInt(matcher.group(6));
230                 return;
231             }
232             throw new IllegalArgumentException();
233         }
234 
235         @Override
toString()236         public String toString() {
237             return BoilerplateUtilities.toStringHelper(this);
238         }
239 
240         @Override
compareTo(Object other)241         public int compareTo(Object other) {
242             return toString().compareTo(other.toString());
243         }
244     }
245 
246     /**
247      * A rule line has the form
248      *
249      * <p>Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
250      *
251      * <p>For example:
252      *
253      * <p>Rule US 1967 1973 - Apr lastSun 2:00 1:00 D
254      *
255      * <p>The fields that make up a rule line are:
256      *
257      * <p>NAME Gives the (arbitrary) name of the set of rules this rule is part of.
258      *
259      * <p>FROM Gives the first year in which the rule applies. Any integer year can be supplied; the
260      * Gregorian calendar is assumed. The word minimum (or an abbreviation) means the minimum year
261      * representable as an integer. The word maximum (or an abbreviation) means the maximum year
262      * representable as an integer. Rules can describe times that are not representable as time
263      * values, with the unrepresentable times ignored; this allows rules to be portable among hosts
264      * with differing time value types.
265      *
266      * <p>TO Gives the final year in which the rule applies. In addition to minimum and maximum (as
267      * above), the word only (or an abbreviation) may be used to repeat the value of the FROM field.
268      *
269      * <p>TYPE Gives the type of year in which the rule applies. If TYPE is - then the rule applies
270      * in all years between FROM and TO inclusive. If TYPE is something else, then zic executes the
271      * command yearistype year type to check the type of a year: an exit status of zero is taken to
272      * mean that the year is of the given type; an exit status of one is taken to mean that the year
273      * is not of the given type.
274      *
275      * <p>IN Names the month in which the rule takes effect. Month names may be abbreviated.
276      *
277      * <p>ON Gives the day on which the rule takes effect. Recognized forms include:
278      *
279      * <p>5 the fifth of the month lastSun the last Sunday in the month lastMon the last Monday in
280      * the month Sun>=8 first Sunday on or after the eighth Sun<=25 last Sunday on or before the
281      * 25th
282      *
283      * <p>Names of days of the week may be abbreviated or spelled out in full. Note that there must
284      * be no spaces within the ON field.
285      *
286      * <p>AT Gives the time of day at which the rule takes effect. Recognized forms include:
287      *
288      * <p>2 time in hours 2:00 time in hours and minutes 15:00 24-hour format time (for times after
289      * noon) 1:28:14 time in hours, minutes, and seconds - equivalent to 0
290      *
291      * <p>where hour 0 is midnight at the start of the day, and hour 24 is midnight at the end of
292      * the day. Any of these forms may be followed by the letter w if the given time is local "wall
293      * clock" time, s if the given time is local "standard" time, or u (or g or z) if the given time
294      * is universal time; in the absence of an indicator, wall clock time is assumed. ** cannot be
295      * negative
296      *
297      * <p>SAVE Gives the amount of time to be added to local standard time when the rule is in
298      * effect. This field has the same format as the AT field (although, of course, the w and s
299      * suffixes are not used). ** can be positive or negative
300      *
301      * <p>LETTER/S Gives the "variable part" (for example, the "S" or "D" in "EST" or "EDT") of time
302      * zone abbreviations to be used when this rule is in effect. If this field is -, the variable
303      * part is null.
304      */
305     public static class RuleLine {
306         public static Set<String> types = new TreeSet<>();
307         public static Set<Day> days = new TreeSet<>();
308         static Set<Integer> saves = new TreeSet<>();
309 
RuleLine(List<String> l)310         RuleLine(List<String> l) {
311             fromYear = parseYear(l.get(0), 0);
312             toYear = parseYear(l.get(1), fromYear);
313             type = l.get(2);
314             if (type.equals("-")) type = null;
315             month = 1 + findStartsWith(l.get(3), months, false);
316             day = new Day(l.get(4));
317             time = new Time(l.get(5));
318             save = Time.parseSeconds(l.get(6), true);
319             letter = l.get(7);
320             if (letter.equals("-")) letter = null;
321             if (type != null) types.add(type);
322             days.add(day);
323         }
324 
325         @Override
toString()326         public String toString() {
327             return BoilerplateUtilities.toStringHelper(this);
328         }
329 
330         public int fromYear;
331 
332         public int toYear;
333 
334         public String type;
335 
336         public int month;
337 
338         public Day day;
339 
340         public Time time;
341 
342         public int save;
343 
344         public String letter;
345 
346         public static final int FIELD_COUNT = 8; // excluding Rule, Name
347     }
348 
349     /**
350      * A zone line has the form
351      *
352      * <p>Zone NAME GMTOFF RULES/SAVE FORMAT [UNTIL]
353      *
354      * <p>For example:
355      *
356      * <p>Zone Australia/Adelaide 9:30 Aus CST 1971 Oct 31 2:00
357      *
358      * <p>The fields that make up a zone line are:
359      *
360      * <p>NAME The name of the time zone. This is the name used in creating the time conversion
361      * information file for the zone.
362      *
363      * <p>GMTOFF The amount of time to add to UTC to get standard time in this zone. This field has
364      * the same format as the AT and SAVE fields of rule lines; begin the field with a minus sign if
365      * time must be subtracted from UTC.
366      *
367      * <p>RULES/SAVE The name of the rule(s) that apply in the time zone or, alternately, an amount
368      * of time to add to local standard time. If this field is - then standard time always applies
369      * in the time zone.
370      *
371      * <p>FORMAT The format for time zone abbreviations in this time zone. The pair of characters %s
372      * is used to show where the "variable part" of the time zone abbreviation goes. Alternately, a
373      * slash (/) separates standard and daylight abbreviations.
374      *
375      * <p>UNTIL The time at which the UTC offset or the rule(s) change for a location. It is
376      * specified as a year, a month, a day, and a time of day. If this is specified, the time zone
377      * information is generated from the given UTC offset and rule change until the time specified.
378      * The month, day, and time of day have the same format as the IN, ON, and AT columns of a rule;
379      * trailing columns can be omitted, and default to the earliest possible value for the missing
380      * columns.
381      *
382      * <p>The next line must be a "continuation" line; this has the same form as a zone line except
383      * that the string "Zone" and the name are omitted, as the continuation line will place
384      * information starting at the time specified as the UNTIL field in the previous line in the
385      * file used by the previous line. Continuation lines may contain an UNTIL field, just as zone
386      * lines do, indicating that the next line is a further continuation.
387      */
388     public static class ZoneLine {
389         public static Set<Day> untilDays = new TreeSet<>();
390         public static Set<String> rulesSaves = new TreeSet<>();
391 
ZoneLine(List<String> l)392         ZoneLine(List<String> l) {
393             gmtOff = Time.parseSeconds(l.get(0), true);
394             rulesSave = l.get(1);
395             if (rulesSave.equals("-")) rulesSave = "0";
396             else if (rulesSave.charAt(0) < 'A')
397                 rulesSave = "" + Time.parseSeconds(rulesSave, false);
398 
399             format = l.get(2);
400             switch (l.size()) {
401                 case 7:
402                     untilTime = new Time(l.get(6)); // fall through
403                 case 6:
404                     untilDay = new Day(l.get(5)); // fall through
405                     untilDays.add(untilDay);
406                 case 5:
407                     untilMonth = 1 + findStartsWith(l.get(4), months, false); // fall through
408                 case 4:
409                     untilYear = parseYear(l.get(3), Integer.MAX_VALUE); // fall through
410                 case 3:
411                     break; // ok
412                 default:
413                     throw new IllegalArgumentException("Wrong field count: " + l);
414             }
415             rulesSaves.add(rulesSave);
416         }
417 
418         @Override
toString()419         public String toString() {
420             return BoilerplateUtilities.toStringHelper(this);
421         }
422 
423         public int gmtOff;
424 
425         public String rulesSave;
426 
427         public String format;
428 
429         public int untilYear = Integer.MAX_VALUE; // indicating continuation
430 
431         public int untilMonth;
432 
433         public Day untilDay;
434 
435         public Time untilTime;
436 
437         public String comment;
438 
439         public static final int FIELD_COUNT = 3; // excluding Zone, Name
440 
441         public static final int FIELD_COUNT_UNTIL = 7; // excluding Zone, Name
442     }
443 
444     Map<String, List<RuleLine>> ruleID_rules = new TreeMap<>();
445 
446     Map<String, List<ZoneLine>> zone_rules = new TreeMap<>();
447 
448     Map<String, String> linkold_new = new TreeMap<>();
449 
450     Map<String, Set<String>> linkNew_oldSet = new TreeMap<>();
451 
452     public class Transition {
453         public long date;
454         public long offset;
455         public String abbreviation;
456     }
457 
458     public class TransitionList {
459 
addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear)460         void addTransitions(ZoneLine lastZoneLine, ZoneLine zoneLine, int startYear, int endYear) {
461             // add everything between the zonelines
462             if (lastZoneLine == null) {
463                 return;
464             }
465             startYear = Math.max(startYear, lastZoneLine.untilYear);
466             endYear = Math.min(endYear, zoneLine.untilYear);
467             int gmtOffset = lastZoneLine.gmtOff;
468             for (int year = startYear; year <= endYear; ++year) {
469                 resolveTime(
470                         gmtOffset,
471                         lastZoneLine.untilYear,
472                         lastZoneLine.untilMonth,
473                         lastZoneLine.untilDay,
474                         lastZoneLine.untilTime);
475             }
476         }
477 
resolveTime( int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime)478         private long resolveTime(
479                 int gmtOffset, int untilYear, int untilMonth, Day untilDay, Time untilTime) {
480             return 0;
481         }
482     }
483 
getTransitions(String zoneID, int startYear, int endYear)484     public TransitionList getTransitions(String zoneID, int startYear, int endYear) {
485         TransitionList results = new TransitionList();
486         List<ZoneLine> rules = zone_rules.get(zoneID);
487         ZoneLine lastZoneLine = null;
488         for (ZoneLine zoneLine : rules) {
489             results.addTransitions(lastZoneLine, zoneLine, startYear, endYear);
490             lastZoneLine = zoneLine;
491         }
492         return results;
493     }
494 
getTZIDComparator()495     public Comparator<String> getTZIDComparator() {
496         return TZIDComparator;
497     }
498 
499     private static List<String> errorData =
500             Arrays.asList(
501                     new String[] {
502                         String.valueOf(Double.MIN_VALUE), String.valueOf(Double.MIN_VALUE), ""
503                     });
504 
505     private Comparator<String> TZIDComparator =
506             new Comparator<>() {
507                 Map<String, List<String>> data = getZoneData();
508 
509                 @Override
510                 public int compare(String s1, String s2) {
511                     List<String> data1 = getData(s1);
512                     List<String> data2 = getData(s2);
513                     int result;
514                     // country
515                     String country1 = data1.get(2);
516                     String country2 = data2.get(2);
517 
518                     if ((result = country1.compareTo(country2)) != 0) return result;
519                     // longitude
520                     Double d1 = Double.valueOf(data1.get(1));
521                     Double d2 = Double.valueOf(data2.get(1));
522                     if ((result = d1.compareTo(d2)) != 0) return result;
523                     // latitude
524                     d1 = Double.valueOf(data1.get(0));
525                     d2 = Double.valueOf(data2.get(0));
526                     if ((result = d1.compareTo(d2)) != 0) return result;
527                     // name
528                     return s1.compareTo(s2);
529                 }
530 
531                 /**
532                  * Get timezone data for the given location Include work-arounds for missing time
533                  * zones
534                  *
535                  * @param s the string like "Australia/Currie"
536                  * @return a list of 4 strings for latitude, longitude, country, city
537                  *     <p>Reference: https://unicode-org.atlassian.net/browse/CLDR-14428
538                  */
539                 private List<String> getData(String s) {
540                     List<String> d = data.get(s);
541                     if (d == null) {
542                         String sNew = linkold_new.get(s);
543                         if (sNew != null) {
544                             d = data.get(sNew);
545                         }
546                         if (d == null) {
547                             d = errorData;
548                         }
549                     }
550                     return d;
551                 }
552             };
553 
554     public static MapComparator<String> regionalCompare = new MapComparator<>();
555 
556     static {
557         regionalCompare.add("America");
558         regionalCompare.add("Atlantic");
559         regionalCompare.add("Europe");
560         regionalCompare.add("Africa");
561         regionalCompare.add("Asia");
562         regionalCompare.add("Indian");
563         regionalCompare.add("Australia");
564         regionalCompare.add("Pacific");
565         regionalCompare.add("Arctic");
566         regionalCompare.add("Antarctica");
567         regionalCompare.add("Etc");
568     }
569 
570     private static String[] TZFiles = {
571         "africa",
572         "antarctica",
573         "asia",
574         "australasia",
575         "backward",
576         "etcetera",
577         "europe",
578         "northamerica",
579         "southamerica"
580     };
581 
582     private static Map<String, String> FIX_UNSTABLE_TZIDS;
583 
584     private static Set<String> SKIP_LINKS =
585             new HashSet<>(
586                     Arrays.asList(
587                             new String[] {
588                                 "America/Montreal", "America/Toronto",
589                                 "America/Santa_Isabel", "America/Tijuana"
590                             }));
591 
592     private static Set<String> PREFERRED_BASES =
593             new HashSet<>(Arrays.asList(new String[] {"Europe/London"}));
594 
595     private static String[][] ADD_ZONE_ALIASES_DATA = {
596         {"Etc/UCT", "Etc/UTC"},
597         {"EST", "Etc/GMT+5"},
598         {"MST", "Etc/GMT+7"},
599         {"HST", "Etc/GMT+10"},
600         {"SystemV/AST4", "Etc/GMT+4"},
601         {"SystemV/EST5", "Etc/GMT+5"},
602         {"SystemV/CST6", "Etc/GMT+6"},
603         {"SystemV/MST7", "Etc/GMT+7"},
604         {"SystemV/PST8", "Etc/GMT+8"},
605         {"SystemV/YST9", "Etc/GMT+9"},
606         {"SystemV/HST10", "Etc/GMT+10"},
607     };
608 
609     static String[] FIX_DEPRECATED_ZONE_DATA = {
610         "Africa/Timbuktu",
611         "America/Argentina/ComodRivadavia",
612         "America/Santa_Isabel",
613         "Europe/Belfast",
614         "Pacific/Yap",
615         "Antarctica/South_Pole",
616         "America/Shiprock",
617         "America/Montreal",
618         "Asia/Chongqing",
619         "Asia/Harbin",
620         "Asia/Kashgar"
621     };
622 
623     static {
624         // The format is <new name>, <old name>
625         String[][] FIX_UNSTABLE_TZID_DATA =
626                 new String[][] {
627                     {"America/Atikokan", "America/Coral_Harbour"},
628                     {"America/Argentina/Buenos_Aires", "America/Buenos_Aires"},
629                     {"America/Argentina/Catamarca", "America/Catamarca"},
630                     {"America/Argentina/Cordoba", "America/Cordoba"},
631                     {"America/Argentina/Jujuy", "America/Jujuy"},
632                     {"America/Argentina/Mendoza", "America/Mendoza"},
633                     {"America/Nuuk", "America/Godthab"},
634                     {"America/Kentucky/Louisville", "America/Louisville"},
635                     {"America/Indiana/Indianapolis", "America/Indianapolis"},
636                     {"Africa/Asmara", "Africa/Asmera"},
637                     {"Atlantic/Faroe", "Atlantic/Faeroe"},
638                     {"Asia/Kolkata", "Asia/Calcutta"},
639                     {"Asia/Ho_Chi_Minh", "Asia/Saigon"},
640                     {"Asia/Yangon", "Asia/Rangoon"},
641                     {"Asia/Kathmandu", "Asia/Katmandu"},
642                     {"Europe/Kyiv", "Europe/Kiev"},
643                     {"Pacific/Pohnpei", "Pacific/Ponape"},
644                     {"Pacific/Chuuk", "Pacific/Truk"},
645                     {"Pacific/Honolulu", "Pacific/Johnston"}
646                 };
647         FIX_UNSTABLE_TZIDS = CldrUtility.asMap(FIX_UNSTABLE_TZID_DATA);
648     }
649 
650     // CLDR canonical zone IDs removed from zone.tab are defined here.
651     // When these zones are deprecated in CLDR, remove them from this array.
652     // See CLDR-16049
653     static final String[][] SUPPLEMENTAL_ZONE_ID_DATA = {
654         {"Europe/Uzhgorod", "UA", "+4837+02218"}, // 2022d
655         {"Europe/Zaporozhye", "UA", "+4750+03510"}, // 2022d
656         {"America/Nipigon", "CA", "+4901-08816"}, // 2022f
657         {"America/Rainy_River", "CA", "+4843-09434"}, // 2022f
658         {"America/Thunder_Bay", "CA", "+4823-08915"}, // 2022f
659         {"America/Pangnirtung", "CA", "+6608-06544"}, // 2022g
660     };
661 
662     /** */
makeZoneData()663     private void makeZoneData() {
664         try {
665             // get version
666             BufferedReader versionIn = CldrUtility.getUTF8Data("tzdb-version.txt");
667             version = versionIn.readLine();
668             if (!version.matches("[0-9]{4}[a-z]")) {
669                 throw new IllegalArgumentException(
670                         String.format(
671                                 "Bad Version number: %s, should be of the form 2007x", version));
672             }
673             versionIn.close();
674 
675             // String deg = "([+-][0-9]+)";//
676             String deg = "([+-])([0-9][0-9][0-9]?)([0-9][0-9])([0-9][0-9])?"; //
677             Matcher m = PatternCache.get(deg + deg).matcher("");
678             zoneData = new TreeMap<>();
679             BufferedReader in = CldrUtility.getUTF8Data("zone.tab");
680             while (true) {
681                 String line = in.readLine();
682                 if (line == null) break;
683                 line = line.trim();
684                 int pos = line.indexOf('#');
685                 if (pos >= 0) {
686                     skippedAliases.add(line);
687                     line = line.substring(0, pos).trim();
688                 }
689                 if (line.length() == 0) continue;
690                 List<String> pieces = CldrUtility.splitList(line, '\t', true);
691                 String country = pieces.get(0);
692                 String latLong = pieces.get(1);
693                 String tzid = pieces.get(2);
694                 String ntzid = FIX_UNSTABLE_TZIDS.get(tzid);
695                 if (ntzid != null) tzid = ntzid;
696                 String comment = pieces.size() < 4 ? null : (String) pieces.get(3);
697                 pieces.clear();
698                 if (!m.reset(latLong).matches())
699                     throw new IllegalArgumentException("Bad zone.tab, lat/long format: " + line);
700 
701                 pieces.add(getDegrees(m, true).toString());
702                 pieces.add(getDegrees(m, false).toString());
703                 pieces.add(country);
704                 if (comment != null) pieces.add(comment);
705                 if (zoneData.containsKey(tzid))
706                     throw new IllegalArgumentException("Bad zone.tab, duplicate entry: " + line);
707                 zoneData.put(tzid, pieces);
708             }
709             in.close();
710             // add Etcs
711             for (int i = -14; i <= 12; ++i) {
712                 List<String> pieces = new ArrayList<>();
713                 int latitude = 0;
714                 int longitude = i * 15;
715                 if (longitude <= -180) {
716                     longitude += 360;
717                 }
718                 pieces.add(Double.toString(latitude)); // lat
719                 // remember that the sign of the TZIDs is wrong
720                 pieces.add(Double.toString(-longitude)); // long
721                 pieces.add(StandardCodes.NO_COUNTRY); // country
722 
723                 zoneData.put("Etc/GMT" + (i == 0 ? "" : i < 0 ? "" + i : "+" + i), pieces);
724             }
725             // add Unknown / UTC
726             List<String> pieces = new ArrayList<>();
727             pieces.add(Double.toString(0)); // lat
728             pieces.add(Double.toString(0)); // long
729             pieces.add(StandardCodes.NO_COUNTRY); // country
730             zoneData.put("Etc/Unknown", pieces);
731             zoneData.put("Etc/UTC", pieces);
732 
733             // add extra zones
734             for (String[] zoneEntry : SUPPLEMENTAL_ZONE_ID_DATA) {
735                 List<String> zarray = new ArrayList<>();
736                 if (!m.reset(zoneEntry[2]).matches()) {
737                     throw new IllegalArgumentException(
738                             "Bad zone.tab, lat/long format: " + zoneEntry[2]);
739                 }
740                 zarray.add(getDegrees(m, true).toString());
741                 zarray.add(getDegrees(m, false).toString());
742                 zarray.add(zoneEntry[1]);
743                 zoneData.put(zoneEntry[0], zarray);
744             }
745 
746             zoneData = CldrUtility.protectCollection(zoneData); // protect for later
747 
748             // now get links
749             Pattern whitespace = PatternCache.get("\\s+");
750             XEquivalenceClass<String, String> linkedItems = new XEquivalenceClass<>("None");
751             for (int i = 0; i < TZFiles.length; ++i) {
752                 in = CldrUtility.getUTF8Data(TZFiles[i]);
753                 String zoneID = null;
754                 while (true) {
755                     String line = in.readLine();
756                     if (line == null) break;
757                     String originalLine = line;
758                     int commentPos = line.indexOf("#");
759                     String comment = null;
760                     if (commentPos >= 0) {
761                         comment = line.substring(commentPos + 1).trim();
762                         line = line.substring(0, commentPos);
763                     }
764                     line = line.trim();
765                     if (line.length() == 0) continue;
766                     String[] items = whitespace.split(line);
767                     if (zoneID != null || items[0].equals("Zone")) {
768                         List<String> l = new ArrayList<>();
769                         l.addAll(Arrays.asList(items));
770 
771                         // Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01
772                         // 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
773                         if (zoneID == null) {
774                             l.remove(0); // "Zone"
775                             zoneID = l.get(0);
776                             String ntzid = FIX_UNSTABLE_TZIDS.get(zoneID);
777                             if (ntzid != null) zoneID = ntzid;
778                             l.remove(0);
779                         }
780                         List<ZoneLine> zoneRules = zone_rules.get(zoneID);
781                         if (zoneRules == null) {
782                             zoneRules = new ArrayList<>();
783                             zone_rules.put(zoneID, zoneRules);
784                         }
785 
786                         if (l.size() < ZoneLine.FIELD_COUNT
787                                 || l.size() > ZoneLine.FIELD_COUNT_UNTIL) {
788                             System.out.println("***Zone incorrect field count:");
789                             System.out.println(l);
790                             System.out.println(originalLine);
791                         }
792 
793                         ZoneLine zoneLine = new ZoneLine(l);
794                         zoneLine.comment = comment;
795                         zoneRules.add(zoneLine);
796                         if (l.size() == ZoneLine.FIELD_COUNT) {
797                             zoneID = null; // no continuation line
798                         }
799                     } else if (items[0].equals("Rule")) {
800                         // # Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
801                         // Rule Algeria 1916 only - Jun 14 23:00s 1:00 S
802 
803                         String ruleID = items[1];
804                         List<RuleLine> ruleList = ruleID_rules.get(ruleID);
805                         if (ruleList == null) {
806                             ruleList = new ArrayList<>();
807                             ruleID_rules.put(ruleID, ruleList);
808                         }
809                         List<String> l = new ArrayList<>();
810                         l.addAll(Arrays.asList(items));
811                         l.remove(0);
812                         l.remove(0);
813                         if (l.size() != RuleLine.FIELD_COUNT) {
814                             System.out.println("***Rule incorrect field count:");
815                             System.out.println(l);
816                         }
817                         if (comment != null) l.add(comment);
818                         RuleLine ruleLine = new RuleLine(l);
819                         ruleList.add(ruleLine);
820 
821                     } else if (items[0].equals("Link")) {
822                         String old = items[2];
823                         String newOne = items[1];
824                         if (!(SKIP_LINKS.contains(old) && SKIP_LINKS.contains(newOne))) {
825                             // System.out.println("Original " + old + "\t=>\t" + newOne);
826                             linkedItems.add(old, newOne);
827                         }
828                         /*
829                          * String conflict = (String) linkold_new.get(old); if (conflict !=
830                          * null) { System.out.println("Conflict with old: " + old + " => " +
831                          * conflict + ", " + newOne); } System.out.println(old + "\t=>\t" +
832                          * newOne); linkold_new.put(old, newOne);
833                          */
834                     } else {
835                         if (DEBUG) System.out.println("Unknown zone line: " + line);
836                     }
837                 }
838                 in.close();
839             }
840             // add in stuff that should be links
841             for (int i = 0; i < ADD_ZONE_ALIASES_DATA.length; ++i) {
842                 linkedItems.add(ADD_ZONE_ALIASES_DATA[i][0], ADD_ZONE_ALIASES_DATA[i][1]);
843             }
844 
845             Set<String> isCanonical = zoneData.keySet();
846 
847             // walk through the sets, and
848             // if any set contains two canonical items, split it.
849             // if any contains one, make it the primary
850             // if any contains zero, problem!
851             for (Set<String> equivalents : linkedItems.getEquivalenceSets()) {
852                 Set<String> canonicals = new TreeSet<>(equivalents);
853                 canonicals.retainAll(isCanonical);
854                 if (canonicals.size() == 0)
855                     throw new IllegalArgumentException("No canonicals in: " + equivalents);
856                 if (canonicals.size() > 1) {
857                     if (DEBUG) {
858                         System.out.println("Too many canonicals in: " + equivalents);
859                         System.out.println(
860                                 "\t*Don't* put these into the same equivalence class: "
861                                         + canonicals);
862                     }
863                     Set<String> remainder = new TreeSet<>(equivalents);
864                     remainder.removeAll(isCanonical);
865                     if (remainder.size() != 0) {
866                         if (DEBUG) {
867                             System.out.println(
868                                     "\tThe following should be equivalent to others: " + remainder);
869                         }
870                     }
871                 }
872                 {
873                     String newOne;
874                     // get the item that we want to hang all the aliases off of.
875                     // normally this is the first (alphabetically) one, but
876                     // it may be overridden with PREFERRED_BASES
877                     Set<String> preferredItems = new HashSet<>(PREFERRED_BASES);
878                     preferredItems.retainAll(canonicals);
879                     if (preferredItems.size() > 0) {
880                         newOne = preferredItems.iterator().next();
881                     } else {
882                         newOne = canonicals.iterator().next();
883                     }
884                     for (String oldOne : equivalents) {
885                         if (canonicals.contains(oldOne)) continue;
886                         // System.out.println("Mapping " + oldOne + "\t=>\t" + newOne);
887                         linkold_new.put(oldOne, newOne);
888                     }
889                 }
890             }
891 
892             /*
893              * // fix the links from old to new, to remove chains for (Iterator it =
894              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
895              * it.next(); Object newItem = linkold_new.get(oldItem); while (true) {
896              * Object linkItem = linkold_new.get(newItem); if (linkItem == null)
897              * break; if (true) System.out.println("Connecting link chain: " + oldItem +
898              * "\t=> " + newItem + "\t=> " + linkItem); newItem = linkItem;
899              * linkold_new.put(oldItem, newItem); } }
900              * // reverse the links *from* canonical names for (Iterator it =
901              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
902              * it.next(); if (!isCanonical.contains(oldItem)) continue; Object newItem =
903              * linkold_new.get(oldItem); }
904              *
905              * // fix unstable TZIDs Set itemsToRemove = new HashSet(); Map
906              * itemsToAdd = new HashMap(); for (Iterator it =
907              * linkold_new.keySet().iterator(); it.hasNext();) { Object oldItem =
908              * it.next(); Object newItem = linkold_new.get(oldItem); Object modOldItem =
909              * RESTORE_UNSTABLE_TZIDS.get(oldItem); Object modNewItem =
910              * FIX_UNSTABLE_TZIDS.get(newItem); if (modOldItem == null && modNewItem ==
911              * null) continue; if (modOldItem == null) { // just fix old entry
912              * itemsToAdd.put(oldItem, modNewItem); continue; } // otherwise have to
913              * nuke and redo itemsToRemove.add(oldItem); if (modNewItem == null)
914              * modNewItem = newItem; itemsToAdd.put(modOldItem, modNewItem); } // now
915              * make fixes (we couldn't earlier because we were iterating
916              * Utility.removeAll(linkold_new, itemsToRemove);
917              * linkold_new.putAll(itemsToAdd);
918              * // now remove all links that are from canonical zones
919              * Utility.removeAll(linkold_new, zoneData.keySet());
920              */
921 
922             // generate list of new to old
923             for (Iterator<String> it = linkold_new.keySet().iterator(); it.hasNext(); ) {
924                 String oldZone = it.next();
925                 String newZone = linkold_new.get(oldZone);
926                 Set<String> s = linkNew_oldSet.get(newZone);
927                 if (s == null) linkNew_oldSet.put(newZone, s = new HashSet<>());
928                 s.add(oldZone);
929             }
930 
931             // PROTECT EVERYTHING
932             linkNew_oldSet = CldrUtility.protectCollection(linkNew_oldSet);
933             linkold_new = CldrUtility.protectCollection(linkold_new);
934             ruleID_rules = CldrUtility.protectCollection(ruleID_rules);
935             zone_rules = CldrUtility.protectCollection(zone_rules);
936             // TODO protect zone info later
937         } catch (IOException e) {
938             throw new ICUUncheckedIOException("Can't find timezone aliases: " + e.toString(), e);
939         }
940     }
941 
942     /**
943      * @param m
944      */
945     private Double getDegrees(Matcher m, boolean lat) {
946         int startIndex = lat ? 1 : 5;
947         double amount =
948                 Integer.parseInt(m.group(startIndex + 1))
949                         + Integer.parseInt(m.group(startIndex + 2)) / 60.0;
950         if (m.group(startIndex + 3) != null)
951             amount += Integer.parseInt(m.group(startIndex + 3)) / 3600.0;
952         if (m.group(startIndex).equals("-")) amount = -amount;
953         return amount;
954     }
955 
956     /**
957      * @return Returns the linkold_new.
958      */
959     public Map<String, String> getZoneLinkold_new() {
960         getZoneData();
961         return linkold_new;
962     }
963 
964     /**
965      * @return Returns the linkold_new.
966      */
967     public Map<String, Set<String>> getZoneLinkNew_OldSet() {
968         getZoneData();
969         return linkNew_oldSet;
970     }
971 
972     /**
973      * @return Returns the ruleID_rules.
974      */
975     public Map<String, List<RuleLine>> getZoneRuleID_rules() {
976         getZoneData();
977         return ruleID_rules;
978     }
979 
980     /**
981      * @return Returns the zone_rules.
982      */
983     public Map<String, List<ZoneLine>> getZone_rules() {
984         getZoneData();
985         return zone_rules;
986     }
987 
988     public String getVersion() {
989         return version;
990     }
991 }
992