xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/test/OutdatedPaths.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.test;
2 
3 import com.ibm.icu.util.ICUUncheckedIOException;
4 import java.io.DataInputStream;
5 import java.io.File;
6 import java.io.FileNotFoundException;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.lang.ref.Reference;
10 import java.lang.ref.SoftReference;
11 import java.util.Collections;
12 import java.util.HashMap;
13 import java.util.HashSet;
14 import java.util.Map;
15 import java.util.Set;
16 import org.unicode.cldr.tool.CldrVersion;
17 import org.unicode.cldr.util.CLDRConfig;
18 import org.unicode.cldr.util.CLDRFile;
19 import org.unicode.cldr.util.CldrUtility;
20 import org.unicode.cldr.util.Factory;
21 import org.unicode.cldr.util.InputStreamFactory;
22 import org.unicode.cldr.util.Pair;
23 import org.unicode.cldr.util.PathHeader;
24 import org.unicode.cldr.util.RegexLookup;
25 import org.unicode.cldr.util.StringId;
26 
27 /**
28  * This class should be used to detect when a path should be included in the set of outdated items,
29  * because the value in the locale has not changed since the last time the English changed. For
30  * efficiency, it only keeps a record of those values in trunk that are out of date.
31  *
32  * <p>That is, to get the set of outdated values, the caller should do the following:
33  *
34  * <ol>
35  *   <li>Test to see if the user has voted for a value for the path. If so, don't include.
36  *   <li>Test to see if the winning value for the path is different from the trunk value. If so,
37  *       don't include.
38  *   <li>Test with isOutdated(path) to see if the trunk value was outdated. If not, don't include.
39  *   <li>Otherwise, include this path in the set of outdated items.
40  * </ol>
41  *
42  * <p>To update the data file, use GenerateBirth.java.
43  */
44 public class OutdatedPaths {
45     public static String FORMAT_KEY = "odp-1";
46     public static final String NO_VALUE = "�";
47 
48     public static final String OUTDATED_DIR = "births/";
49     public static final String OUTDATED_ENGLISH_DATA = "outdatedEnglish.data";
50     public static final String OUTDATED_DATA = "outdated.data";
51 
52     private static final boolean DEBUG = CldrUtility.getProperty("OutdatedPathsDebug", false);
53 
54     private final Map<String, Set<Long>> localeToData = new HashMap<>();
55     private final Map<Long, Pair<CldrVersion, String>> pathToBirthNPrevious = new HashMap<>();
56 
57     /**
58      * Creates a new OutdatedPaths, using the data file "outdated.data" in the same directory as
59      * this class.
60      *
61      * @param version
62      */
OutdatedPaths()63     public OutdatedPaths() {
64         this(null);
65     }
66 
67     /**
68      * Loads the data from the specified directory, using the data file "outdated.data".
69      *
70      * @param directory
71      */
OutdatedPaths(String directory)72     public OutdatedPaths(String directory) {
73         Map<Long, PathHeader> id2header = new HashMap<>(); // for debugging
74 
75         readLocaleToPaths(directory, id2header);
76 
77         // now previous English
78 
79         readBirthValues(directory, id2header, pathToBirthNPrevious);
80     }
81 
readLocaleToPaths(String directory, Map<Long, PathHeader> id2header)82     private void readLocaleToPaths(String directory, Map<Long, PathHeader> id2header) {
83         try {
84             DataInputStream dataIn = openDataInput(directory, OUTDATED_DATA);
85             String key = dataIn.readUTF();
86             if (!OutdatedPaths.FORMAT_KEY.equals(key)) {
87                 throw new IllegalArgumentException(
88                         "Mismatch in FORMAT_KEY: expected="
89                                 + OutdatedPaths.FORMAT_KEY
90                                 + ", read="
91                                 + key);
92             }
93             if (DEBUG) {
94                 Factory factory = CLDRConfig.getInstance().getMainAndAnnotationsFactory();
95                 id2header = getIdToPath(factory);
96             }
97             while (true) {
98                 String locale = dataIn.readUTF();
99                 if (locale.equals("$END$")) {
100                     break;
101                 }
102                 if (DEBUG) {
103                     System.out.println("OutdatedPaths: Locale: " + locale);
104                 }
105                 final HashSet<Long> data = new HashSet<>();
106                 int size = dataIn.readInt();
107                 for (int i = 0; i < size; ++i) {
108                     long item = dataIn.readLong();
109                     data.add(item);
110                     if (DEBUG) {
111                         System.out.println(locale + "\t" + id2header.get(item));
112                     }
113                 }
114                 localeToData.put(locale, Collections.unmodifiableSet(data));
115             }
116             dataIn.close();
117         } catch (IOException e) {
118             throw new ICUUncheckedIOException("Data Not Available", e);
119         }
120     }
121 
readBirthValues( String outdatedDirectory, Map<Long, PathHeader> id2header, Map<Long, Pair<CldrVersion, String>> pathToBirthNPrevious2)122     public static void readBirthValues(
123             String outdatedDirectory,
124             Map<Long, PathHeader> id2header,
125             Map<Long, Pair<CldrVersion, String>> pathToBirthNPrevious2) {
126         try {
127             DataInputStream dataIn = openDataInput(outdatedDirectory, OUTDATED_ENGLISH_DATA);
128             String key = dataIn.readUTF();
129             if (!OutdatedPaths.FORMAT_KEY.equals(key)) {
130                 throw new IllegalArgumentException(
131                         "Mismatch in FORMAT_KEY: expected="
132                                 + OutdatedPaths.FORMAT_KEY
133                                 + ", read="
134                                 + key);
135             }
136 
137             int size = dataIn.readInt();
138             if (DEBUG) {
139                 System.out.println("English Data");
140             }
141             for (int i = 0; i < size; ++i) {
142                 long pathId = dataIn.readLong();
143                 String previous = dataIn.readUTF();
144                 CldrVersion birth = CldrVersion.from(dataIn.readUTF());
145 
146                 if (DEBUG) {
147                     System.out.println(
148                             "en\t("
149                                     + previous
150                                     + ")"
151                                     + (id2header == null ? "" : "\t" + id2header.get(pathId)));
152                 }
153                 pathToBirthNPrevious2.put(pathId, Pair.of(birth, previous).freeze());
154             }
155             String finalCheck = dataIn.readUTF();
156             if (!finalCheck.equals("$END$")) {
157                 throw new IllegalArgumentException("Corrupted " + OUTDATED_ENGLISH_DATA);
158             }
159             dataIn.close();
160         } catch (IOException e) {
161             throw new ICUUncheckedIOException("Data Not Available", e);
162         }
163     }
164 
getIdToPath(Factory factory)165     public Map<Long, PathHeader> getIdToPath(Factory factory) {
166         Map<Long, PathHeader> result = new HashMap<>();
167         CLDRFile english = factory.make("en", true);
168         PathHeader.Factory pathHeaders = PathHeader.getFactory(english);
169         for (String s : english) {
170             long id = StringId.getId(s);
171             PathHeader pathHeader = pathHeaders.fromPath(s);
172             result.put(id, pathHeader);
173         }
174         return result;
175     }
176 
openDataInput(String directory, String filename)177     private static DataInputStream openDataInput(String directory, String filename)
178             throws FileNotFoundException {
179         String dataFileName = filename;
180         InputStream fileInputStream =
181                 directory == null
182                         ? CldrUtility.getInputStream(OUTDATED_DIR + dataFileName)
183                         :
184                         // : new FileInputStream(new File(directory, dataFileName));
185                         InputStreamFactory.createInputStream(new File(directory, dataFileName));
186         DataInputStream dataIn = new DataInputStream(fileInputStream);
187         return dataIn;
188     }
189 
190     /**
191      * Returns true if the value for the path is outdated in trunk. See class description for more
192      * info.
193      *
194      * @param distinguishedPath
195      * @return true if the string is outdated
196      */
isOutdated(String locale, String distinguishedPath)197     public boolean isOutdated(String locale, String distinguishedPath) {
198         Set<Long> data = localeToData.get(locale);
199         if (data == null) {
200             return false;
201         }
202         long id = StringId.getId(distinguishedPath);
203         boolean result = data.contains(id);
204         if (result == false) {
205             return false;
206         }
207         Boolean toSkip = SKIP_PATHS.get(distinguishedPath);
208         if (toSkip != null) {
209             return false;
210         }
211         return result;
212     }
213 
214     /**
215      * The same as isOutdated, but also returns paths that aren't skipped.
216      *
217      * @param locale
218      * @param distinguishedPath
219      * @return
220      */
isRawOutdated(String locale, String distinguishedPath)221     public boolean isRawOutdated(String locale, String distinguishedPath) {
222         Set<Long> data = localeToData.get(locale);
223         if (data == null) {
224             return false;
225         }
226         long id = StringId.getId(distinguishedPath);
227         return data.contains(id);
228     }
229 
230     /**
231      * Is this path to be skipped? (because the English is normally irrelevant).
232      *
233      * @param distinguishedPath
234      * @return
235      */
isSkipped(String distinguishedPath)236     public boolean isSkipped(String distinguishedPath) {
237         return SKIP_PATHS.get(distinguishedPath) != null;
238     }
239 
240     /**
241      * Returns true if the value for the path is outdated in trunk. See class description for more
242      * info.
243      *
244      * @param distinguishedPath
245      * @return true if the string is outdated
246      */
getPreviousEnglish(String distinguishedPath)247     public String getPreviousEnglish(String distinguishedPath) {
248         long id = StringId.getId(distinguishedPath);
249         Pair<CldrVersion, String> value = pathToBirthNPrevious.get(id);
250         return value == null ? null : value.getSecond();
251     }
252 
getEnglishBirth(String distinguishedPath)253     public CldrVersion getEnglishBirth(String distinguishedPath) {
254         long id = StringId.getId(distinguishedPath);
255         Pair<CldrVersion, String> value = pathToBirthNPrevious.get(id);
256         return value == null ? null : value.getFirst();
257     }
258 
259     static RegexLookup<Boolean> SKIP_PATHS =
260             new RegexLookup<Boolean>()
261                     .add("/exemplarCharacters", true)
262                     .add("/references", true)
263                     .add("/delimiters/[^/]*uotation", true)
264                     .add("/posix", true)
265                     .add("/pattern", true)
266                     .add("/fields/field[^/]*/displayName", true)
267                     .add("/dateFormatItem", true)
268                     .add("/numbers/symbols", true)
269                     .add("/fallback", true)
270                     .add("/quarters", true)
271                     .add("/months", true);
272 
273     /**
274      * Returns the number of outdated paths.
275      *
276      * @param locale
277      * @return number of outdated paths.
278      */
countOutdated(String locale)279     public int countOutdated(String locale) {
280         Set<Long> data = localeToData.get(locale);
281         return data == null ? 0 : data.size();
282     }
283 
getInstance()284     public static OutdatedPaths getInstance() {
285         OutdatedPaths outdatedPaths = SINGLETON.get();
286         if (outdatedPaths == null) {
287             outdatedPaths = new OutdatedPaths();
288             SINGLETON = new SoftReference<>(outdatedPaths);
289         }
290         return outdatedPaths;
291     }
292 
293     private static Reference<OutdatedPaths> SINGLETON = new SoftReference<>(null);
294 }
295