1 package org.unicode.cldr.test; 2 3 import com.ibm.icu.util.ICUUncheckedIOException; 4 import java.io.DataInputStream; 5 import java.io.File; 6 import java.io.FileNotFoundException; 7 import java.io.IOException; 8 import java.io.InputStream; 9 import java.lang.ref.Reference; 10 import java.lang.ref.SoftReference; 11 import java.util.Collections; 12 import java.util.HashMap; 13 import java.util.HashSet; 14 import java.util.Map; 15 import java.util.Set; 16 import org.unicode.cldr.tool.CldrVersion; 17 import org.unicode.cldr.util.CLDRConfig; 18 import org.unicode.cldr.util.CLDRFile; 19 import org.unicode.cldr.util.CldrUtility; 20 import org.unicode.cldr.util.Factory; 21 import org.unicode.cldr.util.InputStreamFactory; 22 import org.unicode.cldr.util.Pair; 23 import org.unicode.cldr.util.PathHeader; 24 import org.unicode.cldr.util.RegexLookup; 25 import org.unicode.cldr.util.StringId; 26 27 /** 28 * This class should be used to detect when a path should be included in the set of outdated items, 29 * because the value in the locale has not changed since the last time the English changed. For 30 * efficiency, it only keeps a record of those values in trunk that are out of date. 31 * 32 * <p>That is, to get the set of outdated values, the caller should do the following: 33 * 34 * <ol> 35 * <li>Test to see if the user has voted for a value for the path. If so, don't include. 36 * <li>Test to see if the winning value for the path is different from the trunk value. If so, 37 * don't include. 38 * <li>Test with isOutdated(path) to see if the trunk value was outdated. If not, don't include. 39 * <li>Otherwise, include this path in the set of outdated items. 40 * </ol> 41 * 42 * <p>To update the data file, use GenerateBirth.java. 43 */ 44 public class OutdatedPaths { 45 public static String FORMAT_KEY = "odp-1"; 46 public static final String NO_VALUE = "�"; 47 48 public static final String OUTDATED_DIR = "births/"; 49 public static final String OUTDATED_ENGLISH_DATA = "outdatedEnglish.data"; 50 public static final String OUTDATED_DATA = "outdated.data"; 51 52 private static final boolean DEBUG = CldrUtility.getProperty("OutdatedPathsDebug", false); 53 54 private final Map<String, Set<Long>> localeToData = new HashMap<>(); 55 private final Map<Long, Pair<CldrVersion, String>> pathToBirthNPrevious = new HashMap<>(); 56 57 /** 58 * Creates a new OutdatedPaths, using the data file "outdated.data" in the same directory as 59 * this class. 60 * 61 * @param version 62 */ OutdatedPaths()63 public OutdatedPaths() { 64 this(null); 65 } 66 67 /** 68 * Loads the data from the specified directory, using the data file "outdated.data". 69 * 70 * @param directory 71 */ OutdatedPaths(String directory)72 public OutdatedPaths(String directory) { 73 Map<Long, PathHeader> id2header = new HashMap<>(); // for debugging 74 75 readLocaleToPaths(directory, id2header); 76 77 // now previous English 78 79 readBirthValues(directory, id2header, pathToBirthNPrevious); 80 } 81 readLocaleToPaths(String directory, Map<Long, PathHeader> id2header)82 private void readLocaleToPaths(String directory, Map<Long, PathHeader> id2header) { 83 try { 84 DataInputStream dataIn = openDataInput(directory, OUTDATED_DATA); 85 String key = dataIn.readUTF(); 86 if (!OutdatedPaths.FORMAT_KEY.equals(key)) { 87 throw new IllegalArgumentException( 88 "Mismatch in FORMAT_KEY: expected=" 89 + OutdatedPaths.FORMAT_KEY 90 + ", read=" 91 + key); 92 } 93 if (DEBUG) { 94 Factory factory = CLDRConfig.getInstance().getMainAndAnnotationsFactory(); 95 id2header = getIdToPath(factory); 96 } 97 while (true) { 98 String locale = dataIn.readUTF(); 99 if (locale.equals("$END$")) { 100 break; 101 } 102 if (DEBUG) { 103 System.out.println("OutdatedPaths: Locale: " + locale); 104 } 105 final HashSet<Long> data = new HashSet<>(); 106 int size = dataIn.readInt(); 107 for (int i = 0; i < size; ++i) { 108 long item = dataIn.readLong(); 109 data.add(item); 110 if (DEBUG) { 111 System.out.println(locale + "\t" + id2header.get(item)); 112 } 113 } 114 localeToData.put(locale, Collections.unmodifiableSet(data)); 115 } 116 dataIn.close(); 117 } catch (IOException e) { 118 throw new ICUUncheckedIOException("Data Not Available", e); 119 } 120 } 121 readBirthValues( String outdatedDirectory, Map<Long, PathHeader> id2header, Map<Long, Pair<CldrVersion, String>> pathToBirthNPrevious2)122 public static void readBirthValues( 123 String outdatedDirectory, 124 Map<Long, PathHeader> id2header, 125 Map<Long, Pair<CldrVersion, String>> pathToBirthNPrevious2) { 126 try { 127 DataInputStream dataIn = openDataInput(outdatedDirectory, OUTDATED_ENGLISH_DATA); 128 String key = dataIn.readUTF(); 129 if (!OutdatedPaths.FORMAT_KEY.equals(key)) { 130 throw new IllegalArgumentException( 131 "Mismatch in FORMAT_KEY: expected=" 132 + OutdatedPaths.FORMAT_KEY 133 + ", read=" 134 + key); 135 } 136 137 int size = dataIn.readInt(); 138 if (DEBUG) { 139 System.out.println("English Data"); 140 } 141 for (int i = 0; i < size; ++i) { 142 long pathId = dataIn.readLong(); 143 String previous = dataIn.readUTF(); 144 CldrVersion birth = CldrVersion.from(dataIn.readUTF()); 145 146 if (DEBUG) { 147 System.out.println( 148 "en\t(" 149 + previous 150 + ")" 151 + (id2header == null ? "" : "\t" + id2header.get(pathId))); 152 } 153 pathToBirthNPrevious2.put(pathId, Pair.of(birth, previous).freeze()); 154 } 155 String finalCheck = dataIn.readUTF(); 156 if (!finalCheck.equals("$END$")) { 157 throw new IllegalArgumentException("Corrupted " + OUTDATED_ENGLISH_DATA); 158 } 159 dataIn.close(); 160 } catch (IOException e) { 161 throw new ICUUncheckedIOException("Data Not Available", e); 162 } 163 } 164 getIdToPath(Factory factory)165 public Map<Long, PathHeader> getIdToPath(Factory factory) { 166 Map<Long, PathHeader> result = new HashMap<>(); 167 CLDRFile english = factory.make("en", true); 168 PathHeader.Factory pathHeaders = PathHeader.getFactory(english); 169 for (String s : english) { 170 long id = StringId.getId(s); 171 PathHeader pathHeader = pathHeaders.fromPath(s); 172 result.put(id, pathHeader); 173 } 174 return result; 175 } 176 openDataInput(String directory, String filename)177 private static DataInputStream openDataInput(String directory, String filename) 178 throws FileNotFoundException { 179 String dataFileName = filename; 180 InputStream fileInputStream = 181 directory == null 182 ? CldrUtility.getInputStream(OUTDATED_DIR + dataFileName) 183 : 184 // : new FileInputStream(new File(directory, dataFileName)); 185 InputStreamFactory.createInputStream(new File(directory, dataFileName)); 186 DataInputStream dataIn = new DataInputStream(fileInputStream); 187 return dataIn; 188 } 189 190 /** 191 * Returns true if the value for the path is outdated in trunk. See class description for more 192 * info. 193 * 194 * @param distinguishedPath 195 * @return true if the string is outdated 196 */ isOutdated(String locale, String distinguishedPath)197 public boolean isOutdated(String locale, String distinguishedPath) { 198 Set<Long> data = localeToData.get(locale); 199 if (data == null) { 200 return false; 201 } 202 long id = StringId.getId(distinguishedPath); 203 boolean result = data.contains(id); 204 if (result == false) { 205 return false; 206 } 207 Boolean toSkip = SKIP_PATHS.get(distinguishedPath); 208 if (toSkip != null) { 209 return false; 210 } 211 return result; 212 } 213 214 /** 215 * The same as isOutdated, but also returns paths that aren't skipped. 216 * 217 * @param locale 218 * @param distinguishedPath 219 * @return 220 */ isRawOutdated(String locale, String distinguishedPath)221 public boolean isRawOutdated(String locale, String distinguishedPath) { 222 Set<Long> data = localeToData.get(locale); 223 if (data == null) { 224 return false; 225 } 226 long id = StringId.getId(distinguishedPath); 227 return data.contains(id); 228 } 229 230 /** 231 * Is this path to be skipped? (because the English is normally irrelevant). 232 * 233 * @param distinguishedPath 234 * @return 235 */ isSkipped(String distinguishedPath)236 public boolean isSkipped(String distinguishedPath) { 237 return SKIP_PATHS.get(distinguishedPath) != null; 238 } 239 240 /** 241 * Returns true if the value for the path is outdated in trunk. See class description for more 242 * info. 243 * 244 * @param distinguishedPath 245 * @return true if the string is outdated 246 */ getPreviousEnglish(String distinguishedPath)247 public String getPreviousEnglish(String distinguishedPath) { 248 long id = StringId.getId(distinguishedPath); 249 Pair<CldrVersion, String> value = pathToBirthNPrevious.get(id); 250 return value == null ? null : value.getSecond(); 251 } 252 getEnglishBirth(String distinguishedPath)253 public CldrVersion getEnglishBirth(String distinguishedPath) { 254 long id = StringId.getId(distinguishedPath); 255 Pair<CldrVersion, String> value = pathToBirthNPrevious.get(id); 256 return value == null ? null : value.getFirst(); 257 } 258 259 static RegexLookup<Boolean> SKIP_PATHS = 260 new RegexLookup<Boolean>() 261 .add("/exemplarCharacters", true) 262 .add("/references", true) 263 .add("/delimiters/[^/]*uotation", true) 264 .add("/posix", true) 265 .add("/pattern", true) 266 .add("/fields/field[^/]*/displayName", true) 267 .add("/dateFormatItem", true) 268 .add("/numbers/symbols", true) 269 .add("/fallback", true) 270 .add("/quarters", true) 271 .add("/months", true); 272 273 /** 274 * Returns the number of outdated paths. 275 * 276 * @param locale 277 * @return number of outdated paths. 278 */ countOutdated(String locale)279 public int countOutdated(String locale) { 280 Set<Long> data = localeToData.get(locale); 281 return data == null ? 0 : data.size(); 282 } 283 getInstance()284 public static OutdatedPaths getInstance() { 285 OutdatedPaths outdatedPaths = SINGLETON.get(); 286 if (outdatedPaths == null) { 287 outdatedPaths = new OutdatedPaths(); 288 SINGLETON = new SoftReference<>(outdatedPaths); 289 } 290 return outdatedPaths; 291 } 292 293 private static Reference<OutdatedPaths> SINGLETON = new SoftReference<>(null); 294 } 295