1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import com.google.common.base.Splitter; 12 import com.google.common.collect.ImmutableMap; 13 import com.google.common.collect.ImmutableMultimap; 14 import com.google.common.collect.ImmutableSet; 15 import com.google.common.collect.Multimap; 16 import com.ibm.icu.impl.Utility; 17 import com.ibm.icu.text.DateFormat; 18 import com.ibm.icu.text.SimpleDateFormat; 19 import com.ibm.icu.text.Transform; 20 import com.ibm.icu.text.Transliterator; 21 import com.ibm.icu.text.UTF16; 22 import com.ibm.icu.text.UnicodeSet; 23 import com.ibm.icu.text.UnicodeSetIterator; 24 import com.ibm.icu.util.Freezable; 25 import com.ibm.icu.util.TimeZone; 26 import java.io.BufferedReader; 27 import java.io.File; 28 import java.io.FileReader; 29 import java.io.IOException; 30 import java.io.InputStream; 31 import java.io.InputStreamReader; 32 import java.io.PrintWriter; 33 import java.lang.reflect.Constructor; 34 import java.lang.reflect.Method; 35 import java.nio.file.Files; 36 import java.nio.file.Path; 37 import java.nio.file.Paths; 38 import java.util.ArrayList; 39 import java.util.Arrays; 40 import java.util.Calendar; 41 import java.util.Collection; 42 import java.util.Collections; 43 import java.util.Comparator; 44 import java.util.Date; 45 import java.util.EnumSet; 46 import java.util.HashMap; 47 import java.util.HashSet; 48 import java.util.Iterator; 49 import java.util.LinkedHashMap; 50 import java.util.LinkedHashSet; 51 import java.util.List; 52 import java.util.Map; 53 import java.util.Map.Entry; 54 import java.util.Objects; 55 import java.util.Set; 56 import java.util.SortedMap; 57 import java.util.SortedSet; 58 import java.util.TreeMap; 59 import java.util.TreeSet; 60 import java.util.concurrent.ConcurrentHashMap; 61 import java.util.concurrent.TimeUnit; 62 import java.util.regex.Matcher; 63 import java.util.regex.Pattern; 64 import org.unicode.cldr.draft.FileUtilities; 65 import org.unicode.cldr.tool.Chart; 66 67 public class CldrUtility { 68 /** 69 * These need to be consistent with "CLDR-Code-Git-Commit" in tools/cldr-code/pom.xml 70 * 71 * <p>If and when "CLDR-Apps-Git-Commit" in tools/cldr-apps/pom.xml becomes usable for the 72 * cldr-apps war file, we may add APPS_SLUG = "CLDR-Apps" here, and in some contexts use 73 * APPS_SLUG in addition to, or instead of, CODE_SLUG 74 */ 75 public static final String CODE_SLUG = "CLDR-Code"; 76 77 public static final String GIT_COMMIT_SUFFIX = "-Git-Commit"; 78 79 public static final String HOME_KEY = "CLDRHOME"; 80 public static final String DIR_KEY = "CLDR_DIR"; 81 public static final String MAIN_KEY = "CLDR_MAIN"; 82 83 public static final boolean DEBUG_MISSING_DIRECTORIES = false; 84 85 public static final boolean BETA = false; 86 87 public static final String LINE_SEPARATOR = "\n"; 88 public static final Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*"); 89 90 private static final boolean HANDLEFILE_SHOW_SKIP = false; 91 /** 92 * Constant for "∅∅∅". Indicates that a child locale has no value for a path even though a 93 * parent does. 94 */ 95 public static final String NO_INHERITANCE_MARKER = 96 new String(new char[] {0x2205, 0x2205, 0x2205}); 97 98 /** 99 * Define the constant INHERITANCE_MARKER for "↑↑↑", used by Survey Tool to indicate a 100 * "passthru" vote to the parent locale. If CLDRFile ever finds this value in a data field, 101 * writing of the field should be suppressed. 102 */ 103 public static final String INHERITANCE_MARKER = new String(new char[] {0x2191, 0x2191, 0x2191}); 104 105 public static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 106 107 /** 108 * Very simple class, used to replace variables in a string. For example 109 * 110 * <p> 111 * 112 * <pre> 113 * static VariableReplacer langTag = new VariableReplacer() 114 * .add("$alpha", "[a-zA-Z]") 115 * .add("$digit", "[0-9]") 116 * .add("$alphanum", "[a-zA-Z0-9]") 117 * .add("$x", "[xX]"); 118 * ... 119 * String langTagPattern = langTag.replace(...); 120 * </pre> 121 */ 122 public static class VariableReplacer { 123 // simple implementation for now 124 private Map<String, String> m = new TreeMap<>(Collections.reverseOrder()); 125 add(String variable, String value)126 public VariableReplacer add(String variable, String value) { 127 m.put(variable, value); 128 return this; 129 } 130 replace(String source)131 public String replace(String source) { 132 String oldSource; 133 do { 134 oldSource = source; 135 for (Iterator<String> it = m.keySet().iterator(); it.hasNext(); ) { 136 String variable = it.next(); 137 String value = m.get(variable); 138 source = replaceAll(source, variable, value); 139 } 140 } while (!source.equals(oldSource)); 141 return source; 142 } 143 replaceAll(String source, String key, String value)144 public String replaceAll(String source, String key, String value) { 145 while (true) { 146 int pos = source.indexOf(key); 147 if (pos < 0) return source; 148 source = source.substring(0, pos) + value + source.substring(pos + key.length()); 149 } 150 } 151 } 152 153 public interface LineHandler { 154 /** 155 * Return false if line was skipped 156 * 157 * @param line 158 * @return 159 */ handle(String line)160 boolean handle(String line) throws Exception; 161 } 162 getPath(String fileOrDir, String filename)163 public static String getPath(String fileOrDir, String filename) { 164 // Required for cases where a system property is read but not default is given. 165 // TODO: Fix callers to not fail silently if properties are missing. 166 if (fileOrDir == null) { 167 return null; 168 } 169 Path path = Paths.get(fileOrDir); 170 if (filename != null) { 171 path = path.resolve(filename); 172 } 173 if (DEBUG_MISSING_DIRECTORIES && !Files.exists(path)) { 174 System.err.println("Warning: directory doesn't exist: " + path); 175 } 176 return PathUtilities.getNormalizedPathString(path) + File.separatorChar; 177 } 178 getPath(String path)179 public static String getPath(String path) { 180 return getPath(path, null); 181 } 182 183 public static final String ANALYTICS = Chart.AnalyticsID.CLDR.getScript(); 184 185 public static final List<String> MINIMUM_LANGUAGES = 186 Arrays.asList( 187 new String[] { 188 "ar", "en", "de", "fr", "hi", "it", "es", "pt", "ru", "zh", "ja" 189 }); // plus language itself 190 public static final List<String> MINIMUM_TERRITORIES = 191 Arrays.asList( 192 new String[] {"US", "GB", "DE", "FR", "IT", "JP", "CN", "IN", "RU", "BR"}); 193 194 public interface LineComparer { 195 static final int LINES_DIFFERENT = -1, LINES_SAME = 0, SKIP_FIRST = 1, SKIP_SECOND = 2; 196 197 /** 198 * Returns LINES_DIFFERENT, LINES_SAME, or if one of the lines is ignorable, SKIP_FIRST or 199 * SKIP_SECOND 200 * 201 * @param line1 202 * @param line2 203 * @return 204 */ compare(String line1, String line2)205 int compare(String line1, String line2); 206 } 207 208 public static class SimpleLineComparator implements LineComparer { 209 public static final int TRIM = 1, SKIP_SPACES = 2, SKIP_EMPTY = 4, SKIP_CVS_TAGS = 8; 210 StringIterator si1 = new StringIterator(); 211 StringIterator si2 = new StringIterator(); 212 int flags; 213 SimpleLineComparator(int flags)214 public SimpleLineComparator(int flags) { 215 this.flags = flags; 216 } 217 218 @Override compare(String line1, String line2)219 public int compare(String line1, String line2) { 220 // first, see if we want to skip one or the other lines 221 int skipper = 0; 222 if (line1 == null) { 223 skipper = SKIP_FIRST; 224 } else { 225 if ((flags & TRIM) != 0) line1 = line1.trim(); 226 if ((flags & SKIP_EMPTY) != 0 && line1.length() == 0) skipper = SKIP_FIRST; 227 } 228 if (line2 == null) { 229 skipper = SKIP_SECOND; 230 } else { 231 if ((flags & TRIM) != 0) line2 = line2.trim(); 232 if ((flags & SKIP_EMPTY) != 0 && line2.length() == 0) skipper += SKIP_SECOND; 233 } 234 if (skipper != 0) { 235 if (skipper == SKIP_FIRST + SKIP_SECOND) return LINES_SAME; // ok, don't skip both 236 return skipper; 237 } 238 239 // check for null 240 if (line1 == null) { 241 if (line2 == null) return LINES_SAME; 242 return LINES_DIFFERENT; 243 } 244 if (line2 == null) { 245 return LINES_DIFFERENT; 246 } 247 248 // now check equality 249 if (line1.equals(line2)) return LINES_SAME; 250 251 // if not equal, see if we are skipping spaces 252 if ((flags & SKIP_CVS_TAGS) != 0) { 253 if (line1.indexOf('$') >= 0 && line2.indexOf('$') >= 0) { 254 line1 = stripTags(line1); 255 line2 = stripTags(line2); 256 if (line1.equals(line2)) return LINES_SAME; 257 } else if (line1.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/") 258 && line2.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")) { 259 return LINES_SAME; 260 } 261 } 262 if ((flags & SKIP_SPACES) != 0 && si1.set(line1).matches(si2.set(line2))) 263 return LINES_SAME; 264 return LINES_DIFFERENT; 265 } 266 267 // private Matcher dtdMatcher = PatternCache.get( 268 // "\\Q<!DOCTYPE ldml SYSTEM 269 // \"http://www.unicode.org/cldr/dtd/\\E.*\\Q/ldml.dtd\">\\E").matcher(""); 270 271 private String[] CVS_TAGS = {"Revision", "Date"}; 272 stripTags(String line)273 private String stripTags(String line) { 274 // $ 275 // Revision: 8994 $ 276 // $ 277 // Date: 2013-07-03 21:31:17 +0200 (Wed, 03 Jul 2013) $ 278 int pos = line.indexOf('$'); 279 if (pos < 0) return line; 280 pos++; 281 int endpos = line.indexOf('$', pos); 282 if (endpos < 0) return line; 283 for (int i = 0; i < CVS_TAGS.length; ++i) { 284 if (!line.startsWith(CVS_TAGS[i], pos)) continue; 285 line = line.substring(0, pos + CVS_TAGS[i].length()) + line.substring(endpos); 286 } 287 return line; 288 } 289 } 290 291 /** 292 * @param file1 293 * @param file2 294 * @param failureLines on input, String[2], on output, failing lines 295 * @param lineComparer 296 * @return 297 * @throws IOException 298 */ areFileIdentical( String file1, String file2, String[] failureLines, LineComparer lineComparer)299 public static boolean areFileIdentical( 300 String file1, String file2, String[] failureLines, LineComparer lineComparer) 301 throws IOException { 302 try (BufferedReader br1 = new BufferedReader(new FileReader(file1), 32 * 1024); 303 BufferedReader br2 = new BufferedReader(new FileReader(file2), 32 * 1024); ) { 304 String line1 = ""; 305 String line2 = ""; 306 int skip = 0; 307 308 while (true) { 309 if ((skip & LineComparer.SKIP_FIRST) == 0) line1 = br1.readLine(); 310 if ((skip & LineComparer.SKIP_SECOND) == 0) line2 = br2.readLine(); 311 if (line1 == null && line2 == null) return true; 312 if (line1 == null || line2 == null) { 313 // System.out.println("debug"); 314 } 315 skip = lineComparer.compare(line1, line2); 316 if (skip == LineComparer.LINES_DIFFERENT) { 317 break; 318 } 319 } 320 failureLines[0] = line1 != null ? line1 : "<end of file>"; 321 failureLines[1] = line2 != null ? line2 : "<end of file>"; 322 return false; 323 } 324 } 325 326 /* 327 * static String getLineWithoutFluff(BufferedReader br1, boolean first, int flags) throws IOException { 328 * while (true) { 329 * String line1 = br1.readLine(); 330 * if (line1 == null) return line1; 331 * if ((flags & TRIM)!= 0) line1 = line1.trim(); 332 * if ((flags & SKIP_EMPTY)!= 0 && line1.length() == 0) continue; 333 * return line1; 334 * } 335 * } 336 */ 337 338 public static final class StringIterator { 339 String string; 340 int position = 0; 341 next()342 char next() { 343 while (true) { 344 if (position >= string.length()) return '\uFFFF'; 345 char ch = string.charAt(position++); 346 if (ch != ' ' && ch != '\t') return ch; 347 } 348 } 349 reset()350 StringIterator reset() { 351 position = 0; 352 return this; 353 } 354 set(String string)355 StringIterator set(String string) { 356 this.string = string; 357 position = 0; 358 return this; 359 } 360 matches(StringIterator other)361 boolean matches(StringIterator other) { 362 while (true) { 363 char c1 = next(); 364 char c2 = other.next(); 365 if (c1 != c2) return false; 366 if (c1 == '\uFFFF') return true; 367 } 368 } 369 370 /** 371 * @return Returns the position. 372 */ getPosition()373 public int getPosition() { 374 return position; 375 } 376 } 377 splitArray(String source, char separator)378 public static String[] splitArray(String source, char separator) { 379 return splitArray(source, separator, false); 380 } 381 splitArray(String source, char separator, boolean trim)382 public static String[] splitArray(String source, char separator, boolean trim) { 383 List<String> piecesList = splitList(source, separator, trim); 384 String[] pieces = new String[piecesList.size()]; 385 piecesList.toArray(pieces); 386 return pieces; 387 } 388 splitCommaSeparated(String line)389 public static String[] splitCommaSeparated(String line) { 390 // items are separated by ',' 391 // each item is of the form abc... 392 // or "..." (required if a comma or quote is contained) 393 // " in a field is represented by "" 394 List<String> result = new ArrayList<>(); 395 StringBuilder item = new StringBuilder(); 396 boolean inQuote = false; 397 for (int i = 0; i < line.length(); ++i) { 398 char ch = line.charAt(i); // don't worry about supplementaries 399 switch (ch) { 400 case '"': 401 inQuote = !inQuote; 402 // at start or end, that's enough 403 // if get a quote when we are not in a quote, and not at start, then add it and 404 // return to inQuote 405 if (inQuote && item.length() != 0) { 406 item.append('"'); 407 inQuote = true; 408 } 409 break; 410 case ',': 411 if (!inQuote) { 412 result.add(item.toString()); 413 item.setLength(0); 414 } else { 415 item.append(ch); 416 } 417 break; 418 default: 419 item.append(ch); 420 break; 421 } 422 } 423 result.add(item.toString()); 424 return result.toArray(new String[result.size()]); 425 } 426 splitList(String source, char separator)427 public static List<String> splitList(String source, char separator) { 428 return splitList(source, separator, false, null); 429 } 430 splitList(String source, char separator, boolean trim)431 public static List<String> splitList(String source, char separator, boolean trim) { 432 return splitList(source, separator, trim, null); 433 } 434 splitList( String source, char separator, boolean trim, List<String> output)435 public static List<String> splitList( 436 String source, char separator, boolean trim, List<String> output) { 437 return splitList(source, Character.toString(separator), trim, output); 438 } 439 splitList(String source, String separator)440 public static List<String> splitList(String source, String separator) { 441 return splitList(source, separator, false, null); 442 } 443 splitList(String source, String separator, boolean trim)444 public static List<String> splitList(String source, String separator, boolean trim) { 445 return splitList(source, separator, trim, null); 446 } 447 splitList( String source, String separator, boolean trim, List<String> output)448 public static List<String> splitList( 449 String source, String separator, boolean trim, List<String> output) { 450 if (output == null) output = new ArrayList<>(); 451 if (source.length() == 0) return output; 452 int pos = 0; 453 do { 454 int npos = source.indexOf(separator, pos); 455 if (npos < 0) npos = source.length(); 456 String piece = source.substring(pos, npos); 457 if (trim) piece = piece.trim(); 458 output.add(piece); 459 pos = npos + 1; 460 } while (pos < source.length()); 461 return output; 462 } 463 464 /** 465 * Protect a collection (as much as Java lets us!) from modification. Really, really ugly code, 466 * since Java doesn't let us do better. 467 */ 468 @SuppressWarnings({"rawtypes", "unchecked"}) protectCollection(T source)469 public static <T> T protectCollection(T source) { 470 // TODO - exclude UnmodifiableMap, Set, ... 471 if (source instanceof Map) { 472 Map<Object, Object> sourceMap = (Map) source; 473 ImmutableMap.Builder<Object, Object> builder = ImmutableMap.builder(); 474 for (Entry<Object, Object> entry : sourceMap.entrySet()) { 475 final Object key = entry.getKey(); 476 final Object value = entry.getValue(); 477 builder.put(protectCollection(key), protectCollection(value)); 478 } 479 return (T) builder.build(); 480 } else if (source instanceof Multimap) { 481 Multimap<Object, Object> sourceMap = (Multimap) source; 482 ImmutableMultimap.Builder<Object, Object> builder = ImmutableMultimap.builder(); 483 for (Entry<Object, Object> entry : sourceMap.entries()) { 484 builder.put(protectCollection(entry.getKey()), protectCollection(entry.getValue())); 485 } 486 return (T) builder.build(); 487 } else if (source instanceof Collection) { 488 // TODO use ImmutableSet, List, ... 489 Collection sourceCollection = (Collection) source; 490 Collection<Object> resultCollection = clone(sourceCollection); 491 if (resultCollection == null) return (T) sourceCollection; // failed 492 resultCollection.clear(); 493 494 for (Object item : sourceCollection) { 495 resultCollection.add(protectCollection(item)); 496 } 497 498 return sourceCollection instanceof List 499 ? (T) Collections.unmodifiableList((List) sourceCollection) 500 : sourceCollection instanceof SortedSet 501 ? (T) Collections.unmodifiableSortedSet((SortedSet) sourceCollection) 502 : sourceCollection instanceof Set 503 ? (T) Collections.unmodifiableSet((Set) sourceCollection) 504 : (T) Collections.unmodifiableCollection(sourceCollection); 505 } else if (source instanceof Freezable) { 506 Freezable freezableSource = (Freezable) source; 507 return (T) freezableSource.freeze(); 508 // if (freezableSource.isFrozen()) return source; 509 // return (T) ((Freezable) (freezableSource.cloneAsThawed())).freeze(); 510 } else { 511 return source; // can't protect 512 } 513 } 514 515 /** 516 * Protect a collections where we don't need to clone. 517 * 518 * @param source 519 * @return 520 */ 521 @SuppressWarnings({"rawtypes", "unchecked"}) protectCollectionX(T source)522 public static <T> T protectCollectionX(T source) { 523 // TODO - exclude UnmodifiableMap, Set, ... 524 if (isImmutable(source)) { 525 return source; 526 } 527 if (source instanceof Map) { 528 Map sourceMap = (Map) source; 529 // recurse 530 LinkedHashMap tempMap = new LinkedHashMap<>(sourceMap); // copy contents 531 sourceMap.clear(); 532 for (Object key : tempMap.keySet()) { 533 sourceMap.put(protectCollection(key), protectCollectionX(tempMap.get(key))); 534 } 535 return sourceMap instanceof SortedMap 536 ? (T) Collections.unmodifiableSortedMap((SortedMap) sourceMap) 537 : (T) Collections.unmodifiableMap(sourceMap); 538 } else if (source instanceof Collection) { 539 Collection sourceCollection = (Collection) source; 540 LinkedHashSet tempSet = new LinkedHashSet<>(sourceCollection); // copy contents 541 542 sourceCollection.clear(); 543 for (Object item : tempSet) { 544 sourceCollection.add(protectCollectionX(item)); 545 } 546 547 return sourceCollection instanceof List 548 ? (T) Collections.unmodifiableList((List) sourceCollection) 549 : sourceCollection instanceof SortedSet 550 ? (T) Collections.unmodifiableSortedSet((SortedSet) sourceCollection) 551 : sourceCollection instanceof Set 552 ? (T) Collections.unmodifiableSet((Set) sourceCollection) 553 : (T) Collections.unmodifiableCollection(sourceCollection); 554 } else if (source instanceof Freezable) { 555 Freezable freezableSource = (Freezable) source; 556 return (T) freezableSource.freeze(); 557 } else { 558 throw new IllegalArgumentException("Can’t protect: " + source.getClass().toString()); 559 } 560 } 561 562 private static final Set<Object> KNOWN_IMMUTABLES = new HashSet<>(Arrays.asList(String.class)); 563 isImmutable(Object source)564 public static boolean isImmutable(Object source) { 565 return source == null 566 || source instanceof Enum 567 || source instanceof Number 568 || KNOWN_IMMUTABLES.contains(source.getClass()); 569 } 570 571 /** 572 * Clones T if we can; otherwise returns null. 573 * 574 * @param <T> 575 * @param source 576 * @return 577 */ 578 @SuppressWarnings("unchecked") clone(T source)579 private static <T> T clone(T source) { 580 final Class<? extends Object> class1 = source.getClass(); 581 try { 582 final Method declaredMethod = class1.getDeclaredMethod("clone", (Class<?>) null); 583 return (T) declaredMethod.invoke(source, (Object) null); 584 } catch (Exception e) { 585 } 586 try { 587 final Constructor<? extends Object> declaredMethod = 588 class1.getConstructor((Class<?>) null); 589 return (T) declaredMethod.newInstance((Object) null); 590 } catch (Exception e) { 591 } 592 return null; // uncloneable 593 } 594 595 /** Appends two strings, inserting separator if either is empty */ joinWithSeparation(String a, String separator, String b)596 public static String joinWithSeparation(String a, String separator, String b) { 597 if (a.length() == 0) return b; 598 if (b.length() == 0) return a; 599 return a + separator + b; 600 } 601 602 /** Appends two strings, inserting separator if either is empty. Modifies first map */ joinWithSeparation( Map<String, String> a, String separator, Map<String, String> b)603 public static Map<String, String> joinWithSeparation( 604 Map<String, String> a, String separator, Map<String, String> b) { 605 for (Iterator<String> it = b.keySet().iterator(); it.hasNext(); ) { 606 String key = it.next(); 607 String bvalue = b.get(key); 608 String avalue = a.get(key); 609 if (avalue != null) { 610 if (avalue.trim().equals(bvalue.trim())) continue; 611 bvalue = joinWithSeparation(avalue, separator, bvalue); 612 } 613 a.put(key, bvalue); 614 } 615 return a; 616 } 617 join(Collection<T> c, String separator)618 public static <T> String join(Collection<T> c, String separator) { 619 return join(c, separator, null); 620 } 621 join(Object[] c, String separator)622 public static String join(Object[] c, String separator) { 623 return join(c, separator, null); 624 } 625 join( Collection<T> c, String separator, Transform<T, String> transform)626 public static <T> String join( 627 Collection<T> c, String separator, Transform<T, String> transform) { 628 StringBuffer output = new StringBuffer(); 629 boolean isFirst = true; 630 for (T item : c) { 631 if (isFirst) { 632 isFirst = false; 633 } else { 634 output.append(separator); 635 } 636 output.append(transform != null ? transform.transform(item) : item); 637 } 638 return output.toString(); 639 } 640 join(T[] c, String separator, Transform<T, String> transform)641 public static <T> String join(T[] c, String separator, Transform<T, String> transform) { 642 return join(Arrays.asList(c), separator, transform); 643 } 644 645 /** Utility like Arrays.asList() */ 646 @SuppressWarnings("unchecked") asMap(Object[][] source, Map<K, V> target, boolean reverse)647 public static <K, V> Map<K, V> asMap(Object[][] source, Map<K, V> target, boolean reverse) { 648 int from = 0, to = 1; 649 if (reverse) { 650 from = 1; 651 to = 0; 652 } 653 for (int i = 0; i < source.length; ++i) { 654 if (source[i].length != 2) { 655 throw new IllegalArgumentException( 656 "Source must be array of pairs of strings: " + Arrays.asList(source[i])); 657 } 658 target.put((K) source[i][from], (V) source[i][to]); 659 } 660 return target; 661 } 662 asMap(Object[][] source)663 public static <K, V> Map<K, V> asMap(Object[][] source) { 664 return asMap(source, new HashMap<K, V>(), false); 665 } 666 667 /** Returns the canonical name for a file. */ getCanonicalName(String file)668 public static String getCanonicalName(String file) { 669 try { 670 return PathUtilities.getNormalizedPathString(file); 671 } catch (Exception e) { 672 return file; 673 } 674 } 675 676 /** 677 * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that 678 * are in the UnicodeSet, Supplementary ranges, and escaping 679 * 680 * @param source The source set 681 * @return 682 */ toRegex(UnicodeSet source)683 public static String toRegex(UnicodeSet source) { 684 return toRegex(source, null, false); 685 } 686 687 private static final Transliterator DEFAULT_REGEX_ESCAPER = 688 Transliterator.createFromRules( 689 "foo", 690 "([ \\- \\\\ \\[ \\] ]) > '\\' $1 ;" 691 // + " ([:c:]) > &hex($1);" 692 + " ([[:control:][[:z:]&[:ascii:]]]) > &hex($1);", 693 Transliterator.FORWARD); 694 695 /** 696 * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that 697 * are in the UnicodeSet, Supplementary ranges, and escaping 698 * 699 * @param source The source set 700 * @param escaper A transliterator that is used to escape the characters according to the 701 * requirements of the regex. The default puts a \\ before [, -, \, and ], and converts 702 * controls and Ascii whitespace to hex. Alternatives can be supplied. Note that some Regex 703 * engines, including Java 1.5, don't really deal with escaped supplementaries well. 704 * @param onlyBmp Set to true if the Regex only accepts BMP characters. In that case, ranges of 705 * supplementary characters are converted to lists of ranges. For example, 706 * [\uFFF0-\U0010000F \U0010100F-\U0010300F] converts into: 707 * <pre> 708 * [\uD800][\uDC00-\uDFFF] 709 * [\uD801-\uDBBF][\uDC00-\uDFFF] 710 * [\uDBC0][\uDC00-\uDC0F] 711 * </pre> 712 * and 713 * <pre> 714 * [\uDBC4][\uDC0F-\uDFFF] 715 * [\uDBC5-\uDBCB][\uDC00-\uDFFF] 716 * [\uDBCC][\uDC00-\uDC0F] 717 * </pre> 718 * These are then coalesced into a list of alternatives by sharing parts where feasible. For 719 * example, the above turns into 3 pairs of ranges: 720 * <pre> 721 * [\uDBC0\uDBCC][\uDC00-\uDC0F]|\uDBC4[\uDC0F-\uDFFF]|[\uD800-\uDBBF\uDBC5-\uDBCB][\uDC00-\uDFFF] 722 * </pre> 723 * 724 * @return escaped string. Something like [a-z] or (?:[a-m]|{zh}) if there is a string zh in the 725 * set, or a more complicated case for supplementaries. <br> 726 * Special cases: [] returns "", single item returns a string (escaped), like [a] => "a", or 727 * [{abc}] => "abc"<br> 728 * Supplementaries are handled specially, as described under onlyBmp. 729 */ toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp)730 public static String toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp) { 731 if (escaper == null) { 732 escaper = DEFAULT_REGEX_ESCAPER; 733 } 734 UnicodeSetIterator it = new UnicodeSetIterator(source); 735 // if there is only one item, return it 736 if (source.size() == 0) { 737 return ""; 738 } 739 if (source.size() == 1) { 740 it.next(); 741 return escaper.transliterate(it.getString()); 742 } 743 // otherwise, we figure out what is in the set, and will return 744 StringBuilder base = new StringBuilder("["); 745 StringBuilder alternates = new StringBuilder(); 746 Map<UnicodeSet, UnicodeSet> lastToFirst = new TreeMap<>(new UnicodeSetComparator()); 747 int alternateCount = 0; 748 while (it.nextRange()) { 749 if (it.codepoint == UnicodeSetIterator.IS_STRING) { 750 ++alternateCount; 751 alternates.append('|').append(escaper.transliterate(it.string)); 752 } else if (!onlyBmp || it.codepointEnd <= 0xFFFF) { // BMP 753 addBmpRange(it.codepoint, it.codepointEnd, escaper, base); 754 } else { // supplementary 755 if (it.codepoint <= 0xFFFF) { 756 addBmpRange(it.codepoint, 0xFFFF, escaper, base); 757 it.codepoint = 0x10000; // reset the range 758 } 759 // this gets a bit ugly; we are trying to minimize the extra ranges for 760 // supplementaries 761 // we do this by breaking up X-Y based on the Lead and Trail values for X and Y 762 // Lx [Tx - Ty]) (if Lx == Ly) 763 // Lx [Tx - DFFF] | Ly [DC00-Ty] (if Lx == Ly - 1) 764 // Lx [Tx - DFFF] | [Lx+1 - Ly-1][DC00-DFFF] | Ly [DC00-Ty] (otherwise) 765 int leadX = UTF16.getLeadSurrogate(it.codepoint); 766 int trailX = UTF16.getTrailSurrogate(it.codepoint); 767 int leadY = UTF16.getLeadSurrogate(it.codepointEnd); 768 int trailY = UTF16.getTrailSurrogate(it.codepointEnd); 769 if (leadX == leadY) { 770 addSupplementalRange(leadX, leadX, trailX, trailY, escaper, lastToFirst); 771 } else { 772 addSupplementalRange(leadX, leadX, trailX, 0xDFFF, escaper, lastToFirst); 773 if (leadX != leadY - 1) { 774 addSupplementalRange( 775 leadX + 1, leadY - 1, 0xDC00, 0xDFFF, escaper, lastToFirst); 776 } 777 addSupplementalRange(leadY, leadY, 0xDC00, trailY, escaper, lastToFirst); 778 } 779 } 780 } 781 // add in the supplementary ranges 782 if (lastToFirst.size() != 0) { 783 for (UnicodeSet last : lastToFirst.keySet()) { 784 ++alternateCount; 785 alternates 786 .append('|') 787 .append(toRegex(lastToFirst.get(last), escaper, onlyBmp)) 788 .append(toRegex(last, escaper, onlyBmp)); 789 } 790 } 791 // Return the output. We separate cases in order to get the minimal extra apparatus 792 base.append("]"); 793 if (alternateCount == 0) { 794 return base.toString(); 795 } else if (base.length() > 2) { 796 return "(?:" + base + "|" + alternates.substring(1) + ")"; 797 } else if (alternateCount == 1) { 798 return alternates.substring(1); 799 } else { 800 return "(?:" + alternates.substring(1) + ")"; 801 } 802 } 803 addSupplementalRange( int leadX, int leadY, int trailX, int trailY, Transliterator escaper, Map<UnicodeSet, UnicodeSet> lastToFirst)804 private static void addSupplementalRange( 805 int leadX, 806 int leadY, 807 int trailX, 808 int trailY, 809 Transliterator escaper, 810 Map<UnicodeSet, UnicodeSet> lastToFirst) { 811 System.out.println( 812 "\tadding: " 813 + new UnicodeSet(leadX, leadY) 814 + "\t" 815 + new UnicodeSet(trailX, trailY)); 816 UnicodeSet last = new UnicodeSet(trailX, trailY); 817 UnicodeSet first = lastToFirst.get(last); 818 if (first == null) { 819 lastToFirst.put(last, first = new UnicodeSet()); 820 } 821 first.add(leadX, leadY); 822 } 823 addBmpRange( int start, int limit, Transliterator escaper, StringBuilder base)824 private static void addBmpRange( 825 int start, int limit, Transliterator escaper, StringBuilder base) { 826 base.append(escaper.transliterate(UTF16.valueOf(start))); 827 if (start != limit) { 828 base.append("-").append(escaper.transliterate(UTF16.valueOf(limit))); 829 } 830 } 831 832 public static class UnicodeSetComparator implements Comparator<UnicodeSet> { 833 @Override compare(UnicodeSet o1, UnicodeSet o2)834 public int compare(UnicodeSet o1, UnicodeSet o2) { 835 return o1.compareTo(o2); 836 } 837 } 838 839 public static class CollectionComparator<T extends Comparable<T>> 840 implements Comparator<Collection<T>> { 841 @Override compare(Collection<T> o1, Collection<T> o2)842 public int compare(Collection<T> o1, Collection<T> o2) { 843 return UnicodeSet.compare(o1, o2, UnicodeSet.ComparisonStyle.SHORTER_FIRST); 844 } 845 } 846 847 public static class ComparableComparator<T extends Comparable<T>> implements Comparator<T> { 848 @Override compare(T arg0, T arg1)849 public int compare(T arg0, T arg1) { 850 return Utility.checkCompare(arg0, arg1); 851 } 852 } 853 854 @SuppressWarnings({"rawtypes", "unchecked"}) addTreeMapChain(Map coverageData, Object... objects)855 public static void addTreeMapChain(Map coverageData, Object... objects) { 856 Map<Object, Object> base = coverageData; 857 for (int i = 0; i < objects.length - 2; ++i) { 858 Map<Object, Object> nextOne = (Map<Object, Object>) base.get(objects[i]); 859 if (nextOne == null) base.put(objects[i], nextOne = new TreeMap<>()); 860 base = nextOne; 861 } 862 base.put(objects[objects.length - 2], objects[objects.length - 1]); 863 } 864 865 public abstract static class CollectionTransform<S, T> implements Transform<S, T> { 866 @Override transform(S source)867 public abstract T transform(S source); 868 transform(Collection<S> input, Collection<T> output)869 public Collection<T> transform(Collection<S> input, Collection<T> output) { 870 return CldrUtility.transform(input, this, output); 871 } 872 transform(Collection<S> input)873 public Collection<T> transform(Collection<S> input) { 874 return transform(input, new ArrayList<T>()); 875 } 876 } 877 transform( SC source, Transform<S, T> transform, TC target)878 public static <S, T, SC extends Collection<S>, TC extends Collection<T>> TC transform( 879 SC source, Transform<S, T> transform, TC target) { 880 for (S sourceItem : source) { 881 T targetItem = transform.transform(sourceItem); 882 if (targetItem != null) { 883 target.add(targetItem); 884 } 885 } 886 return target; 887 } 888 transform( SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target)889 public static <SK, SV, TK, TV, SM extends Map<SK, SV>, TM extends Map<TK, TV>> TM transform( 890 SM source, 891 Transform<SK, TK> transformKey, 892 Transform<SV, TV> transformValue, 893 TM target) { 894 for (Entry<SK, SV> sourceEntry : source.entrySet()) { 895 TK targetKey = transformKey.transform(sourceEntry.getKey()); 896 TV targetValue = transformValue.transform(sourceEntry.getValue()); 897 if (targetKey != null && targetValue != null) { 898 target.put(targetKey, targetValue); 899 } 900 } 901 return target; 902 } 903 904 public abstract static class Apply<T> { apply(T item)905 public abstract void apply(T item); 906 applyTo(U collection)907 public <U extends Collection<T>> void applyTo(U collection) { 908 for (T item : collection) { 909 apply(item); 910 } 911 } 912 } 913 914 public abstract static class Filter<T> { 915 contains(T item)916 public abstract boolean contains(T item); 917 retainAll(U c)918 public <U extends Collection<T>> U retainAll(U c) { 919 for (Iterator<T> it = c.iterator(); it.hasNext(); ) { 920 if (!contains(it.next())) it.remove(); 921 } 922 return c; 923 } 924 extractMatches(U c, U target)925 public <U extends Collection<T>> U extractMatches(U c, U target) { 926 for (Iterator<T> it = c.iterator(); it.hasNext(); ) { 927 T item = it.next(); 928 if (contains(item)) { 929 target.add(item); 930 } 931 } 932 return target; 933 } 934 removeAll(U c)935 public <U extends Collection<T>> U removeAll(U c) { 936 for (Iterator<T> it = c.iterator(); it.hasNext(); ) { 937 if (contains(it.next())) it.remove(); 938 } 939 return c; 940 } 941 extractNonMatches(U c, U target)942 public <U extends Collection<T>> U extractNonMatches(U c, U target) { 943 for (Iterator<T> it = c.iterator(); it.hasNext(); ) { 944 T item = it.next(); 945 if (!contains(item)) { 946 target.add(item); 947 } 948 } 949 return target; 950 } 951 } 952 953 public static class MatcherFilter<T> extends Filter<T> { 954 private Matcher matcher; 955 MatcherFilter(String pattern)956 public MatcherFilter(String pattern) { 957 this.matcher = PatternCache.get(pattern).matcher(""); 958 } 959 MatcherFilter(Matcher matcher)960 public MatcherFilter(Matcher matcher) { 961 this.matcher = matcher; 962 } 963 set(Matcher matcher)964 public MatcherFilter<T> set(Matcher matcher) { 965 this.matcher = matcher; 966 return this; 967 } 968 set(String pattern)969 public MatcherFilter<T> set(String pattern) { 970 this.matcher = PatternCache.get(pattern).matcher(""); 971 return this; 972 } 973 974 @Override contains(T o)975 public boolean contains(T o) { 976 return matcher.reset(o.toString()).matches(); 977 } 978 } 979 980 // static final class HandlingTransform implements Transform<String, Handling> { 981 // @Override 982 // public Handling transform(String source) { 983 // return Handling.valueOf(source); 984 // } 985 // } 986 987 public static final class PairComparator<K extends Comparable<K>, V extends Comparable<V>> 988 implements java.util.Comparator<Pair<K, V>> { 989 990 private Comparator<K> comp1; 991 private Comparator<V> comp2; 992 PairComparator(Comparator<K> comp1, Comparator<V> comp2)993 public PairComparator(Comparator<K> comp1, Comparator<V> comp2) { 994 this.comp1 = comp1; 995 this.comp2 = comp2; 996 } 997 998 @Override compare(Pair<K, V> o1, Pair<K, V> o2)999 public int compare(Pair<K, V> o1, Pair<K, V> o2) { 1000 { 1001 K o1First = o1.getFirst(); 1002 K o2First = o2.getFirst(); 1003 int diff = 1004 o1First == null 1005 ? (o2First == null ? 0 : -1) 1006 : o2First == null 1007 ? 1 1008 : comp1 == null 1009 ? o1First.compareTo(o2First) 1010 : comp1.compare(o1First, o2First); 1011 if (diff != 0) { 1012 return diff; 1013 } 1014 } 1015 V o1Second = o1.getSecond(); 1016 V o2Second = o2.getSecond(); 1017 return o1Second == null 1018 ? (o2Second == null ? 0 : -1) 1019 : o2Second == null 1020 ? 1 1021 : comp2 == null 1022 ? o1Second.compareTo(o2Second) 1023 : comp2.compare(o1Second, o2Second); 1024 } 1025 } 1026 1027 /** 1028 * Fetch data from jar 1029 * 1030 * @param name a name residing in the org/unicode/cldr/util/data/ directory, or loading from a 1031 * jar will break. 1032 */ getUTF8Data(String name)1033 public static BufferedReader getUTF8Data(String name) { 1034 if (new File(name).isAbsolute()) { 1035 throw new IllegalArgumentException( 1036 "Path must be relative to org/unicode/cldr/util/data such as 'file.txt' or 'casing/file.txt', but got '" 1037 + name 1038 + "'."); 1039 } 1040 return FileReaders.openFile(CldrUtility.class, "data/" + name); 1041 } 1042 1043 /** License file */ 1044 public static final String LICENSE = "LICENSE"; 1045 1046 /** 1047 * Fetch data from jar 1048 * 1049 * @param name a name residing in the org/unicode/cldr/util/data/ directory, or loading from a 1050 * jar will break. 1051 */ getInputStream(String name)1052 public static InputStream getInputStream(String name) { 1053 if (new File(name).isAbsolute()) { 1054 throw new IllegalArgumentException( 1055 "Path must be relative to org/unicode/cldr/util/data such as 'file.txt' or 'casing/file.txt', but got '" 1056 + name 1057 + "'."); 1058 } 1059 return getInputStream(CldrUtility.class, "data/" + name); 1060 } 1061 getInputStream(Class<?> callingClass, String relativePath)1062 public static InputStream getInputStream(Class<?> callingClass, String relativePath) { 1063 InputStream is = callingClass.getResourceAsStream(relativePath); 1064 // add buffering 1065 return InputStreamFactory.buffer(is); 1066 } 1067 1068 /** 1069 * Takes a Map that goes from Object to Set, and fills in the transpose 1070 * 1071 * @param source_key_valueSet 1072 * @param output_value_key 1073 */ putAllTransposed( Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key)1074 public static void putAllTransposed( 1075 Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key) { 1076 for (Iterator<Object> it = source_key_valueSet.keySet().iterator(); it.hasNext(); ) { 1077 Object key = it.next(); 1078 Set<Object> values = source_key_valueSet.get(key); 1079 for (Iterator<Object> it2 = values.iterator(); it2.hasNext(); ) { 1080 Object value = it2.next(); 1081 output_value_key.put(value, key); 1082 } 1083 } 1084 } 1085 countInstances(String source, String substring)1086 public static int countInstances(String source, String substring) { 1087 int count = 0; 1088 int pos = 0; 1089 while (true) { 1090 pos = source.indexOf(substring, pos) + 1; 1091 if (pos <= 0) break; 1092 count++; 1093 } 1094 return count; 1095 } 1096 registerTransliteratorFromFile(String id, String dir, String filename)1097 public static void registerTransliteratorFromFile(String id, String dir, String filename) { 1098 registerTransliteratorFromFile(id, dir, filename, Transliterator.FORWARD, true); 1099 registerTransliteratorFromFile(id, dir, filename, Transliterator.REVERSE, true); 1100 } 1101 registerTransliteratorFromFile( String id, String dir, String filename, int direction, boolean reverseID)1102 public static void registerTransliteratorFromFile( 1103 String id, String dir, String filename, int direction, boolean reverseID) { 1104 if (filename == null) { 1105 filename = id.replace('-', '_'); 1106 filename = filename.replace('/', '_'); 1107 filename += ".txt"; 1108 } 1109 String rules = getText(dir, filename); 1110 Transliterator t; 1111 int pos = id.indexOf('-'); 1112 String rid; 1113 if (pos < 0) { 1114 rid = id + "-Any"; 1115 id = "Any-" + id; 1116 } else { 1117 rid = id.substring(pos + 1) + "-" + id.substring(0, pos); 1118 } 1119 if (!reverseID) rid = id; 1120 1121 if (direction == Transliterator.FORWARD) { 1122 Transliterator.unregister(id); 1123 t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 1124 Transliterator.registerInstance(t); 1125 System.out.println("Registered new Transliterator: " + id); 1126 } 1127 1128 /* 1129 * String test = "\u049A\u0430\u0437\u0430\u049B"; 1130 * System.out.println(t.transliterate(test)); 1131 * t = Transliterator.getInstance(id); 1132 * System.out.println(t.transliterate(test)); 1133 */ 1134 1135 if (direction == Transliterator.REVERSE) { 1136 Transliterator.unregister(rid); 1137 t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE); 1138 Transliterator.registerInstance(t); 1139 System.out.println("Registered new Transliterator: " + rid); 1140 } 1141 } 1142 getText(String dir, String filename)1143 public static String getText(String dir, String filename) { 1144 try { 1145 BufferedReader br = FileUtilities.openUTF8Reader(dir, filename); 1146 StringBuffer buffer = new StringBuffer(); 1147 while (true) { 1148 String line = br.readLine(); 1149 if (line == null) break; 1150 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1); 1151 if (line.startsWith("//")) continue; 1152 buffer.append(line).append(CldrUtility.LINE_SEPARATOR); 1153 } 1154 br.close(); 1155 String rules = buffer.toString(); 1156 return rules; 1157 } catch (IOException e) { 1158 throw (IllegalArgumentException) 1159 new IllegalArgumentException("Can't open " + dir + ", " + filename) 1160 .initCause(e); 1161 } 1162 } 1163 callMethod(String methodNames, Class<?> cls)1164 public static void callMethod(String methodNames, Class<?> cls) { 1165 for (String methodName : methodNames.split(",")) { 1166 try { 1167 Method method; 1168 try { 1169 method = cls.getMethod(methodName, (Class[]) null); 1170 try { 1171 method.invoke(null, (Object[]) null); 1172 } catch (Exception e) { 1173 e.printStackTrace(); 1174 } 1175 } catch (Exception e) { 1176 System.out.println("No such method: " + methodName); 1177 showMethods(cls); 1178 } 1179 } catch (ClassNotFoundException e) { 1180 e.printStackTrace(); 1181 } 1182 } 1183 } 1184 showMethods(Class<?> cls)1185 public static void showMethods(Class<?> cls) throws ClassNotFoundException { 1186 System.out.println("Possible methods of " + cls.getCanonicalName() + " are: "); 1187 Method[] methods = cls.getMethods(); 1188 Set<String> names = new TreeSet<>(); 1189 for (int i = 0; i < methods.length; ++i) { 1190 if (methods[i].getGenericParameterTypes().length != 0) continue; 1191 // int mods = methods[i].getModifiers(); 1192 // if (!Modifier.isStatic(mods)) continue; 1193 String name = methods[i].getName(); 1194 names.add(name); 1195 } 1196 for (Iterator<String> it = names.iterator(); it.hasNext(); ) { 1197 System.out.println("\t" + it.next()); 1198 } 1199 } 1200 1201 /** 1202 * Breaks lines if they are too long, or if matcher.group(1) != last. Only breaks just before 1203 * matcher. 1204 * 1205 * @param input 1206 * @param separator 1207 * @param matcher must match each possible item. The first group is significant; if different, 1208 * will cause break 1209 * @return 1210 */ breakLines( CharSequence input, String separator, Matcher matcher, int width)1211 public static String breakLines( 1212 CharSequence input, String separator, Matcher matcher, int width) { 1213 StringBuffer output = new StringBuffer(); 1214 String lastPrefix = ""; 1215 int lastEnd = 0; 1216 int lastBreakPos = 0; 1217 matcher.reset(input); 1218 while (true) { 1219 boolean match = matcher.find(); 1220 if (!match) { 1221 output.append(input.subSequence(lastEnd, input.length())); 1222 break; 1223 } 1224 String prefix = matcher.group(1); 1225 if (!prefix.equalsIgnoreCase(lastPrefix) 1226 || matcher.end() - lastBreakPos > width) { // break before? 1227 output.append(separator); 1228 lastBreakPos = lastEnd; 1229 } else if (lastEnd != 0) { 1230 output.append(' '); 1231 } 1232 output.append(input.subSequence(lastEnd, matcher.end()).toString().trim()); 1233 lastEnd = matcher.end(); 1234 lastPrefix = prefix; 1235 } 1236 return output.toString(); 1237 } 1238 showOptions(String[] args)1239 public static void showOptions(String[] args) { 1240 // Properties props = System.getProperties(); 1241 System.out.println( 1242 "Arguments: " + join(args, " ")); // + (props == null ? "" : " " + props)); 1243 } 1244 roundToDecimals(double input, int places)1245 public static double roundToDecimals(double input, int places) { 1246 double log10 = Math.log10(input); // 15000 => 4.xxx 1247 double intLog10 = Math.floor(log10); 1248 double scale = Math.pow(10, intLog10 - places + 1); 1249 double factored = Math.round(input / scale) * scale; 1250 // System.out.println("###\t" +input + "\t" + factored); 1251 return factored; 1252 } 1253 1254 /** 1255 * Get a property value, returning the value if there is one (eg -Dkey=value), otherwise the 1256 * default value (for either empty or null). 1257 * 1258 * @param key 1259 * @param defaultValue 1260 * @return 1261 */ getProperty(String key, String defaultValue)1262 public static String getProperty(String key, String defaultValue) { 1263 return getProperty(key, defaultValue, defaultValue); 1264 } 1265 1266 /** Get a property value, returning the value if there is one, otherwise null. */ getProperty(String key)1267 public static String getProperty(String key) { 1268 return getProperty(key, null, null); 1269 } 1270 1271 /** 1272 * Get a property value, returning the value if there is one (eg -Dkey=value), the valueIfEmpty 1273 * if there is one with no value (eg -Dkey) and the valueIfNull if there is no property. 1274 * 1275 * @param key 1276 * @param valueIfNull 1277 * @param valueIfEmpty 1278 * @return 1279 */ getProperty(String key, String valueIfNull, String valueIfEmpty)1280 public static String getProperty(String key, String valueIfNull, String valueIfEmpty) { 1281 String result = CLDRConfig.getInstance().getProperty(key); 1282 if (result == null) { 1283 result = valueIfNull; 1284 } else if (result.length() == 0) { 1285 result = valueIfEmpty; 1286 } 1287 return result; 1288 } 1289 hex(byte[] bytes, int start, int end, String separator)1290 public static String hex(byte[] bytes, int start, int end, String separator) { 1291 StringBuilder result = new StringBuilder(); 1292 for (int i = 0; i < end; ++i) { 1293 if (result.length() != 0) { 1294 result.append(separator); 1295 } 1296 result.append(Utility.hex(bytes[i] & 0xFF, 2)); 1297 } 1298 return result.toString(); 1299 } 1300 getProperty(String string, boolean b)1301 public static boolean getProperty(String string, boolean b) { 1302 return getProperty(string, b ? "true" : "false", "true").matches("(?i)T|TRUE"); 1303 } 1304 checkValidDirectory(String sourceDirectory)1305 public static String checkValidDirectory(String sourceDirectory) { 1306 return checkValidFile(sourceDirectory, true, null); 1307 } 1308 checkValidDirectory(String sourceDirectory, String correction)1309 public static String checkValidDirectory(String sourceDirectory, String correction) { 1310 return checkValidFile(sourceDirectory, true, correction); 1311 } 1312 checkValidFile( String sourceDirectory, boolean checkForDirectory, String correction)1313 public static String checkValidFile( 1314 String sourceDirectory, boolean checkForDirectory, String correction) { 1315 File file = null; 1316 String normalizedPath = null; 1317 try { 1318 file = new File(sourceDirectory); 1319 normalizedPath = PathUtilities.getNormalizedPathString(file) + File.separatorChar; 1320 } catch (Exception e) { 1321 } 1322 if (file == null || normalizedPath == null || checkForDirectory && !file.isDirectory()) { 1323 throw new RuntimeException( 1324 "Directory not found: " 1325 + sourceDirectory 1326 + (normalizedPath == null ? "" : " => " + normalizedPath) 1327 + (correction == null ? "" : CldrUtility.LINE_SEPARATOR + correction)); 1328 } 1329 return normalizedPath; 1330 } 1331 1332 /** 1333 * Copy up to matching line (not included). If output is null, then just skip until. 1334 * 1335 * @param oldFile file to copy 1336 * @param readUntilPattern pattern to search for. If null, goes to end of file. 1337 * @param output into to copy into. If null, just skips in the input. 1338 * @param includeMatchingLine inclde the matching line when copying. 1339 * @throws IOException 1340 */ copyUpTo( BufferedReader oldFile, final Pattern readUntilPattern, final PrintWriter output, boolean includeMatchingLine)1341 public static void copyUpTo( 1342 BufferedReader oldFile, 1343 final Pattern readUntilPattern, 1344 final PrintWriter output, 1345 boolean includeMatchingLine) 1346 throws IOException { 1347 Matcher readUntil = readUntilPattern == null ? null : readUntilPattern.matcher(""); 1348 while (true) { 1349 String line = oldFile.readLine(); 1350 if (line == null) { 1351 break; 1352 } 1353 if (line.startsWith("\uFEFF")) { 1354 line = line.substring(1); 1355 } 1356 if (readUntil != null && readUntil.reset(line).matches()) { 1357 if (includeMatchingLine && output != null) { 1358 output.println(line); 1359 } 1360 break; 1361 } 1362 if (output != null) { 1363 output.println(line); 1364 } 1365 } 1366 } 1367 1368 private static DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'GMT'"); 1369 private static DateFormat DATE_ONLY = new SimpleDateFormat("yyyy-MM-dd"); 1370 1371 static { 1372 df.setTimeZone(TimeZone.getTimeZone("GMT")); 1373 DATE_ONLY.setTimeZone(TimeZone.getTimeZone("GMT")); 1374 } 1375 isoFormat(Date date)1376 public static String isoFormat(Date date) { 1377 synchronized (df) { 1378 return df.format(date); 1379 } 1380 } 1381 isoFormatDateOnly(Date date)1382 public static String isoFormatDateOnly(Date date) { 1383 synchronized (DATE_ONLY) { 1384 return DATE_ONLY.format(date); 1385 } 1386 } 1387 newConcurrentHashMap()1388 public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap() { 1389 // http://ria101.wordpress.com/2011/12/12/concurrenthashmap-avoid-a-common-misuse/ 1390 return new ConcurrentHashMap<>(4, 0.9f, 1); 1391 } 1392 newConcurrentHashMap(Map<K, V> source)1393 public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap(Map<K, V> source) { 1394 ConcurrentHashMap<K, V> result = newConcurrentHashMap(); 1395 result.putAll(source); 1396 return result; 1397 } 1398 equals(Object a, Object b)1399 public static boolean equals(Object a, Object b) { 1400 return a == b ? true : a == null || b == null ? false : a.equals(b); 1401 } 1402 getDoubleLink(String code)1403 public static String getDoubleLink(String code) { 1404 final String anchorSafe = 1405 TransliteratorUtilities.toHTML.transliterate(code).replace(" ", "_"); 1406 return "<a name='" + anchorSafe + "' href='#" + anchorSafe + "'>"; 1407 } 1408 getDoubleLinkedText(String anchor, String anchorText)1409 public static String getDoubleLinkedText(String anchor, String anchorText) { 1410 return getDoubleLink(anchor) 1411 + TransliteratorUtilities.toHTML.transliterate(anchorText).replace("_", " ") 1412 + "</a>"; 1413 } 1414 getDoubleLinkedText(String anchor)1415 public static String getDoubleLinkedText(String anchor) { 1416 return getDoubleLinkedText(anchor, anchor); 1417 } 1418 getDoubleLinkMsg()1419 public static String getDoubleLinkMsg() { 1420 return "<a name=''{0}'' href=''#{0}''>{0}</a>"; 1421 } 1422 getDoubleLinkMsg2()1423 public static String getDoubleLinkMsg2() { 1424 return "<a name=''{0}{1}'' href=''#{0}{1}''>{0}</a>"; 1425 } 1426 getCopyrightString()1427 public static String getCopyrightString() { 1428 return getCopyrightString(""); 1429 } 1430 1431 private static final class CopyrightHelper { 1432 public static final CopyrightHelper INSTANCE = new CopyrightHelper(); 1433 public final String COPYRIGHT_SHORT = 1434 "Copyright \u00A9 1991-" 1435 + Calendar.getInstance().get(Calendar.YEAR) 1436 + " Unicode, Inc."; 1437 } 1438 getCopyrightString(String linePrefix)1439 public static String getCopyrightString(String linePrefix) { 1440 // now do the rest 1441 return linePrefix 1442 + getCopyrightShort() 1443 + CldrUtility.LINE_SEPARATOR 1444 + linePrefix 1445 + "For terms of use, see http://www.unicode.org/copyright.html" 1446 + CldrUtility.LINE_SEPARATOR 1447 + linePrefix 1448 + CLDRURLS.UNICODE_SPDX_HEADER 1449 + CldrUtility.LINE_SEPARATOR 1450 + linePrefix 1451 + "CLDR data files are interpreted according to the LDML specification " 1452 + "(http://unicode.org/reports/tr35/)"; 1453 } 1454 1455 /** Returns the '## License' section in markdown. */ getCopyrightMarkdown()1456 public static String getCopyrightMarkdown() { 1457 return "## License\n" 1458 + "\n" 1459 + getCopyrightShort() 1460 + "\n" 1461 + "[Terms of Use](http://www.unicode.org/copyright.html)\n\n" 1462 + CLDRURLS.UNICODE_SPDX_HEADER 1463 + "\n"; 1464 } 1465 1466 /** Get the short copyright string, "Copyright © YYYY-YYYY Unicode, Inc." */ getCopyrightShort()1467 public static String getCopyrightShort() { 1468 return CopyrightHelper.INSTANCE.COPYRIGHT_SHORT; 1469 } 1470 1471 // TODO Move to collection utilities 1472 /** 1473 * Type-safe get 1474 * 1475 * @param map 1476 * @param key 1477 * @return value 1478 */ get(M map, K key)1479 public static <K, V, M extends Map<K, V>> V get(M map, K key) { 1480 return map.get(key); 1481 } 1482 1483 /** 1484 * Type-safe contains 1485 * 1486 * @param collection 1487 * @param key 1488 * @return value 1489 */ contains(C collection, K key)1490 public static <K, C extends Collection<K>> boolean contains(C collection, K key) { 1491 return collection.contains(key); 1492 } 1493 toEnumSet( Class<E> classValue, Collection<String> stringValues)1494 public static <E extends Enum<E>> EnumSet<E> toEnumSet( 1495 Class<E> classValue, Collection<String> stringValues) { 1496 EnumSet<E> result = EnumSet.noneOf(classValue); 1497 for (String s : stringValues) { 1498 result.add(Enum.valueOf(classValue, s)); 1499 } 1500 return result; 1501 } 1502 putNew(M map, K key, V value)1503 public static <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) { 1504 if (!map.containsKey(key)) { 1505 map.put(key, value); 1506 } 1507 return map; 1508 } 1509 cleanSemiFields(String line)1510 public static String[] cleanSemiFields(String line) { 1511 line = cleanLine(line); 1512 return line.isEmpty() ? null : SEMI_SPLIT.split(line); 1513 } 1514 cleanLine(String line)1515 private static String cleanLine(String line) { 1516 int comment = line.indexOf("#"); 1517 if (comment >= 0) { 1518 line = line.substring(0, comment); 1519 } 1520 if (line.startsWith("\uFEFF")) { 1521 line = line.substring(1); 1522 } 1523 return line.trim(); 1524 } 1525 handleFile(String filename, LineHandler handler)1526 public static void handleFile(String filename, LineHandler handler) throws IOException { 1527 try (BufferedReader in = getUTF8Data(filename); ) { 1528 String line = null; 1529 while ((line = in.readLine()) != null) { 1530 // String line = in.readLine(); 1531 // if (line == null) { 1532 // break; 1533 // } 1534 try { 1535 if (!handler.handle(line)) { 1536 if (HANDLEFILE_SHOW_SKIP) { 1537 System.out.println("Skipping line: " + line); 1538 } 1539 } 1540 } catch (Exception e) { 1541 throw (RuntimeException) 1542 new IllegalArgumentException("Problem with line: " + line).initCause(e); 1543 } 1544 } 1545 } 1546 // in.close(); 1547 } 1548 ifNull(T x, T y)1549 public static <T> T ifNull(T x, T y) { 1550 return x == null ? y : x; 1551 } 1552 ifSame(T source, T replaceIfSame, T replacement)1553 public static <T> T ifSame(T source, T replaceIfSame, T replacement) { 1554 return source == replaceIfSame ? replacement : source; 1555 } 1556 ifEqual(T source, T replaceIfSame, T replacement)1557 public static <T> T ifEqual(T source, T replaceIfSame, T replacement) { 1558 return Objects.equals(source, replaceIfSame) ? replacement : source; 1559 } 1560 intersect(Set<T> a, Collection<T> b)1561 public static <T> Set<T> intersect(Set<T> a, Collection<T> b) { 1562 Set<T> result = new LinkedHashSet<>(a); 1563 result.retainAll(b); 1564 return result; 1565 } 1566 subtract(Set<T> a, Collection<T> b)1567 public static <T> Set<T> subtract(Set<T> a, Collection<T> b) { 1568 Set<T> result = new LinkedHashSet<>(a); 1569 result.removeAll(b); 1570 return result; 1571 } 1572 deepEquals(Object... pairs)1573 public static boolean deepEquals(Object... pairs) { 1574 for (int item = 0; item < pairs.length; ) { 1575 if (!Objects.deepEquals(pairs[item++], pairs[item++])) { 1576 return false; 1577 } 1578 } 1579 return true; 1580 } 1581 array(Splitter splitter, String source)1582 public static String[] array(Splitter splitter, String source) { 1583 List<String> list = splitter.splitToList(source); 1584 return list.toArray(new String[list.size()]); 1585 } 1586 toHex(String in, boolean javaStyle)1587 public static String toHex(String in, boolean javaStyle) { 1588 StringBuilder result = new StringBuilder(); 1589 for (int i = 0; i < in.length(); ++i) { 1590 result.append(toHex(in.charAt(i), javaStyle)); 1591 } 1592 return result.toString(); 1593 } 1594 toHex(int j, boolean javaStyle)1595 public static String toHex(int j, boolean javaStyle) { 1596 if (j == '\"') { 1597 return "\\\""; 1598 } else if (j == '\\') { 1599 return "\\\\"; 1600 } else if (0x20 < j && j < 0x7F) { 1601 return String.valueOf((char) j); 1602 } 1603 final String hexString = Integer.toHexString(j).toUpperCase(); 1604 int gap = 4 - hexString.length(); 1605 if (gap < 0) { 1606 gap = 0; 1607 } 1608 String prefix = javaStyle ? "\\u" : "U+"; 1609 return prefix + "000".substring(0, gap) + hexString; 1610 } 1611 1612 /** 1613 * get string format for debugging, since Java has a useless display for many items 1614 * 1615 * @param item 1616 * @return 1617 */ toString(Object item)1618 public static String toString(Object item) { 1619 if (item instanceof Object[]) { 1620 return toString(Arrays.asList((Object[]) item)); 1621 } else if (item instanceof Entry) { 1622 return toString(((Entry) item).getKey()) + "≔" + toString(((Entry) item).getValue()); 1623 } else if (item instanceof Map) { 1624 return "{" + toString(((Map) item).entrySet()) + "}"; 1625 } else if (item instanceof Collection) { 1626 List<String> result = new ArrayList<>(); 1627 for (Object subitem : (Collection) item) { 1628 result.add(toString(subitem)); 1629 } 1630 return result.toString(); 1631 } 1632 return item.toString(); 1633 } 1634 1635 /** 1636 * Return the git hash for the CLDR base directory. 1637 * 1638 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1639 */ getCldrBaseDirHash()1640 public static String getCldrBaseDirHash() { 1641 final File baseDir = CLDRConfig.getInstance().getCldrBaseDirectory(); 1642 return getGitHashForDir(baseDir.toString()); 1643 } 1644 1645 /** 1646 * Return the git hash for a directory. 1647 * 1648 * @param dir the directory name 1649 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1650 */ getGitHashForDir(String dir)1651 public static final String getGitHashForDir(String dir) { 1652 // Try #1 1653 String hash = getGitHashDirectlyForDir(dir); 1654 if (hash == null) { 1655 // Try #2 1656 hash = getGitHashByRevParseForDir(dir); 1657 } 1658 if (hash == null) { 1659 // return 'unknown' 1660 hash = CLDRURLS.UNKNOWN_REVISION; 1661 } 1662 return hash; 1663 } 1664 1665 /** 1666 * Attempt to retrieve git hash by digging through .git/HEAD and related files 1667 * 1668 * @param dir 1669 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1670 */ getGitHashDirectlyForDir(String dir)1671 private static String getGitHashDirectlyForDir(String dir) { 1672 // First, try just reading .git/HEAD 1673 final File gitDir = new File(dir, ".git"); 1674 final File headfile = new File(gitDir, "HEAD"); 1675 if (headfile.canRead()) { 1676 // Try this first, fallback to git commands 1677 try { 1678 String s = Files.readString(headfile.toPath()); 1679 if (s != null && !s.isBlank()) { 1680 s = s.trim(); 1681 if (s.startsWith("ref: ")) { 1682 s = s.substring(5); // refs/heads/main 1683 final Path refPath = gitDir.toPath().resolve(s); 1684 if (refPath.startsWith(gitDir.toPath())) { 1685 s = Files.readString(refPath); 1686 if (s != null && !s.isBlank()) { 1687 return s.trim(); 1688 } 1689 } else { // ignore something like refs: ../../../yourfiles 1690 System.err.println("Ignoring strange git refPath " + refPath); 1691 } 1692 } // else, maybe detached head 1693 return s.trim(); 1694 } 1695 } catch (IOException e) { 1696 System.err.println(e + ": readString failed for " + headfile); 1697 e.printStackTrace(); 1698 } 1699 } 1700 return null; // not found; 1701 } 1702 1703 /** 1704 * Attempt to retrieve git hash by calling 'git rev-parse HEAD' 1705 * 1706 * @param dir 1707 * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3" 1708 */ getGitHashByRevParseForDir(String dir)1709 private static String getGitHashByRevParseForDir(String dir) { 1710 final String GIT_HASH_COMMANDS[] = {"git", "rev-parse", "HEAD"}; 1711 try { 1712 if (dir == null) { 1713 return null; // no dir 1714 } 1715 File f = new File(dir); 1716 if (!f.isDirectory()) { 1717 return null; // does not exist 1718 } 1719 Process p = Runtime.getRuntime().exec(GIT_HASH_COMMANDS, null, f); 1720 if (!p.waitFor(15, TimeUnit.SECONDS)) { 1721 System.err.println( 1722 "Git query " + String.join(" ", GIT_HASH_COMMANDS) + " timed out"); 1723 p.destroyForcibly(); 1724 return null; 1725 } 1726 if (p.exitValue() != 0) { 1727 System.err.println( 1728 "Error return : " 1729 + p.exitValue() 1730 + " from " 1731 + String.join(" ", GIT_HASH_COMMANDS)); 1732 try (BufferedReader is = 1733 new BufferedReader(new InputStreamReader(p.getErrorStream()))) { 1734 String str = is.readLine(); 1735 if (str.length() == 0) { 1736 throw new Exception("git returned empty"); 1737 } 1738 System.err.println("git: " + str); 1739 } 1740 return null; 1741 } 1742 try (BufferedReader is = 1743 new BufferedReader(new InputStreamReader(p.getInputStream()))) { 1744 String str = is.readLine(); 1745 if (str == null || str.length() == 0) { 1746 throw new Exception("git returned empty"); 1747 } 1748 return str; 1749 } 1750 } catch (Throwable t) { 1751 // We do not expect this to be called frequently. 1752 System.err.println( 1753 "While trying to get 'git' hash for " + dir + " : " + t.getMessage()); 1754 t.printStackTrace(); 1755 return null; 1756 } 1757 } 1758 1759 /** 1760 * For each string S in the UnicodeSet U, remove S if it U "doesn't need it" for testing 1761 * containsAll. That is, U.containsAll matches the same set of strings with or without S. For 1762 * example [ad{ad}{bcd}{bc}] flattens to [ad{bc}] 1763 * 1764 * @param value, which is modified if it is not freezable 1765 * @return resulting value 1766 */ flatten(UnicodeSet value)1767 public static UnicodeSet flatten(UnicodeSet value) { 1768 Set<String> strings = ImmutableSet.copyOf(value.strings()); 1769 HashSet<String> toAdd = new HashSet<>(); 1770 if (value.isFrozen()) { 1771 value = new UnicodeSet(value); 1772 } 1773 for (String s : strings) { 1774 value.remove(s); 1775 if (!value.containsAll(s)) { 1776 toAdd.add(s); 1777 } 1778 value.add(s); 1779 } 1780 value.removeAll(strings); 1781 value.addAll(toAdd); 1782 return value; 1783 } 1784 removeAll(C fromCollection, Set<String> toRemove)1785 public static <T, C extends Collection<T>> C removeAll(C fromCollection, Set<String> toRemove) { 1786 for (Iterator<T> it = fromCollection.iterator(); it.hasNext(); ) { 1787 T item = it.next(); 1788 if (toRemove.contains(item)) { 1789 it.remove(); 1790 } 1791 } 1792 return fromCollection; 1793 } 1794 } 1795