xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/CldrUtility.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2013, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import com.google.common.base.Splitter;
12 import com.google.common.collect.ImmutableMap;
13 import com.google.common.collect.ImmutableMultimap;
14 import com.google.common.collect.ImmutableSet;
15 import com.google.common.collect.Multimap;
16 import com.ibm.icu.impl.Utility;
17 import com.ibm.icu.text.DateFormat;
18 import com.ibm.icu.text.SimpleDateFormat;
19 import com.ibm.icu.text.Transform;
20 import com.ibm.icu.text.Transliterator;
21 import com.ibm.icu.text.UTF16;
22 import com.ibm.icu.text.UnicodeSet;
23 import com.ibm.icu.text.UnicodeSetIterator;
24 import com.ibm.icu.util.Freezable;
25 import com.ibm.icu.util.TimeZone;
26 import java.io.BufferedReader;
27 import java.io.File;
28 import java.io.FileReader;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.InputStreamReader;
32 import java.io.PrintWriter;
33 import java.lang.reflect.Constructor;
34 import java.lang.reflect.Method;
35 import java.nio.file.Files;
36 import java.nio.file.Path;
37 import java.nio.file.Paths;
38 import java.util.ArrayList;
39 import java.util.Arrays;
40 import java.util.Calendar;
41 import java.util.Collection;
42 import java.util.Collections;
43 import java.util.Comparator;
44 import java.util.Date;
45 import java.util.EnumSet;
46 import java.util.HashMap;
47 import java.util.HashSet;
48 import java.util.Iterator;
49 import java.util.LinkedHashMap;
50 import java.util.LinkedHashSet;
51 import java.util.List;
52 import java.util.Map;
53 import java.util.Map.Entry;
54 import java.util.Objects;
55 import java.util.Set;
56 import java.util.SortedMap;
57 import java.util.SortedSet;
58 import java.util.TreeMap;
59 import java.util.TreeSet;
60 import java.util.concurrent.ConcurrentHashMap;
61 import java.util.concurrent.TimeUnit;
62 import java.util.regex.Matcher;
63 import java.util.regex.Pattern;
64 import org.unicode.cldr.draft.FileUtilities;
65 import org.unicode.cldr.tool.Chart;
66 
67 public class CldrUtility {
68     /**
69      * These need to be consistent with "CLDR-Code-Git-Commit" in tools/cldr-code/pom.xml
70      *
71      * <p>If and when "CLDR-Apps-Git-Commit" in tools/cldr-apps/pom.xml becomes usable for the
72      * cldr-apps war file, we may add APPS_SLUG = "CLDR-Apps" here, and in some contexts use
73      * APPS_SLUG in addition to, or instead of, CODE_SLUG
74      */
75     public static final String CODE_SLUG = "CLDR-Code";
76 
77     public static final String GIT_COMMIT_SUFFIX = "-Git-Commit";
78 
79     public static final String HOME_KEY = "CLDRHOME";
80     public static final String DIR_KEY = "CLDR_DIR";
81     public static final String MAIN_KEY = "CLDR_MAIN";
82 
83     public static final boolean DEBUG_MISSING_DIRECTORIES = false;
84 
85     public static final boolean BETA = false;
86 
87     public static final String LINE_SEPARATOR = "\n";
88     public static final Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*");
89 
90     private static final boolean HANDLEFILE_SHOW_SKIP = false;
91     /**
92      * Constant for "∅∅∅". Indicates that a child locale has no value for a path even though a
93      * parent does.
94      */
95     public static final String NO_INHERITANCE_MARKER =
96             new String(new char[] {0x2205, 0x2205, 0x2205});
97 
98     /**
99      * Define the constant INHERITANCE_MARKER for "↑↑↑", used by Survey Tool to indicate a
100      * "passthru" vote to the parent locale. If CLDRFile ever finds this value in a data field,
101      * writing of the field should be suppressed.
102      */
103     public static final String INHERITANCE_MARKER = new String(new char[] {0x2191, 0x2191, 0x2191});
104 
105     public static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
106 
107     /**
108      * Very simple class, used to replace variables in a string. For example
109      *
110      * <p>
111      *
112      * <pre>
113      * static VariableReplacer langTag = new VariableReplacer()
114      * 			.add("$alpha", "[a-zA-Z]")
115      * 			.add("$digit", "[0-9]")
116      * 			.add("$alphanum", "[a-zA-Z0-9]")
117      * 			.add("$x", "[xX]");
118      * 			...
119      * 			String langTagPattern = langTag.replace(...);
120      * </pre>
121      */
122     public static class VariableReplacer {
123         // simple implementation for now
124         private Map<String, String> m = new TreeMap<>(Collections.reverseOrder());
125 
add(String variable, String value)126         public VariableReplacer add(String variable, String value) {
127             m.put(variable, value);
128             return this;
129         }
130 
replace(String source)131         public String replace(String source) {
132             String oldSource;
133             do {
134                 oldSource = source;
135                 for (Iterator<String> it = m.keySet().iterator(); it.hasNext(); ) {
136                     String variable = it.next();
137                     String value = m.get(variable);
138                     source = replaceAll(source, variable, value);
139                 }
140             } while (!source.equals(oldSource));
141             return source;
142         }
143 
replaceAll(String source, String key, String value)144         public String replaceAll(String source, String key, String value) {
145             while (true) {
146                 int pos = source.indexOf(key);
147                 if (pos < 0) return source;
148                 source = source.substring(0, pos) + value + source.substring(pos + key.length());
149             }
150         }
151     }
152 
153     public interface LineHandler {
154         /**
155          * Return false if line was skipped
156          *
157          * @param line
158          * @return
159          */
handle(String line)160         boolean handle(String line) throws Exception;
161     }
162 
getPath(String fileOrDir, String filename)163     public static String getPath(String fileOrDir, String filename) {
164         // Required for cases where a system property is read but not default is given.
165         // TODO: Fix callers to not fail silently if properties are missing.
166         if (fileOrDir == null) {
167             return null;
168         }
169         Path path = Paths.get(fileOrDir);
170         if (filename != null) {
171             path = path.resolve(filename);
172         }
173         if (DEBUG_MISSING_DIRECTORIES && !Files.exists(path)) {
174             System.err.println("Warning: directory doesn't exist: " + path);
175         }
176         return PathUtilities.getNormalizedPathString(path) + File.separatorChar;
177     }
178 
getPath(String path)179     public static String getPath(String path) {
180         return getPath(path, null);
181     }
182 
183     public static final String ANALYTICS = Chart.AnalyticsID.CLDR.getScript();
184 
185     public static final List<String> MINIMUM_LANGUAGES =
186             Arrays.asList(
187                     new String[] {
188                         "ar", "en", "de", "fr", "hi", "it", "es", "pt", "ru", "zh", "ja"
189                     }); // plus language itself
190     public static final List<String> MINIMUM_TERRITORIES =
191             Arrays.asList(
192                     new String[] {"US", "GB", "DE", "FR", "IT", "JP", "CN", "IN", "RU", "BR"});
193 
194     public interface LineComparer {
195         static final int LINES_DIFFERENT = -1, LINES_SAME = 0, SKIP_FIRST = 1, SKIP_SECOND = 2;
196 
197         /**
198          * Returns LINES_DIFFERENT, LINES_SAME, or if one of the lines is ignorable, SKIP_FIRST or
199          * SKIP_SECOND
200          *
201          * @param line1
202          * @param line2
203          * @return
204          */
compare(String line1, String line2)205         int compare(String line1, String line2);
206     }
207 
208     public static class SimpleLineComparator implements LineComparer {
209         public static final int TRIM = 1, SKIP_SPACES = 2, SKIP_EMPTY = 4, SKIP_CVS_TAGS = 8;
210         StringIterator si1 = new StringIterator();
211         StringIterator si2 = new StringIterator();
212         int flags;
213 
SimpleLineComparator(int flags)214         public SimpleLineComparator(int flags) {
215             this.flags = flags;
216         }
217 
218         @Override
compare(String line1, String line2)219         public int compare(String line1, String line2) {
220             // first, see if we want to skip one or the other lines
221             int skipper = 0;
222             if (line1 == null) {
223                 skipper = SKIP_FIRST;
224             } else {
225                 if ((flags & TRIM) != 0) line1 = line1.trim();
226                 if ((flags & SKIP_EMPTY) != 0 && line1.length() == 0) skipper = SKIP_FIRST;
227             }
228             if (line2 == null) {
229                 skipper = SKIP_SECOND;
230             } else {
231                 if ((flags & TRIM) != 0) line2 = line2.trim();
232                 if ((flags & SKIP_EMPTY) != 0 && line2.length() == 0) skipper += SKIP_SECOND;
233             }
234             if (skipper != 0) {
235                 if (skipper == SKIP_FIRST + SKIP_SECOND) return LINES_SAME; // ok, don't skip both
236                 return skipper;
237             }
238 
239             // check for null
240             if (line1 == null) {
241                 if (line2 == null) return LINES_SAME;
242                 return LINES_DIFFERENT;
243             }
244             if (line2 == null) {
245                 return LINES_DIFFERENT;
246             }
247 
248             // now check equality
249             if (line1.equals(line2)) return LINES_SAME;
250 
251             // if not equal, see if we are skipping spaces
252             if ((flags & SKIP_CVS_TAGS) != 0) {
253                 if (line1.indexOf('$') >= 0 && line2.indexOf('$') >= 0) {
254                     line1 = stripTags(line1);
255                     line2 = stripTags(line2);
256                     if (line1.equals(line2)) return LINES_SAME;
257                 } else if (line1.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")
258                         && line2.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")) {
259                     return LINES_SAME;
260                 }
261             }
262             if ((flags & SKIP_SPACES) != 0 && si1.set(line1).matches(si2.set(line2)))
263                 return LINES_SAME;
264             return LINES_DIFFERENT;
265         }
266 
267         // private Matcher dtdMatcher = PatternCache.get(
268         // "\\Q<!DOCTYPE ldml SYSTEM
269         // \"http://www.unicode.org/cldr/dtd/\\E.*\\Q/ldml.dtd\">\\E").matcher("");
270 
271         private String[] CVS_TAGS = {"Revision", "Date"};
272 
stripTags(String line)273         private String stripTags(String line) {
274             // $
275             // Revision: 8994 $
276             // $
277             // Date: 2013-07-03 21:31:17 +0200 (Wed, 03 Jul 2013) $
278             int pos = line.indexOf('$');
279             if (pos < 0) return line;
280             pos++;
281             int endpos = line.indexOf('$', pos);
282             if (endpos < 0) return line;
283             for (int i = 0; i < CVS_TAGS.length; ++i) {
284                 if (!line.startsWith(CVS_TAGS[i], pos)) continue;
285                 line = line.substring(0, pos + CVS_TAGS[i].length()) + line.substring(endpos);
286             }
287             return line;
288         }
289     }
290 
291     /**
292      * @param file1
293      * @param file2
294      * @param failureLines on input, String[2], on output, failing lines
295      * @param lineComparer
296      * @return
297      * @throws IOException
298      */
areFileIdentical( String file1, String file2, String[] failureLines, LineComparer lineComparer)299     public static boolean areFileIdentical(
300             String file1, String file2, String[] failureLines, LineComparer lineComparer)
301             throws IOException {
302         try (BufferedReader br1 = new BufferedReader(new FileReader(file1), 32 * 1024);
303                 BufferedReader br2 = new BufferedReader(new FileReader(file2), 32 * 1024); ) {
304             String line1 = "";
305             String line2 = "";
306             int skip = 0;
307 
308             while (true) {
309                 if ((skip & LineComparer.SKIP_FIRST) == 0) line1 = br1.readLine();
310                 if ((skip & LineComparer.SKIP_SECOND) == 0) line2 = br2.readLine();
311                 if (line1 == null && line2 == null) return true;
312                 if (line1 == null || line2 == null) {
313                     // System.out.println("debug");
314                 }
315                 skip = lineComparer.compare(line1, line2);
316                 if (skip == LineComparer.LINES_DIFFERENT) {
317                     break;
318                 }
319             }
320             failureLines[0] = line1 != null ? line1 : "<end of file>";
321             failureLines[1] = line2 != null ? line2 : "<end of file>";
322             return false;
323         }
324     }
325 
326     /*
327      * static String getLineWithoutFluff(BufferedReader br1, boolean first, int flags) throws IOException {
328      * while (true) {
329      * String line1 = br1.readLine();
330      * if (line1 == null) return line1;
331      * if ((flags & TRIM)!= 0) line1 = line1.trim();
332      * if ((flags & SKIP_EMPTY)!= 0 && line1.length() == 0) continue;
333      * return line1;
334      * }
335      * }
336      */
337 
338     public static final class StringIterator {
339         String string;
340         int position = 0;
341 
next()342         char next() {
343             while (true) {
344                 if (position >= string.length()) return '\uFFFF';
345                 char ch = string.charAt(position++);
346                 if (ch != ' ' && ch != '\t') return ch;
347             }
348         }
349 
reset()350         StringIterator reset() {
351             position = 0;
352             return this;
353         }
354 
set(String string)355         StringIterator set(String string) {
356             this.string = string;
357             position = 0;
358             return this;
359         }
360 
matches(StringIterator other)361         boolean matches(StringIterator other) {
362             while (true) {
363                 char c1 = next();
364                 char c2 = other.next();
365                 if (c1 != c2) return false;
366                 if (c1 == '\uFFFF') return true;
367             }
368         }
369 
370         /**
371          * @return Returns the position.
372          */
getPosition()373         public int getPosition() {
374             return position;
375         }
376     }
377 
splitArray(String source, char separator)378     public static String[] splitArray(String source, char separator) {
379         return splitArray(source, separator, false);
380     }
381 
splitArray(String source, char separator, boolean trim)382     public static String[] splitArray(String source, char separator, boolean trim) {
383         List<String> piecesList = splitList(source, separator, trim);
384         String[] pieces = new String[piecesList.size()];
385         piecesList.toArray(pieces);
386         return pieces;
387     }
388 
splitCommaSeparated(String line)389     public static String[] splitCommaSeparated(String line) {
390         // items are separated by ','
391         // each item is of the form abc...
392         // or "..." (required if a comma or quote is contained)
393         // " in a field is represented by ""
394         List<String> result = new ArrayList<>();
395         StringBuilder item = new StringBuilder();
396         boolean inQuote = false;
397         for (int i = 0; i < line.length(); ++i) {
398             char ch = line.charAt(i); // don't worry about supplementaries
399             switch (ch) {
400                 case '"':
401                     inQuote = !inQuote;
402                     // at start or end, that's enough
403                     // if get a quote when we are not in a quote, and not at start, then add it and
404                     // return to inQuote
405                     if (inQuote && item.length() != 0) {
406                         item.append('"');
407                         inQuote = true;
408                     }
409                     break;
410                 case ',':
411                     if (!inQuote) {
412                         result.add(item.toString());
413                         item.setLength(0);
414                     } else {
415                         item.append(ch);
416                     }
417                     break;
418                 default:
419                     item.append(ch);
420                     break;
421             }
422         }
423         result.add(item.toString());
424         return result.toArray(new String[result.size()]);
425     }
426 
splitList(String source, char separator)427     public static List<String> splitList(String source, char separator) {
428         return splitList(source, separator, false, null);
429     }
430 
splitList(String source, char separator, boolean trim)431     public static List<String> splitList(String source, char separator, boolean trim) {
432         return splitList(source, separator, trim, null);
433     }
434 
splitList( String source, char separator, boolean trim, List<String> output)435     public static List<String> splitList(
436             String source, char separator, boolean trim, List<String> output) {
437         return splitList(source, Character.toString(separator), trim, output);
438     }
439 
splitList(String source, String separator)440     public static List<String> splitList(String source, String separator) {
441         return splitList(source, separator, false, null);
442     }
443 
splitList(String source, String separator, boolean trim)444     public static List<String> splitList(String source, String separator, boolean trim) {
445         return splitList(source, separator, trim, null);
446     }
447 
splitList( String source, String separator, boolean trim, List<String> output)448     public static List<String> splitList(
449             String source, String separator, boolean trim, List<String> output) {
450         if (output == null) output = new ArrayList<>();
451         if (source.length() == 0) return output;
452         int pos = 0;
453         do {
454             int npos = source.indexOf(separator, pos);
455             if (npos < 0) npos = source.length();
456             String piece = source.substring(pos, npos);
457             if (trim) piece = piece.trim();
458             output.add(piece);
459             pos = npos + 1;
460         } while (pos < source.length());
461         return output;
462     }
463 
464     /**
465      * Protect a collection (as much as Java lets us!) from modification. Really, really ugly code,
466      * since Java doesn't let us do better.
467      */
468     @SuppressWarnings({"rawtypes", "unchecked"})
protectCollection(T source)469     public static <T> T protectCollection(T source) {
470         // TODO - exclude UnmodifiableMap, Set, ...
471         if (source instanceof Map) {
472             Map<Object, Object> sourceMap = (Map) source;
473             ImmutableMap.Builder<Object, Object> builder = ImmutableMap.builder();
474             for (Entry<Object, Object> entry : sourceMap.entrySet()) {
475                 final Object key = entry.getKey();
476                 final Object value = entry.getValue();
477                 builder.put(protectCollection(key), protectCollection(value));
478             }
479             return (T) builder.build();
480         } else if (source instanceof Multimap) {
481             Multimap<Object, Object> sourceMap = (Multimap) source;
482             ImmutableMultimap.Builder<Object, Object> builder = ImmutableMultimap.builder();
483             for (Entry<Object, Object> entry : sourceMap.entries()) {
484                 builder.put(protectCollection(entry.getKey()), protectCollection(entry.getValue()));
485             }
486             return (T) builder.build();
487         } else if (source instanceof Collection) {
488             // TODO use ImmutableSet, List, ...
489             Collection sourceCollection = (Collection) source;
490             Collection<Object> resultCollection = clone(sourceCollection);
491             if (resultCollection == null) return (T) sourceCollection; // failed
492             resultCollection.clear();
493 
494             for (Object item : sourceCollection) {
495                 resultCollection.add(protectCollection(item));
496             }
497 
498             return sourceCollection instanceof List
499                     ? (T) Collections.unmodifiableList((List) sourceCollection)
500                     : sourceCollection instanceof SortedSet
501                             ? (T) Collections.unmodifiableSortedSet((SortedSet) sourceCollection)
502                             : sourceCollection instanceof Set
503                                     ? (T) Collections.unmodifiableSet((Set) sourceCollection)
504                                     : (T) Collections.unmodifiableCollection(sourceCollection);
505         } else if (source instanceof Freezable) {
506             Freezable freezableSource = (Freezable) source;
507             return (T) freezableSource.freeze();
508             //            if (freezableSource.isFrozen()) return source;
509             //            return (T) ((Freezable) (freezableSource.cloneAsThawed())).freeze();
510         } else {
511             return source; // can't protect
512         }
513     }
514 
515     /**
516      * Protect a collections where we don't need to clone.
517      *
518      * @param source
519      * @return
520      */
521     @SuppressWarnings({"rawtypes", "unchecked"})
protectCollectionX(T source)522     public static <T> T protectCollectionX(T source) {
523         // TODO - exclude UnmodifiableMap, Set, ...
524         if (isImmutable(source)) {
525             return source;
526         }
527         if (source instanceof Map) {
528             Map sourceMap = (Map) source;
529             // recurse
530             LinkedHashMap tempMap = new LinkedHashMap<>(sourceMap); // copy contents
531             sourceMap.clear();
532             for (Object key : tempMap.keySet()) {
533                 sourceMap.put(protectCollection(key), protectCollectionX(tempMap.get(key)));
534             }
535             return sourceMap instanceof SortedMap
536                     ? (T) Collections.unmodifiableSortedMap((SortedMap) sourceMap)
537                     : (T) Collections.unmodifiableMap(sourceMap);
538         } else if (source instanceof Collection) {
539             Collection sourceCollection = (Collection) source;
540             LinkedHashSet tempSet = new LinkedHashSet<>(sourceCollection); // copy contents
541 
542             sourceCollection.clear();
543             for (Object item : tempSet) {
544                 sourceCollection.add(protectCollectionX(item));
545             }
546 
547             return sourceCollection instanceof List
548                     ? (T) Collections.unmodifiableList((List) sourceCollection)
549                     : sourceCollection instanceof SortedSet
550                             ? (T) Collections.unmodifiableSortedSet((SortedSet) sourceCollection)
551                             : sourceCollection instanceof Set
552                                     ? (T) Collections.unmodifiableSet((Set) sourceCollection)
553                                     : (T) Collections.unmodifiableCollection(sourceCollection);
554         } else if (source instanceof Freezable) {
555             Freezable freezableSource = (Freezable) source;
556             return (T) freezableSource.freeze();
557         } else {
558             throw new IllegalArgumentException("Can’t protect: " + source.getClass().toString());
559         }
560     }
561 
562     private static final Set<Object> KNOWN_IMMUTABLES = new HashSet<>(Arrays.asList(String.class));
563 
isImmutable(Object source)564     public static boolean isImmutable(Object source) {
565         return source == null
566                 || source instanceof Enum
567                 || source instanceof Number
568                 || KNOWN_IMMUTABLES.contains(source.getClass());
569     }
570 
571     /**
572      * Clones T if we can; otherwise returns null.
573      *
574      * @param <T>
575      * @param source
576      * @return
577      */
578     @SuppressWarnings("unchecked")
clone(T source)579     private static <T> T clone(T source) {
580         final Class<? extends Object> class1 = source.getClass();
581         try {
582             final Method declaredMethod = class1.getDeclaredMethod("clone", (Class<?>) null);
583             return (T) declaredMethod.invoke(source, (Object) null);
584         } catch (Exception e) {
585         }
586         try {
587             final Constructor<? extends Object> declaredMethod =
588                     class1.getConstructor((Class<?>) null);
589             return (T) declaredMethod.newInstance((Object) null);
590         } catch (Exception e) {
591         }
592         return null; // uncloneable
593     }
594 
595     /** Appends two strings, inserting separator if either is empty */
joinWithSeparation(String a, String separator, String b)596     public static String joinWithSeparation(String a, String separator, String b) {
597         if (a.length() == 0) return b;
598         if (b.length() == 0) return a;
599         return a + separator + b;
600     }
601 
602     /** Appends two strings, inserting separator if either is empty. Modifies first map */
joinWithSeparation( Map<String, String> a, String separator, Map<String, String> b)603     public static Map<String, String> joinWithSeparation(
604             Map<String, String> a, String separator, Map<String, String> b) {
605         for (Iterator<String> it = b.keySet().iterator(); it.hasNext(); ) {
606             String key = it.next();
607             String bvalue = b.get(key);
608             String avalue = a.get(key);
609             if (avalue != null) {
610                 if (avalue.trim().equals(bvalue.trim())) continue;
611                 bvalue = joinWithSeparation(avalue, separator, bvalue);
612             }
613             a.put(key, bvalue);
614         }
615         return a;
616     }
617 
join(Collection<T> c, String separator)618     public static <T> String join(Collection<T> c, String separator) {
619         return join(c, separator, null);
620     }
621 
join(Object[] c, String separator)622     public static String join(Object[] c, String separator) {
623         return join(c, separator, null);
624     }
625 
join( Collection<T> c, String separator, Transform<T, String> transform)626     public static <T> String join(
627             Collection<T> c, String separator, Transform<T, String> transform) {
628         StringBuffer output = new StringBuffer();
629         boolean isFirst = true;
630         for (T item : c) {
631             if (isFirst) {
632                 isFirst = false;
633             } else {
634                 output.append(separator);
635             }
636             output.append(transform != null ? transform.transform(item) : item);
637         }
638         return output.toString();
639     }
640 
join(T[] c, String separator, Transform<T, String> transform)641     public static <T> String join(T[] c, String separator, Transform<T, String> transform) {
642         return join(Arrays.asList(c), separator, transform);
643     }
644 
645     /** Utility like Arrays.asList() */
646     @SuppressWarnings("unchecked")
asMap(Object[][] source, Map<K, V> target, boolean reverse)647     public static <K, V> Map<K, V> asMap(Object[][] source, Map<K, V> target, boolean reverse) {
648         int from = 0, to = 1;
649         if (reverse) {
650             from = 1;
651             to = 0;
652         }
653         for (int i = 0; i < source.length; ++i) {
654             if (source[i].length != 2) {
655                 throw new IllegalArgumentException(
656                         "Source must be array of pairs of strings: " + Arrays.asList(source[i]));
657             }
658             target.put((K) source[i][from], (V) source[i][to]);
659         }
660         return target;
661     }
662 
asMap(Object[][] source)663     public static <K, V> Map<K, V> asMap(Object[][] source) {
664         return asMap(source, new HashMap<K, V>(), false);
665     }
666 
667     /** Returns the canonical name for a file. */
getCanonicalName(String file)668     public static String getCanonicalName(String file) {
669         try {
670             return PathUtilities.getNormalizedPathString(file);
671         } catch (Exception e) {
672             return file;
673         }
674     }
675 
676     /**
677      * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that
678      * are in the UnicodeSet, Supplementary ranges, and escaping
679      *
680      * @param source The source set
681      * @return
682      */
toRegex(UnicodeSet source)683     public static String toRegex(UnicodeSet source) {
684         return toRegex(source, null, false);
685     }
686 
687     private static final Transliterator DEFAULT_REGEX_ESCAPER =
688             Transliterator.createFromRules(
689                     "foo",
690                     "([ \\- \\\\ \\[ \\] ]) > '\\' $1 ;"
691                             // + " ([:c:]) > &hex($1);"
692                             + " ([[:control:][[:z:]&[:ascii:]]]) > &hex($1);",
693                     Transliterator.FORWARD);
694 
695     /**
696      * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that
697      * are in the UnicodeSet, Supplementary ranges, and escaping
698      *
699      * @param source The source set
700      * @param escaper A transliterator that is used to escape the characters according to the
701      *     requirements of the regex. The default puts a \\ before [, -, \, and ], and converts
702      *     controls and Ascii whitespace to hex. Alternatives can be supplied. Note that some Regex
703      *     engines, including Java 1.5, don't really deal with escaped supplementaries well.
704      * @param onlyBmp Set to true if the Regex only accepts BMP characters. In that case, ranges of
705      *     supplementary characters are converted to lists of ranges. For example,
706      *     [\uFFF0-\U0010000F \U0010100F-\U0010300F] converts into:
707      *     <pre>
708      *          [\uD800][\uDC00-\uDFFF]
709      *          [\uD801-\uDBBF][\uDC00-\uDFFF]
710      *          [\uDBC0][\uDC00-\uDC0F]
711      * </pre>
712      *     and
713      *     <pre>
714      *          [\uDBC4][\uDC0F-\uDFFF]
715      *          [\uDBC5-\uDBCB][\uDC00-\uDFFF]
716      *          [\uDBCC][\uDC00-\uDC0F]
717      * </pre>
718      *     These are then coalesced into a list of alternatives by sharing parts where feasible. For
719      *     example, the above turns into 3 pairs of ranges:
720      *     <pre>
721      *          [\uDBC0\uDBCC][\uDC00-\uDC0F]|\uDBC4[\uDC0F-\uDFFF]|[\uD800-\uDBBF\uDBC5-\uDBCB][\uDC00-\uDFFF]
722      * </pre>
723      *
724      * @return escaped string. Something like [a-z] or (?:[a-m]|{zh}) if there is a string zh in the
725      *     set, or a more complicated case for supplementaries. <br>
726      *     Special cases: [] returns "", single item returns a string (escaped), like [a] => "a", or
727      *     [{abc}] => "abc"<br>
728      *     Supplementaries are handled specially, as described under onlyBmp.
729      */
toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp)730     public static String toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp) {
731         if (escaper == null) {
732             escaper = DEFAULT_REGEX_ESCAPER;
733         }
734         UnicodeSetIterator it = new UnicodeSetIterator(source);
735         // if there is only one item, return it
736         if (source.size() == 0) {
737             return "";
738         }
739         if (source.size() == 1) {
740             it.next();
741             return escaper.transliterate(it.getString());
742         }
743         // otherwise, we figure out what is in the set, and will return
744         StringBuilder base = new StringBuilder("[");
745         StringBuilder alternates = new StringBuilder();
746         Map<UnicodeSet, UnicodeSet> lastToFirst = new TreeMap<>(new UnicodeSetComparator());
747         int alternateCount = 0;
748         while (it.nextRange()) {
749             if (it.codepoint == UnicodeSetIterator.IS_STRING) {
750                 ++alternateCount;
751                 alternates.append('|').append(escaper.transliterate(it.string));
752             } else if (!onlyBmp || it.codepointEnd <= 0xFFFF) { // BMP
753                 addBmpRange(it.codepoint, it.codepointEnd, escaper, base);
754             } else { // supplementary
755                 if (it.codepoint <= 0xFFFF) {
756                     addBmpRange(it.codepoint, 0xFFFF, escaper, base);
757                     it.codepoint = 0x10000; // reset the range
758                 }
759                 // this gets a bit ugly; we are trying to minimize the extra ranges for
760                 // supplementaries
761                 // we do this by breaking up X-Y based on the Lead and Trail values for X and Y
762                 // Lx [Tx - Ty]) (if Lx == Ly)
763                 // Lx [Tx - DFFF] | Ly [DC00-Ty] (if Lx == Ly - 1)
764                 // Lx [Tx - DFFF] | [Lx+1 - Ly-1][DC00-DFFF] | Ly [DC00-Ty] (otherwise)
765                 int leadX = UTF16.getLeadSurrogate(it.codepoint);
766                 int trailX = UTF16.getTrailSurrogate(it.codepoint);
767                 int leadY = UTF16.getLeadSurrogate(it.codepointEnd);
768                 int trailY = UTF16.getTrailSurrogate(it.codepointEnd);
769                 if (leadX == leadY) {
770                     addSupplementalRange(leadX, leadX, trailX, trailY, escaper, lastToFirst);
771                 } else {
772                     addSupplementalRange(leadX, leadX, trailX, 0xDFFF, escaper, lastToFirst);
773                     if (leadX != leadY - 1) {
774                         addSupplementalRange(
775                                 leadX + 1, leadY - 1, 0xDC00, 0xDFFF, escaper, lastToFirst);
776                     }
777                     addSupplementalRange(leadY, leadY, 0xDC00, trailY, escaper, lastToFirst);
778                 }
779             }
780         }
781         // add in the supplementary ranges
782         if (lastToFirst.size() != 0) {
783             for (UnicodeSet last : lastToFirst.keySet()) {
784                 ++alternateCount;
785                 alternates
786                         .append('|')
787                         .append(toRegex(lastToFirst.get(last), escaper, onlyBmp))
788                         .append(toRegex(last, escaper, onlyBmp));
789             }
790         }
791         // Return the output. We separate cases in order to get the minimal extra apparatus
792         base.append("]");
793         if (alternateCount == 0) {
794             return base.toString();
795         } else if (base.length() > 2) {
796             return "(?:" + base + "|" + alternates.substring(1) + ")";
797         } else if (alternateCount == 1) {
798             return alternates.substring(1);
799         } else {
800             return "(?:" + alternates.substring(1) + ")";
801         }
802     }
803 
addSupplementalRange( int leadX, int leadY, int trailX, int trailY, Transliterator escaper, Map<UnicodeSet, UnicodeSet> lastToFirst)804     private static void addSupplementalRange(
805             int leadX,
806             int leadY,
807             int trailX,
808             int trailY,
809             Transliterator escaper,
810             Map<UnicodeSet, UnicodeSet> lastToFirst) {
811         System.out.println(
812                 "\tadding: "
813                         + new UnicodeSet(leadX, leadY)
814                         + "\t"
815                         + new UnicodeSet(trailX, trailY));
816         UnicodeSet last = new UnicodeSet(trailX, trailY);
817         UnicodeSet first = lastToFirst.get(last);
818         if (first == null) {
819             lastToFirst.put(last, first = new UnicodeSet());
820         }
821         first.add(leadX, leadY);
822     }
823 
addBmpRange( int start, int limit, Transliterator escaper, StringBuilder base)824     private static void addBmpRange(
825             int start, int limit, Transliterator escaper, StringBuilder base) {
826         base.append(escaper.transliterate(UTF16.valueOf(start)));
827         if (start != limit) {
828             base.append("-").append(escaper.transliterate(UTF16.valueOf(limit)));
829         }
830     }
831 
832     public static class UnicodeSetComparator implements Comparator<UnicodeSet> {
833         @Override
compare(UnicodeSet o1, UnicodeSet o2)834         public int compare(UnicodeSet o1, UnicodeSet o2) {
835             return o1.compareTo(o2);
836         }
837     }
838 
839     public static class CollectionComparator<T extends Comparable<T>>
840             implements Comparator<Collection<T>> {
841         @Override
compare(Collection<T> o1, Collection<T> o2)842         public int compare(Collection<T> o1, Collection<T> o2) {
843             return UnicodeSet.compare(o1, o2, UnicodeSet.ComparisonStyle.SHORTER_FIRST);
844         }
845     }
846 
847     public static class ComparableComparator<T extends Comparable<T>> implements Comparator<T> {
848         @Override
compare(T arg0, T arg1)849         public int compare(T arg0, T arg1) {
850             return Utility.checkCompare(arg0, arg1);
851         }
852     }
853 
854     @SuppressWarnings({"rawtypes", "unchecked"})
addTreeMapChain(Map coverageData, Object... objects)855     public static void addTreeMapChain(Map coverageData, Object... objects) {
856         Map<Object, Object> base = coverageData;
857         for (int i = 0; i < objects.length - 2; ++i) {
858             Map<Object, Object> nextOne = (Map<Object, Object>) base.get(objects[i]);
859             if (nextOne == null) base.put(objects[i], nextOne = new TreeMap<>());
860             base = nextOne;
861         }
862         base.put(objects[objects.length - 2], objects[objects.length - 1]);
863     }
864 
865     public abstract static class CollectionTransform<S, T> implements Transform<S, T> {
866         @Override
transform(S source)867         public abstract T transform(S source);
868 
transform(Collection<S> input, Collection<T> output)869         public Collection<T> transform(Collection<S> input, Collection<T> output) {
870             return CldrUtility.transform(input, this, output);
871         }
872 
transform(Collection<S> input)873         public Collection<T> transform(Collection<S> input) {
874             return transform(input, new ArrayList<T>());
875         }
876     }
877 
transform( SC source, Transform<S, T> transform, TC target)878     public static <S, T, SC extends Collection<S>, TC extends Collection<T>> TC transform(
879             SC source, Transform<S, T> transform, TC target) {
880         for (S sourceItem : source) {
881             T targetItem = transform.transform(sourceItem);
882             if (targetItem != null) {
883                 target.add(targetItem);
884             }
885         }
886         return target;
887     }
888 
transform( SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target)889     public static <SK, SV, TK, TV, SM extends Map<SK, SV>, TM extends Map<TK, TV>> TM transform(
890             SM source,
891             Transform<SK, TK> transformKey,
892             Transform<SV, TV> transformValue,
893             TM target) {
894         for (Entry<SK, SV> sourceEntry : source.entrySet()) {
895             TK targetKey = transformKey.transform(sourceEntry.getKey());
896             TV targetValue = transformValue.transform(sourceEntry.getValue());
897             if (targetKey != null && targetValue != null) {
898                 target.put(targetKey, targetValue);
899             }
900         }
901         return target;
902     }
903 
904     public abstract static class Apply<T> {
apply(T item)905         public abstract void apply(T item);
906 
applyTo(U collection)907         public <U extends Collection<T>> void applyTo(U collection) {
908             for (T item : collection) {
909                 apply(item);
910             }
911         }
912     }
913 
914     public abstract static class Filter<T> {
915 
contains(T item)916         public abstract boolean contains(T item);
917 
retainAll(U c)918         public <U extends Collection<T>> U retainAll(U c) {
919             for (Iterator<T> it = c.iterator(); it.hasNext(); ) {
920                 if (!contains(it.next())) it.remove();
921             }
922             return c;
923         }
924 
extractMatches(U c, U target)925         public <U extends Collection<T>> U extractMatches(U c, U target) {
926             for (Iterator<T> it = c.iterator(); it.hasNext(); ) {
927                 T item = it.next();
928                 if (contains(item)) {
929                     target.add(item);
930                 }
931             }
932             return target;
933         }
934 
removeAll(U c)935         public <U extends Collection<T>> U removeAll(U c) {
936             for (Iterator<T> it = c.iterator(); it.hasNext(); ) {
937                 if (contains(it.next())) it.remove();
938             }
939             return c;
940         }
941 
extractNonMatches(U c, U target)942         public <U extends Collection<T>> U extractNonMatches(U c, U target) {
943             for (Iterator<T> it = c.iterator(); it.hasNext(); ) {
944                 T item = it.next();
945                 if (!contains(item)) {
946                     target.add(item);
947                 }
948             }
949             return target;
950         }
951     }
952 
953     public static class MatcherFilter<T> extends Filter<T> {
954         private Matcher matcher;
955 
MatcherFilter(String pattern)956         public MatcherFilter(String pattern) {
957             this.matcher = PatternCache.get(pattern).matcher("");
958         }
959 
MatcherFilter(Matcher matcher)960         public MatcherFilter(Matcher matcher) {
961             this.matcher = matcher;
962         }
963 
set(Matcher matcher)964         public MatcherFilter<T> set(Matcher matcher) {
965             this.matcher = matcher;
966             return this;
967         }
968 
set(String pattern)969         public MatcherFilter<T> set(String pattern) {
970             this.matcher = PatternCache.get(pattern).matcher("");
971             return this;
972         }
973 
974         @Override
contains(T o)975         public boolean contains(T o) {
976             return matcher.reset(o.toString()).matches();
977         }
978     }
979 
980     // static final class HandlingTransform implements Transform<String, Handling> {
981     // @Override
982     // public Handling transform(String source) {
983     // return Handling.valueOf(source);
984     // }
985     // }
986 
987     public static final class PairComparator<K extends Comparable<K>, V extends Comparable<V>>
988             implements java.util.Comparator<Pair<K, V>> {
989 
990         private Comparator<K> comp1;
991         private Comparator<V> comp2;
992 
PairComparator(Comparator<K> comp1, Comparator<V> comp2)993         public PairComparator(Comparator<K> comp1, Comparator<V> comp2) {
994             this.comp1 = comp1;
995             this.comp2 = comp2;
996         }
997 
998         @Override
compare(Pair<K, V> o1, Pair<K, V> o2)999         public int compare(Pair<K, V> o1, Pair<K, V> o2) {
1000             {
1001                 K o1First = o1.getFirst();
1002                 K o2First = o2.getFirst();
1003                 int diff =
1004                         o1First == null
1005                                 ? (o2First == null ? 0 : -1)
1006                                 : o2First == null
1007                                         ? 1
1008                                         : comp1 == null
1009                                                 ? o1First.compareTo(o2First)
1010                                                 : comp1.compare(o1First, o2First);
1011                 if (diff != 0) {
1012                     return diff;
1013                 }
1014             }
1015             V o1Second = o1.getSecond();
1016             V o2Second = o2.getSecond();
1017             return o1Second == null
1018                     ? (o2Second == null ? 0 : -1)
1019                     : o2Second == null
1020                             ? 1
1021                             : comp2 == null
1022                                     ? o1Second.compareTo(o2Second)
1023                                     : comp2.compare(o1Second, o2Second);
1024         }
1025     }
1026 
1027     /**
1028      * Fetch data from jar
1029      *
1030      * @param name a name residing in the org/unicode/cldr/util/data/ directory, or loading from a
1031      *     jar will break.
1032      */
getUTF8Data(String name)1033     public static BufferedReader getUTF8Data(String name) {
1034         if (new File(name).isAbsolute()) {
1035             throw new IllegalArgumentException(
1036                     "Path must be relative to org/unicode/cldr/util/data  such as 'file.txt' or 'casing/file.txt', but got '"
1037                             + name
1038                             + "'.");
1039         }
1040         return FileReaders.openFile(CldrUtility.class, "data/" + name);
1041     }
1042 
1043     /** License file */
1044     public static final String LICENSE = "LICENSE";
1045 
1046     /**
1047      * Fetch data from jar
1048      *
1049      * @param name a name residing in the org/unicode/cldr/util/data/ directory, or loading from a
1050      *     jar will break.
1051      */
getInputStream(String name)1052     public static InputStream getInputStream(String name) {
1053         if (new File(name).isAbsolute()) {
1054             throw new IllegalArgumentException(
1055                     "Path must be relative to org/unicode/cldr/util/data  such as 'file.txt' or 'casing/file.txt', but got '"
1056                             + name
1057                             + "'.");
1058         }
1059         return getInputStream(CldrUtility.class, "data/" + name);
1060     }
1061 
getInputStream(Class<?> callingClass, String relativePath)1062     public static InputStream getInputStream(Class<?> callingClass, String relativePath) {
1063         InputStream is = callingClass.getResourceAsStream(relativePath);
1064         // add buffering
1065         return InputStreamFactory.buffer(is);
1066     }
1067 
1068     /**
1069      * Takes a Map that goes from Object to Set, and fills in the transpose
1070      *
1071      * @param source_key_valueSet
1072      * @param output_value_key
1073      */
putAllTransposed( Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key)1074     public static void putAllTransposed(
1075             Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key) {
1076         for (Iterator<Object> it = source_key_valueSet.keySet().iterator(); it.hasNext(); ) {
1077             Object key = it.next();
1078             Set<Object> values = source_key_valueSet.get(key);
1079             for (Iterator<Object> it2 = values.iterator(); it2.hasNext(); ) {
1080                 Object value = it2.next();
1081                 output_value_key.put(value, key);
1082             }
1083         }
1084     }
1085 
countInstances(String source, String substring)1086     public static int countInstances(String source, String substring) {
1087         int count = 0;
1088         int pos = 0;
1089         while (true) {
1090             pos = source.indexOf(substring, pos) + 1;
1091             if (pos <= 0) break;
1092             count++;
1093         }
1094         return count;
1095     }
1096 
registerTransliteratorFromFile(String id, String dir, String filename)1097     public static void registerTransliteratorFromFile(String id, String dir, String filename) {
1098         registerTransliteratorFromFile(id, dir, filename, Transliterator.FORWARD, true);
1099         registerTransliteratorFromFile(id, dir, filename, Transliterator.REVERSE, true);
1100     }
1101 
registerTransliteratorFromFile( String id, String dir, String filename, int direction, boolean reverseID)1102     public static void registerTransliteratorFromFile(
1103             String id, String dir, String filename, int direction, boolean reverseID) {
1104         if (filename == null) {
1105             filename = id.replace('-', '_');
1106             filename = filename.replace('/', '_');
1107             filename += ".txt";
1108         }
1109         String rules = getText(dir, filename);
1110         Transliterator t;
1111         int pos = id.indexOf('-');
1112         String rid;
1113         if (pos < 0) {
1114             rid = id + "-Any";
1115             id = "Any-" + id;
1116         } else {
1117             rid = id.substring(pos + 1) + "-" + id.substring(0, pos);
1118         }
1119         if (!reverseID) rid = id;
1120 
1121         if (direction == Transliterator.FORWARD) {
1122             Transliterator.unregister(id);
1123             t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
1124             Transliterator.registerInstance(t);
1125             System.out.println("Registered new Transliterator: " + id);
1126         }
1127 
1128         /*
1129          * String test = "\u049A\u0430\u0437\u0430\u049B";
1130          * System.out.println(t.transliterate(test));
1131          * t = Transliterator.getInstance(id);
1132          * System.out.println(t.transliterate(test));
1133          */
1134 
1135         if (direction == Transliterator.REVERSE) {
1136             Transliterator.unregister(rid);
1137             t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);
1138             Transliterator.registerInstance(t);
1139             System.out.println("Registered new Transliterator: " + rid);
1140         }
1141     }
1142 
getText(String dir, String filename)1143     public static String getText(String dir, String filename) {
1144         try {
1145             BufferedReader br = FileUtilities.openUTF8Reader(dir, filename);
1146             StringBuffer buffer = new StringBuffer();
1147             while (true) {
1148                 String line = br.readLine();
1149                 if (line == null) break;
1150                 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);
1151                 if (line.startsWith("//")) continue;
1152                 buffer.append(line).append(CldrUtility.LINE_SEPARATOR);
1153             }
1154             br.close();
1155             String rules = buffer.toString();
1156             return rules;
1157         } catch (IOException e) {
1158             throw (IllegalArgumentException)
1159                     new IllegalArgumentException("Can't open " + dir + ", " + filename)
1160                             .initCause(e);
1161         }
1162     }
1163 
callMethod(String methodNames, Class<?> cls)1164     public static void callMethod(String methodNames, Class<?> cls) {
1165         for (String methodName : methodNames.split(",")) {
1166             try {
1167                 Method method;
1168                 try {
1169                     method = cls.getMethod(methodName, (Class[]) null);
1170                     try {
1171                         method.invoke(null, (Object[]) null);
1172                     } catch (Exception e) {
1173                         e.printStackTrace();
1174                     }
1175                 } catch (Exception e) {
1176                     System.out.println("No such method: " + methodName);
1177                     showMethods(cls);
1178                 }
1179             } catch (ClassNotFoundException e) {
1180                 e.printStackTrace();
1181             }
1182         }
1183     }
1184 
showMethods(Class<?> cls)1185     public static void showMethods(Class<?> cls) throws ClassNotFoundException {
1186         System.out.println("Possible methods of " + cls.getCanonicalName() + " are: ");
1187         Method[] methods = cls.getMethods();
1188         Set<String> names = new TreeSet<>();
1189         for (int i = 0; i < methods.length; ++i) {
1190             if (methods[i].getGenericParameterTypes().length != 0) continue;
1191             // int mods = methods[i].getModifiers();
1192             // if (!Modifier.isStatic(mods)) continue;
1193             String name = methods[i].getName();
1194             names.add(name);
1195         }
1196         for (Iterator<String> it = names.iterator(); it.hasNext(); ) {
1197             System.out.println("\t" + it.next());
1198         }
1199     }
1200 
1201     /**
1202      * Breaks lines if they are too long, or if matcher.group(1) != last. Only breaks just before
1203      * matcher.
1204      *
1205      * @param input
1206      * @param separator
1207      * @param matcher must match each possible item. The first group is significant; if different,
1208      *     will cause break
1209      * @return
1210      */
breakLines( CharSequence input, String separator, Matcher matcher, int width)1211     public static String breakLines(
1212             CharSequence input, String separator, Matcher matcher, int width) {
1213         StringBuffer output = new StringBuffer();
1214         String lastPrefix = "";
1215         int lastEnd = 0;
1216         int lastBreakPos = 0;
1217         matcher.reset(input);
1218         while (true) {
1219             boolean match = matcher.find();
1220             if (!match) {
1221                 output.append(input.subSequence(lastEnd, input.length()));
1222                 break;
1223             }
1224             String prefix = matcher.group(1);
1225             if (!prefix.equalsIgnoreCase(lastPrefix)
1226                     || matcher.end() - lastBreakPos > width) { // break before?
1227                 output.append(separator);
1228                 lastBreakPos = lastEnd;
1229             } else if (lastEnd != 0) {
1230                 output.append(' ');
1231             }
1232             output.append(input.subSequence(lastEnd, matcher.end()).toString().trim());
1233             lastEnd = matcher.end();
1234             lastPrefix = prefix;
1235         }
1236         return output.toString();
1237     }
1238 
showOptions(String[] args)1239     public static void showOptions(String[] args) {
1240         // Properties props = System.getProperties();
1241         System.out.println(
1242                 "Arguments: " + join(args, " ")); // + (props == null ? "" : " " + props));
1243     }
1244 
roundToDecimals(double input, int places)1245     public static double roundToDecimals(double input, int places) {
1246         double log10 = Math.log10(input); // 15000 => 4.xxx
1247         double intLog10 = Math.floor(log10);
1248         double scale = Math.pow(10, intLog10 - places + 1);
1249         double factored = Math.round(input / scale) * scale;
1250         // System.out.println("###\t" +input + "\t" + factored);
1251         return factored;
1252     }
1253 
1254     /**
1255      * Get a property value, returning the value if there is one (eg -Dkey=value), otherwise the
1256      * default value (for either empty or null).
1257      *
1258      * @param key
1259      * @param defaultValue
1260      * @return
1261      */
getProperty(String key, String defaultValue)1262     public static String getProperty(String key, String defaultValue) {
1263         return getProperty(key, defaultValue, defaultValue);
1264     }
1265 
1266     /** Get a property value, returning the value if there is one, otherwise null. */
getProperty(String key)1267     public static String getProperty(String key) {
1268         return getProperty(key, null, null);
1269     }
1270 
1271     /**
1272      * Get a property value, returning the value if there is one (eg -Dkey=value), the valueIfEmpty
1273      * if there is one with no value (eg -Dkey) and the valueIfNull if there is no property.
1274      *
1275      * @param key
1276      * @param valueIfNull
1277      * @param valueIfEmpty
1278      * @return
1279      */
getProperty(String key, String valueIfNull, String valueIfEmpty)1280     public static String getProperty(String key, String valueIfNull, String valueIfEmpty) {
1281         String result = CLDRConfig.getInstance().getProperty(key);
1282         if (result == null) {
1283             result = valueIfNull;
1284         } else if (result.length() == 0) {
1285             result = valueIfEmpty;
1286         }
1287         return result;
1288     }
1289 
hex(byte[] bytes, int start, int end, String separator)1290     public static String hex(byte[] bytes, int start, int end, String separator) {
1291         StringBuilder result = new StringBuilder();
1292         for (int i = 0; i < end; ++i) {
1293             if (result.length() != 0) {
1294                 result.append(separator);
1295             }
1296             result.append(Utility.hex(bytes[i] & 0xFF, 2));
1297         }
1298         return result.toString();
1299     }
1300 
getProperty(String string, boolean b)1301     public static boolean getProperty(String string, boolean b) {
1302         return getProperty(string, b ? "true" : "false", "true").matches("(?i)T|TRUE");
1303     }
1304 
checkValidDirectory(String sourceDirectory)1305     public static String checkValidDirectory(String sourceDirectory) {
1306         return checkValidFile(sourceDirectory, true, null);
1307     }
1308 
checkValidDirectory(String sourceDirectory, String correction)1309     public static String checkValidDirectory(String sourceDirectory, String correction) {
1310         return checkValidFile(sourceDirectory, true, correction);
1311     }
1312 
checkValidFile( String sourceDirectory, boolean checkForDirectory, String correction)1313     public static String checkValidFile(
1314             String sourceDirectory, boolean checkForDirectory, String correction) {
1315         File file = null;
1316         String normalizedPath = null;
1317         try {
1318             file = new File(sourceDirectory);
1319             normalizedPath = PathUtilities.getNormalizedPathString(file) + File.separatorChar;
1320         } catch (Exception e) {
1321         }
1322         if (file == null || normalizedPath == null || checkForDirectory && !file.isDirectory()) {
1323             throw new RuntimeException(
1324                     "Directory not found: "
1325                             + sourceDirectory
1326                             + (normalizedPath == null ? "" : " => " + normalizedPath)
1327                             + (correction == null ? "" : CldrUtility.LINE_SEPARATOR + correction));
1328         }
1329         return normalizedPath;
1330     }
1331 
1332     /**
1333      * Copy up to matching line (not included). If output is null, then just skip until.
1334      *
1335      * @param oldFile file to copy
1336      * @param readUntilPattern pattern to search for. If null, goes to end of file.
1337      * @param output into to copy into. If null, just skips in the input.
1338      * @param includeMatchingLine inclde the matching line when copying.
1339      * @throws IOException
1340      */
copyUpTo( BufferedReader oldFile, final Pattern readUntilPattern, final PrintWriter output, boolean includeMatchingLine)1341     public static void copyUpTo(
1342             BufferedReader oldFile,
1343             final Pattern readUntilPattern,
1344             final PrintWriter output,
1345             boolean includeMatchingLine)
1346             throws IOException {
1347         Matcher readUntil = readUntilPattern == null ? null : readUntilPattern.matcher("");
1348         while (true) {
1349             String line = oldFile.readLine();
1350             if (line == null) {
1351                 break;
1352             }
1353             if (line.startsWith("\uFEFF")) {
1354                 line = line.substring(1);
1355             }
1356             if (readUntil != null && readUntil.reset(line).matches()) {
1357                 if (includeMatchingLine && output != null) {
1358                     output.println(line);
1359                 }
1360                 break;
1361             }
1362             if (output != null) {
1363                 output.println(line);
1364             }
1365         }
1366     }
1367 
1368     private static DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'GMT'");
1369     private static DateFormat DATE_ONLY = new SimpleDateFormat("yyyy-MM-dd");
1370 
1371     static {
1372         df.setTimeZone(TimeZone.getTimeZone("GMT"));
1373         DATE_ONLY.setTimeZone(TimeZone.getTimeZone("GMT"));
1374     }
1375 
isoFormat(Date date)1376     public static String isoFormat(Date date) {
1377         synchronized (df) {
1378             return df.format(date);
1379         }
1380     }
1381 
isoFormatDateOnly(Date date)1382     public static String isoFormatDateOnly(Date date) {
1383         synchronized (DATE_ONLY) {
1384             return DATE_ONLY.format(date);
1385         }
1386     }
1387 
newConcurrentHashMap()1388     public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap() {
1389         // http://ria101.wordpress.com/2011/12/12/concurrenthashmap-avoid-a-common-misuse/
1390         return new ConcurrentHashMap<>(4, 0.9f, 1);
1391     }
1392 
newConcurrentHashMap(Map<K, V> source)1393     public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap(Map<K, V> source) {
1394         ConcurrentHashMap<K, V> result = newConcurrentHashMap();
1395         result.putAll(source);
1396         return result;
1397     }
1398 
equals(Object a, Object b)1399     public static boolean equals(Object a, Object b) {
1400         return a == b ? true : a == null || b == null ? false : a.equals(b);
1401     }
1402 
getDoubleLink(String code)1403     public static String getDoubleLink(String code) {
1404         final String anchorSafe =
1405                 TransliteratorUtilities.toHTML.transliterate(code).replace(" ", "_");
1406         return "<a name='" + anchorSafe + "' href='#" + anchorSafe + "'>";
1407     }
1408 
getDoubleLinkedText(String anchor, String anchorText)1409     public static String getDoubleLinkedText(String anchor, String anchorText) {
1410         return getDoubleLink(anchor)
1411                 + TransliteratorUtilities.toHTML.transliterate(anchorText).replace("_", " ")
1412                 + "</a>";
1413     }
1414 
getDoubleLinkedText(String anchor)1415     public static String getDoubleLinkedText(String anchor) {
1416         return getDoubleLinkedText(anchor, anchor);
1417     }
1418 
getDoubleLinkMsg()1419     public static String getDoubleLinkMsg() {
1420         return "<a name=''{0}'' href=''#{0}''>{0}</a>";
1421     }
1422 
getDoubleLinkMsg2()1423     public static String getDoubleLinkMsg2() {
1424         return "<a name=''{0}{1}'' href=''#{0}{1}''>{0}</a>";
1425     }
1426 
getCopyrightString()1427     public static String getCopyrightString() {
1428         return getCopyrightString("");
1429     }
1430 
1431     private static final class CopyrightHelper {
1432         public static final CopyrightHelper INSTANCE = new CopyrightHelper();
1433         public final String COPYRIGHT_SHORT =
1434                 "Copyright \u00A9 1991-"
1435                         + Calendar.getInstance().get(Calendar.YEAR)
1436                         + " Unicode, Inc.";
1437     }
1438 
getCopyrightString(String linePrefix)1439     public static String getCopyrightString(String linePrefix) {
1440         // now do the rest
1441         return linePrefix
1442                 + getCopyrightShort()
1443                 + CldrUtility.LINE_SEPARATOR
1444                 + linePrefix
1445                 + "For terms of use, see http://www.unicode.org/copyright.html"
1446                 + CldrUtility.LINE_SEPARATOR
1447                 + linePrefix
1448                 + CLDRURLS.UNICODE_SPDX_HEADER
1449                 + CldrUtility.LINE_SEPARATOR
1450                 + linePrefix
1451                 + "CLDR data files are interpreted according to the LDML specification "
1452                 + "(http://unicode.org/reports/tr35/)";
1453     }
1454 
1455     /** Returns the '## License' section in markdown. */
getCopyrightMarkdown()1456     public static String getCopyrightMarkdown() {
1457         return "## License\n"
1458                 + "\n"
1459                 + getCopyrightShort()
1460                 + "\n"
1461                 + "[Terms of Use](http://www.unicode.org/copyright.html)\n\n"
1462                 + CLDRURLS.UNICODE_SPDX_HEADER
1463                 + "\n";
1464     }
1465 
1466     /** Get the short copyright string, "Copyright © YYYY-YYYY Unicode, Inc." */
getCopyrightShort()1467     public static String getCopyrightShort() {
1468         return CopyrightHelper.INSTANCE.COPYRIGHT_SHORT;
1469     }
1470 
1471     // TODO Move to collection utilities
1472     /**
1473      * Type-safe get
1474      *
1475      * @param map
1476      * @param key
1477      * @return value
1478      */
get(M map, K key)1479     public static <K, V, M extends Map<K, V>> V get(M map, K key) {
1480         return map.get(key);
1481     }
1482 
1483     /**
1484      * Type-safe contains
1485      *
1486      * @param collection
1487      * @param key
1488      * @return value
1489      */
contains(C collection, K key)1490     public static <K, C extends Collection<K>> boolean contains(C collection, K key) {
1491         return collection.contains(key);
1492     }
1493 
toEnumSet( Class<E> classValue, Collection<String> stringValues)1494     public static <E extends Enum<E>> EnumSet<E> toEnumSet(
1495             Class<E> classValue, Collection<String> stringValues) {
1496         EnumSet<E> result = EnumSet.noneOf(classValue);
1497         for (String s : stringValues) {
1498             result.add(Enum.valueOf(classValue, s));
1499         }
1500         return result;
1501     }
1502 
putNew(M map, K key, V value)1503     public static <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) {
1504         if (!map.containsKey(key)) {
1505             map.put(key, value);
1506         }
1507         return map;
1508     }
1509 
cleanSemiFields(String line)1510     public static String[] cleanSemiFields(String line) {
1511         line = cleanLine(line);
1512         return line.isEmpty() ? null : SEMI_SPLIT.split(line);
1513     }
1514 
cleanLine(String line)1515     private static String cleanLine(String line) {
1516         int comment = line.indexOf("#");
1517         if (comment >= 0) {
1518             line = line.substring(0, comment);
1519         }
1520         if (line.startsWith("\uFEFF")) {
1521             line = line.substring(1);
1522         }
1523         return line.trim();
1524     }
1525 
handleFile(String filename, LineHandler handler)1526     public static void handleFile(String filename, LineHandler handler) throws IOException {
1527         try (BufferedReader in = getUTF8Data(filename); ) {
1528             String line = null;
1529             while ((line = in.readLine()) != null) {
1530                 //                String line = in.readLine();
1531                 //                if (line == null) {
1532                 //                    break;
1533                 //                }
1534                 try {
1535                     if (!handler.handle(line)) {
1536                         if (HANDLEFILE_SHOW_SKIP) {
1537                             System.out.println("Skipping line: " + line);
1538                         }
1539                     }
1540                 } catch (Exception e) {
1541                     throw (RuntimeException)
1542                             new IllegalArgumentException("Problem with line: " + line).initCause(e);
1543                 }
1544             }
1545         }
1546         //        in.close();
1547     }
1548 
ifNull(T x, T y)1549     public static <T> T ifNull(T x, T y) {
1550         return x == null ? y : x;
1551     }
1552 
ifSame(T source, T replaceIfSame, T replacement)1553     public static <T> T ifSame(T source, T replaceIfSame, T replacement) {
1554         return source == replaceIfSame ? replacement : source;
1555     }
1556 
ifEqual(T source, T replaceIfSame, T replacement)1557     public static <T> T ifEqual(T source, T replaceIfSame, T replacement) {
1558         return Objects.equals(source, replaceIfSame) ? replacement : source;
1559     }
1560 
intersect(Set<T> a, Collection<T> b)1561     public static <T> Set<T> intersect(Set<T> a, Collection<T> b) {
1562         Set<T> result = new LinkedHashSet<>(a);
1563         result.retainAll(b);
1564         return result;
1565     }
1566 
subtract(Set<T> a, Collection<T> b)1567     public static <T> Set<T> subtract(Set<T> a, Collection<T> b) {
1568         Set<T> result = new LinkedHashSet<>(a);
1569         result.removeAll(b);
1570         return result;
1571     }
1572 
deepEquals(Object... pairs)1573     public static boolean deepEquals(Object... pairs) {
1574         for (int item = 0; item < pairs.length; ) {
1575             if (!Objects.deepEquals(pairs[item++], pairs[item++])) {
1576                 return false;
1577             }
1578         }
1579         return true;
1580     }
1581 
array(Splitter splitter, String source)1582     public static String[] array(Splitter splitter, String source) {
1583         List<String> list = splitter.splitToList(source);
1584         return list.toArray(new String[list.size()]);
1585     }
1586 
toHex(String in, boolean javaStyle)1587     public static String toHex(String in, boolean javaStyle) {
1588         StringBuilder result = new StringBuilder();
1589         for (int i = 0; i < in.length(); ++i) {
1590             result.append(toHex(in.charAt(i), javaStyle));
1591         }
1592         return result.toString();
1593     }
1594 
toHex(int j, boolean javaStyle)1595     public static String toHex(int j, boolean javaStyle) {
1596         if (j == '\"') {
1597             return "\\\"";
1598         } else if (j == '\\') {
1599             return "\\\\";
1600         } else if (0x20 < j && j < 0x7F) {
1601             return String.valueOf((char) j);
1602         }
1603         final String hexString = Integer.toHexString(j).toUpperCase();
1604         int gap = 4 - hexString.length();
1605         if (gap < 0) {
1606             gap = 0;
1607         }
1608         String prefix = javaStyle ? "\\u" : "U+";
1609         return prefix + "000".substring(0, gap) + hexString;
1610     }
1611 
1612     /**
1613      * get string format for debugging, since Java has a useless display for many items
1614      *
1615      * @param item
1616      * @return
1617      */
toString(Object item)1618     public static String toString(Object item) {
1619         if (item instanceof Object[]) {
1620             return toString(Arrays.asList((Object[]) item));
1621         } else if (item instanceof Entry) {
1622             return toString(((Entry) item).getKey()) + "≔" + toString(((Entry) item).getValue());
1623         } else if (item instanceof Map) {
1624             return "{" + toString(((Map) item).entrySet()) + "}";
1625         } else if (item instanceof Collection) {
1626             List<String> result = new ArrayList<>();
1627             for (Object subitem : (Collection) item) {
1628                 result.add(toString(subitem));
1629             }
1630             return result.toString();
1631         }
1632         return item.toString();
1633     }
1634 
1635     /**
1636      * Return the git hash for the CLDR base directory.
1637      *
1638      * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3"
1639      */
getCldrBaseDirHash()1640     public static String getCldrBaseDirHash() {
1641         final File baseDir = CLDRConfig.getInstance().getCldrBaseDirectory();
1642         return getGitHashForDir(baseDir.toString());
1643     }
1644 
1645     /**
1646      * Return the git hash for a directory.
1647      *
1648      * @param dir the directory name
1649      * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3"
1650      */
getGitHashForDir(String dir)1651     public static final String getGitHashForDir(String dir) {
1652         // Try #1
1653         String hash = getGitHashDirectlyForDir(dir);
1654         if (hash == null) {
1655             // Try #2
1656             hash = getGitHashByRevParseForDir(dir);
1657         }
1658         if (hash == null) {
1659             // return 'unknown'
1660             hash = CLDRURLS.UNKNOWN_REVISION;
1661         }
1662         return hash;
1663     }
1664 
1665     /**
1666      * Attempt to retrieve git hash by digging through .git/HEAD and related files
1667      *
1668      * @param dir
1669      * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3"
1670      */
getGitHashDirectlyForDir(String dir)1671     private static String getGitHashDirectlyForDir(String dir) {
1672         // First, try just reading .git/HEAD
1673         final File gitDir = new File(dir, ".git");
1674         final File headfile = new File(gitDir, "HEAD");
1675         if (headfile.canRead()) {
1676             // Try this first, fallback to git commands
1677             try {
1678                 String s = Files.readString(headfile.toPath());
1679                 if (s != null && !s.isBlank()) {
1680                     s = s.trim();
1681                     if (s.startsWith("ref: ")) {
1682                         s = s.substring(5); // refs/heads/main
1683                         final Path refPath = gitDir.toPath().resolve(s);
1684                         if (refPath.startsWith(gitDir.toPath())) {
1685                             s = Files.readString(refPath);
1686                             if (s != null && !s.isBlank()) {
1687                                 return s.trim();
1688                             }
1689                         } else { // ignore something like refs: ../../../yourfiles
1690                             System.err.println("Ignoring strange git refPath " + refPath);
1691                         }
1692                     } // else, maybe detached head
1693                     return s.trim();
1694                 }
1695             } catch (IOException e) {
1696                 System.err.println(e + ": readString failed for " + headfile);
1697                 e.printStackTrace();
1698             }
1699         }
1700         return null; // not found;
1701     }
1702 
1703     /**
1704      * Attempt to retrieve git hash by calling 'git rev-parse HEAD'
1705      *
1706      * @param dir
1707      * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3"
1708      */
getGitHashByRevParseForDir(String dir)1709     private static String getGitHashByRevParseForDir(String dir) {
1710         final String GIT_HASH_COMMANDS[] = {"git", "rev-parse", "HEAD"};
1711         try {
1712             if (dir == null) {
1713                 return null; // no dir
1714             }
1715             File f = new File(dir);
1716             if (!f.isDirectory()) {
1717                 return null; // does not exist
1718             }
1719             Process p = Runtime.getRuntime().exec(GIT_HASH_COMMANDS, null, f);
1720             if (!p.waitFor(15, TimeUnit.SECONDS)) {
1721                 System.err.println(
1722                         "Git query " + String.join(" ", GIT_HASH_COMMANDS) + " timed out");
1723                 p.destroyForcibly();
1724                 return null;
1725             }
1726             if (p.exitValue() != 0) {
1727                 System.err.println(
1728                         "Error return : "
1729                                 + p.exitValue()
1730                                 + " from "
1731                                 + String.join(" ", GIT_HASH_COMMANDS));
1732                 try (BufferedReader is =
1733                         new BufferedReader(new InputStreamReader(p.getErrorStream()))) {
1734                     String str = is.readLine();
1735                     if (str.length() == 0) {
1736                         throw new Exception("git returned empty");
1737                     }
1738                     System.err.println("git: " + str);
1739                 }
1740                 return null;
1741             }
1742             try (BufferedReader is =
1743                     new BufferedReader(new InputStreamReader(p.getInputStream()))) {
1744                 String str = is.readLine();
1745                 if (str == null || str.length() == 0) {
1746                     throw new Exception("git returned empty");
1747                 }
1748                 return str;
1749             }
1750         } catch (Throwable t) {
1751             // We do not expect this to be called frequently.
1752             System.err.println(
1753                     "While trying to get 'git' hash for " + dir + " : " + t.getMessage());
1754             t.printStackTrace();
1755             return null;
1756         }
1757     }
1758 
1759     /**
1760      * For each string S in the UnicodeSet U, remove S if it U "doesn't need it" for testing
1761      * containsAll. That is, U.containsAll matches the same set of strings with or without S. For
1762      * example [ad{ad}{bcd}{bc}] flattens to [ad{bc}]
1763      *
1764      * @param value, which is modified if it is not freezable
1765      * @return resulting value
1766      */
flatten(UnicodeSet value)1767     public static UnicodeSet flatten(UnicodeSet value) {
1768         Set<String> strings = ImmutableSet.copyOf(value.strings());
1769         HashSet<String> toAdd = new HashSet<>();
1770         if (value.isFrozen()) {
1771             value = new UnicodeSet(value);
1772         }
1773         for (String s : strings) {
1774             value.remove(s);
1775             if (!value.containsAll(s)) {
1776                 toAdd.add(s);
1777             }
1778             value.add(s);
1779         }
1780         value.removeAll(strings);
1781         value.addAll(toAdd);
1782         return value;
1783     }
1784 
removeAll(C fromCollection, Set<String> toRemove)1785     public static <T, C extends Collection<T>> C removeAll(C fromCollection, Set<String> toRemove) {
1786         for (Iterator<T> it = fromCollection.iterator(); it.hasNext(); ) {
1787             T item = it.next();
1788             if (toRemove.contains(item)) {
1789                 it.remove();
1790             }
1791         }
1792         return fromCollection;
1793     }
1794 }
1795