xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/draft/UnicodeMapBuilder.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.draft;
2 
3 import com.ibm.icu.dev.util.UnicodeMap;
4 import com.ibm.icu.text.UnicodeSet;
5 import com.ibm.icu.util.ICUUncheckedIOException;
6 import java.io.BufferedReader;
7 import java.io.IOException;
8 import java.util.regex.Pattern;
9 import org.unicode.cldr.util.PatternCache;
10 
11 public class UnicodeMapBuilder<T> {
12     public enum Leniency {
13         allowChars,
14         allowUnicodeSet
15     }
16 
17     UnicodeMap<T> result;
18     Parser<T, String> parser;
19     Leniency leniency;
20     Pattern semi = PatternCache.get("\\s+;\\s+");
21 
22     // Matcher semi = PatternCache.get("\\s+;\\s+").matcher("");
23 
UnicodeMapBuilder()24     public UnicodeMapBuilder() {}
25 
setParser(Parser<T, String> parser)26     public UnicodeMapBuilder<T> setParser(Parser<T, String> parser) {
27         this.parser = parser;
28         return this;
29     }
30 
getParser()31     public Parser<T, String> getParser() {
32         return parser;
33     }
34 
getLeniency()35     public Leniency getLeniency() {
36         return leniency;
37     }
38 
setLeniency(Leniency leniency)39     public UnicodeMapBuilder<T> setLeniency(Leniency leniency) {
40         this.leniency = leniency;
41         return this;
42     }
43 
get()44     public UnicodeMap<T> get() {
45         return result;
46     }
47 
getFrozen()48     public UnicodeMap<T> getFrozen() {
49         UnicodeMap<T> myResult = result.freeze();
50         result = null;
51         return myResult;
52     }
53 
putFromLines(BufferedReader br)54     public UnicodeMapBuilder<T> putFromLines(BufferedReader br) {
55         if (result == null) {
56             result = new UnicodeMap<>();
57         }
58         UnicodeSet sources = new UnicodeSet();
59         String line = null;
60         try {
61             while (true) {
62                 line = readDataLine(br, null);
63                 if (line == null) {
64                     break;
65                 }
66                 if (line.length() == 0) {
67                     continue;
68                 }
69                 sources.clear();
70                 final String[] pieces = semi.split(line);
71                 if (pieces.length < 2) {
72                     throw new IllegalArgumentException("Line must be of form code ; value");
73                 }
74                 final String codelist = pieces[0].trim();
75                 final String valueString = pieces[1].trim();
76                 if (UnicodeSet.resemblesPattern(pieces[0], 0)) {
77                     sources = new UnicodeSet(codelist);
78                 } else if (codelist.length() < 4) {
79                     sources.add(codelist);
80                 } else {
81                     final String[] codes = codelist.split("\\s+");
82                     for (int i = 0; i < codes.length; ++i) {
83                         final String[] range = codes[i].split("\\.\\.");
84                         final int start = getCodePoint(range[0]);
85                         int end = start;
86                         if (range.length > 1) {
87                             if (range.length > 2) {
88                                 throw new IllegalArgumentException("Too many ..");
89                             }
90                             end = getCodePoint(range[1]);
91                             if (start >= end) {
92                                 throw new IllegalArgumentException("Range out of order");
93                             }
94                         }
95                         sources.add(start, end);
96                     }
97                 }
98                 T value = parser == null ? (T) valueString : parser.parseObject(valueString);
99                 result.putAll(sources, value);
100             }
101             br.close();
102         } catch (final Exception e) {
103             throw (RuntimeException) new RuntimeException("Failure on line " + line).initCause(e);
104         }
105         return this;
106     }
107 
getCodePoint(String source)108     private int getCodePoint(String source) {
109         if (source.startsWith("U+") || source.startsWith("\\u") || source.startsWith("\\U")) {
110             source = source.substring(2);
111         }
112         return Integer.parseInt(source, 16);
113     }
114 
readDataLine(BufferedReader br, int[] count)115     public static String readDataLine(BufferedReader br, int[] count) throws IOException {
116         String originalLine = "";
117         String line = "";
118 
119         try {
120             line = originalLine = br.readLine();
121             if (line == null) {
122                 return null;
123             }
124             if (count != null) {
125                 ++count[0];
126             }
127             if (line.length() > 0 && line.charAt(0) == 0xFEFF) {
128                 line = line.substring(1);
129             }
130             final int commentPos = line.indexOf('#');
131             if (commentPos >= 0) {
132                 line = line.substring(0, commentPos);
133             }
134             line = line.trim();
135         } catch (final Exception e) {
136             throw new ICUUncheckedIOException(
137                     "Line \"{" + originalLine + "}\",  \"{" + line + "}\"", e);
138         }
139         return line;
140     }
141 }
142