1 package org.unicode.cldr.draft; 2 3 import com.ibm.icu.dev.util.UnicodeMap; 4 import com.ibm.icu.text.UnicodeSet; 5 import com.ibm.icu.util.ICUUncheckedIOException; 6 import java.io.BufferedReader; 7 import java.io.IOException; 8 import java.util.regex.Pattern; 9 import org.unicode.cldr.util.PatternCache; 10 11 public class UnicodeMapBuilder<T> { 12 public enum Leniency { 13 allowChars, 14 allowUnicodeSet 15 } 16 17 UnicodeMap<T> result; 18 Parser<T, String> parser; 19 Leniency leniency; 20 Pattern semi = PatternCache.get("\\s+;\\s+"); 21 22 // Matcher semi = PatternCache.get("\\s+;\\s+").matcher(""); 23 UnicodeMapBuilder()24 public UnicodeMapBuilder() {} 25 setParser(Parser<T, String> parser)26 public UnicodeMapBuilder<T> setParser(Parser<T, String> parser) { 27 this.parser = parser; 28 return this; 29 } 30 getParser()31 public Parser<T, String> getParser() { 32 return parser; 33 } 34 getLeniency()35 public Leniency getLeniency() { 36 return leniency; 37 } 38 setLeniency(Leniency leniency)39 public UnicodeMapBuilder<T> setLeniency(Leniency leniency) { 40 this.leniency = leniency; 41 return this; 42 } 43 get()44 public UnicodeMap<T> get() { 45 return result; 46 } 47 getFrozen()48 public UnicodeMap<T> getFrozen() { 49 UnicodeMap<T> myResult = result.freeze(); 50 result = null; 51 return myResult; 52 } 53 putFromLines(BufferedReader br)54 public UnicodeMapBuilder<T> putFromLines(BufferedReader br) { 55 if (result == null) { 56 result = new UnicodeMap<>(); 57 } 58 UnicodeSet sources = new UnicodeSet(); 59 String line = null; 60 try { 61 while (true) { 62 line = readDataLine(br, null); 63 if (line == null) { 64 break; 65 } 66 if (line.length() == 0) { 67 continue; 68 } 69 sources.clear(); 70 final String[] pieces = semi.split(line); 71 if (pieces.length < 2) { 72 throw new IllegalArgumentException("Line must be of form code ; value"); 73 } 74 final String codelist = pieces[0].trim(); 75 final String valueString = pieces[1].trim(); 76 if (UnicodeSet.resemblesPattern(pieces[0], 0)) { 77 sources = new UnicodeSet(codelist); 78 } else if (codelist.length() < 4) { 79 sources.add(codelist); 80 } else { 81 final String[] codes = codelist.split("\\s+"); 82 for (int i = 0; i < codes.length; ++i) { 83 final String[] range = codes[i].split("\\.\\."); 84 final int start = getCodePoint(range[0]); 85 int end = start; 86 if (range.length > 1) { 87 if (range.length > 2) { 88 throw new IllegalArgumentException("Too many .."); 89 } 90 end = getCodePoint(range[1]); 91 if (start >= end) { 92 throw new IllegalArgumentException("Range out of order"); 93 } 94 } 95 sources.add(start, end); 96 } 97 } 98 T value = parser == null ? (T) valueString : parser.parseObject(valueString); 99 result.putAll(sources, value); 100 } 101 br.close(); 102 } catch (final Exception e) { 103 throw (RuntimeException) new RuntimeException("Failure on line " + line).initCause(e); 104 } 105 return this; 106 } 107 getCodePoint(String source)108 private int getCodePoint(String source) { 109 if (source.startsWith("U+") || source.startsWith("\\u") || source.startsWith("\\U")) { 110 source = source.substring(2); 111 } 112 return Integer.parseInt(source, 16); 113 } 114 readDataLine(BufferedReader br, int[] count)115 public static String readDataLine(BufferedReader br, int[] count) throws IOException { 116 String originalLine = ""; 117 String line = ""; 118 119 try { 120 line = originalLine = br.readLine(); 121 if (line == null) { 122 return null; 123 } 124 if (count != null) { 125 ++count[0]; 126 } 127 if (line.length() > 0 && line.charAt(0) == 0xFEFF) { 128 line = line.substring(1); 129 } 130 final int commentPos = line.indexOf('#'); 131 if (commentPos >= 0) { 132 line = line.substring(0, commentPos); 133 } 134 line = line.trim(); 135 } catch (final Exception e) { 136 throw new ICUUncheckedIOException( 137 "Line \"{" + originalLine + "}\", \"{" + line + "}\"", e); 138 } 139 return line; 140 } 141 } 142