1 /* 2 * ModifyCase.java 3 * 4 * Created on November 29, 2006, 12:53 PM 5 * 6 * To change this template, choose Tools | Template Manager 7 * and open the template in the editor. 8 */ 9 10 package org.unicode.cldr.tool; 11 12 import com.ibm.icu.dev.tool.shared.UOption; 13 import com.ibm.icu.lang.UCharacter; 14 import java.io.BufferedWriter; 15 import java.io.FileWriter; 16 import java.io.IOException; 17 import org.unicode.cldr.util.LDMLUtilities; 18 import org.w3c.dom.Document; 19 import org.w3c.dom.NamedNodeMap; 20 import org.w3c.dom.Node; 21 22 /** 23 * @author pn153353 24 * <p>class will lower case data specified by an xpath and output the modified data only to a 25 * destination folder then use CLDRModify to merge this output with the originasl data, thereby 26 * lower casing the CLDR source 27 * <p>TODO : handling of multiple xpaths not fully working - where elements have same parents - 28 * too amny parent elements get written 29 */ 30 public class ModifyCase { 31 static final int INDENT = 8; 32 static BufferedWriter m_out; 33 34 static String[] m_locales; // = {"bg", "cs", "da", "et", "el", "is", "lt", "ro", "sl", "uk"}; 35 static String[] m_xpaths; // = {"//ldml/localeDisplayNames/languages/language"}; 36 // String xpath = "//ldml/localeDisplayNames/languages/language[@type='to']"; 37 static String m_sourceDir; // = "/home/pn153353/pakua/CVS_unicode_latest/cldr/common/main"; 38 static String m_destDir; // = "/home/pn153353/CLDR/BUGS/casing_1177/src"; 39 40 /** Creates a new instance of ModifyCase */ ModifyCase()41 public ModifyCase() {} 42 43 private static final int HELP1 = 0, 44 HELP2 = 1, 45 DESTDIR = 2, 46 LOCALES = 3, 47 SOURCEDIR = 4, 48 XPATHS = 5; 49 50 private static final UOption[] options = { 51 UOption.HELP_H(), 52 UOption.HELP_QUESTION_MARK(), 53 UOption.create("dest", 'd', UOption.REQUIRES_ARG), 54 UOption.create("locales", 'l', UOption.REQUIRES_ARG), 55 UOption.create("source", 's', UOption.REQUIRES_ARG), 56 UOption.create("xpaths", 'x', UOption.REQUIRES_ARG), 57 }; 58 main(String[] args)59 public static void main(String[] args) { 60 UOption.parseArgs(args, options); 61 if (processArgs() == false) return; 62 63 for (int i = 0; i < m_locales.length; i++) { 64 System.err.println("Locale : " + m_locales[i]); 65 String srcfile = m_sourceDir + "/" + m_locales[i] + ".xml"; 66 String destfile = m_destDir + "/" + m_locales[i] + ".xml"; 67 Document doc = LDMLUtilities.parse(srcfile, false); 68 if (doc == null) continue; 69 try { 70 m_out = new BufferedWriter(new FileWriter(destfile)); 71 openLDML(m_locales[i], doc); 72 73 for (int j = 0; j < m_xpaths.length; j++) { 74 makeLowerCase(doc, m_xpaths[j]); 75 } 76 closeLDML(); 77 } catch (IOException e) { 78 } 79 } 80 } 81 usage()82 private static void usage() { 83 System.err.println( 84 "org.unicode.cldr.tool.ModifyCase allows the casing of the first letter to be changed"); 85 System.err.println( 86 "The output is just the data category which has changed. Run CLDRModify to merge with source"); 87 System.err.println( 88 "-d : specify dest dir (must exist) where resulting modified data is written"); 89 System.err.println("-l : specify comma separated list of LDML locales to be changed"); 90 System.err.println("-s : specify src dir of LDML data to be modified"); 91 System.err.println("-x : specify comma separated list of xpaths to data to be modified"); 92 System.err.println( 93 "Example : ModifyCase -d /dest -s /cldr/comon/main -l bg,en,it,fr -x //ldml/localeDisplayNames/languages/language"); 94 } 95 processArgs()96 private static boolean processArgs() { 97 if (options[HELP1].doesOccur || options[HELP2].doesOccur) { 98 usage(); 99 return false; 100 } 101 if (options[DESTDIR].value == null 102 || options[LOCALES].value == null 103 || options[SOURCEDIR].value == null 104 || options[XPATHS].value == null) { 105 usage(); 106 return false; 107 } 108 109 m_destDir = options[DESTDIR].value; 110 m_locales = options[LOCALES].value.split(","); 111 m_sourceDir = options[SOURCEDIR].value; 112 m_xpaths = options[XPATHS].value.split(","); 113 return true; 114 } 115 openLDML(String locale, Document doc)116 public static void openLDML(String locale, Document doc) { 117 try { 118 m_out.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"); 119 m_out.write( 120 "<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/1.5/ldml.dtd\">\n"); 121 m_out.write("<ldml>\n"); 122 indent(INDENT); 123 m_out.write("<identity>\n"); 124 Node n = LDMLUtilities.getNode(doc, "//ldml/identity/version/@number"); 125 indent(INDENT * 2); 126 m_out.write("<version number=\"" + LDMLUtilities.getNodeValue(n) + "\"/>\n"); 127 n = LDMLUtilities.getNode(doc, "//ldml/identity/generation/@date"); 128 indent(INDENT * 2); 129 m_out.write("<generation date=\"" + LDMLUtilities.getNodeValue(n) + "\"/>\n"); 130 String parts[] = locale.split("_"); 131 indent(INDENT * 2); 132 m_out.write("<language type=\"" + parts[0] + "\"/>\n"); 133 if (parts.length > 1) { 134 indent(INDENT * 2); 135 m_out.write("<territory type=\"" + parts[1] + "\"/>\n"); 136 } 137 indent(INDENT); 138 m_out.write("</identity>\n"); 139 } catch (IOException e) { 140 } 141 } 142 makeLowerCase(Document doc, String xpath)143 public static void makeLowerCase(Document doc, String xpath) { 144 // parse the xpath to write the LDML 145 try { 146 // remove //ldml prefix and split 147 String path = xpath.substring(xpath.indexOf("//ldml") + 7); 148 String parts[] = path.split("/"); 149 for (int i = 0; i < parts.length - 1; i++) { 150 indent(INDENT * (i + 1)); 151 if (addCasingAttribute(parts[i])) 152 m_out.write("<" + parts[i] + " casing=\"lowercase-words\">\n"); 153 else m_out.write("<" + parts[i] + ">\n"); 154 } 155 156 Node n[] = LDMLUtilities.getNodeListAsArray(doc, xpath); 157 if (n == null) // just changing a single element 158 { // not tested, this may not work ! 159 n = new Node[1]; 160 n[0] = LDMLUtilities.getNode(doc, xpath); 161 } 162 163 for (int j = 0; j < n.length; j++) { 164 if (n[j] != null) { 165 String value = LDMLUtilities.getNodeValue(n[j]); 166 boolean bUpperFound = false; 167 for (int k = 1; k < value.length(); k++) // skip first char 168 { 169 int c = value.codePointAt(k); 170 if (UCharacter.isUUppercase(c)) { 171 bUpperFound = true; 172 break; 173 } 174 } 175 if (bUpperFound 176 == true) // don't convert where an upper case is found mid sentence 177 { 178 NamedNodeMap map = n[j].getAttributes(); 179 Node langnode = map.getNamedItem("type"); 180 String lang = langnode.getNodeValue(); 181 System.err.println("Skipping conversion of : " + lang + " " + value); 182 } 183 184 if (bUpperFound 185 == false) // don't convert where an upper case is found mid sentence 186 value = value.toLowerCase(); 187 188 indent(INDENT * parts.length); 189 m_out.write("<" + parts[parts.length - 1]); 190 191 NamedNodeMap map = n[j].getAttributes(); 192 for (int k = 0; k < map.getLength(); k++) { 193 Node node = map.item(k); 194 m_out.write(" " + node.getNodeName() + "=\"" + node.getNodeValue() + "\""); 195 } 196 m_out.write(">" + value + "</" + parts[parts.length - 1] + ">\n"); 197 } 198 } 199 200 for (int i = parts.length - 2; i >= 0; i--) { 201 indent(INDENT * (i + 1)); 202 m_out.write("</" + parts[i] + ">\n"); 203 } 204 } catch (IOException e) { 205 } 206 207 // Factory cldrFactory = Factory.make(sourceDir, ".*"); 208 // boolean makeResolved = false; 209 // CLDRFile file = (CLDRFile) cldrFactory.make(locale, makeResolved).cloneAsThawed(); 210 // System.err.println ("res = " + file.getStringValue 211 // ("//ldml/localeDisplayNames/languages/language[@type=\"en\"]")); 212 213 } 214 closeLDML()215 public static void closeLDML() { 216 try { 217 m_out.write("</ldml>\n"); 218 m_out.close(); 219 } catch (IOException e) { 220 } 221 } 222 indent(int n)223 private static void indent(int n) { 224 try { 225 String spaces = ""; 226 for (int i = 0; i < n; i++) spaces += " "; 227 m_out.write(spaces); 228 } catch (IOException e) { 229 } 230 } 231 232 /* checks if the element can have a casing attribute */ addCasingAttribute(String element)233 private static boolean addCasingAttribute(String element) { 234 String[] elements_with_casing_attribute = { 235 "languages", 236 "scripts", 237 "territories", 238 "variants", 239 "keys", 240 "types", 241 "measurementSystemNames", 242 "monthWidth", 243 "dayWidth", 244 "quarterWidth", 245 "long" /* tz */, 246 "fields", 247 "currency" 248 }; 249 250 for (int i = 0; i < elements_with_casing_attribute.length; i++) { 251 if (element.compareTo(elements_with_casing_attribute[i]) == 0) return true; 252 } 253 return false; 254 } 255 } 256