xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ModifyCase.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 /*
2  * ModifyCase.java
3  *
4  * Created on November 29, 2006, 12:53 PM
5  *
6  * To change this template, choose Tools | Template Manager
7  * and open the template in the editor.
8  */
9 
10 package org.unicode.cldr.tool;
11 
12 import com.ibm.icu.dev.tool.shared.UOption;
13 import com.ibm.icu.lang.UCharacter;
14 import java.io.BufferedWriter;
15 import java.io.FileWriter;
16 import java.io.IOException;
17 import org.unicode.cldr.util.LDMLUtilities;
18 import org.w3c.dom.Document;
19 import org.w3c.dom.NamedNodeMap;
20 import org.w3c.dom.Node;
21 
22 /**
23  * @author pn153353
24  *     <p>class will lower case data specified by an xpath and output the modified data only to a
25  *     destination folder then use CLDRModify to merge this output with the originasl data, thereby
26  *     lower casing the CLDR source
27  *     <p>TODO : handling of multiple xpaths not fully working - where elements have same parents -
28  *     too amny parent elements get written
29  */
30 public class ModifyCase {
31     static final int INDENT = 8;
32     static BufferedWriter m_out;
33 
34     static String[] m_locales; // = {"bg", "cs", "da", "et", "el", "is", "lt", "ro", "sl", "uk"};
35     static String[] m_xpaths; // = {"//ldml/localeDisplayNames/languages/language"};
36     // String xpath = "//ldml/localeDisplayNames/languages/language[@type='to']";
37     static String m_sourceDir; // = "/home/pn153353/pakua/CVS_unicode_latest/cldr/common/main";
38     static String m_destDir; // = "/home/pn153353/CLDR/BUGS/casing_1177/src";
39 
40     /** Creates a new instance of ModifyCase */
ModifyCase()41     public ModifyCase() {}
42 
43     private static final int HELP1 = 0,
44             HELP2 = 1,
45             DESTDIR = 2,
46             LOCALES = 3,
47             SOURCEDIR = 4,
48             XPATHS = 5;
49 
50     private static final UOption[] options = {
51         UOption.HELP_H(),
52         UOption.HELP_QUESTION_MARK(),
53         UOption.create("dest", 'd', UOption.REQUIRES_ARG),
54         UOption.create("locales", 'l', UOption.REQUIRES_ARG),
55         UOption.create("source", 's', UOption.REQUIRES_ARG),
56         UOption.create("xpaths", 'x', UOption.REQUIRES_ARG),
57     };
58 
main(String[] args)59     public static void main(String[] args) {
60         UOption.parseArgs(args, options);
61         if (processArgs() == false) return;
62 
63         for (int i = 0; i < m_locales.length; i++) {
64             System.err.println("Locale : " + m_locales[i]);
65             String srcfile = m_sourceDir + "/" + m_locales[i] + ".xml";
66             String destfile = m_destDir + "/" + m_locales[i] + ".xml";
67             Document doc = LDMLUtilities.parse(srcfile, false);
68             if (doc == null) continue;
69             try {
70                 m_out = new BufferedWriter(new FileWriter(destfile));
71                 openLDML(m_locales[i], doc);
72 
73                 for (int j = 0; j < m_xpaths.length; j++) {
74                     makeLowerCase(doc, m_xpaths[j]);
75                 }
76                 closeLDML();
77             } catch (IOException e) {
78             }
79         }
80     }
81 
usage()82     private static void usage() {
83         System.err.println(
84                 "org.unicode.cldr.tool.ModifyCase allows the casing of the first letter to be changed");
85         System.err.println(
86                 "The output is just the data category which has changed. Run CLDRModify to merge with source");
87         System.err.println(
88                 "-d : specify dest dir (must exist) where resulting modified data is written");
89         System.err.println("-l : specify comma separated list of LDML locales to be changed");
90         System.err.println("-s : specify src dir of LDML data to be modified");
91         System.err.println("-x : specify comma separated list of xpaths to data to be modified");
92         System.err.println(
93                 "Example : ModifyCase -d /dest -s /cldr/comon/main -l bg,en,it,fr -x //ldml/localeDisplayNames/languages/language");
94     }
95 
processArgs()96     private static boolean processArgs() {
97         if (options[HELP1].doesOccur || options[HELP2].doesOccur) {
98             usage();
99             return false;
100         }
101         if (options[DESTDIR].value == null
102                 || options[LOCALES].value == null
103                 || options[SOURCEDIR].value == null
104                 || options[XPATHS].value == null) {
105             usage();
106             return false;
107         }
108 
109         m_destDir = options[DESTDIR].value;
110         m_locales = options[LOCALES].value.split(",");
111         m_sourceDir = options[SOURCEDIR].value;
112         m_xpaths = options[XPATHS].value.split(",");
113         return true;
114     }
115 
openLDML(String locale, Document doc)116     public static void openLDML(String locale, Document doc) {
117         try {
118             m_out.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n");
119             m_out.write(
120                     "<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/1.5/ldml.dtd\">\n");
121             m_out.write("<ldml>\n");
122             indent(INDENT);
123             m_out.write("<identity>\n");
124             Node n = LDMLUtilities.getNode(doc, "//ldml/identity/version/@number");
125             indent(INDENT * 2);
126             m_out.write("<version number=\"" + LDMLUtilities.getNodeValue(n) + "\"/>\n");
127             n = LDMLUtilities.getNode(doc, "//ldml/identity/generation/@date");
128             indent(INDENT * 2);
129             m_out.write("<generation date=\"" + LDMLUtilities.getNodeValue(n) + "\"/>\n");
130             String parts[] = locale.split("_");
131             indent(INDENT * 2);
132             m_out.write("<language type=\"" + parts[0] + "\"/>\n");
133             if (parts.length > 1) {
134                 indent(INDENT * 2);
135                 m_out.write("<territory type=\"" + parts[1] + "\"/>\n");
136             }
137             indent(INDENT);
138             m_out.write("</identity>\n");
139         } catch (IOException e) {
140         }
141     }
142 
makeLowerCase(Document doc, String xpath)143     public static void makeLowerCase(Document doc, String xpath) {
144         // parse the xpath to write the LDML
145         try {
146             // remove //ldml prefix and split
147             String path = xpath.substring(xpath.indexOf("//ldml") + 7);
148             String parts[] = path.split("/");
149             for (int i = 0; i < parts.length - 1; i++) {
150                 indent(INDENT * (i + 1));
151                 if (addCasingAttribute(parts[i]))
152                     m_out.write("<" + parts[i] + " casing=\"lowercase-words\">\n");
153                 else m_out.write("<" + parts[i] + ">\n");
154             }
155 
156             Node n[] = LDMLUtilities.getNodeListAsArray(doc, xpath);
157             if (n == null) // just changing a single element
158             { // not tested, this may not work !
159                 n = new Node[1];
160                 n[0] = LDMLUtilities.getNode(doc, xpath);
161             }
162 
163             for (int j = 0; j < n.length; j++) {
164                 if (n[j] != null) {
165                     String value = LDMLUtilities.getNodeValue(n[j]);
166                     boolean bUpperFound = false;
167                     for (int k = 1; k < value.length(); k++) // skip first char
168                     {
169                         int c = value.codePointAt(k);
170                         if (UCharacter.isUUppercase(c)) {
171                             bUpperFound = true;
172                             break;
173                         }
174                     }
175                     if (bUpperFound
176                             == true) // don't convert where an upper case is found mid sentence
177                     {
178                         NamedNodeMap map = n[j].getAttributes();
179                         Node langnode = map.getNamedItem("type");
180                         String lang = langnode.getNodeValue();
181                         System.err.println("Skipping conversion of : " + lang + "  " + value);
182                     }
183 
184                     if (bUpperFound
185                             == false) // don't convert where an upper case is found mid sentence
186                     value = value.toLowerCase();
187 
188                     indent(INDENT * parts.length);
189                     m_out.write("<" + parts[parts.length - 1]);
190 
191                     NamedNodeMap map = n[j].getAttributes();
192                     for (int k = 0; k < map.getLength(); k++) {
193                         Node node = map.item(k);
194                         m_out.write(" " + node.getNodeName() + "=\"" + node.getNodeValue() + "\"");
195                     }
196                     m_out.write(">" + value + "</" + parts[parts.length - 1] + ">\n");
197                 }
198             }
199 
200             for (int i = parts.length - 2; i >= 0; i--) {
201                 indent(INDENT * (i + 1));
202                 m_out.write("</" + parts[i] + ">\n");
203             }
204         } catch (IOException e) {
205         }
206 
207         // Factory cldrFactory = Factory.make(sourceDir, ".*");
208         // boolean makeResolved = false;
209         // CLDRFile file = (CLDRFile) cldrFactory.make(locale, makeResolved).cloneAsThawed();
210         // System.err.println ("res = " + file.getStringValue
211         // ("//ldml/localeDisplayNames/languages/language[@type=\"en\"]"));
212 
213     }
214 
closeLDML()215     public static void closeLDML() {
216         try {
217             m_out.write("</ldml>\n");
218             m_out.close();
219         } catch (IOException e) {
220         }
221     }
222 
indent(int n)223     private static void indent(int n) {
224         try {
225             String spaces = "";
226             for (int i = 0; i < n; i++) spaces += " ";
227             m_out.write(spaces);
228         } catch (IOException e) {
229         }
230     }
231 
232     /* checks if the element can have a casing attribute */
addCasingAttribute(String element)233     private static boolean addCasingAttribute(String element) {
234         String[] elements_with_casing_attribute = {
235             "languages",
236             "scripts",
237             "territories",
238             "variants",
239             "keys",
240             "types",
241             "measurementSystemNames",
242             "monthWidth",
243             "dayWidth",
244             "quarterWidth",
245             "long" /* tz */,
246             "fields",
247             "currency"
248         };
249 
250         for (int i = 0; i < elements_with_casing_attribute.length; i++) {
251             if (element.compareTo(elements_with_casing_attribute[i]) == 0) return true;
252         }
253         return false;
254     }
255 }
256