xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLFileReader.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2004, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import com.google.common.base.Function;
12 import com.ibm.icu.util.ICUException;
13 import com.ibm.icu.util.ICUUncheckedIOException;
14 import java.io.File;
15 import java.io.FileInputStream;
16 import java.io.IOException;
17 import java.io.InputStream;
18 import java.io.Reader;
19 import java.util.ArrayList;
20 import java.util.List;
21 import java.util.Stack;
22 import org.xml.sax.Attributes;
23 import org.xml.sax.ContentHandler;
24 import org.xml.sax.ErrorHandler;
25 import org.xml.sax.InputSource;
26 import org.xml.sax.Locator;
27 import org.xml.sax.SAXException;
28 import org.xml.sax.SAXNotRecognizedException;
29 import org.xml.sax.SAXNotSupportedException;
30 import org.xml.sax.SAXParseException;
31 import org.xml.sax.XMLReader;
32 import org.xml.sax.ext.DeclHandler;
33 import org.xml.sax.ext.LexicalHandler;
34 import org.xml.sax.helpers.XMLReaderFactory;
35 
36 /**
37  * Convenience class to make reading XML data files easier. The main method is read(); This is meant
38  * for XML data files, so the contents of elements must either be all other elements, or just text.
39  * It is thus not suitable for XML files with MIXED content; all text content in a mixed element is
40  * discarded.
41  *
42  * @author davis
43  */
44 public class XMLFileReader {
45     static final boolean SHOW_ALL = false;
46     /** Handlers to use in read() */
47     public static int CONTENT_HANDLER = 1,
48             ERROR_HANDLER = 2,
49             LEXICAL_HANDLER = 4,
50             DECLARATION_HANDLER = 8;
51 
52     private MyContentHandler DEFAULT_DECLHANDLER = new MyContentHandler();
53     // TODO Add way to skip gathering value contents
54     // private ElementOnlyContentHandler ELEMENT_ONLY_DECLHANDLER = new ElementOnlyContentHandler();
55     private SimpleHandler simpleHandler;
56 
57     public static class SimpleHandler {
58         /**
59          * called when every new element is encountered, with the full path to the element
60          * (including attributes). Called on leaf and non-leaf elements.
61          *
62          * @param path
63          */
handleElement(CharSequence path)64         public void handleElement(CharSequence path) {}
65 
66         /** Called with an "xpath" of each leaf element */
handlePathValue(String path, String value)67         public void handlePathValue(String path, String value) {}
68 
handleComment(String path, String comment)69         public void handleComment(String path, String comment) {}
70 
handleElementDecl(String name, String model)71         public void handleElementDecl(String name, String model) {}
72 
handleAttributeDecl( String eName, String aName, String type, String mode, String value)73         public void handleAttributeDecl(
74                 String eName, String aName, String type, String mode, String value) {}
75 
handleEndDtd()76         public void handleEndDtd() {}
77 
handleStartDtd(String name, String publicId, String systemId)78         public void handleStartDtd(String name, String publicId, String systemId) {}
79     }
80 
setHandler(SimpleHandler simpleHandler)81     public XMLFileReader setHandler(SimpleHandler simpleHandler) {
82         this.simpleHandler = simpleHandler;
83         return this;
84     }
85 
86     /**
87      * Read an XML file. The order of the elements matches what was in the file.
88      *
89      * @param fileName file to open
90      * @param handlers a set of values for the handlers to use, eg CONTENT_HANDLER | ERROR_HANDLER
91      * @param validating if a validating parse is requested
92      * @return list of alternating values.
93      */
read(String fileName, int handlers, boolean validating)94     public XMLFileReader read(String fileName, int handlers, boolean validating) {
95         try (InputStream fis = new FileInputStream(fileName); ) {
96             return read(fileName, new InputSource(fis), handlers, validating);
97         } catch (IOException e) {
98             File full = new File(fileName);
99             String fullName = fileName;
100             try {
101                 fullName = full.getCanonicalPath();
102             } catch (Exception IOException) {
103             }
104             throw (IllegalArgumentException)
105                     new IllegalArgumentException("Can't read " + fullName).initCause(e);
106         }
107     }
108 
109     /**
110      * read from a CLDR resource
111      *
112      * @param fileName
113      * @param handlers
114      * @param validating
115      * @param fis
116      * @see CldrUtility#getInputStream(String)
117      * @return
118      */
readCLDRResource(String resName, int handlers, boolean validating)119     public XMLFileReader readCLDRResource(String resName, int handlers, boolean validating) {
120         try (InputStream inputStream = CldrUtility.getInputStream(resName)) {
121             return read(resName, new InputSource(inputStream), handlers, validating);
122         } catch (IOException e) {
123             throw new ICUUncheckedIOException(e);
124         }
125     }
126 
127     /**
128      * read from an arbitrary
129      *
130      * @param fileName
131      * @param handlers
132      * @param validating
133      * @param fis
134      * @see CldrUtility#getInputStream(String)
135      * @return
136      */
read( String resName, Class<?> callingClass, int handlers, boolean validating)137     public XMLFileReader read(
138             String resName, Class<?> callingClass, int handlers, boolean validating) {
139         try (InputStream inputStream = CldrUtility.getInputStream(callingClass, resName)) {
140             return read(resName, new InputSource(inputStream), handlers, validating);
141         } catch (IOException e) {
142             throw new ICUUncheckedIOException(e);
143         }
144     }
145 
read(String systemID, Reader reader, int handlers, boolean validating)146     public XMLFileReader read(String systemID, Reader reader, int handlers, boolean validating) {
147         read(systemID, reader, handlers, validating, DEFAULT_DECLHANDLER.reset());
148         return this;
149     }
150 
read( String systemID, InputSource insrc, int handlers, boolean validating)151     public XMLFileReader read(
152             String systemID, InputSource insrc, int handlers, boolean validating) {
153         read(systemID, insrc, handlers, validating, DEFAULT_DECLHANDLER.reset());
154         return this;
155     }
156 
read( String systemID, InputStream instr, int handlers, boolean validating, AllHandler allHandler)157     public static void read(
158             String systemID,
159             InputStream instr,
160             int handlers,
161             boolean validating,
162             AllHandler allHandler) {
163         InputSource is = new InputSource(instr);
164         read(systemID, is, handlers, validating, allHandler);
165     }
166 
read( String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler)167     public static void read(
168             String systemID,
169             Reader reader,
170             int handlers,
171             boolean validating,
172             AllHandler allHandler) {
173         InputSource is = new InputSource(reader);
174         read(systemID, is, handlers, validating, allHandler);
175     }
176 
read( String systemID, InputSource is, int handlers, boolean validating, AllHandler allHandler)177     public static void read(
178             String systemID,
179             InputSource is,
180             int handlers,
181             boolean validating,
182             AllHandler allHandler) {
183         try {
184             XMLReader xmlReader = createXMLReader(handlers, validating, allHandler);
185             // wrap the reader to insert a character stream
186             DoctypeXmlStreamWrapper.wrap(is);
187             is.setSystemId(systemID);
188             try {
189                 xmlReader.parse(is);
190             } catch (AbortException e) {
191             } // ok
192         } catch (SAXParseException e) {
193             throw (IllegalArgumentException)
194                     new IllegalArgumentException(
195                                     "Can't read " + systemID + "\tline:\t" + e.getLineNumber())
196                             .initCause(e);
197         } catch (SAXException | IOException e) {
198             throw (IllegalArgumentException)
199                     new IllegalArgumentException("Can't read " + systemID).initCause(e);
200         }
201     }
202 
createXMLReader( int handlers, boolean validating, AllHandler allHandler)203     public static final XMLReader createXMLReader(
204             int handlers, boolean validating, AllHandler allHandler)
205             throws SAXNotRecognizedException, SAXNotSupportedException {
206         XMLReader xmlReader = createXMLReader(validating);
207         if ((handlers & CONTENT_HANDLER) != 0) {
208             xmlReader.setContentHandler(allHandler);
209         }
210         if ((handlers & ERROR_HANDLER) != 0) {
211             xmlReader.setErrorHandler(allHandler);
212         }
213         if ((handlers & LEXICAL_HANDLER) != 0) {
214             xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", allHandler);
215         }
216         if ((handlers & DECLARATION_HANDLER) != 0) {
217             xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", allHandler);
218         }
219         return xmlReader;
220     }
221 
222     public interface AllHandler extends ContentHandler, LexicalHandler, DeclHandler, ErrorHandler {}
223 
224     /** Basis for handlers that provides for logging, with no actions on methods */
225     public static class LoggingHandler implements AllHandler {
226         @Override
startDocument()227         public void startDocument() throws SAXException {
228             if (SHOW_ALL) Log.logln("startDocument");
229         }
230 
231         @Override
characters(char[] ch, int start, int length)232         public void characters(char[] ch, int start, int length) throws SAXException {
233             if (SHOW_ALL) Log.logln("characters");
234         }
235 
236         @Override
startElement( String namespaceURI, String localName, String qName, Attributes atts)237         public void startElement(
238                 String namespaceURI, String localName, String qName, Attributes atts)
239                 throws SAXException {
240             if (SHOW_ALL) Log.logln("startElement");
241         }
242 
243         @Override
endElement(String namespaceURI, String localName, String qName)244         public void endElement(String namespaceURI, String localName, String qName)
245                 throws SAXException {
246             if (SHOW_ALL) Log.logln("endElement");
247         }
248 
249         @Override
startDTD(String name, String publicId, String systemId)250         public void startDTD(String name, String publicId, String systemId) throws SAXException {
251             if (SHOW_ALL) Log.logln("startDTD");
252         }
253 
254         @Override
endDTD()255         public void endDTD() throws SAXException {
256             if (SHOW_ALL) Log.logln("endDTD");
257         }
258 
259         @Override
comment(char[] ch, int start, int length)260         public void comment(char[] ch, int start, int length) throws SAXException {
261             if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length));
262         }
263 
264         @Override
elementDecl(String name, String model)265         public void elementDecl(String name, String model) throws SAXException {
266             if (SHOW_ALL) Log.logln("elementDecl");
267         }
268 
269         @Override
attributeDecl( String eName, String aName, String type, String mode, String value)270         public void attributeDecl(
271                 String eName, String aName, String type, String mode, String value)
272                 throws SAXException {
273             if (SHOW_ALL) Log.logln("attributeDecl");
274         }
275 
276         @Override
ignorableWhitespace(char[] ch, int start, int length)277         public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
278             if (SHOW_ALL) Log.logln("ignorableWhitespace length: " + length);
279         }
280 
281         @Override
endDocument()282         public void endDocument() throws SAXException {
283             if (SHOW_ALL) Log.logln("endDocument");
284         }
285 
286         @Override
internalEntityDecl(String name, String value)287         public void internalEntityDecl(String name, String value) throws SAXException {
288             if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + value);
289         }
290 
291         @Override
externalEntityDecl(String name, String publicId, String systemId)292         public void externalEntityDecl(String name, String publicId, String systemId)
293                 throws SAXException {
294             if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId);
295         }
296 
notationDecl(String name, String publicId, String systemId)297         public void notationDecl(String name, String publicId, String systemId) {
298             if (SHOW_ALL) Log.logln("notationDecl: " + name + ", " + publicId + ", " + systemId);
299         }
300 
301         @Override
processingInstruction(String target, String data)302         public void processingInstruction(String target, String data) throws SAXException {
303             if (SHOW_ALL) Log.logln("processingInstruction: " + target + ", " + data);
304         }
305 
306         @Override
skippedEntity(String name)307         public void skippedEntity(String name) throws SAXException {
308             if (SHOW_ALL) Log.logln("skippedEntity: " + name);
309         }
310 
unparsedEntityDecl( String name, String publicId, String systemId, String notationName)311         public void unparsedEntityDecl(
312                 String name, String publicId, String systemId, String notationName) {
313             if (SHOW_ALL)
314                 Log.logln(
315                         "unparsedEntityDecl: "
316                                 + name
317                                 + ", "
318                                 + publicId
319                                 + ", "
320                                 + systemId
321                                 + ", "
322                                 + notationName);
323         }
324 
325         @Override
setDocumentLocator(Locator locator)326         public void setDocumentLocator(Locator locator) {
327             if (SHOW_ALL) Log.logln("setDocumentLocator Locator " + locator);
328         }
329 
330         @Override
startPrefixMapping(String prefix, String uri)331         public void startPrefixMapping(String prefix, String uri) throws SAXException {
332             if (SHOW_ALL) Log.logln("startPrefixMapping prefix: " + prefix + ", uri: " + uri);
333         }
334 
335         @Override
endPrefixMapping(String prefix)336         public void endPrefixMapping(String prefix) throws SAXException {
337             if (SHOW_ALL) Log.logln("endPrefixMapping prefix: " + prefix);
338         }
339 
340         @Override
startEntity(String name)341         public void startEntity(String name) throws SAXException {
342             if (SHOW_ALL) Log.logln("startEntity name: " + name);
343         }
344 
345         @Override
endEntity(String name)346         public void endEntity(String name) throws SAXException {
347             if (SHOW_ALL) Log.logln("endEntity name: " + name);
348         }
349 
350         @Override
startCDATA()351         public void startCDATA() throws SAXException {
352             if (SHOW_ALL) Log.logln("startCDATA");
353         }
354 
355         @Override
endCDATA()356         public void endCDATA() throws SAXException {
357             if (SHOW_ALL) Log.logln("endCDATA");
358         }
359 
360         /*
361          * (non-Javadoc)
362          *
363          * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
364          */
365         @Override
error(SAXParseException exception)366         public void error(SAXParseException exception) throws SAXException {
367             if (SHOW_ALL) Log.logln("error: " + showSAX(exception));
368             throw exception;
369         }
370 
371         /*
372          * (non-Javadoc)
373          *
374          * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
375          */
376         @Override
fatalError(SAXParseException exception)377         public void fatalError(SAXParseException exception) throws SAXException {
378             if (SHOW_ALL) Log.logln("fatalError: " + showSAX(exception));
379             throw exception;
380         }
381 
382         /*
383          * (non-Javadoc)
384          *
385          * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
386          */
387         @Override
warning(SAXParseException exception)388         public void warning(SAXParseException exception) throws SAXException {
389             if (SHOW_ALL) Log.logln("warning: " + showSAX(exception));
390             throw exception;
391         }
392     }
393 
394     public class MyContentHandler extends LoggingHandler {
395         StringBuffer chars = new StringBuffer();
396         StringBuffer commentChars = new StringBuffer();
397         Stack<String> startElements = new Stack<>();
398         StringBuffer tempPath = new StringBuffer();
399         boolean lastIsStart = false;
400 
reset()401         public MyContentHandler reset() {
402             chars.setLength(0);
403             tempPath = new StringBuffer("/");
404             startElements.clear();
405             startElements.push("/");
406             return this;
407         }
408 
409         @Override
characters(char[] ch, int start, int length)410         public void characters(char[] ch, int start, int length) throws SAXException {
411             if (lastIsStart) chars.append(ch, start, length);
412         }
413 
414         @Override
startElement( String namespaceURI, String localName, String qName, Attributes atts)415         public void startElement(
416                 String namespaceURI, String localName, String qName, Attributes atts)
417                 throws SAXException {
418             tempPath.setLength(0);
419             tempPath.append(startElements.peek()).append('/').append(qName);
420             for (int i = 0; i < atts.getLength(); ++i) {
421                 tempPath.append("[@")
422                         .append(atts.getQName(i))
423                         .append("=\"")
424                         .append(atts.getValue(i).replace('"', '\''))
425                         .append("\"]");
426             }
427             startElements.push(tempPath.toString());
428             chars.setLength(0); // clear garbage
429             lastIsStart = true;
430             simpleHandler.handleElement(tempPath);
431         }
432 
433         @Override
endElement(String namespaceURI, String localName, String qName)434         public void endElement(String namespaceURI, String localName, String qName)
435                 throws SAXException {
436             String startElement = startElements.pop();
437             if (lastIsStart) {
438                 // System.out.println(startElement + ":" + chars);
439                 simpleHandler.handlePathValue(startElement, chars.toString());
440             }
441             chars.setLength(0);
442             lastIsStart = false;
443         }
444 
445         @Override
startDTD(String name, String publicId, String systemId)446         public void startDTD(String name, String publicId, String systemId) throws SAXException {
447             if (SHOW_ALL)
448                 Log.logln(
449                         "startDTD name: "
450                                 + name
451                                 + ", publicId: "
452                                 + publicId
453                                 + ", systemId: "
454                                 + systemId);
455             simpleHandler.handleStartDtd(name, publicId, systemId);
456         }
457 
458         @Override
endDTD()459         public void endDTD() throws SAXException {
460             if (SHOW_ALL) Log.logln("endDTD");
461             simpleHandler.handleEndDtd();
462         }
463 
464         @Override
comment(char[] ch, int start, int length)465         public void comment(char[] ch, int start, int length) throws SAXException {
466             if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length));
467             commentChars.append(ch, start, length);
468             simpleHandler.handleComment(startElements.peek(), commentChars.toString());
469             commentChars.setLength(0);
470         }
471 
472         @Override
elementDecl(String name, String model)473         public void elementDecl(String name, String model) throws SAXException {
474             simpleHandler.handleElementDecl(name, model);
475         }
476 
477         @Override
attributeDecl( String eName, String aName, String type, String mode, String value)478         public void attributeDecl(
479                 String eName, String aName, String type, String mode, String value)
480                 throws SAXException {
481             simpleHandler.handleAttributeDecl(eName, aName, type, mode, value);
482         }
483     }
484 
485     static final class AbortException extends RuntimeException {
486         private static final long serialVersionUID = 1L;
487     }
488 
489     /** Show a SAX exception in a readable form. */
showSAX(SAXParseException exception)490     public static String showSAX(SAXParseException exception) {
491         return exception.getMessage()
492                 + ";\t SystemID: "
493                 + exception.getSystemId()
494                 + ";\t PublicID: "
495                 + exception.getPublicId()
496                 + ";\t LineNumber: "
497                 + exception.getLineNumber()
498                 + ";\t ColumnNumber: "
499                 + exception.getColumnNumber();
500     }
501 
createXMLReader(boolean validating)502     public static XMLReader createXMLReader(boolean validating) {
503         // weiv 07/20/2007: The laundry list below is somewhat obsolete
504         // I have moved the system's default parser (instantiated when "" is
505         // passed) to the top, so that we will always use that. I have also
506         // removed "org.apache.crimson.parser.XMLReaderImpl" as this one gets
507         // confused regarding UTF-8 encoding name.
508         String[] testList = {
509             System.getProperty("CLDR_DEFAULT_SAX_PARSER", ""), // defaults to "", system default.
510             "org.apache.xerces.parsers.SAXParser",
511             "gnu.xml.aelfred2.XmlReader",
512             "com.bluecast.xml.Piccolo",
513             "oracle.xml.parser.v2.SAXParser"
514         };
515         XMLReader result = null;
516         for (int i = 0; i < testList.length; ++i) {
517             try {
518                 result =
519                         (testList[i].length() != 0)
520                                 ? XMLReaderFactory.createXMLReader(testList[i])
521                                 : XMLReaderFactory.createXMLReader();
522                 result.setFeature("http://xml.org/sax/features/validation", validating);
523                 break;
524             } catch (SAXException e1) {
525             }
526         }
527         if (result == null)
528             throw new NoClassDefFoundError(
529                     "No SAX parser is available, or unable to set validation correctly");
530         return result;
531     }
532 
533     static final class DebuggingInputStream extends InputStream {
534         InputStream contents;
535 
536         @Override
close()537         public void close() throws IOException {
538             contents.close();
539         }
540 
DebuggingInputStream(InputStream fis)541         public DebuggingInputStream(InputStream fis) {
542             contents = fis;
543         }
544 
545         @Override
read()546         public int read() throws IOException {
547             int x = contents.read();
548             System.out.println(Integer.toHexString(x) + ",");
549             return x;
550         }
551     }
552 
loadPathValues( String filename, List<Pair<String, String>> data, boolean validating)553     public static List<Pair<String, String>> loadPathValues(
554             String filename, List<Pair<String, String>> data, boolean validating) {
555         return loadPathValues(filename, data, validating, false);
556     }
557 
loadPathValues( String filename, List<Pair<String, String>> data, boolean validating, boolean full)558     public static List<Pair<String, String>> loadPathValues(
559             String filename, List<Pair<String, String>> data, boolean validating, boolean full) {
560         return loadPathValues(filename, data, validating, full, null);
561     }
562 
loadPathValues( String filename, List<Pair<String, String>> data, boolean validating, boolean full, Function<String, String> valueFilter)563     public static List<Pair<String, String>> loadPathValues(
564             String filename,
565             List<Pair<String, String>> data,
566             boolean validating,
567             boolean full,
568             Function<String, String> valueFilter) {
569         try {
570             new XMLFileReader()
571                     .setHandler(new PathValueListHandler(data, full, valueFilter))
572                     .read(filename, -1, validating);
573             return data;
574         } catch (Exception e) {
575             throw new ICUException(filename, e);
576         }
577     }
578 
processPathValues( String filename, boolean validating, SimpleHandler simpleHandler)579     public static void processPathValues(
580             String filename, boolean validating, SimpleHandler simpleHandler) {
581         try {
582             new XMLFileReader().setHandler(simpleHandler).read(filename, -1, validating);
583         } catch (Exception e) {
584             throw new ICUException(filename, e);
585         }
586     }
587 
588     static final class PathValueListHandler extends SimpleHandler {
589         List<Pair<String, String>> data;
590         boolean full;
591         private Function<String, String> valueFilter;
592 
PathValueListHandler( List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter)593         public PathValueListHandler(
594                 List<Pair<String, String>> data,
595                 boolean full,
596                 Function<String, String> valueFilter) {
597             super();
598             this.data = data != null ? data : new ArrayList<>();
599             this.full = full;
600             this.valueFilter = valueFilter;
601         }
602 
603         @Override
handlePathValue(String path, String value)604         public void handlePathValue(String path, String value) {
605             if (valueFilter == null) {
606                 data.add(Pair.of(path, value));
607             } else {
608                 String filteredValue = valueFilter.apply(value);
609                 if (filteredValue != null) {
610                     data.add(Pair.of(path, filteredValue));
611                 }
612             }
613         }
614 
615         @Override
handleComment(String path, String comment)616         public void handleComment(String path, String comment) {
617             if (!full || path.equals("/")) {
618                 return;
619             }
620             data.add(Pair.of("!", comment));
621         }
622     }
623 }
624