1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Mark Davis 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.util; 10 11 import com.google.common.base.Function; 12 import com.ibm.icu.util.ICUException; 13 import com.ibm.icu.util.ICUUncheckedIOException; 14 import java.io.File; 15 import java.io.FileInputStream; 16 import java.io.IOException; 17 import java.io.InputStream; 18 import java.io.Reader; 19 import java.util.ArrayList; 20 import java.util.List; 21 import java.util.Stack; 22 import org.xml.sax.Attributes; 23 import org.xml.sax.ContentHandler; 24 import org.xml.sax.ErrorHandler; 25 import org.xml.sax.InputSource; 26 import org.xml.sax.Locator; 27 import org.xml.sax.SAXException; 28 import org.xml.sax.SAXNotRecognizedException; 29 import org.xml.sax.SAXNotSupportedException; 30 import org.xml.sax.SAXParseException; 31 import org.xml.sax.XMLReader; 32 import org.xml.sax.ext.DeclHandler; 33 import org.xml.sax.ext.LexicalHandler; 34 import org.xml.sax.helpers.XMLReaderFactory; 35 36 /** 37 * Convenience class to make reading XML data files easier. The main method is read(); This is meant 38 * for XML data files, so the contents of elements must either be all other elements, or just text. 39 * It is thus not suitable for XML files with MIXED content; all text content in a mixed element is 40 * discarded. 41 * 42 * @author davis 43 */ 44 public class XMLFileReader { 45 static final boolean SHOW_ALL = false; 46 /** Handlers to use in read() */ 47 public static int CONTENT_HANDLER = 1, 48 ERROR_HANDLER = 2, 49 LEXICAL_HANDLER = 4, 50 DECLARATION_HANDLER = 8; 51 52 private MyContentHandler DEFAULT_DECLHANDLER = new MyContentHandler(); 53 // TODO Add way to skip gathering value contents 54 // private ElementOnlyContentHandler ELEMENT_ONLY_DECLHANDLER = new ElementOnlyContentHandler(); 55 private SimpleHandler simpleHandler; 56 57 public static class SimpleHandler { 58 /** 59 * called when every new element is encountered, with the full path to the element 60 * (including attributes). Called on leaf and non-leaf elements. 61 * 62 * @param path 63 */ handleElement(CharSequence path)64 public void handleElement(CharSequence path) {} 65 66 /** Called with an "xpath" of each leaf element */ handlePathValue(String path, String value)67 public void handlePathValue(String path, String value) {} 68 handleComment(String path, String comment)69 public void handleComment(String path, String comment) {} 70 handleElementDecl(String name, String model)71 public void handleElementDecl(String name, String model) {} 72 handleAttributeDecl( String eName, String aName, String type, String mode, String value)73 public void handleAttributeDecl( 74 String eName, String aName, String type, String mode, String value) {} 75 handleEndDtd()76 public void handleEndDtd() {} 77 handleStartDtd(String name, String publicId, String systemId)78 public void handleStartDtd(String name, String publicId, String systemId) {} 79 } 80 setHandler(SimpleHandler simpleHandler)81 public XMLFileReader setHandler(SimpleHandler simpleHandler) { 82 this.simpleHandler = simpleHandler; 83 return this; 84 } 85 86 /** 87 * Read an XML file. The order of the elements matches what was in the file. 88 * 89 * @param fileName file to open 90 * @param handlers a set of values for the handlers to use, eg CONTENT_HANDLER | ERROR_HANDLER 91 * @param validating if a validating parse is requested 92 * @return list of alternating values. 93 */ read(String fileName, int handlers, boolean validating)94 public XMLFileReader read(String fileName, int handlers, boolean validating) { 95 try (InputStream fis = new FileInputStream(fileName); ) { 96 return read(fileName, new InputSource(fis), handlers, validating); 97 } catch (IOException e) { 98 File full = new File(fileName); 99 String fullName = fileName; 100 try { 101 fullName = full.getCanonicalPath(); 102 } catch (Exception IOException) { 103 } 104 throw (IllegalArgumentException) 105 new IllegalArgumentException("Can't read " + fullName).initCause(e); 106 } 107 } 108 109 /** 110 * read from a CLDR resource 111 * 112 * @param fileName 113 * @param handlers 114 * @param validating 115 * @param fis 116 * @see CldrUtility#getInputStream(String) 117 * @return 118 */ readCLDRResource(String resName, int handlers, boolean validating)119 public XMLFileReader readCLDRResource(String resName, int handlers, boolean validating) { 120 try (InputStream inputStream = CldrUtility.getInputStream(resName)) { 121 return read(resName, new InputSource(inputStream), handlers, validating); 122 } catch (IOException e) { 123 throw new ICUUncheckedIOException(e); 124 } 125 } 126 127 /** 128 * read from an arbitrary 129 * 130 * @param fileName 131 * @param handlers 132 * @param validating 133 * @param fis 134 * @see CldrUtility#getInputStream(String) 135 * @return 136 */ read( String resName, Class<?> callingClass, int handlers, boolean validating)137 public XMLFileReader read( 138 String resName, Class<?> callingClass, int handlers, boolean validating) { 139 try (InputStream inputStream = CldrUtility.getInputStream(callingClass, resName)) { 140 return read(resName, new InputSource(inputStream), handlers, validating); 141 } catch (IOException e) { 142 throw new ICUUncheckedIOException(e); 143 } 144 } 145 read(String systemID, Reader reader, int handlers, boolean validating)146 public XMLFileReader read(String systemID, Reader reader, int handlers, boolean validating) { 147 read(systemID, reader, handlers, validating, DEFAULT_DECLHANDLER.reset()); 148 return this; 149 } 150 read( String systemID, InputSource insrc, int handlers, boolean validating)151 public XMLFileReader read( 152 String systemID, InputSource insrc, int handlers, boolean validating) { 153 read(systemID, insrc, handlers, validating, DEFAULT_DECLHANDLER.reset()); 154 return this; 155 } 156 read( String systemID, InputStream instr, int handlers, boolean validating, AllHandler allHandler)157 public static void read( 158 String systemID, 159 InputStream instr, 160 int handlers, 161 boolean validating, 162 AllHandler allHandler) { 163 InputSource is = new InputSource(instr); 164 read(systemID, is, handlers, validating, allHandler); 165 } 166 read( String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler)167 public static void read( 168 String systemID, 169 Reader reader, 170 int handlers, 171 boolean validating, 172 AllHandler allHandler) { 173 InputSource is = new InputSource(reader); 174 read(systemID, is, handlers, validating, allHandler); 175 } 176 read( String systemID, InputSource is, int handlers, boolean validating, AllHandler allHandler)177 public static void read( 178 String systemID, 179 InputSource is, 180 int handlers, 181 boolean validating, 182 AllHandler allHandler) { 183 try { 184 XMLReader xmlReader = createXMLReader(handlers, validating, allHandler); 185 // wrap the reader to insert a character stream 186 DoctypeXmlStreamWrapper.wrap(is); 187 is.setSystemId(systemID); 188 try { 189 xmlReader.parse(is); 190 } catch (AbortException e) { 191 } // ok 192 } catch (SAXParseException e) { 193 throw (IllegalArgumentException) 194 new IllegalArgumentException( 195 "Can't read " + systemID + "\tline:\t" + e.getLineNumber()) 196 .initCause(e); 197 } catch (SAXException | IOException e) { 198 throw (IllegalArgumentException) 199 new IllegalArgumentException("Can't read " + systemID).initCause(e); 200 } 201 } 202 createXMLReader( int handlers, boolean validating, AllHandler allHandler)203 public static final XMLReader createXMLReader( 204 int handlers, boolean validating, AllHandler allHandler) 205 throws SAXNotRecognizedException, SAXNotSupportedException { 206 XMLReader xmlReader = createXMLReader(validating); 207 if ((handlers & CONTENT_HANDLER) != 0) { 208 xmlReader.setContentHandler(allHandler); 209 } 210 if ((handlers & ERROR_HANDLER) != 0) { 211 xmlReader.setErrorHandler(allHandler); 212 } 213 if ((handlers & LEXICAL_HANDLER) != 0) { 214 xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", allHandler); 215 } 216 if ((handlers & DECLARATION_HANDLER) != 0) { 217 xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", allHandler); 218 } 219 return xmlReader; 220 } 221 222 public interface AllHandler extends ContentHandler, LexicalHandler, DeclHandler, ErrorHandler {} 223 224 /** Basis for handlers that provides for logging, with no actions on methods */ 225 public static class LoggingHandler implements AllHandler { 226 @Override startDocument()227 public void startDocument() throws SAXException { 228 if (SHOW_ALL) Log.logln("startDocument"); 229 } 230 231 @Override characters(char[] ch, int start, int length)232 public void characters(char[] ch, int start, int length) throws SAXException { 233 if (SHOW_ALL) Log.logln("characters"); 234 } 235 236 @Override startElement( String namespaceURI, String localName, String qName, Attributes atts)237 public void startElement( 238 String namespaceURI, String localName, String qName, Attributes atts) 239 throws SAXException { 240 if (SHOW_ALL) Log.logln("startElement"); 241 } 242 243 @Override endElement(String namespaceURI, String localName, String qName)244 public void endElement(String namespaceURI, String localName, String qName) 245 throws SAXException { 246 if (SHOW_ALL) Log.logln("endElement"); 247 } 248 249 @Override startDTD(String name, String publicId, String systemId)250 public void startDTD(String name, String publicId, String systemId) throws SAXException { 251 if (SHOW_ALL) Log.logln("startDTD"); 252 } 253 254 @Override endDTD()255 public void endDTD() throws SAXException { 256 if (SHOW_ALL) Log.logln("endDTD"); 257 } 258 259 @Override comment(char[] ch, int start, int length)260 public void comment(char[] ch, int start, int length) throws SAXException { 261 if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length)); 262 } 263 264 @Override elementDecl(String name, String model)265 public void elementDecl(String name, String model) throws SAXException { 266 if (SHOW_ALL) Log.logln("elementDecl"); 267 } 268 269 @Override attributeDecl( String eName, String aName, String type, String mode, String value)270 public void attributeDecl( 271 String eName, String aName, String type, String mode, String value) 272 throws SAXException { 273 if (SHOW_ALL) Log.logln("attributeDecl"); 274 } 275 276 @Override ignorableWhitespace(char[] ch, int start, int length)277 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { 278 if (SHOW_ALL) Log.logln("ignorableWhitespace length: " + length); 279 } 280 281 @Override endDocument()282 public void endDocument() throws SAXException { 283 if (SHOW_ALL) Log.logln("endDocument"); 284 } 285 286 @Override internalEntityDecl(String name, String value)287 public void internalEntityDecl(String name, String value) throws SAXException { 288 if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + value); 289 } 290 291 @Override externalEntityDecl(String name, String publicId, String systemId)292 public void externalEntityDecl(String name, String publicId, String systemId) 293 throws SAXException { 294 if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId); 295 } 296 notationDecl(String name, String publicId, String systemId)297 public void notationDecl(String name, String publicId, String systemId) { 298 if (SHOW_ALL) Log.logln("notationDecl: " + name + ", " + publicId + ", " + systemId); 299 } 300 301 @Override processingInstruction(String target, String data)302 public void processingInstruction(String target, String data) throws SAXException { 303 if (SHOW_ALL) Log.logln("processingInstruction: " + target + ", " + data); 304 } 305 306 @Override skippedEntity(String name)307 public void skippedEntity(String name) throws SAXException { 308 if (SHOW_ALL) Log.logln("skippedEntity: " + name); 309 } 310 unparsedEntityDecl( String name, String publicId, String systemId, String notationName)311 public void unparsedEntityDecl( 312 String name, String publicId, String systemId, String notationName) { 313 if (SHOW_ALL) 314 Log.logln( 315 "unparsedEntityDecl: " 316 + name 317 + ", " 318 + publicId 319 + ", " 320 + systemId 321 + ", " 322 + notationName); 323 } 324 325 @Override setDocumentLocator(Locator locator)326 public void setDocumentLocator(Locator locator) { 327 if (SHOW_ALL) Log.logln("setDocumentLocator Locator " + locator); 328 } 329 330 @Override startPrefixMapping(String prefix, String uri)331 public void startPrefixMapping(String prefix, String uri) throws SAXException { 332 if (SHOW_ALL) Log.logln("startPrefixMapping prefix: " + prefix + ", uri: " + uri); 333 } 334 335 @Override endPrefixMapping(String prefix)336 public void endPrefixMapping(String prefix) throws SAXException { 337 if (SHOW_ALL) Log.logln("endPrefixMapping prefix: " + prefix); 338 } 339 340 @Override startEntity(String name)341 public void startEntity(String name) throws SAXException { 342 if (SHOW_ALL) Log.logln("startEntity name: " + name); 343 } 344 345 @Override endEntity(String name)346 public void endEntity(String name) throws SAXException { 347 if (SHOW_ALL) Log.logln("endEntity name: " + name); 348 } 349 350 @Override startCDATA()351 public void startCDATA() throws SAXException { 352 if (SHOW_ALL) Log.logln("startCDATA"); 353 } 354 355 @Override endCDATA()356 public void endCDATA() throws SAXException { 357 if (SHOW_ALL) Log.logln("endCDATA"); 358 } 359 360 /* 361 * (non-Javadoc) 362 * 363 * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException) 364 */ 365 @Override error(SAXParseException exception)366 public void error(SAXParseException exception) throws SAXException { 367 if (SHOW_ALL) Log.logln("error: " + showSAX(exception)); 368 throw exception; 369 } 370 371 /* 372 * (non-Javadoc) 373 * 374 * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException) 375 */ 376 @Override fatalError(SAXParseException exception)377 public void fatalError(SAXParseException exception) throws SAXException { 378 if (SHOW_ALL) Log.logln("fatalError: " + showSAX(exception)); 379 throw exception; 380 } 381 382 /* 383 * (non-Javadoc) 384 * 385 * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException) 386 */ 387 @Override warning(SAXParseException exception)388 public void warning(SAXParseException exception) throws SAXException { 389 if (SHOW_ALL) Log.logln("warning: " + showSAX(exception)); 390 throw exception; 391 } 392 } 393 394 public class MyContentHandler extends LoggingHandler { 395 StringBuffer chars = new StringBuffer(); 396 StringBuffer commentChars = new StringBuffer(); 397 Stack<String> startElements = new Stack<>(); 398 StringBuffer tempPath = new StringBuffer(); 399 boolean lastIsStart = false; 400 reset()401 public MyContentHandler reset() { 402 chars.setLength(0); 403 tempPath = new StringBuffer("/"); 404 startElements.clear(); 405 startElements.push("/"); 406 return this; 407 } 408 409 @Override characters(char[] ch, int start, int length)410 public void characters(char[] ch, int start, int length) throws SAXException { 411 if (lastIsStart) chars.append(ch, start, length); 412 } 413 414 @Override startElement( String namespaceURI, String localName, String qName, Attributes atts)415 public void startElement( 416 String namespaceURI, String localName, String qName, Attributes atts) 417 throws SAXException { 418 tempPath.setLength(0); 419 tempPath.append(startElements.peek()).append('/').append(qName); 420 for (int i = 0; i < atts.getLength(); ++i) { 421 tempPath.append("[@") 422 .append(atts.getQName(i)) 423 .append("=\"") 424 .append(atts.getValue(i).replace('"', '\'')) 425 .append("\"]"); 426 } 427 startElements.push(tempPath.toString()); 428 chars.setLength(0); // clear garbage 429 lastIsStart = true; 430 simpleHandler.handleElement(tempPath); 431 } 432 433 @Override endElement(String namespaceURI, String localName, String qName)434 public void endElement(String namespaceURI, String localName, String qName) 435 throws SAXException { 436 String startElement = startElements.pop(); 437 if (lastIsStart) { 438 // System.out.println(startElement + ":" + chars); 439 simpleHandler.handlePathValue(startElement, chars.toString()); 440 } 441 chars.setLength(0); 442 lastIsStart = false; 443 } 444 445 @Override startDTD(String name, String publicId, String systemId)446 public void startDTD(String name, String publicId, String systemId) throws SAXException { 447 if (SHOW_ALL) 448 Log.logln( 449 "startDTD name: " 450 + name 451 + ", publicId: " 452 + publicId 453 + ", systemId: " 454 + systemId); 455 simpleHandler.handleStartDtd(name, publicId, systemId); 456 } 457 458 @Override endDTD()459 public void endDTD() throws SAXException { 460 if (SHOW_ALL) Log.logln("endDTD"); 461 simpleHandler.handleEndDtd(); 462 } 463 464 @Override comment(char[] ch, int start, int length)465 public void comment(char[] ch, int start, int length) throws SAXException { 466 if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length)); 467 commentChars.append(ch, start, length); 468 simpleHandler.handleComment(startElements.peek(), commentChars.toString()); 469 commentChars.setLength(0); 470 } 471 472 @Override elementDecl(String name, String model)473 public void elementDecl(String name, String model) throws SAXException { 474 simpleHandler.handleElementDecl(name, model); 475 } 476 477 @Override attributeDecl( String eName, String aName, String type, String mode, String value)478 public void attributeDecl( 479 String eName, String aName, String type, String mode, String value) 480 throws SAXException { 481 simpleHandler.handleAttributeDecl(eName, aName, type, mode, value); 482 } 483 } 484 485 static final class AbortException extends RuntimeException { 486 private static final long serialVersionUID = 1L; 487 } 488 489 /** Show a SAX exception in a readable form. */ showSAX(SAXParseException exception)490 public static String showSAX(SAXParseException exception) { 491 return exception.getMessage() 492 + ";\t SystemID: " 493 + exception.getSystemId() 494 + ";\t PublicID: " 495 + exception.getPublicId() 496 + ";\t LineNumber: " 497 + exception.getLineNumber() 498 + ";\t ColumnNumber: " 499 + exception.getColumnNumber(); 500 } 501 createXMLReader(boolean validating)502 public static XMLReader createXMLReader(boolean validating) { 503 // weiv 07/20/2007: The laundry list below is somewhat obsolete 504 // I have moved the system's default parser (instantiated when "" is 505 // passed) to the top, so that we will always use that. I have also 506 // removed "org.apache.crimson.parser.XMLReaderImpl" as this one gets 507 // confused regarding UTF-8 encoding name. 508 String[] testList = { 509 System.getProperty("CLDR_DEFAULT_SAX_PARSER", ""), // defaults to "", system default. 510 "org.apache.xerces.parsers.SAXParser", 511 "gnu.xml.aelfred2.XmlReader", 512 "com.bluecast.xml.Piccolo", 513 "oracle.xml.parser.v2.SAXParser" 514 }; 515 XMLReader result = null; 516 for (int i = 0; i < testList.length; ++i) { 517 try { 518 result = 519 (testList[i].length() != 0) 520 ? XMLReaderFactory.createXMLReader(testList[i]) 521 : XMLReaderFactory.createXMLReader(); 522 result.setFeature("http://xml.org/sax/features/validation", validating); 523 break; 524 } catch (SAXException e1) { 525 } 526 } 527 if (result == null) 528 throw new NoClassDefFoundError( 529 "No SAX parser is available, or unable to set validation correctly"); 530 return result; 531 } 532 533 static final class DebuggingInputStream extends InputStream { 534 InputStream contents; 535 536 @Override close()537 public void close() throws IOException { 538 contents.close(); 539 } 540 DebuggingInputStream(InputStream fis)541 public DebuggingInputStream(InputStream fis) { 542 contents = fis; 543 } 544 545 @Override read()546 public int read() throws IOException { 547 int x = contents.read(); 548 System.out.println(Integer.toHexString(x) + ","); 549 return x; 550 } 551 } 552 loadPathValues( String filename, List<Pair<String, String>> data, boolean validating)553 public static List<Pair<String, String>> loadPathValues( 554 String filename, List<Pair<String, String>> data, boolean validating) { 555 return loadPathValues(filename, data, validating, false); 556 } 557 loadPathValues( String filename, List<Pair<String, String>> data, boolean validating, boolean full)558 public static List<Pair<String, String>> loadPathValues( 559 String filename, List<Pair<String, String>> data, boolean validating, boolean full) { 560 return loadPathValues(filename, data, validating, full, null); 561 } 562 loadPathValues( String filename, List<Pair<String, String>> data, boolean validating, boolean full, Function<String, String> valueFilter)563 public static List<Pair<String, String>> loadPathValues( 564 String filename, 565 List<Pair<String, String>> data, 566 boolean validating, 567 boolean full, 568 Function<String, String> valueFilter) { 569 try { 570 new XMLFileReader() 571 .setHandler(new PathValueListHandler(data, full, valueFilter)) 572 .read(filename, -1, validating); 573 return data; 574 } catch (Exception e) { 575 throw new ICUException(filename, e); 576 } 577 } 578 processPathValues( String filename, boolean validating, SimpleHandler simpleHandler)579 public static void processPathValues( 580 String filename, boolean validating, SimpleHandler simpleHandler) { 581 try { 582 new XMLFileReader().setHandler(simpleHandler).read(filename, -1, validating); 583 } catch (Exception e) { 584 throw new ICUException(filename, e); 585 } 586 } 587 588 static final class PathValueListHandler extends SimpleHandler { 589 List<Pair<String, String>> data; 590 boolean full; 591 private Function<String, String> valueFilter; 592 PathValueListHandler( List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter)593 public PathValueListHandler( 594 List<Pair<String, String>> data, 595 boolean full, 596 Function<String, String> valueFilter) { 597 super(); 598 this.data = data != null ? data : new ArrayList<>(); 599 this.full = full; 600 this.valueFilter = valueFilter; 601 } 602 603 @Override handlePathValue(String path, String value)604 public void handlePathValue(String path, String value) { 605 if (valueFilter == null) { 606 data.add(Pair.of(path, value)); 607 } else { 608 String filteredValue = valueFilter.apply(value); 609 if (filteredValue != null) { 610 data.add(Pair.of(path, filteredValue)); 611 } 612 } 613 } 614 615 @Override handleComment(String path, String comment)616 public void handleComment(String path, String comment) { 617 if (!full || path.equals("/")) { 618 return; 619 } 620 data.add(Pair.of("!", comment)); 621 } 622 } 623 } 624