1 package org.unicode.cldr.util; 2 3 import com.google.common.base.Splitter; 4 import com.ibm.icu.text.Transform; 5 import com.ibm.icu.util.Output; 6 import java.util.ArrayList; 7 import java.util.Collections; 8 import java.util.Comparator; 9 import java.util.HashMap; 10 import java.util.Iterator; 11 import java.util.LinkedHashMap; 12 import java.util.List; 13 import java.util.Map; 14 import java.util.Map.Entry; 15 import java.util.Set; 16 import java.util.TreeSet; 17 import java.util.regex.Matcher; 18 import java.util.regex.Pattern; 19 import org.unicode.cldr.util.CldrUtility.VariableReplacer; 20 import org.unicode.cldr.util.RegexFileParser.RegexLineParser; 21 import org.unicode.cldr.util.RegexFileParser.VariableProcessor; 22 import org.unicode.cldr.util.RegexLookup.Finder; 23 import org.unicode.cldr.util.RegexLookup.Finder.Info; 24 25 /** 26 * Lookup items according to a set of regex patterns. Returns the value according to the first 27 * pattern that matches. Not thread-safe. 28 * 29 * @param <T> 30 */ 31 public class RegexLookup<T> implements Iterable<Map.Entry<Finder, T>> { 32 protected static final String SEPARATOR = "; "; 33 private VariableReplacer variables = new VariableReplacer(); 34 private StorageInterfaceBase<T> storage; 35 private Map<Finder, T> MEntries; 36 private Transform<String, ? extends Finder> patternTransform = RegexFinderTransform; 37 private Transform<String, ? extends T> valueTransform; 38 private Merger<T> valueMerger; 39 private final boolean allowNull = false; 40 private static PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*"); 41 42 public enum LookupType { 43 STAR_PATTERN_LOOKUP, 44 OPTIMIZED_DIRECTORY_PATTERN_LOOKUP, 45 STANDARD 46 } 47 48 private LookupType _lookupType; 49 50 /* 51 * STAR_PATTERN_LOOKUP 52 * 53 * When true, RegexLookup can match regex's even faster than the OPTIMIZED_DIRECTORY_PATTERN_LOOKUP below. 54 * It requires some additional structure, such that the only regular expression constructs such as (a|b) occur 55 * in attributes, so that the star pattern for a given path can match the star pattern of a given regular expression, 56 * thus greatly reducing the number of actual regex matches that need to occur. 57 */ 58 59 /* 60 * OPTIMIZED_DIRECTORY_PATTERN_LOOKUP 61 * 62 * When true, RegexLookup can match regex's in O(log(n)) time, where n is the number of regex's stored. 63 * This is provided that all regex patterns follow a directory based format and all directories are separated by a forward slash '/' 64 * for example: ^//ldml/dates/calendars/calendar[@type="([^"']++)"]/alias[@source="locale"][@path="../calendar[@type='([^"']++)']"] 65 * 66 * When false, RegexLookup will match regex's in O(n) time, where n is the number of regex's stored. 67 * However regex's no longer need to follow any specific format (Slower but more versatile). 68 */ 69 RegexLookup(LookupType type)70 public RegexLookup(LookupType type) { 71 _lookupType = type; 72 switch (type) { 73 case STAR_PATTERN_LOOKUP: 74 // SPEntries = new StarPatternMap<T>(); 75 storage = new StarPatternMap<>(); 76 break; 77 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 78 // RTEntries = new RegexTree<T>(); 79 storage = new RegexTree<>(); 80 break; 81 default: 82 MEntries = new LinkedHashMap<>(); 83 break; 84 } 85 } 86 RegexLookup()87 public RegexLookup() { 88 this(LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP); 89 // _lookupType = RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP; 90 // RTEntries = new RegexTree<T>(); 91 } 92 93 public abstract static class Finder { 94 public static class Info { 95 public String[] value; 96 } 97 98 // abstract public String[] getInfo(); 99 100 // abstract public boolean find(String item, Object context); 101 find(String item, Object context, Info info)102 public abstract boolean find(String item, Object context, Info info); 103 matches(String item, Object context, Info info)104 public abstract boolean matches(String item, Object context, Info info); 105 getFailPoint(String source)106 public int getFailPoint(String source) { 107 return -1; 108 } 109 // must also define toString 110 } 111 112 public static class RegexFinder extends Finder { 113 /** The matcher used by this RegexFinder */ 114 private final Matcher matcher; 115 116 /** The Pattern used by this RegexFinder */ 117 protected final Pattern pattern; 118 RegexFinder(String pattern)119 public RegexFinder(String pattern) { 120 this.pattern = Pattern.compile(pattern, Pattern.COMMENTS); 121 matcher = this.pattern.matcher(""); 122 } 123 124 /** 125 * Call Matches on the pattern, returning additional information in the Info field, if it is 126 * non null 127 */ 128 @Override matches(String item, Object context, Info info)129 public boolean matches(String item, Object context, Info info) { 130 synchronized (matcher) { 131 try { 132 boolean result = matcher.reset(item).matches(); 133 extractInfo(info, result); 134 return result; 135 } catch (StringIndexOutOfBoundsException e) { 136 // We don't know what causes this error (cldrbug 5051) so 137 // make the exception message more detailed. 138 throw new IllegalArgumentException( 139 "Matching error caused by pattern: [" 140 + matcher.toString() 141 + "] on text: [" 142 + item 143 + "]", 144 e); 145 } 146 } 147 } 148 149 /** 150 * Extract match related information into the info field, if result is true, and info is not 151 * null. 152 * 153 * @param info 154 * @param result 155 */ extractInfo(Info info, boolean result)156 private void extractInfo(Info info, boolean result) { 157 if (result && info != null) { 158 int limit = matcher.groupCount() + 1; 159 String[] value = new String[limit]; 160 for (int i = 0; i < limit; ++i) { 161 value[i] = matcher.group(i); 162 } 163 info.value = value; 164 } 165 } 166 167 /** 168 * Call find() on the pattern, returning additional information in the info field, if it is 169 * non-null 170 */ 171 @Override find(String item, Object context, Info info)172 public boolean find(String item, Object context, Info info) { 173 synchronized (matcher) { 174 try { 175 boolean result = matcher.reset(item).find(); 176 extractInfo(info, result); 177 return result; 178 } catch (StringIndexOutOfBoundsException e) { 179 // We don't know what causes this error (cldrbug 5051) so 180 // make the exception message more detailed. 181 throw new IllegalArgumentException( 182 "Matching error caused by pattern: [" 183 + matcher.toString() 184 + "] on text: [" 185 + item 186 + "]", 187 e); 188 } 189 } 190 } 191 192 @Override toString()193 public String toString() { 194 // Use pattern here, to avoid having to synchronize on matcher 195 return pattern.pattern(); 196 } 197 198 @Override equals(Object obj)199 public boolean equals(Object obj) { 200 if (obj == null) { 201 return false; 202 } else { 203 return toString().equals(obj.toString()); 204 } 205 } 206 207 @Override hashCode()208 public int hashCode() { 209 return toString().hashCode(); 210 } 211 212 @Override getFailPoint(String source)213 public int getFailPoint(String source) { 214 synchronized (matcher) { 215 return RegexUtilities.findMismatch(matcher, source); 216 } 217 } 218 } 219 220 private static interface StorageInterfaceBase<T> { entrySet()221 Set<Entry<Finder, T>> entrySet(); 222 get(Finder finder)223 T get(Finder finder); 224 get( String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)225 T get( 226 String pattern, 227 Object context, 228 Output<String[]> arguments, 229 Output<Finder> matcherFound); 230 getAll( String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)231 List<T> getAll( 232 String pattern, 233 Object context, 234 List<Finder> matcherList, 235 Output<String[]> firstInfo); 236 put(Finder pattern, T value)237 void put(Finder pattern, T value); 238 size()239 int size(); 240 } 241 242 // private static class FinderWithInfo { 243 // Finder _finder; 244 // Info _info; 245 // public FinderWithInfo(Finder finder,Info info) { 246 // _info=info; 247 // _finder=finder; 248 // } 249 // } 250 private static class RegexTree<T> implements StorageInterfaceBase<T> { 251 private RTNode root; 252 private int _size; 253 private RTNodeRankComparator rankComparator = new RTNodeRankComparator(); 254 RegexTree()255 public RegexTree() { 256 root = new RTNode("", null); 257 _size = 0; 258 } 259 260 @Override size()261 public int size() { 262 return _size; 263 } 264 265 @Override put(Finder pattern, T value)266 public void put(Finder pattern, T value) { 267 root.put(new RTNode(pattern, value, _size)); 268 _size++; 269 } 270 271 @Override get(Finder finder)272 public T get(Finder finder) { 273 return root.get(finder); 274 } 275 276 @Override getAll( String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)277 public List<T> getAll( 278 String pattern, 279 Object context, 280 List<Finder> matcherList, 281 Output<String[]> firstInfo) { 282 List<RTNode> list = new ArrayList<>(); 283 List<T> retList = new ArrayList<>(); 284 285 root.addToList(pattern, context, list); 286 Collections.sort(list, rankComparator); 287 288 boolean isFirst = true; 289 if (firstInfo != null && !list.isEmpty()) { 290 RTNode firstNode = list.get(0); 291 if (firstNode._info != null) { 292 firstInfo.value = firstNode._info.value; 293 } 294 } 295 296 for (RTNode n : list) { 297 if (isFirst) { 298 firstInfo.value = n._info.value; 299 isFirst = false; 300 } 301 } 302 303 for (RTNode n : list) { 304 retList.add(n._val); 305 if (matcherList != null) { 306 matcherList.add(n._finder); 307 } 308 } 309 310 return retList; 311 } 312 313 @Override get( String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)314 public T get( 315 String pattern, 316 Object context, 317 Output<String[]> arguments, 318 Output<Finder> matcherFound) { 319 List<Finder> matcherList = new ArrayList<>(); 320 Output<String[]> firstInfo = new Output<>(); 321 List<T> matches = 322 getAll( 323 pattern, 324 context, 325 matcherList, 326 firstInfo); // need to get whole list because we want value that was 327 // entered first 328 if (arguments != null) { 329 // arguments.value = (matcherList.size() > 0) ? 330 // matcherList.get(0).getInfo() : null; 331 arguments.value = firstInfo.value; 332 // arguments.value = (matcherList.size() > 0) ? 333 // matcherList.get(0).getInfo() : null; 334 arguments.value = firstInfo.value; 335 } 336 if (matcherFound != null) { 337 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null; 338 } 339 return (matches.size() > 0) ? matches.get(0) : null; 340 } 341 342 @Override entrySet()343 public Set<Entry<Finder, T>> entrySet() { 344 LinkedHashMap<Finder, T> ret = new LinkedHashMap<>(); 345 TreeSet<RTNode> s = new TreeSet<>(rankComparator); 346 root.addToEntrySet(s); 347 348 for (RTNode n : s) { 349 ret.put(n._finder, n._val); 350 } 351 352 return ret.entrySet(); 353 } 354 355 public class RTNode extends NodeBase<T> { 356 // Finder _finder; 357 // T _val; 358 List<RTNode> _children = new ArrayList<>(); 359 int _rank = -1; // rank -1 means the node was not inserted, but only used for structural 360 // purposes 361 362 // constructor for regular nodes with a Finder RTNode(Finder finder, T val, int rank)363 public RTNode(Finder finder, T val, int rank) { 364 super(finder, val); 365 // _finder = finder; 366 // _val = val; 367 _rank = rank; 368 } 369 370 // constructors for nodes without a Finder RTNode(String key, T val)371 public RTNode(String key, T val) { 372 super(new RegexFinder(key), val); 373 // _finder = new RegexFinder(key); 374 // _val = val; 375 // _rank = -1; 376 _info = new Info(); 377 } 378 put(RTNode node)379 public void put(RTNode node) { 380 if (_children.size() == 0) { 381 _children.add(node); 382 } else { 383 String maxSimilarChars = 384 ""; // most similar characters up to the last similar directory 385 int insertIndex = 0; 386 for (int i = 0; i < _children.size(); i++) { 387 RTNode child = _children.get(i); 388 String childFinderPattern = child._finder.toString(); 389 if (childFinderPattern.length() > 0 390 && childFinderPattern.charAt(childFinderPattern.length() - 1) 391 == '$') { 392 childFinderPattern = 393 childFinderPattern.substring( 394 0, 395 childFinderPattern.length() 396 - 1); // takes into account the added "$" 397 } else if (child._rank == -1) { 398 childFinderPattern = 399 childFinderPattern.substring( 400 0, 401 childFinderPattern.length() 402 - 2); // takes into account the added ".*" 403 } 404 405 // check if child has the same Finder as node to insert, then replace it 406 if (node._finder.equals(child._finder)) { 407 child._finder = node._finder; 408 child._val = node._val; 409 if (child._rank == -1) { 410 child._rank = node._rank; 411 } else { 412 _size--; 413 } 414 return; 415 } 416 417 // check if child is the parent of node 418 if (child._rank == -1 419 && node._finder.toString().startsWith(childFinderPattern)) { 420 child.put(node); 421 return; 422 } 423 424 // if not parent then check if candidate for most similar RTNode 425 String gcp = 426 greatestCommonPrefix(childFinderPattern, node._finder.toString()); 427 gcp = removeExtraChars(gcp); 428 if (gcp.length() > maxSimilarChars.length()) { 429 maxSimilarChars = gcp; 430 insertIndex = i; 431 } 432 } 433 434 String finderPattern = this._finder.toString(); 435 if (finderPattern.length() > 0 436 && finderPattern.charAt(finderPattern.length() - 1) == '$') { 437 finderPattern = 438 finderPattern.substring( 439 0, 440 finderPattern.length() 441 - 1); // takes into account the added "$" 442 } else if (!(finderPattern.equals("")) && this._rank == -1) { 443 finderPattern = 444 finderPattern.substring( 445 0, 446 finderPattern.length() 447 - 2); // takes into account the added ".*" 448 } 449 450 if ((maxSimilarChars) 451 .equals(finderPattern)) { // add under this if no similar children 452 _children.add(node); 453 } else { 454 // create the common parent of the chosen candidate above and node, then add 455 // to the insert index 456 RTNode newParent = new RTNode(maxSimilarChars + ".*", null); 457 newParent._children.add(this._children.get(insertIndex)); 458 newParent._children.add(node); 459 this._children.remove(insertIndex); 460 this._children.add(insertIndex, newParent); 461 } 462 } 463 } 464 465 // takes a string in a directory regex form and removes all characters up to the last 466 // full directory removeExtraChars(String input)467 private String removeExtraChars(String input) { 468 String ret = input.substring(0, Math.max(0, input.lastIndexOf('/'))); 469 while ((ret.lastIndexOf('(') != -1 && ret.lastIndexOf('(') > ret.lastIndexOf(')')) 470 || (ret.lastIndexOf('[') != -1 471 && ret.lastIndexOf('[') > ret.lastIndexOf(']')) 472 || (ret.lastIndexOf('{') != -1 473 && ret.lastIndexOf('{') > ret.lastIndexOf('}'))) { 474 ret = ret.substring(0, Math.max(0, ret.lastIndexOf('/'))); 475 } 476 return ret; 477 } 478 479 // traverse tree to get value get(Finder finder)480 public T get(Finder finder) { 481 T ret = null; // return value 482 483 if (_children.size() == 0) { 484 return null; 485 } else { 486 for (RTNode child : _children) { 487 488 // check if child is the node 489 if (child._rank != -1 && finder.equals(child._finder)) { 490 return child._val; 491 } 492 493 String childFinderPattern = child._finder.toString(); 494 495 if (childFinderPattern.length() > 0 496 && childFinderPattern.charAt(childFinderPattern.length() - 1) 497 == '$') { 498 childFinderPattern = 499 childFinderPattern.substring( 500 0, 501 childFinderPattern.length() 502 - 1); // takes into account the added "$" 503 } else if (child._rank == -1) { 504 childFinderPattern = 505 childFinderPattern.substring( 506 0, 507 childFinderPattern.length() 508 - 2); // takes into account the added ".*" 509 } 510 511 // check if child is the parent of node 512 if (finder.toString().startsWith(childFinderPattern)) { 513 ret = child.get(finder); 514 if (ret != null) { 515 break; 516 } 517 } 518 } 519 520 return ret; 521 } 522 } 523 524 // traverse tree to get an entry set addToEntrySet(TreeSet<RTNode> s)525 public void addToEntrySet(TreeSet<RTNode> s) { 526 if (_children.size() == 0) { 527 return; 528 } else { 529 for (RTNode child : _children) { 530 if (child._rank != -1) { 531 s.add(child); 532 } 533 child.addToEntrySet(s); 534 } 535 } 536 } 537 538 // traverse tree to get list of all values who's key matcher matches pattern addToList(String pattern, Object context, List<RTNode> list)539 public void addToList(String pattern, Object context, List<RTNode> list) { 540 if (_children.size() == 0) { 541 return; 542 } else { 543 Info firstInfo = new Info(); 544 for (RTNode child : _children) { 545 boolean found; 546 synchronized (child._finder) { 547 found = child._finder.find(pattern, context, firstInfo); 548 } 549 550 // check if child matches pattern 551 if (found) { 552 if (child._rank != -1) { 553 list.add(child); 554 } 555 // if this node's info value is unset, set it to the result of the 556 // lookup 557 // if (child._info!=null && 558 // child._info.value==null) { 559 if (child._info != null) { 560 // set the value to the result of the last find 561 child._info.value = firstInfo.value; 562 } else { 563 // for some reason, child._info is null, so simply initialize it. 564 child._info = new Info(); 565 // set the value to the result of the last find 566 child._info.value = firstInfo.value; 567 } 568 // check if child is the parent of node then enter that node 569 child.addToList(pattern, context, list); 570 } 571 } 572 } 573 } 574 575 @Override toString()576 public String toString() { 577 return toString("", new StringBuilder()).toString(); 578 } 579 toString(String prefix, StringBuilder result)580 private StringBuilder toString(String prefix, StringBuilder result) { 581 result.append(prefix).append(this._finder.toString()).append("\n"); 582 for (RegexTree<T>.RTNode child : _children) { 583 child.toString(prefix + "\t", result); 584 } 585 return result; 586 } 587 588 // greatest common prefix between two strings greatestCommonPrefix(String a, String b)589 public String greatestCommonPrefix(String a, String b) { 590 int minLength = Math.min(a.length(), b.length()); 591 for (int i = 0; i < minLength; i++) { 592 if (a.charAt(i) != b.charAt(i)) { 593 return a.substring(0, i); 594 } 595 } 596 return a.substring(0, minLength); 597 } 598 } 599 600 class RTNodeRankComparator implements Comparator<RTNode> { 601 @Override compare(RTNode a, RTNode b)602 public int compare(RTNode a, RTNode b) { 603 if (a == b) { 604 return 0; 605 } else if (a == null) { 606 return -1; 607 } else if (b == null) { 608 return 1; 609 } else if (a._rank == b._rank) { 610 return 0; 611 } else if (a._rank > b._rank) { 612 return 1; 613 } else { 614 return -1; 615 } 616 } 617 } 618 619 @Override toString()620 public String toString() { 621 return root.toString(); 622 } 623 } 624 625 private static class StarPatternMap<T> implements StorageInterfaceBase<T> { 626 private Map<String, List<SPNode>> _spmap; 627 private int _size = 0; 628 StarPatternMap()629 public StarPatternMap() { 630 _spmap = new HashMap<>(); 631 // _size = 0; 632 } 633 634 @Override size()635 public int size() { 636 return _size; 637 } 638 639 @Override put(Finder pattern, T value)640 public void put(Finder pattern, T value) { 641 // System.out.println("pattern.toString() is => "+pattern.toString()); 642 String starPattern = 643 pathStarrer.transform2( 644 pattern.toString().replaceAll("\\(\\[\\^\"\\]\\*\\)", "*")); 645 // System.out.println("Putting => "+starPattern); 646 List<SPNode> candidates = _spmap.get(starPattern); 647 if (candidates == null) { 648 candidates = new ArrayList<>(); 649 } 650 SPNode newNode = new SPNode(pattern, value); 651 candidates.add(newNode); 652 _spmap.put(starPattern, candidates); 653 _size++; 654 } 655 656 @Override get(Finder finder)657 public T get(Finder finder) { 658 String starPattern = pathStarrer.transform2(finder.toString()); 659 List<SPNode> candidates = _spmap.get(starPattern); 660 if (candidates == null) { 661 return null; 662 } 663 for (SPNode cand : candidates) { 664 if (cand._finder.equals(finder)) { 665 return cand._val; 666 } 667 } 668 return null; 669 } 670 671 @Override getAll( String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)672 public List<T> getAll( 673 String pattern, 674 Object context, 675 List<Finder> matcherList, 676 Output<String[]> firstInfo) { 677 List<SPNode> list = new ArrayList<>(); 678 List<T> retList = new ArrayList<>(); 679 680 String starPattern = pathStarrer.transform2(pattern); 681 List<SPNode> candidates = _spmap.get(starPattern); 682 if (candidates == null) { 683 return retList; 684 } 685 for (SPNode cand : candidates) { 686 Info info = new Info(); 687 if (cand._finder.find(pattern, context, info)) { 688 list.add(cand); 689 if (firstInfo != null) { 690 firstInfo.value = info.value; 691 } 692 } 693 } 694 695 for (SPNode n : list) { 696 retList.add(n._val); 697 if (matcherList != null) { 698 matcherList.add(n._finder); 699 } 700 } 701 702 return retList; 703 } 704 705 @Override get( String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)706 public T get( 707 String pattern, 708 Object context, 709 Output<String[]> arguments, 710 Output<Finder> matcherFound) { 711 List<Finder> matcherList = new ArrayList<>(); 712 Output<String[]> firstInfo = new Output<>(); 713 List<T> matches = 714 getAll( 715 pattern, 716 context, 717 matcherList, 718 firstInfo); // need to get whole list because we want value that was 719 // entered first 720 if (arguments != null && firstInfo.value != null) { 721 // arguments.value = (matcherList.size() > 0) ? 722 // matcherList.get(0).getInfo() : null; 723 arguments.value = matcherList.isEmpty() ? null : firstInfo.value; 724 } 725 if (matcherFound != null) { 726 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null; 727 } 728 return (matches.size() > 0) ? matches.get(0) : null; 729 } 730 731 @Override entrySet()732 public Set<Entry<Finder, T>> entrySet() { 733 LinkedHashMap<Finder, T> ret = new LinkedHashMap<>(); 734 735 for (Entry<String, List<SPNode>> entry : _spmap.entrySet()) { 736 List<SPNode> candidates = entry.getValue(); 737 for (SPNode node : candidates) { 738 ret.put(node._finder, node._val); 739 } 740 } 741 return ret.entrySet(); 742 } 743 744 /** 745 * A Node of a StarPatternMap 746 * 747 * @author ribnitz 748 */ 749 public class SPNode extends NodeBase<T> { 750 // Finder _finder; 751 // T _val; 752 SPNode(Finder finder, T val)753 public SPNode(Finder finder, T val) { 754 // _finder = finder; 755 // _val = val; 756 super(finder, val); 757 } 758 759 @Override toString()760 public String toString() { 761 return this._finder.toString(); 762 } 763 } 764 } 765 766 /** 767 * The basic class of an information node, featuring a Finder, a value and an Info 768 * 769 * @author ribnitz 770 * @param <T> 771 */ 772 private static class NodeBase<T> { 773 Finder _finder; 774 T _val; 775 Info _info = new Info(); 776 NodeBase(Finder finder, T value)777 public NodeBase(Finder finder, T value) { 778 this._finder = finder; 779 this._val = value; 780 } 781 } 782 783 public static Transform<String, RegexFinder> RegexFinderTransform = 784 new Transform<String, RegexFinder>() { 785 @Override 786 public RegexFinder transform(String source) { 787 return new RegexFinder(source); 788 } 789 }; 790 791 /** 792 * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before 793 * //. To work better with XPaths. 794 */ 795 public static Transform<String, RegexFinder> RegexFinderTransformPath = 796 new Transform<String, RegexFinder>() { 797 @Override 798 public RegexFinder transform(String source) { 799 final String newSource = source.replace("[@", "\\[@"); 800 return new RegexFinder( 801 newSource.startsWith("//") ? "^" + newSource : newSource); 802 } 803 }; 804 805 /** 806 * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before 807 * //, and ' is changed to ". To work better with XPaths. 808 */ 809 public static Transform<String, RegexFinder> RegexFinderTransformPath2 = 810 new Transform<String, RegexFinder>() { 811 @Override 812 public RegexFinder transform(String source) { 813 final String newSource = source.replace("[@", "\\[@").replace('\'', '"'); 814 return new RegexFinder( 815 newSource.startsWith("//") ? "^" + newSource : newSource); 816 } 817 }; 818 819 /** 820 * Allows for merging items of the same type. 821 * 822 * @param <T> 823 */ 824 public interface Merger<T> { merge(T a, T into)825 T merge(T a, T into); 826 } 827 828 /** 829 * Returns the result of a regex lookup. 830 * 831 * @param source 832 * @return 833 */ get(String source)834 public final T get(String source) { 835 return get(source, null, null, null, null); 836 } 837 838 /** 839 * Returns the result of a regex lookup, with the group arguments that matched. 840 * 841 * @param source 842 * @param context TODO 843 * @return 844 */ get(String source, Object context, Output<String[]> arguments)845 public T get(String source, Object context, Output<String[]> arguments) { 846 return get(source, context, arguments, null, null); 847 } 848 849 /** 850 * Returns the result of a regex lookup, with the group arguments that matched. Supplies failure 851 * cases for debugging. 852 * 853 * @param source 854 * @param context 855 * @return 856 */ get( String source, Object context, Output<String[]> arguments, Output<Finder> matcherFound, List<String> failures)857 public T get( 858 String source, 859 Object context, 860 Output<String[]> arguments, 861 Output<Finder> matcherFound, 862 List<String> failures) { 863 864 if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) { 865 // T ret = SPEntries.get(source, context, arguments, matcherFound); 866 T ret = storage.get(source, context, arguments, matcherFound); 867 if (ret != null) { 868 return ret; 869 } 870 871 if (failures != null) { 872 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 873 // for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) { 874 Finder matcher = entry.getKey(); 875 synchronized (matcher) { 876 int failPoint = matcher.getFailPoint(source); 877 String show = 878 source.substring(0, failPoint) 879 + "☹" 880 + source.substring(failPoint) 881 + "\t" 882 + matcher.toString(); 883 failures.add(show); 884 } 885 } 886 } 887 } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) { 888 // T ret = RTEntries.get(source, context, arguments, matcherFound); 889 T ret = storage.get(source, context, arguments, matcherFound); 890 if (ret != null) { 891 return ret; 892 } 893 894 if (failures != null) { 895 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 896 // for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) { 897 Finder matcher = entry.getKey(); 898 synchronized (matcher) { 899 int failPoint = matcher.getFailPoint(source); 900 String show = 901 source.substring(0, failPoint) 902 + "☹" 903 + source.substring(failPoint) 904 + "\t" 905 + matcher.toString(); 906 failures.add(show); 907 } 908 } 909 } 910 } else { 911 // slow but versatile implementation 912 for (Map.Entry<Finder, T> entry : MEntries.entrySet()) { 913 Finder matcher = entry.getKey(); 914 synchronized (matcher) { 915 Info firstInfo = new Info(); 916 if (matcher.find(source, context, firstInfo)) { 917 if (arguments != null) { 918 // arguments.value = matcher.getInfo(); 919 arguments.value = firstInfo.value; 920 } 921 if (matcherFound != null) { 922 matcherFound.value = matcher; 923 } 924 return entry.getValue(); 925 } else if (failures != null) { 926 int failPoint = matcher.getFailPoint(source); 927 String show = 928 source.substring(0, failPoint) 929 + "☹" 930 + source.substring(failPoint) 931 + "\t" 932 + matcher.toString(); 933 failures.add(show); 934 } 935 } 936 } 937 } 938 939 // not really necessary, but makes debugging easier. 940 if (arguments != null) { 941 arguments.value = null; 942 } 943 if (matcherFound != null) { 944 matcherFound.value = null; 945 } 946 return null; 947 } 948 949 /** 950 * Returns all results of a regex lookup, with the group arguments that matched. Supplies 951 * failure cases for debugging. 952 * 953 * @param source 954 * @param context TODO 955 * @return 956 */ getAll( String source, Object context, List<Finder> matcherList, List<String> failures)957 public List<T> getAll( 958 String source, Object context, List<Finder> matcherList, List<String> failures) { 959 if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) { 960 Output<String[]> firstInfo = new Output<>(); 961 // List<T> matches = SPEntries.getAll(source, context, 962 // matcherList,firstInfo); 963 List<T> matches = storage.getAll(source, context, matcherList, firstInfo); 964 if (matches != null) { 965 return matches; 966 } 967 968 if (failures != null) { 969 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 970 // for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) { 971 Finder matcher = entry.getKey(); 972 synchronized (matcher) { 973 int failPoint = matcher.getFailPoint(source); 974 String show = 975 source.substring(0, failPoint) 976 + "☹" 977 + source.substring(failPoint) 978 + "\t" 979 + matcher.toString(); 980 failures.add(show); 981 } 982 } 983 } 984 return null; 985 } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) { 986 Output<String[]> info = new Output<>(); 987 // List<T> matches = RTEntries.getAll(source, context, matcherList,info); 988 List<T> matches = storage.getAll(source, context, matcherList, info); 989 if (matches != null) { 990 return matches; 991 } 992 993 if (failures != null) { 994 for (Map.Entry<Finder, T> entry : storage.entrySet()) { 995 // for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) { 996 Finder matcher = entry.getKey(); 997 synchronized (matcher) { 998 int failPoint = matcher.getFailPoint(source); 999 String show = 1000 source.substring(0, failPoint) 1001 + "☹" 1002 + source.substring(failPoint) 1003 + "\t" 1004 + matcher.toString(); 1005 failures.add(show); 1006 } 1007 } 1008 } 1009 return null; 1010 } else { 1011 // slow but versatile implementation 1012 List<T> matches = new ArrayList<>(); 1013 for (Map.Entry<Finder, T> entry : MEntries.entrySet()) { 1014 Finder matcher = entry.getKey(); 1015 Info firstInfo = new Info(); 1016 if (matcher.find(source, context, firstInfo)) { 1017 if (matcherList != null) { 1018 matcherList.add(matcher); 1019 } 1020 matches.add(entry.getValue()); 1021 } else if (failures != null) { 1022 int failPoint = matcher.getFailPoint(source); 1023 String show = 1024 source.substring(0, failPoint) 1025 + "☹" 1026 + source.substring(failPoint) 1027 + "\t" 1028 + matcher.toString(); 1029 failures.add(show); 1030 } 1031 } 1032 return matches; 1033 } 1034 } 1035 1036 /** 1037 * Find the patterns that haven't been matched. Requires the caller to collect the patterns that 1038 * have, using matcherFound. 1039 * 1040 * @return outputUnmatched 1041 */ getUnmatchedPatterns( Set<String> matched, Map<String, T> outputUnmatched)1042 public Map<String, T> getUnmatchedPatterns( 1043 Set<String> matched, Map<String, T> outputUnmatched) { 1044 outputUnmatched.clear(); 1045 1046 Set<Map.Entry<Finder, T>> entrySet; 1047 switch (_lookupType) { 1048 case STAR_PATTERN_LOOKUP: 1049 // entrySet = SPEntries.entrySet(); 1050 entrySet = storage.entrySet(); 1051 break; 1052 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1053 // entrySet = RTEntries.entrySet(); 1054 entrySet = storage.entrySet(); 1055 break; 1056 default: 1057 entrySet = MEntries.entrySet(); 1058 break; 1059 } 1060 1061 for (Map.Entry<Finder, T> entry : entrySet) { 1062 String pattern = entry.getKey().toString(); 1063 if (!matched.contains(pattern)) { 1064 outputUnmatched.put(pattern, entry.getValue()); 1065 } 1066 } 1067 return outputUnmatched; 1068 } 1069 1070 /** 1071 * Create a RegexLookup. It will take a list of key/value pairs, where the key is a regex 1072 * pattern and the value is what gets returned. 1073 * 1074 * @param patternTransform Used to transform string patterns into a Pattern. Can be used to 1075 * process replacements (like variables). 1076 * @param valueTransform Used to transform string values into another form. 1077 * @param valueMerger Used to merge values with the same key. 1078 */ of( Transform<String, Finder> patternTransform, Transform<String, T> valueTransform, Merger<T> valueMerger)1079 public static <T, U> RegexLookup<T> of( 1080 Transform<String, Finder> patternTransform, 1081 Transform<String, T> valueTransform, 1082 Merger<T> valueMerger) { 1083 return new RegexLookup<T>() 1084 .setPatternTransform(patternTransform) 1085 .setValueTransform(valueTransform) 1086 .setValueMerger(valueMerger); 1087 } 1088 of(Transform<String, T> valueTransform)1089 public static <T> RegexLookup<T> of(Transform<String, T> valueTransform) { 1090 return new RegexLookup<T>() 1091 .setValueTransform(valueTransform) 1092 .setPatternTransform(RegexFinderTransform); 1093 } 1094 of()1095 public static <T> RegexLookup<T> of() { 1096 return new RegexLookup<T>().setPatternTransform(RegexFinderTransform); 1097 } 1098 1099 /** 1100 * @deprecated Use {@link #of(LookupType,Transform)} instead 1101 */ 1102 @Deprecated of(LookupType lookupType)1103 public static <T> RegexLookup<T> of(LookupType lookupType) { 1104 return of(lookupType, RegexFinderTransform); 1105 } 1106 of( LookupType lookupType, Transform<String, RegexFinder> transform)1107 public static <T> RegexLookup<T> of( 1108 LookupType lookupType, Transform<String, RegexFinder> transform) { 1109 return new RegexLookup<T>(lookupType).setPatternTransform(transform); 1110 } 1111 setValueTransform(Transform<String, ? extends T> valueTransform)1112 public RegexLookup<T> setValueTransform(Transform<String, ? extends T> valueTransform) { 1113 this.valueTransform = valueTransform; 1114 return this; 1115 } 1116 setPatternTransform( Transform<String, ? extends Finder> patternTransform)1117 public RegexLookup<T> setPatternTransform( 1118 Transform<String, ? extends Finder> patternTransform) { 1119 this.patternTransform = patternTransform != null ? patternTransform : RegexFinderTransform; 1120 return this; 1121 } 1122 setValueMerger(Merger<T> valueMerger)1123 public RegexLookup<T> setValueMerger(Merger<T> valueMerger) { 1124 this.valueMerger = valueMerger; 1125 return this; 1126 } 1127 1128 /** 1129 * Load a RegexLookup from a file. Opens a file relative to the class, and adds lines separated 1130 * by "; ". Lines starting with # are comments. 1131 */ loadFromFile(Class<?> baseClass, String filename)1132 public RegexLookup<T> loadFromFile(Class<?> baseClass, String filename) { 1133 RegexFileParser parser = setupRegexFileParser(); 1134 parser.parse(baseClass, filename); 1135 return this; 1136 } 1137 1138 /** Load a RegexLookup from a list of strings, as if they came from a file. See loadFromFile */ loadFromString(String source)1139 public RegexLookup<T> loadFromString(String source) { 1140 RegexFileParser parser = setupRegexFileParser(); 1141 parser.parseStrings("string", Splitter.on('\n').split(source)); 1142 return this; 1143 } 1144 setupRegexFileParser()1145 private RegexFileParser setupRegexFileParser() { 1146 RegexFileParser parser = new RegexFileParser(); 1147 parser.setLineParser( 1148 new RegexLineParser() { 1149 @Override 1150 public void parse(String line) { 1151 int pos = line.indexOf(RegexLookup.SEPARATOR); 1152 if (pos < 0) { 1153 throw new IllegalArgumentException( 1154 "Illegal line, doesn't contain semicolon: " + line); 1155 } 1156 String source = line.substring(0, pos).trim(); 1157 String target = line.substring(pos + 2).trim(); 1158 try { 1159 @SuppressWarnings("unchecked") 1160 T result = 1161 valueTransform == null 1162 ? (T) target 1163 : valueTransform.transform(target); 1164 add(source, result); 1165 } catch (Exception e) { 1166 throw new IllegalArgumentException( 1167 "Failed to add <" + source + "> => <" + target + ">", e); 1168 } 1169 } 1170 }); 1171 parser.setVariableProcessor( 1172 new VariableProcessor() { 1173 @Override 1174 public void add(String variable, String variableName) { 1175 addVariable(variable, variableName); 1176 } 1177 1178 @Override 1179 public String replace(String str) { 1180 return variables.replace(str); 1181 } 1182 }); 1183 return parser; 1184 } 1185 addVariable(String variable, String variableValue)1186 public RegexLookup<T> addVariable(String variable, String variableValue) { 1187 if (!variable.startsWith("%")) { 1188 throw new IllegalArgumentException("Variables must start with %"); 1189 } 1190 variables.add(variable.trim(), variableValue.trim()); 1191 return this; 1192 } 1193 1194 /** 1195 * Add a pattern/value pair. 1196 * 1197 * @param stringPattern regex to match 1198 * @param target return type on match 1199 * @return this, for chaining 1200 */ add(String stringPattern, T target)1201 public RegexLookup<T> add(String stringPattern, T target) { 1202 if (stringPattern.contains("%")) { 1203 stringPattern = variables.replace(stringPattern); 1204 } 1205 Finder pattern0 = patternTransform.transform(stringPattern); 1206 return add(pattern0, target); 1207 } 1208 1209 /** 1210 * Add a pattern/value pair. 1211 * 1212 * @param pattern 1213 * @param target return type on match 1214 * @return this, for chaining 1215 */ add(Finder pattern, T target)1216 public RegexLookup<T> add(Finder pattern, T target) { 1217 if (!allowNull && target == null) { 1218 throw new NullPointerException("null disallowed, unless allowNull(true) is called."); 1219 } 1220 1221 T old; 1222 switch (_lookupType) { 1223 case STAR_PATTERN_LOOKUP: // fallthrough 1224 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1225 old = storage.get(pattern); 1226 // old = SPEntries.get(pattern); 1227 break; 1228 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1229 // old = storage.get(pattern); 1230 // old = RTEntries.get(pattern); 1231 // break; 1232 default: 1233 old = MEntries.get(pattern); 1234 break; 1235 } 1236 1237 if (old == null) { 1238 switch (_lookupType) { 1239 case STAR_PATTERN_LOOKUP: // fallthrough 1240 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1241 storage.put(pattern, target); 1242 // SPEntries.put(pattern, target); 1243 break; 1244 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1245 // storage.put(pattern, target); 1246 // RTEntries.put(pattern, target); 1247 // break; 1248 default: 1249 MEntries.put(pattern, target); 1250 break; 1251 } 1252 } else if (valueMerger != null) { 1253 valueMerger.merge(target, old); 1254 } else { 1255 throw new IllegalArgumentException( 1256 "Duplicate matcher without Merger defined " 1257 + pattern 1258 + "; old: " 1259 + old 1260 + "; new: " 1261 + target); 1262 } 1263 return this; 1264 } 1265 1266 @Override iterator()1267 public Iterator<Map.Entry<Finder, T>> iterator() { 1268 switch (_lookupType) { 1269 case STAR_PATTERN_LOOKUP: // fall through 1270 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1271 // return 1272 // Collections.unmodifiableCollection(SPEntries.entrySet()).iterator(); 1273 return Collections.unmodifiableCollection(storage.entrySet()).iterator(); 1274 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1275 // return 1276 // Collections.unmodifiableCollection(RTEntries.entrySet()).iterator(); 1277 // return 1278 // Collections.unmodifiableCollection(storage.entrySet()).iterator(); 1279 default: 1280 return Collections.unmodifiableCollection(MEntries.entrySet()).iterator(); 1281 } 1282 } 1283 replace(String lookup, String... arguments)1284 public static String replace(String lookup, String... arguments) { 1285 StringBuilder result = new StringBuilder(); 1286 int last = 0; 1287 while (true) { 1288 int pos = lookup.indexOf("$", last); 1289 if (pos < 0) { 1290 result.append(lookup.substring(last, lookup.length())); 1291 break; 1292 } 1293 result.append(lookup.substring(last, pos)); 1294 final int arg = lookup.charAt(pos + 1) - '0'; 1295 try { 1296 result.append(arguments[arg]); 1297 } catch (Exception e) { 1298 throw new IllegalArgumentException( 1299 "Replacing $" 1300 + arg 1301 + " in <" 1302 + lookup 1303 + ">, but too few arguments supplied."); 1304 } 1305 last = pos + 2; 1306 } 1307 return result.toString(); 1308 } 1309 1310 /** 1311 * @return the number of entries 1312 */ size()1313 public int size() { 1314 switch (_lookupType) { 1315 case STAR_PATTERN_LOOKUP: // fall through 1316 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1317 // return SPEntries.size(); 1318 return storage.size(); 1319 // case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP: 1320 // return storage.size(); 1321 // return RTEntries.size(); 1322 default: 1323 return MEntries.size(); 1324 } 1325 } 1326 1327 @Override toString()1328 public String toString() { 1329 return storage.toString(); 1330 } 1331 } 1332