xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/RegexLookup.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.Splitter;
4 import com.ibm.icu.text.Transform;
5 import com.ibm.icu.util.Output;
6 import java.util.ArrayList;
7 import java.util.Collections;
8 import java.util.Comparator;
9 import java.util.HashMap;
10 import java.util.Iterator;
11 import java.util.LinkedHashMap;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Set;
16 import java.util.TreeSet;
17 import java.util.regex.Matcher;
18 import java.util.regex.Pattern;
19 import org.unicode.cldr.util.CldrUtility.VariableReplacer;
20 import org.unicode.cldr.util.RegexFileParser.RegexLineParser;
21 import org.unicode.cldr.util.RegexFileParser.VariableProcessor;
22 import org.unicode.cldr.util.RegexLookup.Finder;
23 import org.unicode.cldr.util.RegexLookup.Finder.Info;
24 
25 /**
26  * Lookup items according to a set of regex patterns. Returns the value according to the first
27  * pattern that matches. Not thread-safe.
28  *
29  * @param <T>
30  */
31 public class RegexLookup<T> implements Iterable<Map.Entry<Finder, T>> {
32     protected static final String SEPARATOR = "; ";
33     private VariableReplacer variables = new VariableReplacer();
34     private StorageInterfaceBase<T> storage;
35     private Map<Finder, T> MEntries;
36     private Transform<String, ? extends Finder> patternTransform = RegexFinderTransform;
37     private Transform<String, ? extends T> valueTransform;
38     private Merger<T> valueMerger;
39     private final boolean allowNull = false;
40     private static PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*");
41 
42     public enum LookupType {
43         STAR_PATTERN_LOOKUP,
44         OPTIMIZED_DIRECTORY_PATTERN_LOOKUP,
45         STANDARD
46     }
47 
48     private LookupType _lookupType;
49 
50     /*
51      * STAR_PATTERN_LOOKUP
52      *
53      * When true, RegexLookup can match regex's even faster than the OPTIMIZED_DIRECTORY_PATTERN_LOOKUP below.
54      * It requires some additional structure, such that the only regular expression constructs such as (a|b) occur
55      * in attributes, so that the star pattern for a given path can match the star pattern of a given regular expression,
56      * thus greatly reducing the number of actual regex matches that need to occur.
57      */
58 
59     /*
60      * OPTIMIZED_DIRECTORY_PATTERN_LOOKUP
61      *
62      * When true, RegexLookup can match regex's in O(log(n)) time, where n is the number of regex's stored.
63      * This is provided that all regex patterns follow a directory based format and all directories are separated by a forward slash '/'
64      * for example: ^//ldml/dates/calendars/calendar[@type="([^"']++)"]/alias[@source="locale"][@path="../calendar[@type='([^"']++)']"]
65      *
66      * When false, RegexLookup will match regex's in O(n) time, where n is the number of regex's stored.
67      * However regex's no longer need to follow any specific format (Slower but more versatile).
68      */
69 
RegexLookup(LookupType type)70     public RegexLookup(LookupType type) {
71         _lookupType = type;
72         switch (type) {
73             case STAR_PATTERN_LOOKUP:
74                 //   SPEntries = new StarPatternMap<T>();
75                 storage = new StarPatternMap<>();
76                 break;
77             case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
78                 //   RTEntries = new RegexTree<T>();
79                 storage = new RegexTree<>();
80                 break;
81             default:
82                 MEntries = new LinkedHashMap<>();
83                 break;
84         }
85     }
86 
RegexLookup()87     public RegexLookup() {
88         this(LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP);
89         //        _lookupType = RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP;
90         //        RTEntries = new RegexTree<T>();
91     }
92 
93     public abstract static class Finder {
94         public static class Info {
95             public String[] value;
96         }
97 
98         //   abstract public String[] getInfo();
99 
100         // abstract public boolean find(String item, Object context);
101 
find(String item, Object context, Info info)102         public abstract boolean find(String item, Object context, Info info);
103 
matches(String item, Object context, Info info)104         public abstract boolean matches(String item, Object context, Info info);
105 
getFailPoint(String source)106         public int getFailPoint(String source) {
107             return -1;
108         }
109         // must also define toString
110     }
111 
112     public static class RegexFinder extends Finder {
113         /** The matcher used by this RegexFinder */
114         private final Matcher matcher;
115 
116         /** The Pattern used by this RegexFinder */
117         protected final Pattern pattern;
118 
RegexFinder(String pattern)119         public RegexFinder(String pattern) {
120             this.pattern = Pattern.compile(pattern, Pattern.COMMENTS);
121             matcher = this.pattern.matcher("");
122         }
123 
124         /**
125          * Call Matches on the pattern, returning additional information in the Info field, if it is
126          * non null
127          */
128         @Override
matches(String item, Object context, Info info)129         public boolean matches(String item, Object context, Info info) {
130             synchronized (matcher) {
131                 try {
132                     boolean result = matcher.reset(item).matches();
133                     extractInfo(info, result);
134                     return result;
135                 } catch (StringIndexOutOfBoundsException e) {
136                     // We don't know what causes this error (cldrbug 5051) so
137                     // make the exception message more detailed.
138                     throw new IllegalArgumentException(
139                             "Matching error caused by pattern: ["
140                                     + matcher.toString()
141                                     + "] on text: ["
142                                     + item
143                                     + "]",
144                             e);
145                 }
146             }
147         }
148 
149         /**
150          * Extract match related information into the info field, if result is true, and info is not
151          * null.
152          *
153          * @param info
154          * @param result
155          */
extractInfo(Info info, boolean result)156         private void extractInfo(Info info, boolean result) {
157             if (result && info != null) {
158                 int limit = matcher.groupCount() + 1;
159                 String[] value = new String[limit];
160                 for (int i = 0; i < limit; ++i) {
161                     value[i] = matcher.group(i);
162                 }
163                 info.value = value;
164             }
165         }
166 
167         /**
168          * Call find() on the pattern, returning additional information in the info field, if it is
169          * non-null
170          */
171         @Override
find(String item, Object context, Info info)172         public boolean find(String item, Object context, Info info) {
173             synchronized (matcher) {
174                 try {
175                     boolean result = matcher.reset(item).find();
176                     extractInfo(info, result);
177                     return result;
178                 } catch (StringIndexOutOfBoundsException e) {
179                     // We don't know what causes this error (cldrbug 5051) so
180                     // make the exception message more detailed.
181                     throw new IllegalArgumentException(
182                             "Matching error caused by pattern: ["
183                                     + matcher.toString()
184                                     + "] on text: ["
185                                     + item
186                                     + "]",
187                             e);
188                 }
189             }
190         }
191 
192         @Override
toString()193         public String toString() {
194             // Use pattern here, to avoid having to synchronize on matcher
195             return pattern.pattern();
196         }
197 
198         @Override
equals(Object obj)199         public boolean equals(Object obj) {
200             if (obj == null) {
201                 return false;
202             } else {
203                 return toString().equals(obj.toString());
204             }
205         }
206 
207         @Override
hashCode()208         public int hashCode() {
209             return toString().hashCode();
210         }
211 
212         @Override
getFailPoint(String source)213         public int getFailPoint(String source) {
214             synchronized (matcher) {
215                 return RegexUtilities.findMismatch(matcher, source);
216             }
217         }
218     }
219 
220     private static interface StorageInterfaceBase<T> {
entrySet()221         Set<Entry<Finder, T>> entrySet();
222 
get(Finder finder)223         T get(Finder finder);
224 
get( String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)225         T get(
226                 String pattern,
227                 Object context,
228                 Output<String[]> arguments,
229                 Output<Finder> matcherFound);
230 
getAll( String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)231         List<T> getAll(
232                 String pattern,
233                 Object context,
234                 List<Finder> matcherList,
235                 Output<String[]> firstInfo);
236 
put(Finder pattern, T value)237         void put(Finder pattern, T value);
238 
size()239         int size();
240     }
241 
242     //    private static class FinderWithInfo {
243     //        Finder _finder;
244     //        Info _info;
245     //        public FinderWithInfo(Finder finder,Info info) {
246     //            _info=info;
247     //            _finder=finder;
248     //        }
249     //    }
250     private static class RegexTree<T> implements StorageInterfaceBase<T> {
251         private RTNode root;
252         private int _size;
253         private RTNodeRankComparator rankComparator = new RTNodeRankComparator();
254 
RegexTree()255         public RegexTree() {
256             root = new RTNode("", null);
257             _size = 0;
258         }
259 
260         @Override
size()261         public int size() {
262             return _size;
263         }
264 
265         @Override
put(Finder pattern, T value)266         public void put(Finder pattern, T value) {
267             root.put(new RTNode(pattern, value, _size));
268             _size++;
269         }
270 
271         @Override
get(Finder finder)272         public T get(Finder finder) {
273             return root.get(finder);
274         }
275 
276         @Override
getAll( String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)277         public List<T> getAll(
278                 String pattern,
279                 Object context,
280                 List<Finder> matcherList,
281                 Output<String[]> firstInfo) {
282             List<RTNode> list = new ArrayList<>();
283             List<T> retList = new ArrayList<>();
284 
285             root.addToList(pattern, context, list);
286             Collections.sort(list, rankComparator);
287 
288             boolean isFirst = true;
289             if (firstInfo != null && !list.isEmpty()) {
290                 RTNode firstNode = list.get(0);
291                 if (firstNode._info != null) {
292                     firstInfo.value = firstNode._info.value;
293                 }
294             }
295 
296             for (RTNode n : list) {
297                 if (isFirst) {
298                     firstInfo.value = n._info.value;
299                     isFirst = false;
300                 }
301             }
302 
303             for (RTNode n : list) {
304                 retList.add(n._val);
305                 if (matcherList != null) {
306                     matcherList.add(n._finder);
307                 }
308             }
309 
310             return retList;
311         }
312 
313         @Override
get( String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)314         public T get(
315                 String pattern,
316                 Object context,
317                 Output<String[]> arguments,
318                 Output<Finder> matcherFound) {
319             List<Finder> matcherList = new ArrayList<>();
320             Output<String[]> firstInfo = new Output<>();
321             List<T> matches =
322                     getAll(
323                             pattern,
324                             context,
325                             matcherList,
326                             firstInfo); // need to get whole list because we want value that was
327             // entered first
328             if (arguments != null) {
329                 //               arguments.value = (matcherList.size() > 0) ?
330                 // matcherList.get(0).getInfo() : null;
331                 arguments.value = firstInfo.value;
332                 //               arguments.value = (matcherList.size() > 0) ?
333                 // matcherList.get(0).getInfo() : null;
334                 arguments.value = firstInfo.value;
335             }
336             if (matcherFound != null) {
337                 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null;
338             }
339             return (matches.size() > 0) ? matches.get(0) : null;
340         }
341 
342         @Override
entrySet()343         public Set<Entry<Finder, T>> entrySet() {
344             LinkedHashMap<Finder, T> ret = new LinkedHashMap<>();
345             TreeSet<RTNode> s = new TreeSet<>(rankComparator);
346             root.addToEntrySet(s);
347 
348             for (RTNode n : s) {
349                 ret.put(n._finder, n._val);
350             }
351 
352             return ret.entrySet();
353         }
354 
355         public class RTNode extends NodeBase<T> {
356             //            Finder _finder;
357             //            T _val;
358             List<RTNode> _children = new ArrayList<>();
359             int _rank = -1; // rank -1 means the node was not inserted, but only used for structural
360             // purposes
361 
362             // constructor for regular nodes with a Finder
RTNode(Finder finder, T val, int rank)363             public RTNode(Finder finder, T val, int rank) {
364                 super(finder, val);
365                 //                _finder = finder;
366                 //                _val = val;
367                 _rank = rank;
368             }
369 
370             // constructors for nodes without a Finder
RTNode(String key, T val)371             public RTNode(String key, T val) {
372                 super(new RegexFinder(key), val);
373                 //                _finder = new RegexFinder(key);
374                 //                _val = val;
375                 //                _rank = -1;
376                 _info = new Info();
377             }
378 
put(RTNode node)379             public void put(RTNode node) {
380                 if (_children.size() == 0) {
381                     _children.add(node);
382                 } else {
383                     String maxSimilarChars =
384                             ""; // most similar characters up to the last similar directory
385                     int insertIndex = 0;
386                     for (int i = 0; i < _children.size(); i++) {
387                         RTNode child = _children.get(i);
388                         String childFinderPattern = child._finder.toString();
389                         if (childFinderPattern.length() > 0
390                                 && childFinderPattern.charAt(childFinderPattern.length() - 1)
391                                         == '$') {
392                             childFinderPattern =
393                                     childFinderPattern.substring(
394                                             0,
395                                             childFinderPattern.length()
396                                                     - 1); // takes into account the added "$"
397                         } else if (child._rank == -1) {
398                             childFinderPattern =
399                                     childFinderPattern.substring(
400                                             0,
401                                             childFinderPattern.length()
402                                                     - 2); // takes into account the added ".*"
403                         }
404 
405                         // check if child has the same Finder as node to insert, then replace it
406                         if (node._finder.equals(child._finder)) {
407                             child._finder = node._finder;
408                             child._val = node._val;
409                             if (child._rank == -1) {
410                                 child._rank = node._rank;
411                             } else {
412                                 _size--;
413                             }
414                             return;
415                         }
416 
417                         // check if child is the parent of node
418                         if (child._rank == -1
419                                 && node._finder.toString().startsWith(childFinderPattern)) {
420                             child.put(node);
421                             return;
422                         }
423 
424                         // if not parent then check if candidate for most similar RTNode
425                         String gcp =
426                                 greatestCommonPrefix(childFinderPattern, node._finder.toString());
427                         gcp = removeExtraChars(gcp);
428                         if (gcp.length() > maxSimilarChars.length()) {
429                             maxSimilarChars = gcp;
430                             insertIndex = i;
431                         }
432                     }
433 
434                     String finderPattern = this._finder.toString();
435                     if (finderPattern.length() > 0
436                             && finderPattern.charAt(finderPattern.length() - 1) == '$') {
437                         finderPattern =
438                                 finderPattern.substring(
439                                         0,
440                                         finderPattern.length()
441                                                 - 1); // takes into account the added "$"
442                     } else if (!(finderPattern.equals("")) && this._rank == -1) {
443                         finderPattern =
444                                 finderPattern.substring(
445                                         0,
446                                         finderPattern.length()
447                                                 - 2); // takes into account the added ".*"
448                     }
449 
450                     if ((maxSimilarChars)
451                             .equals(finderPattern)) { // add under this if no similar children
452                         _children.add(node);
453                     } else {
454                         // create the common parent of the chosen candidate above and node, then add
455                         // to the insert index
456                         RTNode newParent = new RTNode(maxSimilarChars + ".*", null);
457                         newParent._children.add(this._children.get(insertIndex));
458                         newParent._children.add(node);
459                         this._children.remove(insertIndex);
460                         this._children.add(insertIndex, newParent);
461                     }
462                 }
463             }
464 
465             // takes a string in a directory regex form and removes all characters up to the last
466             // full directory
removeExtraChars(String input)467             private String removeExtraChars(String input) {
468                 String ret = input.substring(0, Math.max(0, input.lastIndexOf('/')));
469                 while ((ret.lastIndexOf('(') != -1 && ret.lastIndexOf('(') > ret.lastIndexOf(')'))
470                         || (ret.lastIndexOf('[') != -1
471                                 && ret.lastIndexOf('[') > ret.lastIndexOf(']'))
472                         || (ret.lastIndexOf('{') != -1
473                                 && ret.lastIndexOf('{') > ret.lastIndexOf('}'))) {
474                     ret = ret.substring(0, Math.max(0, ret.lastIndexOf('/')));
475                 }
476                 return ret;
477             }
478 
479             // traverse tree to get value
get(Finder finder)480             public T get(Finder finder) {
481                 T ret = null; // return value
482 
483                 if (_children.size() == 0) {
484                     return null;
485                 } else {
486                     for (RTNode child : _children) {
487 
488                         // check if child is the node
489                         if (child._rank != -1 && finder.equals(child._finder)) {
490                             return child._val;
491                         }
492 
493                         String childFinderPattern = child._finder.toString();
494 
495                         if (childFinderPattern.length() > 0
496                                 && childFinderPattern.charAt(childFinderPattern.length() - 1)
497                                         == '$') {
498                             childFinderPattern =
499                                     childFinderPattern.substring(
500                                             0,
501                                             childFinderPattern.length()
502                                                     - 1); // takes into account the added "$"
503                         } else if (child._rank == -1) {
504                             childFinderPattern =
505                                     childFinderPattern.substring(
506                                             0,
507                                             childFinderPattern.length()
508                                                     - 2); // takes into account the added ".*"
509                         }
510 
511                         // check if child is the parent of node
512                         if (finder.toString().startsWith(childFinderPattern)) {
513                             ret = child.get(finder);
514                             if (ret != null) {
515                                 break;
516                             }
517                         }
518                     }
519 
520                     return ret;
521                 }
522             }
523 
524             // traverse tree to get an entry set
addToEntrySet(TreeSet<RTNode> s)525             public void addToEntrySet(TreeSet<RTNode> s) {
526                 if (_children.size() == 0) {
527                     return;
528                 } else {
529                     for (RTNode child : _children) {
530                         if (child._rank != -1) {
531                             s.add(child);
532                         }
533                         child.addToEntrySet(s);
534                     }
535                 }
536             }
537 
538             // traverse tree to get list of all values who's key matcher matches pattern
addToList(String pattern, Object context, List<RTNode> list)539             public void addToList(String pattern, Object context, List<RTNode> list) {
540                 if (_children.size() == 0) {
541                     return;
542                 } else {
543                     Info firstInfo = new Info();
544                     for (RTNode child : _children) {
545                         boolean found;
546                         synchronized (child._finder) {
547                             found = child._finder.find(pattern, context, firstInfo);
548                         }
549 
550                         // check if child matches pattern
551                         if (found) {
552                             if (child._rank != -1) {
553                                 list.add(child);
554                             }
555                             // if this node's info value is unset, set it to the result of the
556                             // lookup
557                             //                            if (child._info!=null &&
558                             // child._info.value==null) {
559                             if (child._info != null) {
560                                 // set the value to the result of the last find
561                                 child._info.value = firstInfo.value;
562                             } else {
563                                 // for some reason, child._info is null, so simply initialize it.
564                                 child._info = new Info();
565                                 // set the value to the result of the last find
566                                 child._info.value = firstInfo.value;
567                             }
568                             // check if child is the parent of node then enter that node
569                             child.addToList(pattern, context, list);
570                         }
571                     }
572                 }
573             }
574 
575             @Override
toString()576             public String toString() {
577                 return toString("", new StringBuilder()).toString();
578             }
579 
toString(String prefix, StringBuilder result)580             private StringBuilder toString(String prefix, StringBuilder result) {
581                 result.append(prefix).append(this._finder.toString()).append("\n");
582                 for (RegexTree<T>.RTNode child : _children) {
583                     child.toString(prefix + "\t", result);
584                 }
585                 return result;
586             }
587 
588             // greatest common prefix between two strings
greatestCommonPrefix(String a, String b)589             public String greatestCommonPrefix(String a, String b) {
590                 int minLength = Math.min(a.length(), b.length());
591                 for (int i = 0; i < minLength; i++) {
592                     if (a.charAt(i) != b.charAt(i)) {
593                         return a.substring(0, i);
594                     }
595                 }
596                 return a.substring(0, minLength);
597             }
598         }
599 
600         class RTNodeRankComparator implements Comparator<RTNode> {
601             @Override
compare(RTNode a, RTNode b)602             public int compare(RTNode a, RTNode b) {
603                 if (a == b) {
604                     return 0;
605                 } else if (a == null) {
606                     return -1;
607                 } else if (b == null) {
608                     return 1;
609                 } else if (a._rank == b._rank) {
610                     return 0;
611                 } else if (a._rank > b._rank) {
612                     return 1;
613                 } else {
614                     return -1;
615                 }
616             }
617         }
618 
619         @Override
toString()620         public String toString() {
621             return root.toString();
622         }
623     }
624 
625     private static class StarPatternMap<T> implements StorageInterfaceBase<T> {
626         private Map<String, List<SPNode>> _spmap;
627         private int _size = 0;
628 
StarPatternMap()629         public StarPatternMap() {
630             _spmap = new HashMap<>();
631             //            _size = 0;
632         }
633 
634         @Override
size()635         public int size() {
636             return _size;
637         }
638 
639         @Override
put(Finder pattern, T value)640         public void put(Finder pattern, T value) {
641             // System.out.println("pattern.toString() is => "+pattern.toString());
642             String starPattern =
643                     pathStarrer.transform2(
644                             pattern.toString().replaceAll("\\(\\[\\^\"\\]\\*\\)", "*"));
645             // System.out.println("Putting => "+starPattern);
646             List<SPNode> candidates = _spmap.get(starPattern);
647             if (candidates == null) {
648                 candidates = new ArrayList<>();
649             }
650             SPNode newNode = new SPNode(pattern, value);
651             candidates.add(newNode);
652             _spmap.put(starPattern, candidates);
653             _size++;
654         }
655 
656         @Override
get(Finder finder)657         public T get(Finder finder) {
658             String starPattern = pathStarrer.transform2(finder.toString());
659             List<SPNode> candidates = _spmap.get(starPattern);
660             if (candidates == null) {
661                 return null;
662             }
663             for (SPNode cand : candidates) {
664                 if (cand._finder.equals(finder)) {
665                     return cand._val;
666                 }
667             }
668             return null;
669         }
670 
671         @Override
getAll( String pattern, Object context, List<Finder> matcherList, Output<String[]> firstInfo)672         public List<T> getAll(
673                 String pattern,
674                 Object context,
675                 List<Finder> matcherList,
676                 Output<String[]> firstInfo) {
677             List<SPNode> list = new ArrayList<>();
678             List<T> retList = new ArrayList<>();
679 
680             String starPattern = pathStarrer.transform2(pattern);
681             List<SPNode> candidates = _spmap.get(starPattern);
682             if (candidates == null) {
683                 return retList;
684             }
685             for (SPNode cand : candidates) {
686                 Info info = new Info();
687                 if (cand._finder.find(pattern, context, info)) {
688                     list.add(cand);
689                     if (firstInfo != null) {
690                         firstInfo.value = info.value;
691                     }
692                 }
693             }
694 
695             for (SPNode n : list) {
696                 retList.add(n._val);
697                 if (matcherList != null) {
698                     matcherList.add(n._finder);
699                 }
700             }
701 
702             return retList;
703         }
704 
705         @Override
get( String pattern, Object context, Output<String[]> arguments, Output<Finder> matcherFound)706         public T get(
707                 String pattern,
708                 Object context,
709                 Output<String[]> arguments,
710                 Output<Finder> matcherFound) {
711             List<Finder> matcherList = new ArrayList<>();
712             Output<String[]> firstInfo = new Output<>();
713             List<T> matches =
714                     getAll(
715                             pattern,
716                             context,
717                             matcherList,
718                             firstInfo); // need to get whole list because we want value that was
719             // entered first
720             if (arguments != null && firstInfo.value != null) {
721                 //                arguments.value = (matcherList.size() > 0) ?
722                 // matcherList.get(0).getInfo() : null;
723                 arguments.value = matcherList.isEmpty() ? null : firstInfo.value;
724             }
725             if (matcherFound != null) {
726                 matcherFound.value = (matcherList.size() > 0) ? matcherList.get(0) : null;
727             }
728             return (matches.size() > 0) ? matches.get(0) : null;
729         }
730 
731         @Override
entrySet()732         public Set<Entry<Finder, T>> entrySet() {
733             LinkedHashMap<Finder, T> ret = new LinkedHashMap<>();
734 
735             for (Entry<String, List<SPNode>> entry : _spmap.entrySet()) {
736                 List<SPNode> candidates = entry.getValue();
737                 for (SPNode node : candidates) {
738                     ret.put(node._finder, node._val);
739                 }
740             }
741             return ret.entrySet();
742         }
743 
744         /**
745          * A Node of a StarPatternMap
746          *
747          * @author ribnitz
748          */
749         public class SPNode extends NodeBase<T> {
750             //            Finder _finder;
751             //            T _val;
752 
SPNode(Finder finder, T val)753             public SPNode(Finder finder, T val) {
754                 //                _finder = finder;
755                 //                _val = val;
756                 super(finder, val);
757             }
758 
759             @Override
toString()760             public String toString() {
761                 return this._finder.toString();
762             }
763         }
764     }
765 
766     /**
767      * The basic class of an information node, featuring a Finder, a value and an Info
768      *
769      * @author ribnitz
770      * @param <T>
771      */
772     private static class NodeBase<T> {
773         Finder _finder;
774         T _val;
775         Info _info = new Info();
776 
NodeBase(Finder finder, T value)777         public NodeBase(Finder finder, T value) {
778             this._finder = finder;
779             this._val = value;
780         }
781     }
782 
783     public static Transform<String, RegexFinder> RegexFinderTransform =
784             new Transform<String, RegexFinder>() {
785                 @Override
786                 public RegexFinder transform(String source) {
787                     return new RegexFinder(source);
788                 }
789             };
790 
791     /**
792      * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before
793      * //. To work better with XPaths.
794      */
795     public static Transform<String, RegexFinder> RegexFinderTransformPath =
796             new Transform<String, RegexFinder>() {
797                 @Override
798                 public RegexFinder transform(String source) {
799                     final String newSource = source.replace("[@", "\\[@");
800                     return new RegexFinder(
801                             newSource.startsWith("//") ? "^" + newSource : newSource);
802                 }
803             };
804 
805     /**
806      * The same as a RegexFinderTransform, except that [@ is changed to \[@, and ^ is added before
807      * //, and ' is changed to ". To work better with XPaths.
808      */
809     public static Transform<String, RegexFinder> RegexFinderTransformPath2 =
810             new Transform<String, RegexFinder>() {
811                 @Override
812                 public RegexFinder transform(String source) {
813                     final String newSource = source.replace("[@", "\\[@").replace('\'', '"');
814                     return new RegexFinder(
815                             newSource.startsWith("//") ? "^" + newSource : newSource);
816                 }
817             };
818 
819     /**
820      * Allows for merging items of the same type.
821      *
822      * @param <T>
823      */
824     public interface Merger<T> {
merge(T a, T into)825         T merge(T a, T into);
826     }
827 
828     /**
829      * Returns the result of a regex lookup.
830      *
831      * @param source
832      * @return
833      */
get(String source)834     public final T get(String source) {
835         return get(source, null, null, null, null);
836     }
837 
838     /**
839      * Returns the result of a regex lookup, with the group arguments that matched.
840      *
841      * @param source
842      * @param context TODO
843      * @return
844      */
get(String source, Object context, Output<String[]> arguments)845     public T get(String source, Object context, Output<String[]> arguments) {
846         return get(source, context, arguments, null, null);
847     }
848 
849     /**
850      * Returns the result of a regex lookup, with the group arguments that matched. Supplies failure
851      * cases for debugging.
852      *
853      * @param source
854      * @param context
855      * @return
856      */
get( String source, Object context, Output<String[]> arguments, Output<Finder> matcherFound, List<String> failures)857     public T get(
858             String source,
859             Object context,
860             Output<String[]> arguments,
861             Output<Finder> matcherFound,
862             List<String> failures) {
863 
864         if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) {
865             //   T ret = SPEntries.get(source, context, arguments, matcherFound);
866             T ret = storage.get(source, context, arguments, matcherFound);
867             if (ret != null) {
868                 return ret;
869             }
870 
871             if (failures != null) {
872                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
873                     //                for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) {
874                     Finder matcher = entry.getKey();
875                     synchronized (matcher) {
876                         int failPoint = matcher.getFailPoint(source);
877                         String show =
878                                 source.substring(0, failPoint)
879                                         + "☹"
880                                         + source.substring(failPoint)
881                                         + "\t"
882                                         + matcher.toString();
883                         failures.add(show);
884                     }
885                 }
886             }
887         } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) {
888             //      T ret = RTEntries.get(source, context, arguments, matcherFound);
889             T ret = storage.get(source, context, arguments, matcherFound);
890             if (ret != null) {
891                 return ret;
892             }
893 
894             if (failures != null) {
895                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
896                     //                for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) {
897                     Finder matcher = entry.getKey();
898                     synchronized (matcher) {
899                         int failPoint = matcher.getFailPoint(source);
900                         String show =
901                                 source.substring(0, failPoint)
902                                         + "☹"
903                                         + source.substring(failPoint)
904                                         + "\t"
905                                         + matcher.toString();
906                         failures.add(show);
907                     }
908                 }
909             }
910         } else {
911             // slow but versatile implementation
912             for (Map.Entry<Finder, T> entry : MEntries.entrySet()) {
913                 Finder matcher = entry.getKey();
914                 synchronized (matcher) {
915                     Info firstInfo = new Info();
916                     if (matcher.find(source, context, firstInfo)) {
917                         if (arguments != null) {
918                             //                            arguments.value = matcher.getInfo();
919                             arguments.value = firstInfo.value;
920                         }
921                         if (matcherFound != null) {
922                             matcherFound.value = matcher;
923                         }
924                         return entry.getValue();
925                     } else if (failures != null) {
926                         int failPoint = matcher.getFailPoint(source);
927                         String show =
928                                 source.substring(0, failPoint)
929                                         + "☹"
930                                         + source.substring(failPoint)
931                                         + "\t"
932                                         + matcher.toString();
933                         failures.add(show);
934                     }
935                 }
936             }
937         }
938 
939         // not really necessary, but makes debugging easier.
940         if (arguments != null) {
941             arguments.value = null;
942         }
943         if (matcherFound != null) {
944             matcherFound.value = null;
945         }
946         return null;
947     }
948 
949     /**
950      * Returns all results of a regex lookup, with the group arguments that matched. Supplies
951      * failure cases for debugging.
952      *
953      * @param source
954      * @param context TODO
955      * @return
956      */
getAll( String source, Object context, List<Finder> matcherList, List<String> failures)957     public List<T> getAll(
958             String source, Object context, List<Finder> matcherList, List<String> failures) {
959         if (_lookupType == RegexLookup.LookupType.STAR_PATTERN_LOOKUP) {
960             Output<String[]> firstInfo = new Output<>();
961             //            List<T> matches = SPEntries.getAll(source, context,
962             // matcherList,firstInfo);
963             List<T> matches = storage.getAll(source, context, matcherList, firstInfo);
964             if (matches != null) {
965                 return matches;
966             }
967 
968             if (failures != null) {
969                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
970                     //                for (Map.Entry<Finder, T> entry : SPEntries.entrySet()) {
971                     Finder matcher = entry.getKey();
972                     synchronized (matcher) {
973                         int failPoint = matcher.getFailPoint(source);
974                         String show =
975                                 source.substring(0, failPoint)
976                                         + "☹"
977                                         + source.substring(failPoint)
978                                         + "\t"
979                                         + matcher.toString();
980                         failures.add(show);
981                     }
982                 }
983             }
984             return null;
985         } else if (_lookupType == RegexLookup.LookupType.OPTIMIZED_DIRECTORY_PATTERN_LOOKUP) {
986             Output<String[]> info = new Output<>();
987             //            List<T> matches = RTEntries.getAll(source, context, matcherList,info);
988             List<T> matches = storage.getAll(source, context, matcherList, info);
989             if (matches != null) {
990                 return matches;
991             }
992 
993             if (failures != null) {
994                 for (Map.Entry<Finder, T> entry : storage.entrySet()) {
995                     //                for (Map.Entry<Finder, T> entry : RTEntries.entrySet()) {
996                     Finder matcher = entry.getKey();
997                     synchronized (matcher) {
998                         int failPoint = matcher.getFailPoint(source);
999                         String show =
1000                                 source.substring(0, failPoint)
1001                                         + "☹"
1002                                         + source.substring(failPoint)
1003                                         + "\t"
1004                                         + matcher.toString();
1005                         failures.add(show);
1006                     }
1007                 }
1008             }
1009             return null;
1010         } else {
1011             // slow but versatile implementation
1012             List<T> matches = new ArrayList<>();
1013             for (Map.Entry<Finder, T> entry : MEntries.entrySet()) {
1014                 Finder matcher = entry.getKey();
1015                 Info firstInfo = new Info();
1016                 if (matcher.find(source, context, firstInfo)) {
1017                     if (matcherList != null) {
1018                         matcherList.add(matcher);
1019                     }
1020                     matches.add(entry.getValue());
1021                 } else if (failures != null) {
1022                     int failPoint = matcher.getFailPoint(source);
1023                     String show =
1024                             source.substring(0, failPoint)
1025                                     + "☹"
1026                                     + source.substring(failPoint)
1027                                     + "\t"
1028                                     + matcher.toString();
1029                     failures.add(show);
1030                 }
1031             }
1032             return matches;
1033         }
1034     }
1035 
1036     /**
1037      * Find the patterns that haven't been matched. Requires the caller to collect the patterns that
1038      * have, using matcherFound.
1039      *
1040      * @return outputUnmatched
1041      */
getUnmatchedPatterns( Set<String> matched, Map<String, T> outputUnmatched)1042     public Map<String, T> getUnmatchedPatterns(
1043             Set<String> matched, Map<String, T> outputUnmatched) {
1044         outputUnmatched.clear();
1045 
1046         Set<Map.Entry<Finder, T>> entrySet;
1047         switch (_lookupType) {
1048             case STAR_PATTERN_LOOKUP:
1049                 //            entrySet = SPEntries.entrySet();
1050                 entrySet = storage.entrySet();
1051                 break;
1052             case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1053                 //            entrySet = RTEntries.entrySet();
1054                 entrySet = storage.entrySet();
1055                 break;
1056             default:
1057                 entrySet = MEntries.entrySet();
1058                 break;
1059         }
1060 
1061         for (Map.Entry<Finder, T> entry : entrySet) {
1062             String pattern = entry.getKey().toString();
1063             if (!matched.contains(pattern)) {
1064                 outputUnmatched.put(pattern, entry.getValue());
1065             }
1066         }
1067         return outputUnmatched;
1068     }
1069 
1070     /**
1071      * Create a RegexLookup. It will take a list of key/value pairs, where the key is a regex
1072      * pattern and the value is what gets returned.
1073      *
1074      * @param patternTransform Used to transform string patterns into a Pattern. Can be used to
1075      *     process replacements (like variables).
1076      * @param valueTransform Used to transform string values into another form.
1077      * @param valueMerger Used to merge values with the same key.
1078      */
of( Transform<String, Finder> patternTransform, Transform<String, T> valueTransform, Merger<T> valueMerger)1079     public static <T, U> RegexLookup<T> of(
1080             Transform<String, Finder> patternTransform,
1081             Transform<String, T> valueTransform,
1082             Merger<T> valueMerger) {
1083         return new RegexLookup<T>()
1084                 .setPatternTransform(patternTransform)
1085                 .setValueTransform(valueTransform)
1086                 .setValueMerger(valueMerger);
1087     }
1088 
of(Transform<String, T> valueTransform)1089     public static <T> RegexLookup<T> of(Transform<String, T> valueTransform) {
1090         return new RegexLookup<T>()
1091                 .setValueTransform(valueTransform)
1092                 .setPatternTransform(RegexFinderTransform);
1093     }
1094 
of()1095     public static <T> RegexLookup<T> of() {
1096         return new RegexLookup<T>().setPatternTransform(RegexFinderTransform);
1097     }
1098 
1099     /**
1100      * @deprecated Use {@link #of(LookupType,Transform)} instead
1101      */
1102     @Deprecated
of(LookupType lookupType)1103     public static <T> RegexLookup<T> of(LookupType lookupType) {
1104         return of(lookupType, RegexFinderTransform);
1105     }
1106 
of( LookupType lookupType, Transform<String, RegexFinder> transform)1107     public static <T> RegexLookup<T> of(
1108             LookupType lookupType, Transform<String, RegexFinder> transform) {
1109         return new RegexLookup<T>(lookupType).setPatternTransform(transform);
1110     }
1111 
setValueTransform(Transform<String, ? extends T> valueTransform)1112     public RegexLookup<T> setValueTransform(Transform<String, ? extends T> valueTransform) {
1113         this.valueTransform = valueTransform;
1114         return this;
1115     }
1116 
setPatternTransform( Transform<String, ? extends Finder> patternTransform)1117     public RegexLookup<T> setPatternTransform(
1118             Transform<String, ? extends Finder> patternTransform) {
1119         this.patternTransform = patternTransform != null ? patternTransform : RegexFinderTransform;
1120         return this;
1121     }
1122 
setValueMerger(Merger<T> valueMerger)1123     public RegexLookup<T> setValueMerger(Merger<T> valueMerger) {
1124         this.valueMerger = valueMerger;
1125         return this;
1126     }
1127 
1128     /**
1129      * Load a RegexLookup from a file. Opens a file relative to the class, and adds lines separated
1130      * by "; ". Lines starting with # are comments.
1131      */
loadFromFile(Class<?> baseClass, String filename)1132     public RegexLookup<T> loadFromFile(Class<?> baseClass, String filename) {
1133         RegexFileParser parser = setupRegexFileParser();
1134         parser.parse(baseClass, filename);
1135         return this;
1136     }
1137 
1138     /** Load a RegexLookup from a list of strings, as if they came from a file. See loadFromFile */
loadFromString(String source)1139     public RegexLookup<T> loadFromString(String source) {
1140         RegexFileParser parser = setupRegexFileParser();
1141         parser.parseStrings("string", Splitter.on('\n').split(source));
1142         return this;
1143     }
1144 
setupRegexFileParser()1145     private RegexFileParser setupRegexFileParser() {
1146         RegexFileParser parser = new RegexFileParser();
1147         parser.setLineParser(
1148                 new RegexLineParser() {
1149                     @Override
1150                     public void parse(String line) {
1151                         int pos = line.indexOf(RegexLookup.SEPARATOR);
1152                         if (pos < 0) {
1153                             throw new IllegalArgumentException(
1154                                     "Illegal line, doesn't contain semicolon: " + line);
1155                         }
1156                         String source = line.substring(0, pos).trim();
1157                         String target = line.substring(pos + 2).trim();
1158                         try {
1159                             @SuppressWarnings("unchecked")
1160                             T result =
1161                                     valueTransform == null
1162                                             ? (T) target
1163                                             : valueTransform.transform(target);
1164                             add(source, result);
1165                         } catch (Exception e) {
1166                             throw new IllegalArgumentException(
1167                                     "Failed to add <" + source + "> => <" + target + ">", e);
1168                         }
1169                     }
1170                 });
1171         parser.setVariableProcessor(
1172                 new VariableProcessor() {
1173                     @Override
1174                     public void add(String variable, String variableName) {
1175                         addVariable(variable, variableName);
1176                     }
1177 
1178                     @Override
1179                     public String replace(String str) {
1180                         return variables.replace(str);
1181                     }
1182                 });
1183         return parser;
1184     }
1185 
addVariable(String variable, String variableValue)1186     public RegexLookup<T> addVariable(String variable, String variableValue) {
1187         if (!variable.startsWith("%")) {
1188             throw new IllegalArgumentException("Variables must start with %");
1189         }
1190         variables.add(variable.trim(), variableValue.trim());
1191         return this;
1192     }
1193 
1194     /**
1195      * Add a pattern/value pair.
1196      *
1197      * @param stringPattern regex to match
1198      * @param target return type on match
1199      * @return this, for chaining
1200      */
add(String stringPattern, T target)1201     public RegexLookup<T> add(String stringPattern, T target) {
1202         if (stringPattern.contains("%")) {
1203             stringPattern = variables.replace(stringPattern);
1204         }
1205         Finder pattern0 = patternTransform.transform(stringPattern);
1206         return add(pattern0, target);
1207     }
1208 
1209     /**
1210      * Add a pattern/value pair.
1211      *
1212      * @param pattern
1213      * @param target return type on match
1214      * @return this, for chaining
1215      */
add(Finder pattern, T target)1216     public RegexLookup<T> add(Finder pattern, T target) {
1217         if (!allowNull && target == null) {
1218             throw new NullPointerException("null disallowed, unless allowNull(true) is called.");
1219         }
1220 
1221         T old;
1222         switch (_lookupType) {
1223             case STAR_PATTERN_LOOKUP: // fallthrough
1224             case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1225                 old = storage.get(pattern);
1226                 //            old = SPEntries.get(pattern);
1227                 break;
1228                 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1229                 //            old = storage.get(pattern);
1230                 //            old = RTEntries.get(pattern);
1231                 //            break;
1232             default:
1233                 old = MEntries.get(pattern);
1234                 break;
1235         }
1236 
1237         if (old == null) {
1238             switch (_lookupType) {
1239                 case STAR_PATTERN_LOOKUP: // fallthrough
1240                 case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1241                     storage.put(pattern, target);
1242                     //                SPEntries.put(pattern, target);
1243                     break;
1244                     //            case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1245                     //                storage.put(pattern, target);
1246                     //                RTEntries.put(pattern, target);
1247                     //                break;
1248                 default:
1249                     MEntries.put(pattern, target);
1250                     break;
1251             }
1252         } else if (valueMerger != null) {
1253             valueMerger.merge(target, old);
1254         } else {
1255             throw new IllegalArgumentException(
1256                     "Duplicate matcher without Merger defined "
1257                             + pattern
1258                             + "; old: "
1259                             + old
1260                             + "; new: "
1261                             + target);
1262         }
1263         return this;
1264     }
1265 
1266     @Override
iterator()1267     public Iterator<Map.Entry<Finder, T>> iterator() {
1268         switch (_lookupType) {
1269             case STAR_PATTERN_LOOKUP: // fall through
1270             case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1271                 //            return
1272                 // Collections.unmodifiableCollection(SPEntries.entrySet()).iterator();
1273                 return Collections.unmodifiableCollection(storage.entrySet()).iterator();
1274                 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1275                 //            return
1276                 // Collections.unmodifiableCollection(RTEntries.entrySet()).iterator();
1277                 //            return
1278                 // Collections.unmodifiableCollection(storage.entrySet()).iterator();
1279             default:
1280                 return Collections.unmodifiableCollection(MEntries.entrySet()).iterator();
1281         }
1282     }
1283 
replace(String lookup, String... arguments)1284     public static String replace(String lookup, String... arguments) {
1285         StringBuilder result = new StringBuilder();
1286         int last = 0;
1287         while (true) {
1288             int pos = lookup.indexOf("$", last);
1289             if (pos < 0) {
1290                 result.append(lookup.substring(last, lookup.length()));
1291                 break;
1292             }
1293             result.append(lookup.substring(last, pos));
1294             final int arg = lookup.charAt(pos + 1) - '0';
1295             try {
1296                 result.append(arguments[arg]);
1297             } catch (Exception e) {
1298                 throw new IllegalArgumentException(
1299                         "Replacing $"
1300                                 + arg
1301                                 + " in <"
1302                                 + lookup
1303                                 + ">, but too few arguments supplied.");
1304             }
1305             last = pos + 2;
1306         }
1307         return result.toString();
1308     }
1309 
1310     /**
1311      * @return the number of entries
1312      */
size()1313     public int size() {
1314         switch (_lookupType) {
1315             case STAR_PATTERN_LOOKUP: // fall through
1316             case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1317                 //            return SPEntries.size();
1318                 return storage.size();
1319                 //        case OPTIMIZED_DIRECTORY_PATTERN_LOOKUP:
1320                 //            return storage.size();
1321                 //            return RTEntries.size();
1322             default:
1323                 return MEntries.size();
1324         }
1325     }
1326 
1327     @Override
toString()1328     public String toString() {
1329         return storage.toString();
1330     }
1331 }
1332