xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/MatchStrings.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.ibm.icu.impl.Relation;
4 import java.io.BufferedReader;
5 import java.io.IOException;
6 import java.util.ArrayList;
7 import java.util.Collection;
8 import java.util.LinkedHashSet;
9 import java.util.List;
10 import java.util.Set;
11 import java.util.TreeMap;
12 import org.unicode.cldr.draft.FileUtilities;
13 
14 /** Take mappings to IPA and interleave them. */
15 public class MatchStrings {
16 
17     static String cldrDataDir =
18             "C:\\cvsdata\\unicode\\cldr\\tools\\java\\org\\unicode\\cldr\\util\\data\\transforms\\";
19 
20     static class Info {
21         String english;
22 
23         String ipa;
24 
25         String fixedIpa;
26 
Info(String english, String ipa, String fixedIpa)27         public Info(String english, String ipa, String fixedIpa) {
28             this.english = english;
29             this.ipa = ipa;
30             this.fixedIpa = fixedIpa.equals(ipa) ? ipa : fixedIpa; // make ==
31         }
32 
33         @Override
toString()34         public String toString() {
35             return "{" + english + "/" + ipa + (fixedIpa == ipa ? "" : "/" + fixedIpa) + "}";
36         }
37     }
38 
39     Relation<String, Info> letter_correspondances =
40             Relation.of(new TreeMap<String, Set<Info>>(), LinkedHashSet.class);
41 
MatchStrings()42     MatchStrings() throws IOException {
43         BufferedReader in = FileUtilities.openUTF8Reader(cldrDataDir, "internal_matchIpaRules.txt");
44         while (true) {
45             String line = in.readLine();
46             if (line == null) break;
47             if (line.length() == 0) continue;
48             String[] parts = line.split("\\s+");
49             String ipa = parts.length > 1 ? parts[1] : "";
50             add(parts[0], ipa, parts.length > 2 ? parts[2] : ipa);
51         }
52         in.close();
53     }
54 
add(String english, String ipa, String fixedIpa)55     void add(String english, String ipa, String fixedIpa) {
56         String key = english.length() == 0 ? "" : english.substring(0, 1);
57         letter_correspondances.put(key, new Info(english, ipa, fixedIpa));
58     }
59 
60     /**
61      * Insert the IPA in after the string, such as baitt + /bet/ => b{b}ai{e}t{t}
62      *
63      * @param english
64      * @param ipa
65      * @return
66      */
interleaveIPA(String english, String ipa, List<Info> output)67     int interleaveIPA(String english, String ipa, List<Info> output) {
68         highWater = 0;
69         longestEnglish = 0;
70         longestIpa = 0;
71         highWaterList.clear();
72         this.english = english;
73         this.ipa = ipa;
74         this.output = output;
75         output.clear();
76         return interleave2(0, 0);
77     }
78 
79     String english;
80     String ipa;
81     List<Info> output;
82     int highWater = 0;
83     List<Info> highWaterList = new ArrayList<>();
84     private int longestEnglish;
85     private int longestIpa;
86 
87     /**
88      * Recursively match the string. Right now, we just take the matches in order; later we could
89      * try a weighted fit
90      *
91      * @param english
92      * @param englishPosition
93      * @param ipa
94      * @param ipaPosition
95      * @param path2values
96      * @return
97      */
interleave2(int englishPosition, int ipaPosition)98     private int interleave2(int englishPosition, int ipaPosition) {
99 
100         if (highWater < ipaPosition) {
101             highWaterList.clear();
102             highWaterList.addAll(output);
103             highWater = output.size();
104             longestEnglish = englishPosition;
105             longestIpa = ipaPosition;
106         }
107         if (englishPosition == english.length()) {
108             if (ipaPosition == ipa.length()) {
109                 return 1;
110             }
111             return 0;
112         }
113         String firstLetter = english.substring(englishPosition, englishPosition + 1);
114         Set<Info> possibilities = letter_correspondances.getAll(firstLetter);
115         if (possibilities != null) {
116             int result = checkPossibilities(possibilities, englishPosition, ipaPosition);
117             if (result != 0) {
118                 return result;
119             }
120         }
121 
122         // we failed, try the empty string
123         possibilities = letter_correspondances.getAll("");
124         if (possibilities != null) {
125             int result = checkPossibilities(possibilities, englishPosition, ipaPosition);
126             if (result != 0) {
127                 return result;
128             }
129         }
130 
131         // failed,
132 
133         // we failed to find a pair. Make last check to see if we just
134         // delete one English letter
135         Info last = output.size() == 0 ? null : output.get(output.size() - 1);
136         if (last == null || last.ipa.length() != 0) {
137             output.add(new Info(firstLetter, "", ""));
138             int result = interleave2(englishPosition + 1, ipaPosition);
139             if (result == 1) {
140                 return 1;
141             }
142             // if we fail, then remove the pair, and continue
143             output.remove(output.size() - 1);
144         }
145 
146         // if we get this far, we've exhausted the possibilities, so fail
147         return 0;
148     }
149 
checkPossibilities(Collection<Info> possibilities, int englishPosition, int ipaPosition)150     int checkPossibilities(Collection<Info> possibilities, int englishPosition, int ipaPosition) {
151         for (Info englishIpa : possibilities) {
152             // skip if we don't match
153             String englishPart = englishIpa.english;
154             String ipaPart = englishIpa.ipa;
155             if (!english.regionMatches(englishPosition, englishPart, 0, englishPart.length())) {
156                 continue;
157             }
158             // boolean ipaMatches = ipa.regionMatches(ipaPosition, ipaPart, 0, ipaPart.length());
159             // boolean ipa2Matches = matchAtIgnoring(ipaPosition, ipaPart);
160             // if (ipaMatches != ipa2Matches) {
161             // System.out.println("Fails " + ipa.substring(ipaPosition) + ", " + ipaPart);
162             // }
163             int matchesUpTo = matchAtIgnoring(ipaPosition, ipaPart);
164             if (matchesUpTo < 0) {
165                 continue;
166             }
167             // we match, so recurse
168             output.add(englishIpa);
169             int result = interleave2(englishPosition + englishPart.length(), matchesUpTo);
170             if (result == 1) {
171                 return 1;
172             }
173             // if we fail, then remove the pair, and continue
174             output.remove(output.size() - 1);
175         }
176         return 0;
177     }
178 
179     /**
180      * Does ipaPart match ipa at the position, ignoring stress marks in ipa? Returns how far it got.
181      *
182      * @param ipaPosition
183      * @param ipaPart
184      * @return
185      */
matchAtIgnoring(int ipaPosition, String ipaPart)186     private int matchAtIgnoring(int ipaPosition, String ipaPart) {
187         if (ipaPart.length() == 0) return ipaPosition;
188         int j = 0;
189         for (int i = ipaPosition; i < ipa.length(); ++i) {
190             char ch = ipa.charAt(i);
191             if (ch == 'ˈ' || ch == 'ˌ') continue;
192             char ch2 = ipaPart.charAt(j++);
193             if (ch != ch2) return -1;
194             if (j >= ipaPart.length()) return i + 1;
195         }
196         return -1;
197     }
198 
199     List<Info> current = new ArrayList<>();
200 
201     /**
202      * Fix the IPA in a string
203      *
204      * @param english
205      * @param ipa
206      * @return
207      */
fixIPA(String english, String ipa)208     String fixIPA(String english, String ipa) {
209         int result = interleaveIPA(english, ipa, current);
210         if (result == 0) return null;
211         StringBuilder buffer = new StringBuilder();
212         for (Info englishIpa : current) {
213             buffer.append(englishIpa.fixedIpa);
214         }
215         return buffer.toString();
216     }
217 
getTrace()218     String getTrace() {
219         return highWaterList.toString()
220                 + "\t\t"
221                 + english.substring(longestEnglish)
222                 + "\t≠\t"
223                 + ipa.substring(longestIpa);
224     }
225 }
226