xref: /aosp_15_r20/external/icu/icu4j/demos/src/main/java/com/ibm/icu/dev/demo/translit/AnyTransliterator.java (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4  *******************************************************************************
5  * Copyright (C) 2001-2010, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 package com.ibm.icu.dev.demo.translit;
10 import java.util.Enumeration;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Iterator;
14 import java.util.Set;
15 import java.util.TreeSet;
16 
17 import com.ibm.icu.lang.UScript;
18 import com.ibm.icu.text.Replaceable;
19 import com.ibm.icu.text.Transliterator;
20 import com.ibm.icu.text.UTF16;
21 import com.ibm.icu.text.UnicodeFilter;
22 
23 public class AnyTransliterator extends Transliterator {
24 
25     static final boolean DEBUG = false;
26     private String targetName;
27     private RunIterator it;
28     private Position run;
29 
30 
AnyTransliterator(String targetName, UnicodeFilter filter, RunIterator it)31     public AnyTransliterator(String targetName, UnicodeFilter filter, RunIterator it){
32         super("Any-" + targetName, filter);
33         this.targetName = targetName;
34         this.it = it;
35         run = new Position();
36     }
37 
AnyTransliterator(String targetName, UnicodeFilter filter)38     public AnyTransliterator(String targetName, UnicodeFilter filter){
39         this(targetName, filter, new ScriptRunIterator());
40     }
41 
42     static private Transliterator hex = Transliterator.getInstance("[^\\u0020-\\u007E] hex");
43 
handleTransliterate(Replaceable text, Position offsets, boolean isIncremental)44     protected void handleTransliterate(Replaceable text,
45                                        Position offsets, boolean isIncremental) {
46         if (DEBUG) {
47             System.out.println("- handleTransliterate " + hex.transliterate(text.toString())
48                 + ", " + toString(offsets));
49         }
50         it.reset(text, offsets);
51 
52         while (it.next(run)) {
53             if (targetName.equalsIgnoreCase(it.getName())) {
54                 if (DEBUG) System.out.println("Skipping identical: " + targetName);
55                 run.start = run.limit; // show we processed
56                 continue; // skip if same
57             }
58 
59             Transliterator t;
60             String id = it.getName() + '-' + targetName;
61             try {
62                 t = Transliterator.getInstance(id);
63             } catch (IllegalArgumentException ex) {
64                 if (DEBUG) System.out.println("Couldn't find: " + id + ", Trying Latin as Pivot");
65                 id = it.getName() + "-Latin; Latin-" + targetName;
66                 try {
67                     t = Transliterator.getInstance(id);
68                 } catch (IllegalArgumentException ex2) {
69                     if (DEBUG) System.out.println("Couldn't find: " + id);
70                     continue;
71                 }
72             }
73             // TODO catch error later!!
74 
75             if (DEBUG) {
76                 System.out.println(t.getID());
77                 System.out.println("input: " + hex.transliterate(text.toString())
78                  + ", " + toString(run));
79             }
80 
81             if (isIncremental && it.atEnd()) {
82                 t.transliterate(text, run);
83             } else {
84                 t.finishTransliteration(text, run);
85             }
86             // adjust the offsets in line with the changes
87             it.adjust(run.limit);
88 
89             if (DEBUG) {
90                 System.out.println("output: " + hex.transliterate(text.toString())
91                  + ", " + toString(run));
92             }
93         }
94 
95         // show how far we got!
96         it.getExpanse(offsets);
97         if (run.start == run.limit) offsets.start = offsets.limit;
98         else offsets.start = run.start;
99         if (DEBUG) {
100             System.out.println("+ handleTransliterate: " + ", " + toString(offsets));
101             System.out.println();
102         }
103     }
104 
105     // should be method on Position
toString(Position offsets)106     public static String toString(Position offsets) {
107         return "[cs: " + offsets.contextStart
108                 + ", s: " + offsets.start
109                 + ", l: " + offsets.limit
110                 + ", cl: " + offsets.contextLimit
111                 + "]";
112     }
113 
114     public interface RunIterator {
reset(Replaceable text, Position expanse)115         public void reset(Replaceable text, Position expanse);
getExpanse(Position run)116         public void getExpanse(Position run);
reset()117         public void reset();
next(Position run)118         public boolean next(Position run);
getCurrent(Position run)119         public void getCurrent(Position run);
getName()120         public String getName();
adjust(int newCurrentLimit)121         public void adjust(int newCurrentLimit);
atEnd()122         public boolean atEnd();
123     }
124 
125     /**
126      * Returns a series of ranges corresponding to scripts. They will be of the form:
127      * ccccSScSSccccTTcTcccc    - where c is common, S is the first script and T is the second
128      *|            |            - first run
129      *         |            |    - second run
130      * That is, the runs will overlap. The reason for this is so that a transliterator can
131      * consider common characters both before and after the scripts.
132      * The only time that contextStart != start is for the first run
133      *    (the context is the start context of the entire expanse)
134      * The only time that contextLimit != limit is for the last run
135      *    (the context is the end context of the entire expanse)
136      */
137     public static class ScriptRunIterator implements RunIterator {
138         private Replaceable text;
139         private Position expanse = new Position();
140         private Position current = new Position();
141         private int script;
142         private boolean done = true;
143 
144 
reset(Replaceable repText, Position expansePos)145         public void reset(Replaceable repText, Position expansePos) {
146             set(this.expanse, expansePos);
147             this.text = repText;
148             reset();
149         }
150 
reset()151         public void reset() {
152             done = false;
153             //this.expanse = expanse;
154             script = UScript.INVALID_CODE;
155             // set up first range to be empty, at beginning
156             current.contextStart = expanse.contextStart;
157             current.start = current.limit = current.contextLimit = expanse.start;
158         }
159 
next(Position run)160         public boolean next(Position run) {
161             if (done) return false;
162             if (DEBUG) {
163                 System.out.println("+cs: " + current.contextStart
164                     + ", s: " + current.start
165                     + ", l: " + current.limit
166                     + ", cl: " + current.contextLimit);
167             }
168             // reset start context run to the last end
169             current.start = current.limit;
170 
171             // Phase 1. Backup the START value through COMMON until we get to expanse.start or a real script.
172             int i, cp;
173             int limit = expanse.start;
174             for (i = current.start; i > limit; i -= UTF16.getCharCount(cp)) {
175                 cp = text.char32At(i);
176                 int scrpt = UScript.getScript(cp);
177                 if (scrpt != UScript.COMMON && scrpt != UScript.INHERITED) break;
178             }
179             current.start = i;
180             current.contextStart = (i == limit) ? expanse.contextStart : i; // extend at start
181 
182             // PHASE 2. Move up the LIMIT value through COMMON or single script until we get to expanse.limit
183             int lastScript = UScript.COMMON;
184             //int veryLastScript = UScript.COMMON;
185             limit = expanse.limit;
186             for (i = current.limit; i < limit; i += UTF16.getCharCount(cp)) {
187                 cp = text.char32At(i);
188                 int scrpt = UScript.getScript(cp);
189                 if (scrpt == UScript.INHERITED) scrpt = UScript.COMMON;
190                 if (scrpt != UScript.COMMON) {
191                     // if we find a real script:
192                     //   if we already had a script, bail
193                     //   otherwise set our script
194                     if (lastScript == UScript.COMMON) lastScript = scrpt;
195                     else if (lastScript != scrpt) break;
196                 }
197             }
198             current.limit = i;
199             current.contextLimit = (i == limit) ? expanse.contextLimit : i; // extend at end
200             done = (i == limit);
201             script = lastScript;
202 
203             if (DEBUG) {
204                 System.out.println("-cs: " + current.contextStart
205                     + ", s: " + current.start
206                     + ", l: " + current.limit
207                     + ", cl: " + current.contextLimit);
208             }
209 
210             set(run, current);
211             return true;
212         }
213 
214         // SHOULD BE METHOD ON POSITION
set(Position run, Position current)215         public static void set(Position run, Position current) {
216             run.contextStart = current.contextStart;
217             run.start = current.start;
218             run.limit = current.limit;
219             run.contextLimit = current.contextLimit;
220         }
221 
atEnd()222         public boolean atEnd() {
223             return current.limit == expanse.limit;
224         }
225 
getCurrent(Position run)226         public void getCurrent(Position run) {
227             set(run, current);
228         }
229 
getExpanse(Position run)230         public void getExpanse(Position run) {
231             set(run, expanse);
232         }
233 
getName()234         public String getName() {
235             return UScript.getName(script);
236         }
237 
adjust(int newCurrentLimit)238         public void adjust(int newCurrentLimit) {
239             if (expanse == null) {
240                 throw new IllegalArgumentException("Must reset() before calling");
241             }
242             int delta = newCurrentLimit - current.limit;
243             current.limit += delta;
244             current.contextLimit += delta;
245             expanse.limit += delta;
246             expanse.contextLimit += delta;
247         }
248 
249         // register Any-Script for every script.
250 
251         private static Set scriptList = new HashSet();
252 
registerAnyToScript()253         public static void registerAnyToScript() {
254             synchronized (scriptList) {
255                 Enumeration sources = Transliterator.getAvailableSources();
256                 while(sources.hasMoreElements()) {
257                     String source = (String) sources.nextElement();
258                     if (source.equals("Any")) continue; // to keep from looping
259 
260                     Enumeration targets = Transliterator.getAvailableTargets(source);
261                     while(targets.hasMoreElements()) {
262                         String target = (String) targets.nextElement();
263                         if (UScript.getCode(target) == null) continue; // SKIP unless we have a script (or locale)
264                         if (scriptList.contains(target)) continue; // already encountered
265                         scriptList.add(target); // otherwise add for later testing
266 
267                         Set variantSet = add(new TreeSet(), Transliterator.getAvailableVariants(source, target));
268                         if (variantSet.size() < 2) {
269                             AnyTransliterator at = new AnyTransliterator(target, null);
270                             DummyFactory.add(at.getID(), at);
271                         } else {
272                             Iterator variants = variantSet.iterator();
273                             while(variants.hasNext()) {
274                                 String variant = (String) variants.next();
275                                 AnyTransliterator at = new AnyTransliterator(
276                                     (variant.length() > 0) ? target + "/" + variant : target, null);
277                                 DummyFactory.add(at.getID(), at);
278                             }
279                         }
280                     }
281                 }
282             }
283         }
284 
285         static class DummyFactory implements Transliterator.Factory {
286             static DummyFactory singleton = new DummyFactory();
287             static HashMap m = new HashMap();
288 
289             // Since Transliterators are immutable, we don't have to clone on set & get
add(String ID, Transliterator t)290             static void add(String ID, Transliterator t) {
291                 m.put(ID, t);
292                 System.out.println("Registering: " + ID + ", " + t.toRules(true));
293                 Transliterator.registerFactory(ID, singleton);
294             }
getInstance(String ID)295             public Transliterator getInstance(String ID) {
296                 return (Transliterator) m.get(ID);
297             }
298         }
299 
300         // Nice little Utility for converting Enumeration to collection
add(Set s, Enumeration enumeration)301         static Set add(Set s, Enumeration enumeration) {
302             while(enumeration.hasMoreElements()) {
303                 s.add(enumeration.nextElement());
304             }
305             return s;
306         }
307 
308 
309     }
310 }
311