xref: /aosp_15_r20/external/icu/icu4j/perf-tests/src/main/java/com/ibm/icu/dev/test/perf/BreakIteratorPerformanceTest.java (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  **********************************************************************
5  * Copyright (c) 2002-2008, International Business Machines           *
6  * Corporation and others.  All Rights Reserved.                      *
7  **********************************************************************
8  */
9 package com.ibm.icu.dev.test.perf;
10 
11 import java.io.FileInputStream;
12 import java.util.ArrayList;
13 
14 public class BreakIteratorPerformanceTest extends PerfTest {
15 
16     String fileContents;
17 
18     com.ibm.icu.text.BreakIterator iSentenceIter;
19     com.ibm.icu.text.BreakIterator iWordIter;
20     com.ibm.icu.text.BreakIterator iLineIter;
21     com.ibm.icu.text.BreakIterator iCharacterIter;
22     java.text.BreakIterator jSentenceIter;
23     java.text.BreakIterator jWordIter;
24     java.text.BreakIterator jLineIter;
25     java.text.BreakIterator jCharacterIter;
26     String[] iSentences;
27     String[] iWords;
28     String[] iLines;
29     String[] iCharacters;
30     String[] jSentences;
31     String[] jWords;
32     String[] jLines;
33     String[] jCharacters;
34 
main(String[] args)35     public static void main(String[] args) throws Exception {
36         new BreakIteratorPerformanceTest().run(args);
37     }
38 
setup(String[] args)39     protected void setup(String[] args) {
40         try {
41             // read in the input file, being careful with a possible BOM
42             FileInputStream in = new FileInputStream(fileName);
43             BOMFreeReader reader = new BOMFreeReader(in, encoding);
44             fileContents = new String(readToEOS(reader));
45 
46             // // get rid of any characters that may cause differences between ICU4J and Java BreakIterator
47             // // fileContents = fileContents.replaceAll("[\t\f\r\n\\-/ ]+", " ");
48             // String res = "";
49             // StringTokenizer tokenizer = new StringTokenizer(fileContents, "\t\f\r\n-/ ");
50             // while (tokenizer.hasMoreTokens())
51             // res += tokenizer.nextToken() + " ";
52             // fileContents = res.trim();
53 
54             // create the break iterators with respect to locale
55             if (locale == null) {
56                 iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance();
57                 iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance();
58                 iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance();
59                 iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance();
60 
61                 jSentenceIter = java.text.BreakIterator.getSentenceInstance();
62                 jWordIter = java.text.BreakIterator.getWordInstance();
63                 jLineIter = java.text.BreakIterator.getLineInstance();
64                 jCharacterIter = java.text.BreakIterator.getCharacterInstance();
65             } else {
66                 iSentenceIter = com.ibm.icu.text.BreakIterator.getSentenceInstance(locale);
67                 iWordIter = com.ibm.icu.text.BreakIterator.getWordInstance(locale);
68                 iLineIter = com.ibm.icu.text.BreakIterator.getLineInstance(locale);
69                 iCharacterIter = com.ibm.icu.text.BreakIterator.getCharacterInstance(locale);
70 
71                 jSentenceIter = java.text.BreakIterator.getSentenceInstance(locale);
72                 jWordIter = java.text.BreakIterator.getWordInstance(locale);
73                 jLineIter = java.text.BreakIterator.getLineInstance(locale);
74                 jCharacterIter = java.text.BreakIterator.getCharacterInstance(locale);
75             }
76 
77             iSentences = init(iSentenceIter);
78             iWords = init(iWordIter);
79             iLines = init(iLineIter);
80             iCharacters = init(iCharacterIter);
81             jSentences = init(jSentenceIter);
82             jWords = init(jWordIter);
83             jLines = init(jLineIter);
84             jCharacters = init(jCharacterIter);
85 
86         } catch (Exception ex) {
87             ex.printStackTrace();
88             throw new RuntimeException(ex.getMessage());
89         }
90 
91         // we created some heavy objects, so lets try to clean up a little before running the tests
92         gc();
93     }
94 
init(com.ibm.icu.text.BreakIterator iter)95     private String[] init(com.ibm.icu.text.BreakIterator iter) {
96         // set the string to iterate on
97         iter.setText(fileContents);
98 
99         // produce a token list
100         ArrayList tokenList = new ArrayList();
101         int start = iter.first();
102         for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next())
103             tokenList.add(fileContents.substring(start, end));
104 
105         // return the token list as a string array
106         return (String[]) tokenList.toArray(new String[0]);
107     }
108 
init(java.text.BreakIterator iter)109     private String[] init(java.text.BreakIterator iter) {
110         // set the string to iterate on
111         iter.setText(fileContents);
112 
113         // produce a token list
114         ArrayList tokenList = new ArrayList();
115         int start = iter.first();
116         for (int end = iter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iter.next())
117             tokenList.add(fileContents.substring(start, end));
118 
119         // return the token list as a string array
120         return (String[]) tokenList.toArray(new String[0]);
121     }
122 
createTestICU(final com.ibm.icu.text.BreakIterator iIter, final String[] correct, final String breakType)123     PerfTest.Function createTestICU(final com.ibm.icu.text.BreakIterator iIter, final String[] correct,
124             final String breakType) {
125         return new PerfTest.Function() {
126             public void call() {
127                 int k = 0;
128                 int start = iIter.first();
129                 for (int end = iIter.next(); end != com.ibm.icu.text.BreakIterator.DONE; start = end, end = iIter
130                         .next())
131                     if (!correct[k++].equals(fileContents.substring(start, end)))
132                         throw new RuntimeException("ICU4J BreakIterator gave the wrong answer for " + breakType + " "
133                                 + (k - 1) + " during the performance test. Cannot continue the performance test.");
134                 if (k != correct.length)
135                     throw new RuntimeException("ICU4J BreakIterator gave the wrong number of " + breakType
136                             + "s during the performance test. Cannot continue the performance test.");
137             }
138 
139             public long getOperationsPerIteration() {
140                 return fileContents.length();
141             }
142         };
143     }
144 
145     PerfTest.Function createTestJava(final java.text.BreakIterator jIter, final String[] correct, final String breakType) {
146         return new PerfTest.Function() {
147             public void call() {
148                 int k = 0;
149                 int start = jIter.first();
150                 for (int end = jIter.next(); end != java.text.BreakIterator.DONE; start = end, end = jIter.next())
151                     if (!correct[k++].equals(fileContents.substring(start, end)))
152                         throw new RuntimeException("Java BreakIterator gave the wrong answer for " + breakType + " "
153                                 + (k - 1) + " during the performance test. Cannot continue the performance test.");
154                 if (k != correct.length)
155                     throw new RuntimeException("Java BreakIterator gave the wrong number of " + breakType
156                             + "s during the performance test. Cannot continue the performance test.");
157             }
158 
159             public long getOperationsPerIteration() {
160                 return fileContents.length();
161             }
162         };
163     }
164 
165     PerfTest.Function TestICUSentences() {
166         return createTestICU(iSentenceIter, iSentences, "sentence");
167     }
168 
169     PerfTest.Function TestICUWords() {
170         return createTestICU(iWordIter, iWords, "word");
171     }
172 
173     PerfTest.Function TestICULines() {
174         return createTestICU(iLineIter, iLines, "line");
175     }
176 
177     PerfTest.Function TestICUCharacters() {
178         return createTestICU(iCharacterIter, iCharacters, "character");
179     }
180 
181     PerfTest.Function TestJavaSentences() {
182         return createTestJava(jSentenceIter, jSentences, "sentence");
183     }
184 
185     PerfTest.Function TestJavaWords() {
186         return createTestJava(jWordIter, jWords, "word");
187     }
188 
189     PerfTest.Function TestJavaLines() {
190         return createTestJava(jLineIter, jLines, "line");
191     }
192 
193     PerfTest.Function TestJavaCharacters() {
194         return createTestJava(jCharacterIter, jCharacters, "character");
195     }
196 }
197