xref: /aosp_15_r20/external/cronet/third_party/icu/source/test/intltest/rbbiapts.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 1999-2016, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  ********************************************************************
7  *   Date        Name        Description
8  *   12/14/99    Madhu        Creation.
9  *   01/12/2000  Madhu        updated for changed API
10  ********************************************************************/
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_BREAK_ITERATION
15 
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
27 #include "cmemory.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for snprintf
31 #endif
32 /**
33  * API Test the RuleBasedBreakIterator class
34  */
35 
36 
37 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
38     if (U_FAILURE(status)) { \
39         dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); \
40     } \
41 } UPRV_BLOCK_MACRO_END
42 
43 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
44     if ((expr) == false) { \
45         errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr); \
46     } \
47 } UPRV_BLOCK_MACRO_END
48 
TestCloneEquals()49 void RBBIAPITest::TestCloneEquals()
50 {
51 
52     UErrorCode status=U_ZERO_ERROR;
53     RuleBasedBreakIterator* bi1     = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
54     RuleBasedBreakIterator* biequal = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
55     RuleBasedBreakIterator* bi3     = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
56     RuleBasedBreakIterator* bi2     = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
57     if(U_FAILURE(status)){
58         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
59         return;
60     }
61 
62 
63     UnicodeString testString="Testing word break iterators's clone() and equals()";
64     bi1->setText(testString);
65     bi2->setText(testString);
66     biequal->setText(testString);
67 
68     bi3->setText("hello");
69 
70     logln((UnicodeString)"Testing equals()");
71 
72     logln((UnicodeString)"Testing == and !=");
73     bool b = (*bi1 != *biequal);
74     b |= *bi1 == *bi2;
75     b |= *bi1 == *bi3;
76     if (b) {
77         errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__);
78     }
79 
80     if(*bi2 == *biequal || *bi2 == *bi1  || *biequal == *bi3)
81         errln("%s:%d ERROR:2 RBBI's == and != operator  failed.", __FILE__, __LINE__);
82 
83 
84     // Quick test of RulesBasedBreakIterator assignment -
85     // Check that
86     //    two different iterators are !=
87     //    they are == after assignment
88     //    source and dest iterator produce the same next() after assignment.
89     //    deleting one doesn't disable the other.
90     logln("Testing assignment");
91     RuleBasedBreakIterator *bix = dynamic_cast<RuleBasedBreakIterator *>(BreakIterator::createLineInstance(Locale::getDefault(), status));
92     if(U_FAILURE(status)){
93         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
94         return;
95     }
96 
97     RuleBasedBreakIterator biDefault, biDefault2;
98     if(U_FAILURE(status)){
99         errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__);
100         return;
101     }
102     if (biDefault == *bix) {
103         errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__);
104         return;
105     }
106     if (biDefault != biDefault2) {
107         errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__);
108         return;
109     }
110 
111 
112     UnicodeString   HelloString("Hello Kitty");
113     bix->setText(HelloString);
114     if (*bix == *bi2) {
115         errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__);
116     }
117     *bix = *bi2;
118     if (*bix != *bi2) {
119         errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__);
120     }
121 
122     int bixnext = bix->next();
123     int bi2next = bi2->next();
124     if (! (bixnext == bi2next && bixnext == 7)) {
125         errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__);
126     }
127     delete bix;
128     if (bi2->next() != 8) {
129         errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__);
130     }
131 
132 
133 
134     logln((UnicodeString)"Testing clone()");
135     RuleBasedBreakIterator* bi1clone = bi1->clone();
136     RuleBasedBreakIterator* bi2clone = bi2->clone();
137 
138     if(*bi1clone != *bi1 || *bi1clone  != *biequal  ||
139       *bi1clone == *bi3 || *bi1clone == *bi2)
140         errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__);
141 
142     if(*bi2clone == *bi1 || *bi2clone == *biequal ||
143        *bi2clone == *bi3 || *bi2clone != *bi2)
144         errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__);
145 
146     if(bi1->getText() != bi1clone->getText()   ||
147        bi2clone->getText() != bi2->getText()   ||
148        *bi2clone == *bi1clone )
149         errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__);
150 
151     delete bi1clone;
152     delete bi2clone;
153     delete bi1;
154     delete bi3;
155     delete bi2;
156     delete biequal;
157 }
158 
TestBoilerPlate()159 void RBBIAPITest::TestBoilerPlate()
160 {
161     UErrorCode status = U_ZERO_ERROR;
162     BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
163     BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
164     if (U_FAILURE(status)) {
165         errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
166         return;
167     }
168     if(*a!=*b){
169         errln("Failed: boilerplate method operator!= does not return correct results");
170     }
171     // Japanese word break iterators are identical to root with
172     // a dictionary-based break iterator
173     BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
174     BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
175     if(c && d){
176         if(*c!=*d){
177             errln("Failed: boilerplate method operator== does not return correct results");
178         }
179     }else{
180         errln("creation of break iterator failed");
181     }
182     delete a;
183     delete b;
184     delete c;
185     delete d;
186 }
187 
TestgetRules()188 void RBBIAPITest::TestgetRules()
189 {
190     UErrorCode status=U_ZERO_ERROR;
191 
192     LocalPointer<RuleBasedBreakIterator> bi1(
193             dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)), status);
194     LocalPointer<RuleBasedBreakIterator> bi2(
195             dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)), status);
196     if(U_FAILURE(status)){
197         errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status));
198         return;
199     }
200 
201     logln((UnicodeString)"Testing getRules()");
202 
203     UnicodeString text(u"Hello there");
204     bi1->setText(text);
205 
206     LocalPointer <RuleBasedBreakIterator> bi3(bi1->clone());
207 
208     UnicodeString temp=bi1->getRules();
209     UnicodeString temp2=bi2->getRules();
210     UnicodeString temp3=bi3->getRules();
211     if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
212         errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__);
213 
214     RuleBasedBreakIterator bi4;   // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
215     if (!bi4.getRules().isEmpty()) {
216         errln("%s:%d Empty string expected.", __FILE__, __LINE__);
217     }
218 }
219 
TestHashCode()220 void RBBIAPITest::TestHashCode()
221 {
222     UErrorCode status=U_ZERO_ERROR;
223     RuleBasedBreakIterator* bi1     = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
224     RuleBasedBreakIterator* bi3     = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
225     RuleBasedBreakIterator* bi2     = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
226     if(U_FAILURE(status)){
227         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
228         delete bi1;
229         delete bi2;
230         delete bi3;
231         return;
232     }
233 
234 
235     logln((UnicodeString)"Testing hashCode()");
236 
237     bi1->setText((UnicodeString)"Hash code");
238     bi2->setText((UnicodeString)"Hash code");
239     bi3->setText((UnicodeString)"Hash code");
240 
241     RuleBasedBreakIterator* bi1clone= bi1->clone();
242     RuleBasedBreakIterator* bi2clone= bi2->clone();
243 
244     if(bi1->hashCode() != bi1clone->hashCode() ||  bi1->hashCode() != bi3->hashCode() ||
245         bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
246         errln((UnicodeString)"ERROR: identical objects have different hashcodes");
247 
248     if(bi1->hashCode() == bi2->hashCode() ||  bi2->hashCode() == bi3->hashCode() ||
249         bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
250         errln((UnicodeString)"ERROR: different objects have same hashcodes");
251 
252     delete bi1clone;
253     delete bi2clone;
254     delete bi1;
255     delete bi2;
256     delete bi3;
257 
258 }
TestGetSetAdoptText()259 void RBBIAPITest::TestGetSetAdoptText()
260 {
261     logln((UnicodeString)"Testing getText setText ");
262     IcuTestErrorCode status(*this, "TestGetSetAdoptText");
263     UnicodeString str1="first string.";
264     UnicodeString str2="Second string.";
265     LocalPointer<RuleBasedBreakIterator> charIter1(dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)));
266     LocalPointer<RuleBasedBreakIterator> wordIter1(dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)));
267     if(status.isFailure()){
268         errcheckln(status, "Fail : in construction - %s", status.errorName());
269             return;
270     }
271 
272 
273     CharacterIterator* text1= new StringCharacterIterator(str1);
274     CharacterIterator* text1Clone = text1->clone();
275     CharacterIterator* text2= new StringCharacterIterator(str2);
276     CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); //  "ond str"
277 
278     wordIter1->setText(str1);
279     CharacterIterator *tci = &wordIter1->getText();
280     UnicodeString      tstr;
281     tci->getText(tstr);
282     TEST_ASSERT(tstr == str1);
283     if(wordIter1->current() != 0)
284         errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
285 
286     wordIter1->next(2);
287 
288     wordIter1->setText(str2);
289     if(wordIter1->current() != 0)
290         errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
291 
292 
293     charIter1->adoptText(text1Clone);
294     TEST_ASSERT(wordIter1->getText() != charIter1->getText());
295     tci = &wordIter1->getText();
296     tci->getText(tstr);
297     TEST_ASSERT(tstr == str2);
298     tci = &charIter1->getText();
299     tci->getText(tstr);
300     TEST_ASSERT(tstr == str1);
301 
302 
303     LocalPointer<RuleBasedBreakIterator> rb(wordIter1->clone());
304     rb->adoptText(text1);
305     if(rb->getText() != *text1)
306         errln((UnicodeString)"ERROR:1 error in adoptText ");
307     rb->adoptText(text2);
308     if(rb->getText() != *text2)
309         errln((UnicodeString)"ERROR:2 error in adoptText ");
310 
311     // Adopt where iterator range is less than the entire original source string.
312     //   (With the change of the break engine to working with UText internally,
313     //    CharacterIterators starting at positions other than zero are not supported)
314     rb->adoptText(text3);
315     TEST_ASSERT(rb->preceding(2) == 0);
316     TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
317     //if(rb->preceding(2) != 3) {
318     //    errln((UnicodeString)"ERROR:3 error in adoptText ");
319     //}
320     //if(rb->following(11) != BreakIterator::DONE) {
321     //    errln((UnicodeString)"ERROR:4 error in adoptText ");
322     //}
323 
324     // UText API
325     //
326     //   Quick test to see if UText is working at all.
327     //
328     const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
329     const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
330     //                012345678901
331 
332     status.reset();
333     LocalUTextPointer ut(utext_openUTF8(nullptr, s1, -1, status));
334     wordIter1->setText(ut.getAlias(), status);
335     TEST_ASSERT_SUCCESS(status);
336 
337     int32_t pos;
338     pos = wordIter1->first();
339     TEST_ASSERT(pos==0);
340     pos = wordIter1->next();
341     TEST_ASSERT(pos==5);
342     pos = wordIter1->next();
343     TEST_ASSERT(pos==6);
344     pos = wordIter1->next();
345     TEST_ASSERT(pos==11);
346     pos = wordIter1->next();
347     TEST_ASSERT(pos==UBRK_DONE);
348 
349     status.reset();
350     LocalUTextPointer ut2(utext_openUTF8(nullptr, s2, -1, status));
351     TEST_ASSERT_SUCCESS(status);
352     wordIter1->setText(ut2.getAlias(), status);
353     TEST_ASSERT_SUCCESS(status);
354 
355     pos = wordIter1->first();
356     TEST_ASSERT(pos==0);
357     pos = wordIter1->next();
358     TEST_ASSERT(pos==3);
359     pos = wordIter1->next();
360     TEST_ASSERT(pos==4);
361 
362     pos = wordIter1->last();
363     TEST_ASSERT(pos==6);
364     pos = wordIter1->previous();
365     TEST_ASSERT(pos==4);
366     pos = wordIter1->previous();
367     TEST_ASSERT(pos==3);
368     pos = wordIter1->previous();
369     TEST_ASSERT(pos==0);
370     pos = wordIter1->previous();
371     TEST_ASSERT(pos==UBRK_DONE);
372 
373     status.reset();
374     UnicodeString sEmpty;
375     LocalUTextPointer gut2(utext_openUnicodeString(nullptr, &sEmpty, status));
376     wordIter1->getUText(gut2.getAlias(), status);
377     TEST_ASSERT_SUCCESS(status);
378     status.reset();
379 }
380 
381 
TestIteration()382 void RBBIAPITest::TestIteration()
383 {
384     // This test just verifies that the API is present.
385     // Testing for correct operation of the break rules happens elsewhere.
386 
387     UErrorCode status=U_ZERO_ERROR;
388     RuleBasedBreakIterator* bi  = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
389     if (U_FAILURE(status) || bi == nullptr)  {
390         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
391     }
392     delete bi;
393 
394     status=U_ZERO_ERROR;
395     bi  = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
396     if (U_FAILURE(status) || bi == nullptr)  {
397         errcheckln(status, "Failure creating Word break iterator.  Status = %s", u_errorName(status));
398     }
399     delete bi;
400 
401     status=U_ZERO_ERROR;
402     bi  = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status));
403     if (U_FAILURE(status) || bi == nullptr)  {
404         errcheckln(status, "Failure creating Line break iterator.  Status = %s", u_errorName(status));
405     }
406     delete bi;
407 
408     status=U_ZERO_ERROR;
409     bi  = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status));
410     if (U_FAILURE(status) || bi == nullptr)  {
411         errcheckln(status, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status));
412     }
413     delete bi;
414 
415     status=U_ZERO_ERROR;
416     bi  = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status));
417     if (U_FAILURE(status) || bi == nullptr)  {
418         errcheckln(status, "Failure creating Title break iterator.  Status = %s", u_errorName(status));
419     }
420     delete bi;
421 
422     status=U_ZERO_ERROR;
423     bi  = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
424     if (U_FAILURE(status) || bi == nullptr)  {
425         errcheckln(status, "Failure creating character break iterator.  Status = %s", u_errorName(status));
426         return;   // Skip the rest of these tests.
427     }
428 
429 
430     UnicodeString testString="0123456789";
431     bi->setText(testString);
432 
433     int32_t i;
434     i = bi->first();
435     if (i != 0) {
436         errln("%s:%d Incorrect value from bi->first().  Expected 0, got %d.", __FILE__, __LINE__, i);
437     }
438 
439     i = bi->last();
440     if (i != 10) {
441         errln("%s:%d Incorrect value from bi->last().  Expected 10, got %d", __FILE__, __LINE__, i);
442     }
443 
444     //
445     // Previous
446     //
447     bi->last();
448     i = bi->previous();
449     if (i != 9) {
450         errln("%s:%d Incorrect value from bi->last().  Expected 9, got %d", __FILE__, __LINE__, i);
451     }
452 
453 
454     bi->first();
455     i = bi->previous();
456     if (i != BreakIterator::DONE) {
457         errln("%s:%d Incorrect value from bi->previous().  Expected DONE, got %d", __FILE__, __LINE__, i);
458     }
459 
460     //
461     // next()
462     //
463     bi->first();
464     i = bi->next();
465     if (i != 1) {
466         errln("%s:%d Incorrect value from bi->next().  Expected 1, got %d", __FILE__, __LINE__, i);
467     }
468 
469     bi->last();
470     i = bi->next();
471     if (i != BreakIterator::DONE) {
472         errln("%s:%d Incorrect value from bi->next().  Expected DONE, got %d", __FILE__, __LINE__, i);
473     }
474 
475 
476     //
477     //  current()
478     //
479     bi->first();
480     i = bi->current();
481     if (i != 0) {
482         errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__, __LINE__, i);
483     }
484 
485     bi->next();
486     i = bi->current();
487     if (i != 1) {
488         errln("%s:%d Incorrect value from bi->current().  Expected 1, got %d", __FILE__, __LINE__, i);
489     }
490 
491     bi->last();
492     bi->next();
493     i = bi->current();
494     if (i != 10) {
495         errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__, __LINE__, i);
496     }
497 
498     bi->first();
499     bi->previous();
500     i = bi->current();
501     if (i != 0) {
502         errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__, __LINE__, i);
503     }
504 
505 
506     //
507     // Following()
508     //
509     i = bi->following(4);
510     if (i != 5) {
511         errln("%s:%d Incorrect value from bi->following().  Expected 5, got %d", __FILE__, __LINE__, i);
512     }
513 
514     i = bi->following(9);
515     if (i != 10) {
516         errln("%s:%d Incorrect value from bi->following().  Expected 10, got %d", __FILE__, __LINE__, i);
517     }
518 
519     i = bi->following(10);
520     if (i != BreakIterator::DONE) {
521         errln("%s:%d Incorrect value from bi->following().  Expected DONE, got %d", __FILE__, __LINE__, i);
522     }
523 
524 
525     //
526     // Preceding
527     //
528     i = bi->preceding(4);
529     if (i != 3) {
530         errln("%s:%d Incorrect value from bi->preceding().  Expected 3, got %d", __FILE__, __LINE__, i);
531     }
532 
533     i = bi->preceding(10);
534     if (i != 9) {
535         errln("%s:%d Incorrect value from bi->preceding().  Expected 9, got %d", __FILE__, __LINE__, i);
536     }
537 
538     i = bi->preceding(1);
539     if (i != 0) {
540         errln("%s:%d Incorrect value from bi->preceding().  Expected 0, got %d", __FILE__, __LINE__, i);
541     }
542 
543     i = bi->preceding(0);
544     if (i != BreakIterator::DONE) {
545         errln("%s:%d Incorrect value from bi->preceding().  Expected DONE, got %d", __FILE__, __LINE__, i);
546     }
547 
548 
549     //
550     // isBoundary()
551     //
552     bi->first();
553     if (bi->isBoundary(3) != true) {
554         errln("%s:%d Incorrect value from bi->isBoundary().  Expected true, got false", __FILE__, __LINE__, i);
555     }
556     i = bi->current();
557     if (i != 3) {
558         errln("%s:%d Incorrect value from bi->current().  Expected 3, got %d", __FILE__, __LINE__, i);
559     }
560 
561 
562     if (bi->isBoundary(11) != false) {
563         errln("%s:%d Incorrect value from bi->isBoundary().  Expected false, got true", __FILE__, __LINE__, i);
564     }
565     i = bi->current();
566     if (i != 10) {
567         errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__, __LINE__, i);
568     }
569 
570     //
571     // next(n)
572     //
573     bi->first();
574     i = bi->next(4);
575     if (i != 4) {
576         errln("%s:%d Incorrect value from bi->next().  Expected 4, got %d", __FILE__, __LINE__, i);
577     }
578 
579     i = bi->next(6);
580     if (i != 10) {
581         errln("%s:%d Incorrect value from bi->next().  Expected 10, got %d", __FILE__, __LINE__, i);
582     }
583 
584     bi->first();
585     i = bi->next(11);
586     if (i != BreakIterator::DONE) {
587         errln("%s:%d Incorrect value from bi->next().  Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i);
588     }
589 
590     delete bi;
591 
592 }
593 
594 
595 
596 
597 
598 
TestBuilder()599 void RBBIAPITest::TestBuilder() {
600      UnicodeString rulesString1 = "$Letters = [:L:];\n"
601                                   "$Numbers = [:N:];\n"
602                                   "$Letters+;\n"
603                                   "$Numbers+;\n"
604                                   "[^$Letters $Numbers];\n"
605                                   "!.*;\n";
606      UnicodeString testString1  = "abc123..abc";
607                                 // 01234567890
608      int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
609      UErrorCode status=U_ZERO_ERROR;
610      UParseError    parseError;
611 
612      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
613      if(U_FAILURE(status)) {
614          dataerrln("Fail : in construction - %s", u_errorName(status));
615      } else {
616          bi->setText(testString1);
617          doBoundaryTest(*bi, testString1, bounds1);
618      }
619      delete bi;
620 }
621 
622 
623 //
624 //  TestQuoteGrouping
625 //       Single quotes within rules imply a grouping, so that a modifier
626 //       following the quoted text (* or +) applies to all of the quoted chars.
627 //
TestQuoteGrouping()628 void RBBIAPITest::TestQuoteGrouping() {
629      UnicodeString rulesString1 = "#Here comes the rule...\n"
630                                   "'$@!'*;\n"   //  (\$\@\!)*
631                                   ".;\n";
632 
633      UnicodeString testString1  = "$@!$@!X$@!!X";
634                                 // 0123456789012
635      int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
636      UErrorCode status=U_ZERO_ERROR;
637      UParseError    parseError;
638 
639      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
640      if(U_FAILURE(status)) {
641          dataerrln("Fail : in construction - %s", u_errorName(status));
642      } else {
643          bi->setText(testString1);
644          doBoundaryTest(*bi, testString1, bounds1);
645      }
646      delete bi;
647 }
648 
649 //
650 //  TestRuleStatus
651 //      Test word break rule status constants.
652 //
TestRuleStatus()653 void RBBIAPITest::TestRuleStatus() {
654      char16_t str[30];
655      //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
656      // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
657      u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
658               // 012345678901234567  8      9    0
659               //                     Katakana
660                 str, 30);
661      UnicodeString testString1(str);
662      int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
663      int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
664                           UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
665                           UBRK_WORD_IDEO,     UBRK_WORD_NONE};
666 
667      int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
668                           UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
669                           UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
670 
671      UErrorCode status=U_ZERO_ERROR;
672 
673      BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
674      if(U_FAILURE(status)) {
675          errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status));
676      } else {
677          bi->setText(testString1);
678          // First test that the breaks are in the right spots.
679          doBoundaryTest(*bi, testString1, bounds1);
680 
681          // Then go back and check tag values
682          int32_t i = 0;
683          int32_t pos, tag;
684          for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
685              if (pos != bounds1[i]) {
686                  errln("%s:%d FAIL: unexpected word break at position %d", __FILE__, __LINE__, pos);
687                  break;
688              }
689              tag = bi->getRuleStatus();
690              if (tag < tag_lo[i] || tag >= tag_hi[i]) {
691                  errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos);
692                  break;
693              }
694 
695              // Check that we get the same tag values from getRuleStatusVec()
696              int32_t vec[10];
697              int t = bi->getRuleStatusVec(vec, 10, status);
698              TEST_ASSERT_SUCCESS(status);
699              TEST_ASSERT(t==1);
700              TEST_ASSERT(vec[0] == tag);
701          }
702      }
703      delete bi;
704 
705      // Now test line break status.  This test mostly is to confirm that the status constants
706      //                              are correctly declared in the header.
707      testString1 =   "test line. \n";
708      // break type    s    s     h
709 
710      bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
711      if(U_FAILURE(status)) {
712          errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status));
713      } else {
714          int32_t i = 0;
715          int32_t pos, tag;
716          UBool   success;
717 
718          bi->setText(testString1);
719          pos = bi->current();
720          tag = bi->getRuleStatus();
721          for (i=0; i<3; i++) {
722              switch (i) {
723              case 0:
724                  success = pos==0  && tag==UBRK_LINE_SOFT; break;
725              case 1:
726                  success = pos==5  && tag==UBRK_LINE_SOFT; break;
727              case 2:
728                  success = pos==12 && tag==UBRK_LINE_HARD; break;
729              default:
730                  success = false; break;
731              }
732              if (success == false) {
733                  errln("%s:%d: incorrect line break status or position.  i=%d, pos=%d, tag=%d",
734                      __FILE__, __LINE__, i, pos, tag);
735                  break;
736              }
737              pos = bi->next();
738              tag = bi->getRuleStatus();
739          }
740          if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
741              UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
742              (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
743              errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__);
744          }
745      }
746      delete bi;
747 
748 }
749 
750 
751 //
752 //  TestRuleStatusVec
753 //      Test the vector form of  break rule status.
754 //
TestRuleStatusVec()755 void RBBIAPITest::TestRuleStatusVec() {
756     UnicodeString rulesString(   "[A-N]{100}; \n"
757                                  "[a-w]{200}; \n"
758                                  "[\\p{L}]{300}; \n"
759                                  "[\\p{N}]{400}; \n"
760                                  "[0-5]{500}; \n"
761                                   "!.*;\n", -1, US_INV);
762      UnicodeString testString1  = "Aapz5?";
763      int32_t  statusVals[10];
764      int32_t  numStatuses;
765      int32_t  pos;
766 
767      UErrorCode status=U_ZERO_ERROR;
768      UParseError    parseError;
769 
770      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
771      if (U_FAILURE(status)) {
772          dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
773      } else {
774          bi->setText(testString1);
775 
776          // A
777          pos = bi->next();
778          TEST_ASSERT(pos==1);
779          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
780          TEST_ASSERT_SUCCESS(status);
781          TEST_ASSERT(numStatuses == 2);
782          TEST_ASSERT(statusVals[0] == 100);
783          TEST_ASSERT(statusVals[1] == 300);
784 
785          // a
786          pos = bi->next();
787          TEST_ASSERT(pos==2);
788          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
789          TEST_ASSERT_SUCCESS(status);
790          TEST_ASSERT(numStatuses == 2);
791          TEST_ASSERT(statusVals[0] == 200);
792          TEST_ASSERT(statusVals[1] == 300);
793 
794          // p
795          pos = bi->next();
796          TEST_ASSERT(pos==3);
797          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
798          TEST_ASSERT_SUCCESS(status);
799          TEST_ASSERT(numStatuses == 2);
800          TEST_ASSERT(statusVals[0] == 200);
801          TEST_ASSERT(statusVals[1] == 300);
802 
803          // z
804          pos = bi->next();
805          TEST_ASSERT(pos==4);
806          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
807          TEST_ASSERT_SUCCESS(status);
808          TEST_ASSERT(numStatuses == 1);
809          TEST_ASSERT(statusVals[0] == 300);
810 
811          // 5
812          pos = bi->next();
813          TEST_ASSERT(pos==5);
814          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
815          TEST_ASSERT_SUCCESS(status);
816          TEST_ASSERT(numStatuses == 2);
817          TEST_ASSERT(statusVals[0] == 400);
818          TEST_ASSERT(statusVals[1] == 500);
819 
820          // ?
821          pos = bi->next();
822          TEST_ASSERT(pos==6);
823          numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
824          TEST_ASSERT_SUCCESS(status);
825          TEST_ASSERT(numStatuses == 1);
826          TEST_ASSERT(statusVals[0] == 0);
827 
828          //
829          //  Check buffer overflow error handling.   Char == A
830          //
831          bi->first();
832          pos = bi->next();
833          TEST_ASSERT(pos==1);
834          memset(statusVals, -1, sizeof(statusVals));
835          numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
836          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
837          TEST_ASSERT(numStatuses == 2);
838          TEST_ASSERT(statusVals[0] == -1);
839 
840          status = U_ZERO_ERROR;
841          memset(statusVals, -1, sizeof(statusVals));
842          numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
843          TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
844          TEST_ASSERT(numStatuses == 2);
845          TEST_ASSERT(statusVals[0] == 100);
846          TEST_ASSERT(statusVals[1] == -1);
847 
848          status = U_ZERO_ERROR;
849          memset(statusVals, -1, sizeof(statusVals));
850          numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
851          TEST_ASSERT_SUCCESS(status);
852          TEST_ASSERT(numStatuses == 2);
853          TEST_ASSERT(statusVals[0] == 100);
854          TEST_ASSERT(statusVals[1] == 300);
855          TEST_ASSERT(statusVals[2] == -1);
856      }
857      delete bi;
858 
859 }
860 
861 //
862 //   Bug 2190 Regression test.   Builder crash on rule consisting of only a
863 //                               $variable reference
TestBug2190()864 void RBBIAPITest::TestBug2190() {
865      UnicodeString rulesString1 = "$aaa = abcd;\n"
866                                   "$bbb = $aaa;\n"
867                                   "$bbb;\n";
868      UnicodeString testString1  = "abcdabcd";
869                                 // 01234567890
870      int32_t bounds1[] = {0, 4, 8};
871      UErrorCode status=U_ZERO_ERROR;
872      UParseError    parseError;
873 
874      RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
875      if(U_FAILURE(status)) {
876          dataerrln("Fail : in construction - %s", u_errorName(status));
877      } else {
878          bi->setText(testString1);
879          doBoundaryTest(*bi, testString1, bounds1);
880      }
881      delete bi;
882 }
883 
884 
TestRegistration()885 void RBBIAPITest::TestRegistration() {
886 #if !UCONFIG_NO_SERVICE
887     UErrorCode status = U_ZERO_ERROR;
888     BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
889     // ok to not delete these if we exit because of error?
890     BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
891     BreakIterator* root_word = BreakIterator::createWordInstance("", status);
892     BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
893 
894     if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
895         dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
896 
897         delete ja_word;
898         delete ja_char;
899         delete root_word;
900         delete root_char;
901 
902         return;
903     }
904 
905     URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
906     {
907 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
908         if (ja_word && *ja_word == *root_word) {
909             errln("japan not different from root");
910         }
911 #endif
912     }
913 
914     {
915         BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
916         UBool fail = true;
917         if(result){
918             fail = *result != *ja_word;
919         }
920         delete result;
921         if (fail) {
922             errln("bad result for xx_XX/word");
923         }
924     }
925 
926     {
927         BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
928         UBool fail = true;
929         if(result){
930             fail = *result != *ja_char;
931         }
932         delete result;
933         if (fail) {
934             errln("bad result for ja_JP/char");
935         }
936     }
937 
938     {
939         BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
940         UBool fail = true;
941         if(result){
942             fail = *result != *root_char;
943         }
944         delete result;
945         if (fail) {
946             errln("bad result for xx_XX/char");
947         }
948     }
949 
950     {
951         StringEnumeration* avail = BreakIterator::getAvailableLocales();
952         UBool found = false;
953         const UnicodeString* p;
954         while ((p = avail->snext(status))) {
955             if (p->compare("xx") == 0) {
956                 found = true;
957                 break;
958             }
959         }
960         delete avail;
961         if (!found) {
962             errln("did not find test locale");
963         }
964     }
965 
966     {
967         UBool unreg = BreakIterator::unregister(key, status);
968         if (!unreg) {
969             errln("unable to unregister");
970         }
971     }
972 
973     {
974         BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
975         BreakIterator* root = BreakIterator::createWordInstance("", status);
976         UBool fail = true;
977         if(root){
978           fail = *root != *result;
979         }
980         delete root;
981         delete result;
982         if (fail) {
983             errln("did not get root break");
984         }
985     }
986 
987     {
988         StringEnumeration* avail = BreakIterator::getAvailableLocales();
989         UBool found = false;
990         const UnicodeString* p;
991         while ((p = avail->snext(status))) {
992             if (p->compare("xx") == 0) {
993                 found = true;
994                 break;
995             }
996         }
997         delete avail;
998         if (found) {
999             errln("found test locale");
1000         }
1001     }
1002 
1003     {
1004         int32_t count;
1005         UBool   foundLocale = false;
1006         const Locale *avail = BreakIterator::getAvailableLocales(count);
1007         for (int i=0; i<count; i++) {
1008             if (avail[i] == Locale::getEnglish()) {
1009                 foundLocale = true;
1010                 break;
1011             }
1012         }
1013         if (foundLocale == false) {
1014             errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1015         }
1016     }
1017 
1018 
1019     // ja_word was adopted by factory
1020     delete ja_char;
1021     delete root_word;
1022     delete root_char;
1023 #endif
1024 }
1025 
RoundtripRule(const char * dataFile)1026 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1027     UErrorCode status = U_ZERO_ERROR;
1028     UParseError parseError;
1029     parseError.line = 0;
1030     parseError.offset = 0;
1031     LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1032     uint32_t length;
1033     UnicodeString builtSource;
1034     const uint8_t *rbbiRules;
1035     const uint8_t *builtRules;
1036 
1037     if (U_FAILURE(status)) {
1038         errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1039         return;
1040     }
1041 
1042     builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1043     builtSource = UnicodeString::fromUTF8(
1044         (const char *)(builtRules + ((RBBIDataHeader *)builtRules)->fRuleSource));
1045     LocalPointer<RuleBasedBreakIterator> brkItr (new RuleBasedBreakIterator(builtSource, parseError, status));
1046     if (U_FAILURE(status)) {
1047         errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
1048                 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1049         errln(builtSource);
1050         return;
1051     }
1052     rbbiRules = brkItr->getBinaryRules(length);
1053     logln("Comparing \"%s\" len=%d", dataFile, length);
1054     if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1055         errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1056         return;
1057     }
1058 }
1059 
TestRoundtripRules()1060 void RBBIAPITest::TestRoundtripRules() {
1061     RoundtripRule("word");
1062     RoundtripRule("title");
1063     RoundtripRule("sent");
1064     RoundtripRule("line");
1065     RoundtripRule("char");
1066     if (!quick) {
1067         RoundtripRule("word_POSIX");
1068     }
1069 }
1070 
1071 
1072 // Check getBinaryRules() and construction of a break iterator from those rules.
1073 
TestGetBinaryRules()1074 void RBBIAPITest::TestGetBinaryRules() {
1075     UErrorCode status=U_ZERO_ERROR;
1076     LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1077     if (U_FAILURE(status)) {
1078         dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
1079         return;
1080     }
1081     RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1082     if (rbbi == nullptr) {
1083         dataerrln("FAIL: RuleBasedBreakIterator is nullptr");
1084         return;
1085     }
1086 
1087     // Check that the new line break iterator is nominally functional.
1088     UnicodeString helloWorld("Hello, World!");
1089     rbbi->setText(helloWorld);
1090     int n = 0;
1091     while (bi->next() != UBRK_DONE) {
1092         ++n;
1093     }
1094     TEST_ASSERT(n == 2);
1095 
1096     // Extract the binary rules as a uint8_t blob.
1097     uint32_t ruleLength;
1098     const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1099     TEST_ASSERT(ruleLength > 0);
1100     TEST_ASSERT(binRules != nullptr);
1101 
1102     // Clone the binary rules, and create a break iterator from that.
1103     // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1104     uint8_t *clonedRules = new uint8_t[ruleLength];
1105     memcpy(clonedRules, binRules, ruleLength);
1106     RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1107     TEST_ASSERT_SUCCESS(status);
1108 
1109     // Check that the cloned line break iterator is nominally alive.
1110     clonedBI.setText(helloWorld);
1111     n = 0;
1112     while (clonedBI.next() != UBRK_DONE) {
1113         ++n;
1114     }
1115     TEST_ASSERT(n == 2);
1116 
1117     delete[] clonedRules;
1118 }
1119 
1120 
TestRefreshInputText()1121 void RBBIAPITest::TestRefreshInputText() {
1122     /*
1123      *  RefreshInput changes out the input of a Break Iterator without
1124      *    changing anything else in the iterator's state.  Used with Java JNI,
1125      *    when Java moves the underlying string storage.   This test
1126      *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1127      *    The right set of boundaries should still be found.
1128      */
1129     char16_t testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */
1130     char16_t movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};
1131     UErrorCode status = U_ZERO_ERROR;
1132     UText ut1 = UTEXT_INITIALIZER;
1133     UText ut2 = UTEXT_INITIALIZER;
1134     RuleBasedBreakIterator *bi = dynamic_cast<RuleBasedBreakIterator *>(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1135     TEST_ASSERT_SUCCESS(status);
1136 
1137     utext_openUChars(&ut1, testStr, -1, &status);
1138     TEST_ASSERT_SUCCESS(status);
1139 
1140     if (U_SUCCESS(status)) {
1141         bi->setText(&ut1, status);
1142         TEST_ASSERT_SUCCESS(status);
1143 
1144         /* Line boundaries will occur before each letter in the original string */
1145         TEST_ASSERT(1 == bi->next());
1146         TEST_ASSERT(3 == bi->next());
1147 
1148         /* Move the string, kill the original string.  */
1149         u_strcpy(movedStr, testStr);
1150         u_memset(testStr, 0x20, u_strlen(testStr));
1151         utext_openUChars(&ut2, movedStr, -1, &status);
1152         TEST_ASSERT_SUCCESS(status);
1153         RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1154         TEST_ASSERT_SUCCESS(status);
1155         TEST_ASSERT(bi == returnedBI);
1156 
1157         /* Find the following matches, now working in the moved string. */
1158         TEST_ASSERT(5 == bi->next());
1159         TEST_ASSERT(7 == bi->next());
1160         TEST_ASSERT(8 == bi->next());
1161         TEST_ASSERT(UBRK_DONE == bi->next());
1162 
1163         utext_close(&ut1);
1164         utext_close(&ut2);
1165     }
1166     delete bi;
1167 
1168 }
1169 
1170 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1171 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1172   static const char16_t PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1173   it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1174 
1175   int32_t *pos = new int32_t[ustr.length()];
1176   int32_t posCount = 0;
1177 
1178   // calculate breaks up front, so we can print out
1179   // sans any debugging
1180   for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1181     pos[posCount++] = n;
1182     if(posCount>=ustr.length()) {
1183       it.errln("brk count exceeds string length!");
1184       return;
1185     }
1186   }
1187   UnicodeString out;
1188   out.append((char16_t)CHSTR);
1189   int32_t prev = 0;
1190   for(int32_t i=0;i<posCount;i++) {
1191     int32_t n=pos[i];
1192     out.append(ustr.tempSubString(prev,n-prev));
1193     out.append((char16_t)PILCROW);
1194     prev=n;
1195   }
1196   out.append(ustr.tempSubString(prev,ustr.length()-prev));
1197   out.append((char16_t)CHEND);
1198   it.logln(out);
1199 
1200   out.remove();
1201   for(int32_t i=0;i<posCount;i++) {
1202     char tmp[100];
1203     snprintf(tmp, sizeof(tmp), "%d ",pos[i]);
1204     out.append(UnicodeString(tmp));
1205   }
1206   it.logln(out);
1207   delete [] pos;
1208 }
1209 #endif
1210 
TestFilteredBreakIteratorBuilder()1211 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1212 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1213   UErrorCode status = U_ZERO_ERROR;
1214   LocalPointer<FilteredBreakIteratorBuilder> builder;
1215   LocalPointer<BreakIterator> baseBI;
1216   LocalPointer<BreakIterator> filteredBI;
1217   LocalPointer<BreakIterator> frenchBI;
1218 
1219   const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1220   const UnicodeString ABBR_MR("Mr.");
1221   const UnicodeString ABBR_CAPT("Capt.");
1222 
1223   {
1224     logln("Constructing empty builder\n");
1225     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1226     TEST_ASSERT_SUCCESS(status);
1227 
1228     logln("Constructing base BI\n");
1229     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1230     TEST_ASSERT_SUCCESS(status);
1231 
1232 	logln("Building new BI\n");
1233     filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1234     TEST_ASSERT_SUCCESS(status);
1235 
1236 	if (U_SUCCESS(status)) {
1237         logln("Testing:");
1238         filteredBI->setText(text);
1239         TEST_ASSERT(20 == filteredBI->next()); // Mr.
1240         TEST_ASSERT(84 == filteredBI->next()); // recovered.
1241         TEST_ASSERT(90 == filteredBI->next()); // Capt.
1242         TEST_ASSERT(181 == filteredBI->next()); // Mr.
1243         TEST_ASSERT(278 == filteredBI->next()); // charge.
1244         filteredBI->first();
1245         prtbrks(filteredBI.getAlias(), text, *this);
1246     }
1247   }
1248 
1249   {
1250     logln("Constructing empty builder\n");
1251     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1252     TEST_ASSERT_SUCCESS(status);
1253 
1254     if (U_SUCCESS(status)) {
1255         logln("Adding Mr. as an exception\n");
1256         TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1257         TEST_ASSERT(false == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1258         TEST_ASSERT(true == builder->unsuppressBreakAfter(ABBR_MR, status));
1259         TEST_ASSERT(false == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1260         TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1261         TEST_ASSERT_SUCCESS(status);
1262 
1263         logln("Constructing base BI\n");
1264         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1265         TEST_ASSERT_SUCCESS(status);
1266 
1267         logln("Building new BI\n");
1268         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1269         TEST_ASSERT_SUCCESS(status);
1270 
1271         logln("Testing:");
1272         filteredBI->setText(text);
1273         TEST_ASSERT(84 == filteredBI->next());
1274         TEST_ASSERT(90 == filteredBI->next());// Capt.
1275         TEST_ASSERT(278 == filteredBI->next());
1276         filteredBI->first();
1277         prtbrks(filteredBI.getAlias(), text, *this);
1278     }
1279   }
1280 
1281 
1282   {
1283     logln("Constructing empty builder\n");
1284     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1285     TEST_ASSERT_SUCCESS(status);
1286 
1287     if (U_SUCCESS(status)) {
1288         logln("Adding Mr. and Capt as an exception\n");
1289         TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1290         TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_CAPT, status));
1291         TEST_ASSERT_SUCCESS(status);
1292 
1293         logln("Constructing base BI\n");
1294         baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1295         TEST_ASSERT_SUCCESS(status);
1296 
1297         logln("Building new BI\n");
1298         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1299         TEST_ASSERT_SUCCESS(status);
1300 
1301         logln("Testing:");
1302         filteredBI->setText(text);
1303         TEST_ASSERT(84 == filteredBI->next());
1304         TEST_ASSERT(278 == filteredBI->next());
1305         filteredBI->first();
1306         prtbrks(filteredBI.getAlias(), text, *this);
1307     }
1308   }
1309 
1310 
1311   {
1312     logln("Constructing English builder\n");
1313     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1314     TEST_ASSERT_SUCCESS(status);
1315 
1316     logln("Constructing base BI\n");
1317     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1318     TEST_ASSERT_SUCCESS(status);
1319 
1320     if (U_SUCCESS(status)) {
1321         logln("unsuppressing 'Capt'");
1322         TEST_ASSERT(true == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1323 
1324         logln("Building new BI\n");
1325         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1326         TEST_ASSERT_SUCCESS(status);
1327 
1328         if(filteredBI.isValid()) {
1329           logln("Testing:");
1330           filteredBI->setText(text);
1331           TEST_ASSERT(84 == filteredBI->next());
1332           TEST_ASSERT(90 == filteredBI->next());
1333           TEST_ASSERT(278 == filteredBI->next());
1334           filteredBI->first();
1335           prtbrks(filteredBI.getAlias(), text, *this);
1336         }
1337     }
1338   }
1339 
1340 
1341   {
1342     logln("Constructing English builder\n");
1343     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1344     TEST_ASSERT_SUCCESS(status);
1345 
1346     logln("Constructing base BI\n");
1347     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1348     TEST_ASSERT_SUCCESS(status);
1349 
1350     if (U_SUCCESS(status)) {
1351         logln("Building new BI\n");
1352         filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1353         TEST_ASSERT_SUCCESS(status);
1354 
1355         if(filteredBI.isValid()) {
1356           logln("Testing:");
1357           filteredBI->setText(text);
1358           TEST_ASSERT(84 == filteredBI->next());
1359           TEST_ASSERT(278 == filteredBI->next());
1360           filteredBI->first();
1361           prtbrks(filteredBI.getAlias(), text, *this);
1362         }
1363     }
1364   }
1365 
1366   // reenable once french is in
1367   {
1368     logln("Constructing French builder");
1369     builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1370     TEST_ASSERT_SUCCESS(status);
1371 
1372     logln("Constructing base BI\n");
1373     baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1374     TEST_ASSERT_SUCCESS(status);
1375 
1376     if (U_SUCCESS(status)) {
1377         logln("Building new BI\n");
1378         frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1379         TEST_ASSERT_SUCCESS(status);
1380     }
1381 
1382     if(frenchBI.isValid()) {
1383       logln("Testing:");
1384       UnicodeString frText("C'est MM. Duval.");
1385       frenchBI->setText(frText);
1386       TEST_ASSERT(16 == frenchBI->next());
1387       TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1388       frenchBI->first();
1389       prtbrks(frenchBI.getAlias(), frText, *this);
1390       logln("Testing against English:");
1391       filteredBI->setText(frText);
1392       TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1393       TEST_ASSERT(16 == filteredBI->next());
1394       TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1395       filteredBI->first();
1396       prtbrks(filteredBI.getAlias(), frText, *this);
1397 
1398       // Verify ==
1399       assertTrue(WHERE, *frenchBI   == *frenchBI);
1400       assertTrue(WHERE, *filteredBI != *frenchBI);
1401       assertTrue(WHERE, *frenchBI   != *filteredBI);
1402     } else {
1403       dataerrln("French BI: not valid.");
1404 	}
1405   }
1406 
1407 #else
1408   logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1409 #endif
1410 }
1411 
1412 //---------------------------------------------
1413 // runIndexedTest
1414 //---------------------------------------------
1415 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1416 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1417 {
1418     if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1419     TESTCASE_AUTO_BEGIN;
1420 #if !UCONFIG_NO_FILE_IO
1421     TESTCASE_AUTO(TestCloneEquals);
1422     TESTCASE_AUTO(TestgetRules);
1423     TESTCASE_AUTO(TestHashCode);
1424     TESTCASE_AUTO(TestGetSetAdoptText);
1425     TESTCASE_AUTO(TestIteration);
1426 #endif
1427     TESTCASE_AUTO(TestBuilder);
1428     TESTCASE_AUTO(TestQuoteGrouping);
1429     TESTCASE_AUTO(TestRuleStatusVec);
1430     TESTCASE_AUTO(TestBug2190);
1431 #if !UCONFIG_NO_FILE_IO
1432     TESTCASE_AUTO(TestRegistration);
1433     TESTCASE_AUTO(TestBoilerPlate);
1434     TESTCASE_AUTO(TestRuleStatus);
1435     TESTCASE_AUTO(TestRoundtripRules);
1436     TESTCASE_AUTO(TestGetBinaryRules);
1437 #endif
1438     TESTCASE_AUTO(TestRefreshInputText);
1439 #if !UCONFIG_NO_BREAK_ITERATION
1440     TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1441 #endif
1442     TESTCASE_AUTO_END;
1443 }
1444 
1445 
1446 //---------------------------------------------
1447 //Internal subroutines
1448 //---------------------------------------------
1449 
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1450 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1451      logln((UnicodeString)"testIsBoundary():");
1452         int32_t p = 0;
1453         UBool isB;
1454         for (int32_t i = 0; i < text.length(); i++) {
1455             isB = bi.isBoundary(i);
1456             logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1457 
1458             if (i == boundaries[p]) {
1459                 if (!isB)
1460                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1461                 p++;
1462             }
1463             else {
1464                 if (isB)
1465                     errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1466             }
1467         }
1468 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1469 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1470     UnicodeString selected;
1471     UnicodeString expected=CharsToUnicodeString(expectedString);
1472 
1473     if(gotoffset != expectedOffset)
1474          errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1475     if(start <= gotoffset){
1476         testString.extractBetween(start, gotoffset, selected);
1477     }
1478     else{
1479         testString.extractBetween(gotoffset, start, selected);
1480     }
1481     if(selected.compare(expected) != 0)
1482          errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1483     else
1484         logln(prettify("****selected \"" + selected + "\""));
1485 }
1486 
1487 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1488