1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1999-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
10 ********************************************************************/
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_BREAK_ITERATION
15
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
27 #include "cmemory.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for snprintf
31 #endif
32 /**
33 * API Test the RuleBasedBreakIterator class
34 */
35
36
37 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
38 if (U_FAILURE(status)) { \
39 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); \
40 } \
41 } UPRV_BLOCK_MACRO_END
42
43 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
44 if ((expr) == false) { \
45 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr); \
46 } \
47 } UPRV_BLOCK_MACRO_END
48
TestCloneEquals()49 void RBBIAPITest::TestCloneEquals()
50 {
51
52 UErrorCode status=U_ZERO_ERROR;
53 RuleBasedBreakIterator* bi1 = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
54 RuleBasedBreakIterator* biequal = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
55 RuleBasedBreakIterator* bi3 = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
56 RuleBasedBreakIterator* bi2 = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
57 if(U_FAILURE(status)){
58 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
59 return;
60 }
61
62
63 UnicodeString testString="Testing word break iterators's clone() and equals()";
64 bi1->setText(testString);
65 bi2->setText(testString);
66 biequal->setText(testString);
67
68 bi3->setText("hello");
69
70 logln((UnicodeString)"Testing equals()");
71
72 logln((UnicodeString)"Testing == and !=");
73 bool b = (*bi1 != *biequal);
74 b |= *bi1 == *bi2;
75 b |= *bi1 == *bi3;
76 if (b) {
77 errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__);
78 }
79
80 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
81 errln("%s:%d ERROR:2 RBBI's == and != operator failed.", __FILE__, __LINE__);
82
83
84 // Quick test of RulesBasedBreakIterator assignment -
85 // Check that
86 // two different iterators are !=
87 // they are == after assignment
88 // source and dest iterator produce the same next() after assignment.
89 // deleting one doesn't disable the other.
90 logln("Testing assignment");
91 RuleBasedBreakIterator *bix = dynamic_cast<RuleBasedBreakIterator *>(BreakIterator::createLineInstance(Locale::getDefault(), status));
92 if(U_FAILURE(status)){
93 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
94 return;
95 }
96
97 RuleBasedBreakIterator biDefault, biDefault2;
98 if(U_FAILURE(status)){
99 errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__);
100 return;
101 }
102 if (biDefault == *bix) {
103 errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__);
104 return;
105 }
106 if (biDefault != biDefault2) {
107 errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__);
108 return;
109 }
110
111
112 UnicodeString HelloString("Hello Kitty");
113 bix->setText(HelloString);
114 if (*bix == *bi2) {
115 errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__);
116 }
117 *bix = *bi2;
118 if (*bix != *bi2) {
119 errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__);
120 }
121
122 int bixnext = bix->next();
123 int bi2next = bi2->next();
124 if (! (bixnext == bi2next && bixnext == 7)) {
125 errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__);
126 }
127 delete bix;
128 if (bi2->next() != 8) {
129 errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__);
130 }
131
132
133
134 logln((UnicodeString)"Testing clone()");
135 RuleBasedBreakIterator* bi1clone = bi1->clone();
136 RuleBasedBreakIterator* bi2clone = bi2->clone();
137
138 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
139 *bi1clone == *bi3 || *bi1clone == *bi2)
140 errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__);
141
142 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
143 *bi2clone == *bi3 || *bi2clone != *bi2)
144 errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__);
145
146 if(bi1->getText() != bi1clone->getText() ||
147 bi2clone->getText() != bi2->getText() ||
148 *bi2clone == *bi1clone )
149 errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__);
150
151 delete bi1clone;
152 delete bi2clone;
153 delete bi1;
154 delete bi3;
155 delete bi2;
156 delete biequal;
157 }
158
TestBoilerPlate()159 void RBBIAPITest::TestBoilerPlate()
160 {
161 UErrorCode status = U_ZERO_ERROR;
162 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
163 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
164 if (U_FAILURE(status)) {
165 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
166 return;
167 }
168 if(*a!=*b){
169 errln("Failed: boilerplate method operator!= does not return correct results");
170 }
171 // Japanese word break iterators are identical to root with
172 // a dictionary-based break iterator
173 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
174 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
175 if(c && d){
176 if(*c!=*d){
177 errln("Failed: boilerplate method operator== does not return correct results");
178 }
179 }else{
180 errln("creation of break iterator failed");
181 }
182 delete a;
183 delete b;
184 delete c;
185 delete d;
186 }
187
TestgetRules()188 void RBBIAPITest::TestgetRules()
189 {
190 UErrorCode status=U_ZERO_ERROR;
191
192 LocalPointer<RuleBasedBreakIterator> bi1(
193 dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)), status);
194 LocalPointer<RuleBasedBreakIterator> bi2(
195 dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)), status);
196 if(U_FAILURE(status)){
197 errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status));
198 return;
199 }
200
201 logln((UnicodeString)"Testing getRules()");
202
203 UnicodeString text(u"Hello there");
204 bi1->setText(text);
205
206 LocalPointer <RuleBasedBreakIterator> bi3(bi1->clone());
207
208 UnicodeString temp=bi1->getRules();
209 UnicodeString temp2=bi2->getRules();
210 UnicodeString temp3=bi3->getRules();
211 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
212 errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__);
213
214 RuleBasedBreakIterator bi4; // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
215 if (!bi4.getRules().isEmpty()) {
216 errln("%s:%d Empty string expected.", __FILE__, __LINE__);
217 }
218 }
219
TestHashCode()220 void RBBIAPITest::TestHashCode()
221 {
222 UErrorCode status=U_ZERO_ERROR;
223 RuleBasedBreakIterator* bi1 = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
224 RuleBasedBreakIterator* bi3 = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
225 RuleBasedBreakIterator* bi2 = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
226 if(U_FAILURE(status)){
227 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
228 delete bi1;
229 delete bi2;
230 delete bi3;
231 return;
232 }
233
234
235 logln((UnicodeString)"Testing hashCode()");
236
237 bi1->setText((UnicodeString)"Hash code");
238 bi2->setText((UnicodeString)"Hash code");
239 bi3->setText((UnicodeString)"Hash code");
240
241 RuleBasedBreakIterator* bi1clone= bi1->clone();
242 RuleBasedBreakIterator* bi2clone= bi2->clone();
243
244 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
245 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
246 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
247
248 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
249 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
250 errln((UnicodeString)"ERROR: different objects have same hashcodes");
251
252 delete bi1clone;
253 delete bi2clone;
254 delete bi1;
255 delete bi2;
256 delete bi3;
257
258 }
TestGetSetAdoptText()259 void RBBIAPITest::TestGetSetAdoptText()
260 {
261 logln((UnicodeString)"Testing getText setText ");
262 IcuTestErrorCode status(*this, "TestGetSetAdoptText");
263 UnicodeString str1="first string.";
264 UnicodeString str2="Second string.";
265 LocalPointer<RuleBasedBreakIterator> charIter1(dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)));
266 LocalPointer<RuleBasedBreakIterator> wordIter1(dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)));
267 if(status.isFailure()){
268 errcheckln(status, "Fail : in construction - %s", status.errorName());
269 return;
270 }
271
272
273 CharacterIterator* text1= new StringCharacterIterator(str1);
274 CharacterIterator* text1Clone = text1->clone();
275 CharacterIterator* text2= new StringCharacterIterator(str2);
276 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
277
278 wordIter1->setText(str1);
279 CharacterIterator *tci = &wordIter1->getText();
280 UnicodeString tstr;
281 tci->getText(tstr);
282 TEST_ASSERT(tstr == str1);
283 if(wordIter1->current() != 0)
284 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
285
286 wordIter1->next(2);
287
288 wordIter1->setText(str2);
289 if(wordIter1->current() != 0)
290 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
291
292
293 charIter1->adoptText(text1Clone);
294 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
295 tci = &wordIter1->getText();
296 tci->getText(tstr);
297 TEST_ASSERT(tstr == str2);
298 tci = &charIter1->getText();
299 tci->getText(tstr);
300 TEST_ASSERT(tstr == str1);
301
302
303 LocalPointer<RuleBasedBreakIterator> rb(wordIter1->clone());
304 rb->adoptText(text1);
305 if(rb->getText() != *text1)
306 errln((UnicodeString)"ERROR:1 error in adoptText ");
307 rb->adoptText(text2);
308 if(rb->getText() != *text2)
309 errln((UnicodeString)"ERROR:2 error in adoptText ");
310
311 // Adopt where iterator range is less than the entire original source string.
312 // (With the change of the break engine to working with UText internally,
313 // CharacterIterators starting at positions other than zero are not supported)
314 rb->adoptText(text3);
315 TEST_ASSERT(rb->preceding(2) == 0);
316 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
317 //if(rb->preceding(2) != 3) {
318 // errln((UnicodeString)"ERROR:3 error in adoptText ");
319 //}
320 //if(rb->following(11) != BreakIterator::DONE) {
321 // errln((UnicodeString)"ERROR:4 error in adoptText ");
322 //}
323
324 // UText API
325 //
326 // Quick test to see if UText is working at all.
327 //
328 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
329 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
330 // 012345678901
331
332 status.reset();
333 LocalUTextPointer ut(utext_openUTF8(nullptr, s1, -1, status));
334 wordIter1->setText(ut.getAlias(), status);
335 TEST_ASSERT_SUCCESS(status);
336
337 int32_t pos;
338 pos = wordIter1->first();
339 TEST_ASSERT(pos==0);
340 pos = wordIter1->next();
341 TEST_ASSERT(pos==5);
342 pos = wordIter1->next();
343 TEST_ASSERT(pos==6);
344 pos = wordIter1->next();
345 TEST_ASSERT(pos==11);
346 pos = wordIter1->next();
347 TEST_ASSERT(pos==UBRK_DONE);
348
349 status.reset();
350 LocalUTextPointer ut2(utext_openUTF8(nullptr, s2, -1, status));
351 TEST_ASSERT_SUCCESS(status);
352 wordIter1->setText(ut2.getAlias(), status);
353 TEST_ASSERT_SUCCESS(status);
354
355 pos = wordIter1->first();
356 TEST_ASSERT(pos==0);
357 pos = wordIter1->next();
358 TEST_ASSERT(pos==3);
359 pos = wordIter1->next();
360 TEST_ASSERT(pos==4);
361
362 pos = wordIter1->last();
363 TEST_ASSERT(pos==6);
364 pos = wordIter1->previous();
365 TEST_ASSERT(pos==4);
366 pos = wordIter1->previous();
367 TEST_ASSERT(pos==3);
368 pos = wordIter1->previous();
369 TEST_ASSERT(pos==0);
370 pos = wordIter1->previous();
371 TEST_ASSERT(pos==UBRK_DONE);
372
373 status.reset();
374 UnicodeString sEmpty;
375 LocalUTextPointer gut2(utext_openUnicodeString(nullptr, &sEmpty, status));
376 wordIter1->getUText(gut2.getAlias(), status);
377 TEST_ASSERT_SUCCESS(status);
378 status.reset();
379 }
380
381
TestIteration()382 void RBBIAPITest::TestIteration()
383 {
384 // This test just verifies that the API is present.
385 // Testing for correct operation of the break rules happens elsewhere.
386
387 UErrorCode status=U_ZERO_ERROR;
388 RuleBasedBreakIterator* bi = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
389 if (U_FAILURE(status) || bi == nullptr) {
390 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
391 }
392 delete bi;
393
394 status=U_ZERO_ERROR;
395 bi = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
396 if (U_FAILURE(status) || bi == nullptr) {
397 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));
398 }
399 delete bi;
400
401 status=U_ZERO_ERROR;
402 bi = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status));
403 if (U_FAILURE(status) || bi == nullptr) {
404 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));
405 }
406 delete bi;
407
408 status=U_ZERO_ERROR;
409 bi = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status));
410 if (U_FAILURE(status) || bi == nullptr) {
411 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));
412 }
413 delete bi;
414
415 status=U_ZERO_ERROR;
416 bi = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status));
417 if (U_FAILURE(status) || bi == nullptr) {
418 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status));
419 }
420 delete bi;
421
422 status=U_ZERO_ERROR;
423 bi = dynamic_cast<RuleBasedBreakIterator*>(RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
424 if (U_FAILURE(status) || bi == nullptr) {
425 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
426 return; // Skip the rest of these tests.
427 }
428
429
430 UnicodeString testString="0123456789";
431 bi->setText(testString);
432
433 int32_t i;
434 i = bi->first();
435 if (i != 0) {
436 errln("%s:%d Incorrect value from bi->first(). Expected 0, got %d.", __FILE__, __LINE__, i);
437 }
438
439 i = bi->last();
440 if (i != 10) {
441 errln("%s:%d Incorrect value from bi->last(). Expected 10, got %d", __FILE__, __LINE__, i);
442 }
443
444 //
445 // Previous
446 //
447 bi->last();
448 i = bi->previous();
449 if (i != 9) {
450 errln("%s:%d Incorrect value from bi->last(). Expected 9, got %d", __FILE__, __LINE__, i);
451 }
452
453
454 bi->first();
455 i = bi->previous();
456 if (i != BreakIterator::DONE) {
457 errln("%s:%d Incorrect value from bi->previous(). Expected DONE, got %d", __FILE__, __LINE__, i);
458 }
459
460 //
461 // next()
462 //
463 bi->first();
464 i = bi->next();
465 if (i != 1) {
466 errln("%s:%d Incorrect value from bi->next(). Expected 1, got %d", __FILE__, __LINE__, i);
467 }
468
469 bi->last();
470 i = bi->next();
471 if (i != BreakIterator::DONE) {
472 errln("%s:%d Incorrect value from bi->next(). Expected DONE, got %d", __FILE__, __LINE__, i);
473 }
474
475
476 //
477 // current()
478 //
479 bi->first();
480 i = bi->current();
481 if (i != 0) {
482 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i);
483 }
484
485 bi->next();
486 i = bi->current();
487 if (i != 1) {
488 errln("%s:%d Incorrect value from bi->current(). Expected 1, got %d", __FILE__, __LINE__, i);
489 }
490
491 bi->last();
492 bi->next();
493 i = bi->current();
494 if (i != 10) {
495 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i);
496 }
497
498 bi->first();
499 bi->previous();
500 i = bi->current();
501 if (i != 0) {
502 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i);
503 }
504
505
506 //
507 // Following()
508 //
509 i = bi->following(4);
510 if (i != 5) {
511 errln("%s:%d Incorrect value from bi->following(). Expected 5, got %d", __FILE__, __LINE__, i);
512 }
513
514 i = bi->following(9);
515 if (i != 10) {
516 errln("%s:%d Incorrect value from bi->following(). Expected 10, got %d", __FILE__, __LINE__, i);
517 }
518
519 i = bi->following(10);
520 if (i != BreakIterator::DONE) {
521 errln("%s:%d Incorrect value from bi->following(). Expected DONE, got %d", __FILE__, __LINE__, i);
522 }
523
524
525 //
526 // Preceding
527 //
528 i = bi->preceding(4);
529 if (i != 3) {
530 errln("%s:%d Incorrect value from bi->preceding(). Expected 3, got %d", __FILE__, __LINE__, i);
531 }
532
533 i = bi->preceding(10);
534 if (i != 9) {
535 errln("%s:%d Incorrect value from bi->preceding(). Expected 9, got %d", __FILE__, __LINE__, i);
536 }
537
538 i = bi->preceding(1);
539 if (i != 0) {
540 errln("%s:%d Incorrect value from bi->preceding(). Expected 0, got %d", __FILE__, __LINE__, i);
541 }
542
543 i = bi->preceding(0);
544 if (i != BreakIterator::DONE) {
545 errln("%s:%d Incorrect value from bi->preceding(). Expected DONE, got %d", __FILE__, __LINE__, i);
546 }
547
548
549 //
550 // isBoundary()
551 //
552 bi->first();
553 if (bi->isBoundary(3) != true) {
554 errln("%s:%d Incorrect value from bi->isBoundary(). Expected true, got false", __FILE__, __LINE__, i);
555 }
556 i = bi->current();
557 if (i != 3) {
558 errln("%s:%d Incorrect value from bi->current(). Expected 3, got %d", __FILE__, __LINE__, i);
559 }
560
561
562 if (bi->isBoundary(11) != false) {
563 errln("%s:%d Incorrect value from bi->isBoundary(). Expected false, got true", __FILE__, __LINE__, i);
564 }
565 i = bi->current();
566 if (i != 10) {
567 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i);
568 }
569
570 //
571 // next(n)
572 //
573 bi->first();
574 i = bi->next(4);
575 if (i != 4) {
576 errln("%s:%d Incorrect value from bi->next(). Expected 4, got %d", __FILE__, __LINE__, i);
577 }
578
579 i = bi->next(6);
580 if (i != 10) {
581 errln("%s:%d Incorrect value from bi->next(). Expected 10, got %d", __FILE__, __LINE__, i);
582 }
583
584 bi->first();
585 i = bi->next(11);
586 if (i != BreakIterator::DONE) {
587 errln("%s:%d Incorrect value from bi->next(). Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i);
588 }
589
590 delete bi;
591
592 }
593
594
595
596
597
598
TestBuilder()599 void RBBIAPITest::TestBuilder() {
600 UnicodeString rulesString1 = "$Letters = [:L:];\n"
601 "$Numbers = [:N:];\n"
602 "$Letters+;\n"
603 "$Numbers+;\n"
604 "[^$Letters $Numbers];\n"
605 "!.*;\n";
606 UnicodeString testString1 = "abc123..abc";
607 // 01234567890
608 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
609 UErrorCode status=U_ZERO_ERROR;
610 UParseError parseError;
611
612 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
613 if(U_FAILURE(status)) {
614 dataerrln("Fail : in construction - %s", u_errorName(status));
615 } else {
616 bi->setText(testString1);
617 doBoundaryTest(*bi, testString1, bounds1);
618 }
619 delete bi;
620 }
621
622
623 //
624 // TestQuoteGrouping
625 // Single quotes within rules imply a grouping, so that a modifier
626 // following the quoted text (* or +) applies to all of the quoted chars.
627 //
TestQuoteGrouping()628 void RBBIAPITest::TestQuoteGrouping() {
629 UnicodeString rulesString1 = "#Here comes the rule...\n"
630 "'$@!'*;\n" // (\$\@\!)*
631 ".;\n";
632
633 UnicodeString testString1 = "$@!$@!X$@!!X";
634 // 0123456789012
635 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
636 UErrorCode status=U_ZERO_ERROR;
637 UParseError parseError;
638
639 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
640 if(U_FAILURE(status)) {
641 dataerrln("Fail : in construction - %s", u_errorName(status));
642 } else {
643 bi->setText(testString1);
644 doBoundaryTest(*bi, testString1, bounds1);
645 }
646 delete bi;
647 }
648
649 //
650 // TestRuleStatus
651 // Test word break rule status constants.
652 //
TestRuleStatus()653 void RBBIAPITest::TestRuleStatus() {
654 char16_t str[30];
655 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
656 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
657 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
658 // 012345678901234567 8 9 0
659 // Katakana
660 str, 30);
661 UnicodeString testString1(str);
662 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
663 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
664 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
665 UBRK_WORD_IDEO, UBRK_WORD_NONE};
666
667 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
668 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
669 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
670
671 UErrorCode status=U_ZERO_ERROR;
672
673 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
674 if(U_FAILURE(status)) {
675 errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status));
676 } else {
677 bi->setText(testString1);
678 // First test that the breaks are in the right spots.
679 doBoundaryTest(*bi, testString1, bounds1);
680
681 // Then go back and check tag values
682 int32_t i = 0;
683 int32_t pos, tag;
684 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
685 if (pos != bounds1[i]) {
686 errln("%s:%d FAIL: unexpected word break at position %d", __FILE__, __LINE__, pos);
687 break;
688 }
689 tag = bi->getRuleStatus();
690 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
691 errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos);
692 break;
693 }
694
695 // Check that we get the same tag values from getRuleStatusVec()
696 int32_t vec[10];
697 int t = bi->getRuleStatusVec(vec, 10, status);
698 TEST_ASSERT_SUCCESS(status);
699 TEST_ASSERT(t==1);
700 TEST_ASSERT(vec[0] == tag);
701 }
702 }
703 delete bi;
704
705 // Now test line break status. This test mostly is to confirm that the status constants
706 // are correctly declared in the header.
707 testString1 = "test line. \n";
708 // break type s s h
709
710 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
711 if(U_FAILURE(status)) {
712 errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status));
713 } else {
714 int32_t i = 0;
715 int32_t pos, tag;
716 UBool success;
717
718 bi->setText(testString1);
719 pos = bi->current();
720 tag = bi->getRuleStatus();
721 for (i=0; i<3; i++) {
722 switch (i) {
723 case 0:
724 success = pos==0 && tag==UBRK_LINE_SOFT; break;
725 case 1:
726 success = pos==5 && tag==UBRK_LINE_SOFT; break;
727 case 2:
728 success = pos==12 && tag==UBRK_LINE_HARD; break;
729 default:
730 success = false; break;
731 }
732 if (success == false) {
733 errln("%s:%d: incorrect line break status or position. i=%d, pos=%d, tag=%d",
734 __FILE__, __LINE__, i, pos, tag);
735 break;
736 }
737 pos = bi->next();
738 tag = bi->getRuleStatus();
739 }
740 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
741 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
742 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
743 errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__);
744 }
745 }
746 delete bi;
747
748 }
749
750
751 //
752 // TestRuleStatusVec
753 // Test the vector form of break rule status.
754 //
TestRuleStatusVec()755 void RBBIAPITest::TestRuleStatusVec() {
756 UnicodeString rulesString( "[A-N]{100}; \n"
757 "[a-w]{200}; \n"
758 "[\\p{L}]{300}; \n"
759 "[\\p{N}]{400}; \n"
760 "[0-5]{500}; \n"
761 "!.*;\n", -1, US_INV);
762 UnicodeString testString1 = "Aapz5?";
763 int32_t statusVals[10];
764 int32_t numStatuses;
765 int32_t pos;
766
767 UErrorCode status=U_ZERO_ERROR;
768 UParseError parseError;
769
770 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
771 if (U_FAILURE(status)) {
772 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
773 } else {
774 bi->setText(testString1);
775
776 // A
777 pos = bi->next();
778 TEST_ASSERT(pos==1);
779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
780 TEST_ASSERT_SUCCESS(status);
781 TEST_ASSERT(numStatuses == 2);
782 TEST_ASSERT(statusVals[0] == 100);
783 TEST_ASSERT(statusVals[1] == 300);
784
785 // a
786 pos = bi->next();
787 TEST_ASSERT(pos==2);
788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
789 TEST_ASSERT_SUCCESS(status);
790 TEST_ASSERT(numStatuses == 2);
791 TEST_ASSERT(statusVals[0] == 200);
792 TEST_ASSERT(statusVals[1] == 300);
793
794 // p
795 pos = bi->next();
796 TEST_ASSERT(pos==3);
797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
798 TEST_ASSERT_SUCCESS(status);
799 TEST_ASSERT(numStatuses == 2);
800 TEST_ASSERT(statusVals[0] == 200);
801 TEST_ASSERT(statusVals[1] == 300);
802
803 // z
804 pos = bi->next();
805 TEST_ASSERT(pos==4);
806 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
807 TEST_ASSERT_SUCCESS(status);
808 TEST_ASSERT(numStatuses == 1);
809 TEST_ASSERT(statusVals[0] == 300);
810
811 // 5
812 pos = bi->next();
813 TEST_ASSERT(pos==5);
814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
815 TEST_ASSERT_SUCCESS(status);
816 TEST_ASSERT(numStatuses == 2);
817 TEST_ASSERT(statusVals[0] == 400);
818 TEST_ASSERT(statusVals[1] == 500);
819
820 // ?
821 pos = bi->next();
822 TEST_ASSERT(pos==6);
823 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
824 TEST_ASSERT_SUCCESS(status);
825 TEST_ASSERT(numStatuses == 1);
826 TEST_ASSERT(statusVals[0] == 0);
827
828 //
829 // Check buffer overflow error handling. Char == A
830 //
831 bi->first();
832 pos = bi->next();
833 TEST_ASSERT(pos==1);
834 memset(statusVals, -1, sizeof(statusVals));
835 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
836 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
837 TEST_ASSERT(numStatuses == 2);
838 TEST_ASSERT(statusVals[0] == -1);
839
840 status = U_ZERO_ERROR;
841 memset(statusVals, -1, sizeof(statusVals));
842 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
843 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
844 TEST_ASSERT(numStatuses == 2);
845 TEST_ASSERT(statusVals[0] == 100);
846 TEST_ASSERT(statusVals[1] == -1);
847
848 status = U_ZERO_ERROR;
849 memset(statusVals, -1, sizeof(statusVals));
850 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
851 TEST_ASSERT_SUCCESS(status);
852 TEST_ASSERT(numStatuses == 2);
853 TEST_ASSERT(statusVals[0] == 100);
854 TEST_ASSERT(statusVals[1] == 300);
855 TEST_ASSERT(statusVals[2] == -1);
856 }
857 delete bi;
858
859 }
860
861 //
862 // Bug 2190 Regression test. Builder crash on rule consisting of only a
863 // $variable reference
TestBug2190()864 void RBBIAPITest::TestBug2190() {
865 UnicodeString rulesString1 = "$aaa = abcd;\n"
866 "$bbb = $aaa;\n"
867 "$bbb;\n";
868 UnicodeString testString1 = "abcdabcd";
869 // 01234567890
870 int32_t bounds1[] = {0, 4, 8};
871 UErrorCode status=U_ZERO_ERROR;
872 UParseError parseError;
873
874 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
875 if(U_FAILURE(status)) {
876 dataerrln("Fail : in construction - %s", u_errorName(status));
877 } else {
878 bi->setText(testString1);
879 doBoundaryTest(*bi, testString1, bounds1);
880 }
881 delete bi;
882 }
883
884
TestRegistration()885 void RBBIAPITest::TestRegistration() {
886 #if !UCONFIG_NO_SERVICE
887 UErrorCode status = U_ZERO_ERROR;
888 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
889 // ok to not delete these if we exit because of error?
890 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
891 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
892 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
893
894 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
895 dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
896
897 delete ja_word;
898 delete ja_char;
899 delete root_word;
900 delete root_char;
901
902 return;
903 }
904
905 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
906 {
907 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
908 if (ja_word && *ja_word == *root_word) {
909 errln("japan not different from root");
910 }
911 #endif
912 }
913
914 {
915 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
916 UBool fail = true;
917 if(result){
918 fail = *result != *ja_word;
919 }
920 delete result;
921 if (fail) {
922 errln("bad result for xx_XX/word");
923 }
924 }
925
926 {
927 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
928 UBool fail = true;
929 if(result){
930 fail = *result != *ja_char;
931 }
932 delete result;
933 if (fail) {
934 errln("bad result for ja_JP/char");
935 }
936 }
937
938 {
939 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
940 UBool fail = true;
941 if(result){
942 fail = *result != *root_char;
943 }
944 delete result;
945 if (fail) {
946 errln("bad result for xx_XX/char");
947 }
948 }
949
950 {
951 StringEnumeration* avail = BreakIterator::getAvailableLocales();
952 UBool found = false;
953 const UnicodeString* p;
954 while ((p = avail->snext(status))) {
955 if (p->compare("xx") == 0) {
956 found = true;
957 break;
958 }
959 }
960 delete avail;
961 if (!found) {
962 errln("did not find test locale");
963 }
964 }
965
966 {
967 UBool unreg = BreakIterator::unregister(key, status);
968 if (!unreg) {
969 errln("unable to unregister");
970 }
971 }
972
973 {
974 BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
975 BreakIterator* root = BreakIterator::createWordInstance("", status);
976 UBool fail = true;
977 if(root){
978 fail = *root != *result;
979 }
980 delete root;
981 delete result;
982 if (fail) {
983 errln("did not get root break");
984 }
985 }
986
987 {
988 StringEnumeration* avail = BreakIterator::getAvailableLocales();
989 UBool found = false;
990 const UnicodeString* p;
991 while ((p = avail->snext(status))) {
992 if (p->compare("xx") == 0) {
993 found = true;
994 break;
995 }
996 }
997 delete avail;
998 if (found) {
999 errln("found test locale");
1000 }
1001 }
1002
1003 {
1004 int32_t count;
1005 UBool foundLocale = false;
1006 const Locale *avail = BreakIterator::getAvailableLocales(count);
1007 for (int i=0; i<count; i++) {
1008 if (avail[i] == Locale::getEnglish()) {
1009 foundLocale = true;
1010 break;
1011 }
1012 }
1013 if (foundLocale == false) {
1014 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1015 }
1016 }
1017
1018
1019 // ja_word was adopted by factory
1020 delete ja_char;
1021 delete root_word;
1022 delete root_char;
1023 #endif
1024 }
1025
RoundtripRule(const char * dataFile)1026 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1027 UErrorCode status = U_ZERO_ERROR;
1028 UParseError parseError;
1029 parseError.line = 0;
1030 parseError.offset = 0;
1031 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1032 uint32_t length;
1033 UnicodeString builtSource;
1034 const uint8_t *rbbiRules;
1035 const uint8_t *builtRules;
1036
1037 if (U_FAILURE(status)) {
1038 errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
1039 return;
1040 }
1041
1042 builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1043 builtSource = UnicodeString::fromUTF8(
1044 (const char *)(builtRules + ((RBBIDataHeader *)builtRules)->fRuleSource));
1045 LocalPointer<RuleBasedBreakIterator> brkItr (new RuleBasedBreakIterator(builtSource, parseError, status));
1046 if (U_FAILURE(status)) {
1047 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1048 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1049 errln(builtSource);
1050 return;
1051 }
1052 rbbiRules = brkItr->getBinaryRules(length);
1053 logln("Comparing \"%s\" len=%d", dataFile, length);
1054 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1055 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
1056 return;
1057 }
1058 }
1059
TestRoundtripRules()1060 void RBBIAPITest::TestRoundtripRules() {
1061 RoundtripRule("word");
1062 RoundtripRule("title");
1063 RoundtripRule("sent");
1064 RoundtripRule("line");
1065 RoundtripRule("char");
1066 if (!quick) {
1067 RoundtripRule("word_POSIX");
1068 }
1069 }
1070
1071
1072 // Check getBinaryRules() and construction of a break iterator from those rules.
1073
TestGetBinaryRules()1074 void RBBIAPITest::TestGetBinaryRules() {
1075 UErrorCode status=U_ZERO_ERROR;
1076 LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1077 if (U_FAILURE(status)) {
1078 dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
1079 return;
1080 }
1081 RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1082 if (rbbi == nullptr) {
1083 dataerrln("FAIL: RuleBasedBreakIterator is nullptr");
1084 return;
1085 }
1086
1087 // Check that the new line break iterator is nominally functional.
1088 UnicodeString helloWorld("Hello, World!");
1089 rbbi->setText(helloWorld);
1090 int n = 0;
1091 while (bi->next() != UBRK_DONE) {
1092 ++n;
1093 }
1094 TEST_ASSERT(n == 2);
1095
1096 // Extract the binary rules as a uint8_t blob.
1097 uint32_t ruleLength;
1098 const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1099 TEST_ASSERT(ruleLength > 0);
1100 TEST_ASSERT(binRules != nullptr);
1101
1102 // Clone the binary rules, and create a break iterator from that.
1103 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1104 uint8_t *clonedRules = new uint8_t[ruleLength];
1105 memcpy(clonedRules, binRules, ruleLength);
1106 RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1107 TEST_ASSERT_SUCCESS(status);
1108
1109 // Check that the cloned line break iterator is nominally alive.
1110 clonedBI.setText(helloWorld);
1111 n = 0;
1112 while (clonedBI.next() != UBRK_DONE) {
1113 ++n;
1114 }
1115 TEST_ASSERT(n == 2);
1116
1117 delete[] clonedRules;
1118 }
1119
1120
TestRefreshInputText()1121 void RBBIAPITest::TestRefreshInputText() {
1122 /*
1123 * RefreshInput changes out the input of a Break Iterator without
1124 * changing anything else in the iterator's state. Used with Java JNI,
1125 * when Java moves the underlying string storage. This test
1126 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1127 * The right set of boundaries should still be found.
1128 */
1129 char16_t testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1130 char16_t movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1131 UErrorCode status = U_ZERO_ERROR;
1132 UText ut1 = UTEXT_INITIALIZER;
1133 UText ut2 = UTEXT_INITIALIZER;
1134 RuleBasedBreakIterator *bi = dynamic_cast<RuleBasedBreakIterator *>(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1135 TEST_ASSERT_SUCCESS(status);
1136
1137 utext_openUChars(&ut1, testStr, -1, &status);
1138 TEST_ASSERT_SUCCESS(status);
1139
1140 if (U_SUCCESS(status)) {
1141 bi->setText(&ut1, status);
1142 TEST_ASSERT_SUCCESS(status);
1143
1144 /* Line boundaries will occur before each letter in the original string */
1145 TEST_ASSERT(1 == bi->next());
1146 TEST_ASSERT(3 == bi->next());
1147
1148 /* Move the string, kill the original string. */
1149 u_strcpy(movedStr, testStr);
1150 u_memset(testStr, 0x20, u_strlen(testStr));
1151 utext_openUChars(&ut2, movedStr, -1, &status);
1152 TEST_ASSERT_SUCCESS(status);
1153 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1154 TEST_ASSERT_SUCCESS(status);
1155 TEST_ASSERT(bi == returnedBI);
1156
1157 /* Find the following matches, now working in the moved string. */
1158 TEST_ASSERT(5 == bi->next());
1159 TEST_ASSERT(7 == bi->next());
1160 TEST_ASSERT(8 == bi->next());
1161 TEST_ASSERT(UBRK_DONE == bi->next());
1162
1163 utext_close(&ut1);
1164 utext_close(&ut2);
1165 }
1166 delete bi;
1167
1168 }
1169
1170 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
prtbrks(BreakIterator * brk,const UnicodeString & ustr,IntlTest & it)1171 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1172 static const char16_t PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1173 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1174
1175 int32_t *pos = new int32_t[ustr.length()];
1176 int32_t posCount = 0;
1177
1178 // calculate breaks up front, so we can print out
1179 // sans any debugging
1180 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1181 pos[posCount++] = n;
1182 if(posCount>=ustr.length()) {
1183 it.errln("brk count exceeds string length!");
1184 return;
1185 }
1186 }
1187 UnicodeString out;
1188 out.append((char16_t)CHSTR);
1189 int32_t prev = 0;
1190 for(int32_t i=0;i<posCount;i++) {
1191 int32_t n=pos[i];
1192 out.append(ustr.tempSubString(prev,n-prev));
1193 out.append((char16_t)PILCROW);
1194 prev=n;
1195 }
1196 out.append(ustr.tempSubString(prev,ustr.length()-prev));
1197 out.append((char16_t)CHEND);
1198 it.logln(out);
1199
1200 out.remove();
1201 for(int32_t i=0;i<posCount;i++) {
1202 char tmp[100];
1203 snprintf(tmp, sizeof(tmp), "%d ",pos[i]);
1204 out.append(UnicodeString(tmp));
1205 }
1206 it.logln(out);
1207 delete [] pos;
1208 }
1209 #endif
1210
TestFilteredBreakIteratorBuilder()1211 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1212 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1213 UErrorCode status = U_ZERO_ERROR;
1214 LocalPointer<FilteredBreakIteratorBuilder> builder;
1215 LocalPointer<BreakIterator> baseBI;
1216 LocalPointer<BreakIterator> filteredBI;
1217 LocalPointer<BreakIterator> frenchBI;
1218
1219 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1220 const UnicodeString ABBR_MR("Mr.");
1221 const UnicodeString ABBR_CAPT("Capt.");
1222
1223 {
1224 logln("Constructing empty builder\n");
1225 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1226 TEST_ASSERT_SUCCESS(status);
1227
1228 logln("Constructing base BI\n");
1229 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1230 TEST_ASSERT_SUCCESS(status);
1231
1232 logln("Building new BI\n");
1233 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1234 TEST_ASSERT_SUCCESS(status);
1235
1236 if (U_SUCCESS(status)) {
1237 logln("Testing:");
1238 filteredBI->setText(text);
1239 TEST_ASSERT(20 == filteredBI->next()); // Mr.
1240 TEST_ASSERT(84 == filteredBI->next()); // recovered.
1241 TEST_ASSERT(90 == filteredBI->next()); // Capt.
1242 TEST_ASSERT(181 == filteredBI->next()); // Mr.
1243 TEST_ASSERT(278 == filteredBI->next()); // charge.
1244 filteredBI->first();
1245 prtbrks(filteredBI.getAlias(), text, *this);
1246 }
1247 }
1248
1249 {
1250 logln("Constructing empty builder\n");
1251 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1252 TEST_ASSERT_SUCCESS(status);
1253
1254 if (U_SUCCESS(status)) {
1255 logln("Adding Mr. as an exception\n");
1256 TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1257 TEST_ASSERT(false == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1258 TEST_ASSERT(true == builder->unsuppressBreakAfter(ABBR_MR, status));
1259 TEST_ASSERT(false == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1260 TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1261 TEST_ASSERT_SUCCESS(status);
1262
1263 logln("Constructing base BI\n");
1264 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1265 TEST_ASSERT_SUCCESS(status);
1266
1267 logln("Building new BI\n");
1268 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1269 TEST_ASSERT_SUCCESS(status);
1270
1271 logln("Testing:");
1272 filteredBI->setText(text);
1273 TEST_ASSERT(84 == filteredBI->next());
1274 TEST_ASSERT(90 == filteredBI->next());// Capt.
1275 TEST_ASSERT(278 == filteredBI->next());
1276 filteredBI->first();
1277 prtbrks(filteredBI.getAlias(), text, *this);
1278 }
1279 }
1280
1281
1282 {
1283 logln("Constructing empty builder\n");
1284 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1285 TEST_ASSERT_SUCCESS(status);
1286
1287 if (U_SUCCESS(status)) {
1288 logln("Adding Mr. and Capt as an exception\n");
1289 TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_MR, status));
1290 TEST_ASSERT(true == builder->suppressBreakAfter(ABBR_CAPT, status));
1291 TEST_ASSERT_SUCCESS(status);
1292
1293 logln("Constructing base BI\n");
1294 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1295 TEST_ASSERT_SUCCESS(status);
1296
1297 logln("Building new BI\n");
1298 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1299 TEST_ASSERT_SUCCESS(status);
1300
1301 logln("Testing:");
1302 filteredBI->setText(text);
1303 TEST_ASSERT(84 == filteredBI->next());
1304 TEST_ASSERT(278 == filteredBI->next());
1305 filteredBI->first();
1306 prtbrks(filteredBI.getAlias(), text, *this);
1307 }
1308 }
1309
1310
1311 {
1312 logln("Constructing English builder\n");
1313 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1314 TEST_ASSERT_SUCCESS(status);
1315
1316 logln("Constructing base BI\n");
1317 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1318 TEST_ASSERT_SUCCESS(status);
1319
1320 if (U_SUCCESS(status)) {
1321 logln("unsuppressing 'Capt'");
1322 TEST_ASSERT(true == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1323
1324 logln("Building new BI\n");
1325 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1326 TEST_ASSERT_SUCCESS(status);
1327
1328 if(filteredBI.isValid()) {
1329 logln("Testing:");
1330 filteredBI->setText(text);
1331 TEST_ASSERT(84 == filteredBI->next());
1332 TEST_ASSERT(90 == filteredBI->next());
1333 TEST_ASSERT(278 == filteredBI->next());
1334 filteredBI->first();
1335 prtbrks(filteredBI.getAlias(), text, *this);
1336 }
1337 }
1338 }
1339
1340
1341 {
1342 logln("Constructing English builder\n");
1343 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1344 TEST_ASSERT_SUCCESS(status);
1345
1346 logln("Constructing base BI\n");
1347 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1348 TEST_ASSERT_SUCCESS(status);
1349
1350 if (U_SUCCESS(status)) {
1351 logln("Building new BI\n");
1352 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1353 TEST_ASSERT_SUCCESS(status);
1354
1355 if(filteredBI.isValid()) {
1356 logln("Testing:");
1357 filteredBI->setText(text);
1358 TEST_ASSERT(84 == filteredBI->next());
1359 TEST_ASSERT(278 == filteredBI->next());
1360 filteredBI->first();
1361 prtbrks(filteredBI.getAlias(), text, *this);
1362 }
1363 }
1364 }
1365
1366 // reenable once french is in
1367 {
1368 logln("Constructing French builder");
1369 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1370 TEST_ASSERT_SUCCESS(status);
1371
1372 logln("Constructing base BI\n");
1373 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1374 TEST_ASSERT_SUCCESS(status);
1375
1376 if (U_SUCCESS(status)) {
1377 logln("Building new BI\n");
1378 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1379 TEST_ASSERT_SUCCESS(status);
1380 }
1381
1382 if(frenchBI.isValid()) {
1383 logln("Testing:");
1384 UnicodeString frText("C'est MM. Duval.");
1385 frenchBI->setText(frText);
1386 TEST_ASSERT(16 == frenchBI->next());
1387 TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1388 frenchBI->first();
1389 prtbrks(frenchBI.getAlias(), frText, *this);
1390 logln("Testing against English:");
1391 filteredBI->setText(frText);
1392 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1393 TEST_ASSERT(16 == filteredBI->next());
1394 TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1395 filteredBI->first();
1396 prtbrks(filteredBI.getAlias(), frText, *this);
1397
1398 // Verify ==
1399 assertTrue(WHERE, *frenchBI == *frenchBI);
1400 assertTrue(WHERE, *filteredBI != *frenchBI);
1401 assertTrue(WHERE, *frenchBI != *filteredBI);
1402 } else {
1403 dataerrln("French BI: not valid.");
1404 }
1405 }
1406
1407 #else
1408 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1409 #endif
1410 }
1411
1412 //---------------------------------------------
1413 // runIndexedTest
1414 //---------------------------------------------
1415
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)1416 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1417 {
1418 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1419 TESTCASE_AUTO_BEGIN;
1420 #if !UCONFIG_NO_FILE_IO
1421 TESTCASE_AUTO(TestCloneEquals);
1422 TESTCASE_AUTO(TestgetRules);
1423 TESTCASE_AUTO(TestHashCode);
1424 TESTCASE_AUTO(TestGetSetAdoptText);
1425 TESTCASE_AUTO(TestIteration);
1426 #endif
1427 TESTCASE_AUTO(TestBuilder);
1428 TESTCASE_AUTO(TestQuoteGrouping);
1429 TESTCASE_AUTO(TestRuleStatusVec);
1430 TESTCASE_AUTO(TestBug2190);
1431 #if !UCONFIG_NO_FILE_IO
1432 TESTCASE_AUTO(TestRegistration);
1433 TESTCASE_AUTO(TestBoilerPlate);
1434 TESTCASE_AUTO(TestRuleStatus);
1435 TESTCASE_AUTO(TestRoundtripRules);
1436 TESTCASE_AUTO(TestGetBinaryRules);
1437 #endif
1438 TESTCASE_AUTO(TestRefreshInputText);
1439 #if !UCONFIG_NO_BREAK_ITERATION
1440 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
1441 #endif
1442 TESTCASE_AUTO_END;
1443 }
1444
1445
1446 //---------------------------------------------
1447 //Internal subroutines
1448 //---------------------------------------------
1449
doBoundaryTest(BreakIterator & bi,UnicodeString & text,int32_t * boundaries)1450 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1451 logln((UnicodeString)"testIsBoundary():");
1452 int32_t p = 0;
1453 UBool isB;
1454 for (int32_t i = 0; i < text.length(); i++) {
1455 isB = bi.isBoundary(i);
1456 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1457
1458 if (i == boundaries[p]) {
1459 if (!isB)
1460 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1461 p++;
1462 }
1463 else {
1464 if (isB)
1465 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1466 }
1467 }
1468 }
doTest(UnicodeString & testString,int32_t start,int32_t gotoffset,int32_t expectedOffset,const char * expectedString)1469 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1470 UnicodeString selected;
1471 UnicodeString expected=CharsToUnicodeString(expectedString);
1472
1473 if(gotoffset != expectedOffset)
1474 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1475 if(start <= gotoffset){
1476 testString.extractBetween(start, gotoffset, selected);
1477 }
1478 else{
1479 testString.extractBetween(gotoffset, start, selected);
1480 }
1481 if(selected.compare(expected) != 0)
1482 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1483 else
1484 logln(prettify("****selected \"" + selected + "\""));
1485 }
1486
1487 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
1488