1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: custrtst.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002oct09
16 * created by: Markus W. Scherer
17 *
18 * Tests of ustring.h Unicode string API functions.
19 */
20
21 #include "unicode/ustring.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/uiter.h"
24 #include "cintltst.h"
25 #include "cstring.h"
26 #include "cmemory.h"
27 #include <stdbool.h>
28 #include <string.h>
29
30 /* get the sign of an integer */
31 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
32
33 /* test setup --------------------------------------------------------------- */
34
35 static void setUpDataTable(void);
36 static void TestStringCopy(void);
37 static void TestStringFunctions(void);
38 static void TestStringSearching(void);
39 static void TestSurrogateSearching(void);
40 static void TestUnescape(void);
41 static void TestUnescapeRepeatedSurrogateLead20725(void);
42 static void TestCountChar32(void);
43 static void TestUCharIterator(void);
44
45 void addUStringTest(TestNode** root);
46
addUStringTest(TestNode ** root)47 void addUStringTest(TestNode** root)
48 {
49 addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy");
50 addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions");
51 addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
52 addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
53 addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
54 addTest(root, &TestUnescapeRepeatedSurrogateLead20725,
55 "tsutil/custrtst/TestUnescapeRepeatedSurrogateLead20725");
56 addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
57 addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
58 }
59
60 /* test data for TestStringFunctions ---------------------------------------- */
61
62 UChar*** dataTable = NULL;
63
64 static const char* raw[3][4] = {
65
66 /* First String */
67 { "English_", "French_", "Croatian_", "English_"},
68 /* Second String */
69 { "United States", "France", "Croatia", "Unites States"},
70
71 /* Concatenated string */
72 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
73 };
74
setUpDataTable()75 static void setUpDataTable()
76 {
77 int32_t i,j;
78 if(dataTable == NULL) {
79 dataTable = (UChar***)calloc(sizeof(UChar**),3);
80
81 for (i = 0; i < 3; i++) {
82 dataTable[i] = (UChar**)calloc(sizeof(UChar*),4);
83 for (j = 0; j < 4; j++){
84 dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1));
85 u_uastrcpy(dataTable[i][j],raw[i][j]);
86 }
87 }
88 }
89 }
90
cleanUpDataTable()91 static void cleanUpDataTable()
92 {
93 int32_t i,j;
94 if(dataTable != NULL) {
95 for (i=0; i<3; i++) {
96 for(j = 0; j<4; j++) {
97 free(dataTable[i][j]);
98 }
99 free(dataTable[i]);
100 }
101 free(dataTable);
102 }
103 dataTable = NULL;
104 }
105
106 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
TestStringFunctions()107 static void TestStringFunctions()
108 {
109 int32_t i,j,k;
110 UChar temp[512];
111 UChar nullTemp[512];
112 char test[512];
113 char tempOut[512];
114
115 setUpDataTable();
116
117 log_verbose("Testing u_strlen()\n");
118 if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2]))
119 log_err("There is an error in u_strlen()");
120
121 log_verbose("Testing u_memcpy() and u_memcmp()\n");
122
123 for(i=0;i<3;++i)
124 {
125 for(j=0;j<4;++j)
126 {
127 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
128 temp[0] = 0;
129 temp[7] = 0xA4; /* Mark the end */
130 u_memcpy(temp,dataTable[i][j], 7);
131
132 if(temp[7] != 0xA4)
133 log_err("an error occurred in u_memcpy()\n");
134 if(u_memcmp(temp, dataTable[i][j], 7)!=0)
135 log_err("an error occurred in u_memcpy() or u_memcmp()\n");
136 }
137 }
138 if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0)
139 log_err("an error occurred in u_memcmp()\n");
140
141 log_verbose("Testing u_memset()\n");
142 nullTemp[0] = 0;
143 nullTemp[7] = 0;
144 u_memset(nullTemp, 0xa4, 7);
145 for (i = 0; i < 7; i++) {
146 if(nullTemp[i] != 0xa4) {
147 log_err("an error occurred in u_memset()\n");
148 }
149 }
150 if(nullTemp[7] != 0) {
151 log_err("u_memset() went too far\n");
152 }
153
154 u_memset(nullTemp, 0, 7);
155 nullTemp[7] = 0xa4;
156 temp[7] = 0;
157 u_memcpy(temp,nullTemp, 7);
158 if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0)
159 log_err("an error occurred in u_memcpy() or u_memcmp()\n");
160
161
162 log_verbose("Testing u_memmove()\n");
163 for (i = 0; i < 7; i++) {
164 temp[i] = (UChar)i;
165 }
166 u_memmove(temp + 1, temp, 7);
167 if(temp[0] != 0) {
168 log_err("an error occurred in u_memmove()\n");
169 }
170 for (i = 1; i <= 7; i++) {
171 if(temp[i] != (i - 1)) {
172 log_err("an error occurred in u_memmove()\n");
173 }
174 }
175
176 log_verbose("Testing u_strcpy() and u_strcmp()\n");
177
178 for(i=0;i<3;++i)
179 {
180 for(j=0;j<4;++j)
181 {
182 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
183 temp[0] = 0;
184 u_strcpy(temp,dataTable[i][j]);
185
186 if(u_strcmp(temp,dataTable[i][j])!=0)
187 log_err("something threw an error in u_strcpy() or u_strcmp()\n");
188 }
189 }
190 if(u_strcmp(dataTable[0][0], dataTable[1][1])==0)
191 log_err("an error occurred in u_memcmp()\n");
192
193 log_verbose("testing u_strcat()\n");
194 i=0;
195 for(j=0; j<2;++j)
196 {
197 u_uastrcpy(temp, "");
198 u_strcpy(temp,dataTable[i][j]);
199 u_strcat(temp,dataTable[i+1][j]);
200 if(u_strcmp(temp,dataTable[i+2][j])!=0)
201 log_err("something threw an error in u_strcat()\n");
202
203 }
204 log_verbose("Testing u_strncmp()\n");
205 for(i=0,j=0;j<4; ++j)
206 {
207 k=u_strlen(dataTable[i][j]);
208 if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0)
209 log_err("Something threw an error in u_strncmp\n");
210 }
211 if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0)
212 log_err("an error occurred in u_memcmp()\n");
213
214
215 log_verbose("Testing u_strncat\n");
216 for(i=0,j=0;j<4; ++j)
217 {
218 k=u_strlen(dataTable[i][j]);
219
220 u_uastrcpy(temp,"");
221
222 if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0)
223 log_err("something threw an error in u_strncat or u_uastrcpy()\n");
224
225 }
226
227 log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
228 for(i=2,j=0;j<4; ++j)
229 {
230 k=u_strlen(dataTable[i][j]);
231 u_strncpy(temp, dataTable[i][j],k);
232 temp[k] = 0xa4;
233
234 if(u_strncmp(temp, dataTable[i][j],k)!=0)
235 log_err("something threw an error in u_strncpy()\n");
236
237 if(temp[k] != 0xa4)
238 log_err("something threw an error in u_strncpy()\n");
239
240 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
241 u_uastrncpy(temp, raw[i][j], k-1);
242 if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
243 log_err("something threw an error in u_uastrncpy(k-1)\n");
244
245 if(temp[k-1] != 0x3F)
246 log_err("something threw an error in u_uastrncpy(k-1)\n");
247
248 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
249 u_uastrncpy(temp, raw[i][j], k+1);
250 if(u_strcmp(temp, dataTable[i][j])!=0)
251 log_err("something threw an error in u_uastrncpy(k+1)\n");
252
253 if(temp[k] != 0)
254 log_err("something threw an error in u_uastrncpy(k+1)\n");
255
256 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
257 u_uastrncpy(temp, raw[i][j], k);
258 if(u_strncmp(temp, dataTable[i][j], k)!=0)
259 log_err("something threw an error in u_uastrncpy(k)\n");
260
261 if(temp[k] != 0x3F)
262 log_err("something threw an error in u_uastrncpy(k)\n");
263 }
264
265 log_verbose("Testing u_strchr() and u_memchr()\n");
266
267 for(i=2,j=0;j<4;j++)
268 {
269 UChar saveVal = dataTable[i][j][0];
270 UChar *findPtr = u_strchr(dataTable[i][j], 0x005F);
271 int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1);
272
273 log_verbose("%s ", u_austrcpy(tempOut, findPtr));
274
275 if (findPtr == NULL || *findPtr != 0x005F) {
276 log_err("u_strchr can't find '_' in the string\n");
277 }
278
279 findPtr = u_strchr32(dataTable[i][j], 0x005F);
280 if (findPtr == NULL || *findPtr != 0x005F) {
281 log_err("u_strchr32 can't find '_' in the string\n");
282 }
283
284 findPtr = u_strchr(dataTable[i][j], 0);
285 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
286 log_err("u_strchr can't find NULL in the string\n");
287 }
288
289 findPtr = u_strchr32(dataTable[i][j], 0);
290 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
291 log_err("u_strchr32 can't find NULL in the string\n");
292 }
293
294 findPtr = u_memchr(dataTable[i][j], 0, dataSize);
295 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
296 log_err("u_memchr can't find NULL in the string\n");
297 }
298
299 findPtr = u_memchr32(dataTable[i][j], 0, dataSize);
300 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
301 log_err("u_memchr32 can't find NULL in the string\n");
302 }
303
304 dataTable[i][j][0] = 0;
305 /* Make sure we skip over the NULL termination */
306 findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize);
307 if (findPtr == NULL || *findPtr != 0x005F) {
308 log_err("u_memchr can't find '_' in the string\n");
309 }
310
311 findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize);
312 if (findPtr == NULL || *findPtr != 0x005F) {
313 log_err("u_memchr32 can't find '_' in the string\n");
314 }
315 findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize);
316 if (findPtr != NULL) {
317 log_err("Should have found NULL when the character is not there.\n");
318 }
319 dataTable[i][j][0] = saveVal; /* Put it back for the other tests */
320 }
321
322 /*
323 * test that u_strchr32()
324 * does not find surrogate code points when they are part of matched pairs
325 * (= part of supplementary code points)
326 * Jitterbug 1542
327 */
328 {
329 static const UChar s[]={
330 /* 0 1 2 3 4 5 6 7 8 9 */
331 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
332 };
333
334 if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) {
335 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
336 }
337 if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) {
338 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
339 }
340 }
341
342 log_verbose("Testing u_austrcpy()");
343 u_austrcpy(test,dataTable[0][0]);
344 if(strcmp(test,raw[0][0])!=0)
345 log_err("There is an error in u_austrcpy()");
346
347
348 log_verbose("Testing u_strtok_r()");
349 {
350 const char tokString[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n";
351 const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
352 UChar delimBuf[sizeof(test)];
353 UChar currTokenBuf[sizeof(tokString)];
354 UChar *state;
355 uint32_t currToken = 0;
356 UChar *ptr;
357
358 u_uastrcpy(temp, tokString);
359 u_uastrcpy(delimBuf, " ");
360
361 ptr = u_strtok_r(temp, delimBuf, &state);
362 u_uastrcpy(delimBuf, " ,");
363 while (ptr != NULL) {
364 u_uastrcpy(currTokenBuf, tokens[currToken]);
365 if (u_strcmp(ptr, currTokenBuf) != 0) {
366 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]);
367 }
368 ptr = u_strtok_r(NULL, delimBuf, &state);
369 currToken++;
370 }
371
372 if (currToken != UPRV_LENGTHOF(tokens)) {
373 log_err("Didn't get correct number of tokens\n");
374 }
375 state = delimBuf; /* Give it an "invalid" saveState */
376 u_uastrcpy(currTokenBuf, "");
377 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
378 log_err("Didn't get NULL for empty string\n");
379 }
380 if (state != NULL) {
381 log_err("State should be NULL for empty string\n");
382 }
383 state = delimBuf; /* Give it an "invalid" saveState */
384 u_uastrcpy(currTokenBuf, ", ,");
385 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
386 log_err("Didn't get NULL for a string of delimiters\n");
387 }
388 if (state != NULL) {
389 log_err("State should be NULL for a string of delimiters\n");
390 }
391
392 state = delimBuf; /* Give it an "invalid" saveState */
393 u_uastrcpy(currTokenBuf, "q, ,");
394 if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) {
395 log_err("Got NULL for a string that does not begin with delimiters\n");
396 }
397 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
398 log_err("Didn't get NULL for a string that ends in delimiters\n");
399 }
400 if (state != NULL) {
401 log_err("State should be NULL for empty string\n");
402 }
403
404 state = delimBuf; /* Give it an "invalid" saveState */
405 u_uastrcpy(currTokenBuf, tokString);
406 u_uastrcpy(temp, tokString);
407 u_uastrcpy(delimBuf, "q"); /* Give it a delimiter that it can't find. */
408 ptr = u_strtok_r(currTokenBuf, delimBuf, &state);
409 if (ptr == NULL || u_strcmp(ptr, temp) != 0) {
410 log_err("Should have received the same string when there are no delimiters\n");
411 }
412 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
413 log_err("Should not have found another token in a one token string\n");
414 }
415 }
416
417 /* test u_strcmpCodePointOrder() */
418 {
419 /* these strings are in ascending order */
420 static const UChar strings[][4]={
421 { 0x61, 0 }, /* U+0061 */
422 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
423 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
424 { 0xd800, 0 }, /* U+d800 */
425 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
426 { 0xdfff, 0 }, /* U+dfff */
427 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
428 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
429 { 0xd800, 0xdc02, 0 }, /* U+10002 */
430 { 0xd84d, 0xdc56, 0 } /* U+23456 */
431 };
432
433 UCharIterator iter1, iter2;
434 int32_t len1, len2, r1, r2;
435
436 for(i=0; i<(UPRV_LENGTHOF(strings)-1); ++i) {
437 if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
438 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
439 }
440 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) {
441 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i);
442 }
443
444 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
445 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) {
446 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i);
447 }
448
449 /* test u_strCompare(true) */
450 len1=u_strlen(strings[i]);
451 len2=u_strlen(strings[i+1]);
452 if( u_strCompare(strings[i], -1, strings[i+1], -1, true)>=0 ||
453 u_strCompare(strings[i], -1, strings[i+1], len2, true)>=0 ||
454 u_strCompare(strings[i], len1, strings[i+1], -1, true)>=0 ||
455 u_strCompare(strings[i], len1, strings[i+1], len2, true)>=0
456 ) {
457 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i);
458 }
459
460 /* test u_strCompare(false) */
461 r1=u_strCompare(strings[i], -1, strings[i+1], -1, false);
462 r2=u_strcmp(strings[i], strings[i+1]);
463 if(_SIGN(r1)!=_SIGN(r2)) {
464 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
465 }
466
467 /* test u_strCompareIter() */
468 uiter_setString(&iter1, strings[i], len1);
469 uiter_setString(&iter2, strings[i+1], len2);
470 if(u_strCompareIter(&iter1, &iter2, true)>=0) {
471 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
472 }
473 r1=u_strCompareIter(&iter1, &iter2, false);
474 if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
475 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
476 }
477 }
478 }
479
480 cleanUpDataTable();
481 }
482
TestStringSearching()483 static void TestStringSearching()
484 {
485 const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
486 const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
487 const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
488 const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
489 const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
490 const UChar surrMatchSet4[] = {0x0000};
491 const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
492 const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
493 const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */
494 const UChar
495 empty[] = { 0 },
496 a[] = { 0x61, 0 },
497 ab[] = { 0x61, 0x62, 0 },
498 ba[] = { 0x62, 0x61, 0 },
499 abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
500 cd[] = { 0x63, 0x64, 0 },
501 dc[] = { 0x64, 0x63, 0 },
502 cdh[] = { 0x63, 0x64, 0x68, 0 },
503 f[] = { 0x66, 0 },
504 fg[] = { 0x66, 0x67, 0 },
505 gf[] = { 0x67, 0x66, 0 };
506
507 log_verbose("Testing u_strpbrk()");
508
509 if (u_strpbrk(testString, a) != &testString[0]) {
510 log_err("u_strpbrk couldn't find first letter a.\n");
511 }
512 if (u_strpbrk(testString, dc) != &testString[2]) {
513 log_err("u_strpbrk couldn't find d or c.\n");
514 }
515 if (u_strpbrk(testString, cd) != &testString[2]) {
516 log_err("u_strpbrk couldn't find c or d.\n");
517 }
518 if (u_strpbrk(testString, cdh) != &testString[2]) {
519 log_err("u_strpbrk couldn't find c, d or h.\n");
520 }
521 if (u_strpbrk(testString, f) != NULL) {
522 log_err("u_strpbrk didn't return NULL for \"f\".\n");
523 }
524 if (u_strpbrk(testString, fg) != NULL) {
525 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
526 }
527 if (u_strpbrk(testString, gf) != NULL) {
528 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
529 }
530 if (u_strpbrk(testString, empty) != NULL) {
531 log_err("u_strpbrk didn't return NULL for \"\".\n");
532 }
533
534 log_verbose("Testing u_strpbrk() with surrogates");
535
536 if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
537 log_err("u_strpbrk couldn't find first letter a.\n");
538 }
539 if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
540 log_err("u_strpbrk couldn't find d or c.\n");
541 }
542 if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
543 log_err("u_strpbrk couldn't find c or d.\n");
544 }
545 if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
546 log_err("u_strpbrk couldn't find c, d or h.\n");
547 }
548 if (u_strpbrk(testSurrogateString, f) != NULL) {
549 log_err("u_strpbrk didn't return NULL for \"f\".\n");
550 }
551 if (u_strpbrk(testSurrogateString, fg) != NULL) {
552 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
553 }
554 if (u_strpbrk(testSurrogateString, gf) != NULL) {
555 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
556 }
557 if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
558 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
559 }
560 if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) {
561 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
562 }
563 if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) {
564 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
565 }
566 if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) {
567 log_err("u_strpbrk should have returned NULL for empty string.\n");
568 }
569 if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) {
570 log_err("u_strpbrk should have found bad surrogate.\n");
571 }
572
573 log_verbose("Testing u_strcspn()");
574
575 if (u_strcspn(testString, a) != 0) {
576 log_err("u_strcspn couldn't find first letter a.\n");
577 }
578 if (u_strcspn(testString, dc) != 2) {
579 log_err("u_strcspn couldn't find d or c.\n");
580 }
581 if (u_strcspn(testString, cd) != 2) {
582 log_err("u_strcspn couldn't find c or d.\n");
583 }
584 if (u_strcspn(testString, cdh) != 2) {
585 log_err("u_strcspn couldn't find c, d or h.\n");
586 }
587 if (u_strcspn(testString, f) != u_strlen(testString)) {
588 log_err("u_strcspn didn't return NULL for \"f\".\n");
589 }
590 if (u_strcspn(testString, fg) != u_strlen(testString)) {
591 log_err("u_strcspn didn't return NULL for \"fg\".\n");
592 }
593 if (u_strcspn(testString, gf) != u_strlen(testString)) {
594 log_err("u_strcspn didn't return NULL for \"gf\".\n");
595 }
596
597 log_verbose("Testing u_strcspn() with surrogates");
598
599 if (u_strcspn(testSurrogateString, a) != 1) {
600 log_err("u_strcspn couldn't find first letter a.\n");
601 }
602 if (u_strcspn(testSurrogateString, dc) != 5) {
603 log_err("u_strcspn couldn't find d or c.\n");
604 }
605 if (u_strcspn(testSurrogateString, cd) != 5) {
606 log_err("u_strcspn couldn't find c or d.\n");
607 }
608 if (u_strcspn(testSurrogateString, cdh) != 5) {
609 log_err("u_strcspn couldn't find c, d or h.\n");
610 }
611 if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
612 log_err("u_strcspn didn't return NULL for \"f\".\n");
613 }
614 if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
615 log_err("u_strcspn didn't return NULL for \"fg\".\n");
616 }
617 if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
618 log_err("u_strcspn didn't return NULL for \"gf\".\n");
619 }
620 if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
621 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
622 }
623 if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) {
624 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
625 }
626 if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) {
627 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
628 }
629 if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) {
630 log_err("u_strcspn should have returned strlen for empty string.\n");
631 }
632
633
634 log_verbose("Testing u_strspn()");
635
636 if (u_strspn(testString, a) != 1) {
637 log_err("u_strspn couldn't skip first letter a.\n");
638 }
639 if (u_strspn(testString, ab) != 2) {
640 log_err("u_strspn couldn't skip a or b.\n");
641 }
642 if (u_strspn(testString, ba) != 2) {
643 log_err("u_strspn couldn't skip a or b.\n");
644 }
645 if (u_strspn(testString, f) != 0) {
646 log_err("u_strspn didn't return 0 for \"f\".\n");
647 }
648 if (u_strspn(testString, dc) != 0) {
649 log_err("u_strspn couldn't find first letter a (skip d or c).\n");
650 }
651 if (u_strspn(testString, abcd) != u_strlen(testString)) {
652 log_err("u_strspn couldn't skip over the whole string.\n");
653 }
654 if (u_strspn(testString, empty) != 0) {
655 log_err("u_strspn should have returned 0 for empty string.\n");
656 }
657
658 log_verbose("Testing u_strspn() with surrogates");
659 if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) {
660 log_err("u_strspn couldn't skip 0xdbff or a.\n");
661 }
662 if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
663 log_err("u_strspn couldn't skip 0xdbff or a.\n");
664 }
665 if (u_strspn(testSurrogateString, f) != 0) {
666 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
667 }
668 if (u_strspn(testSurrogateString, dc) != 0) {
669 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
670 }
671 if (u_strspn(testSurrogateString, cd) != 0) {
672 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
673 }
674 if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
675 log_err("u_strspn couldn't skip whole string.\n");
676 }
677 if (u_strspn(testSurrogateString, surrMatchSet1) != 0) {
678 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
679 }
680 if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) {
681 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
682 }
683 if (u_strspn(testSurrogateString, surrMatchSet4) != 0) {
684 log_err("u_strspn should have returned 0 for empty string.\n");
685 }
686 }
687
688 /*
689 * All binary Unicode string searches should behave the same for equivalent input.
690 * See Jitterbug 2145.
691 * There are some new functions, too - just test them all.
692 */
693 static void
TestSurrogateSearching()694 TestSurrogateSearching() {
695 static const UChar s[]={
696 /* 0 1 2 3 4 5 6 7 8 9 10 11 */
697 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
698 }, sub_a[]={
699 0x61, 0
700 }, sub_b[]={
701 0x62, 0
702 }, sub_lead[]={
703 0xd801, 0
704 }, sub_trail[]={
705 0xdc02, 0
706 }, sub_supp[]={
707 0xd801, 0xdc02, 0
708 }, sub_supp2[]={
709 0xd801, 0xdc03, 0
710 }, sub_a_lead[]={
711 0x61, 0xd801, 0
712 }, sub_trail_a[]={
713 0xdc02, 0x61, 0
714 }, sub_aba[]={
715 0x61, 0x62, 0x61, 0
716 };
717 static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0;
718 static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456;
719
720 const UChar *first, *last;
721
722 /* search for NUL code point: find end of string */
723 first=s+u_strlen(s);
724
725 if(
726 first!=u_strchr(s, nul) ||
727 first!=u_strchr32(s, nul) ||
728 first!=u_memchr(s, nul, UPRV_LENGTHOF(s)) ||
729 first!=u_memchr32(s, nul, UPRV_LENGTHOF(s)) ||
730 first!=u_strrchr(s, nul) ||
731 first!=u_strrchr32(s, nul) ||
732 first!=u_memrchr(s, nul, UPRV_LENGTHOF(s)) ||
733 first!=u_memrchr32(s, nul, UPRV_LENGTHOF(s))
734 ) {
735 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
736 }
737
738 /* search for empty substring: find beginning of string */
739 if(
740 s!=u_strstr(s, &nul) ||
741 s!=u_strFindFirst(s, -1, &nul, -1) ||
742 s!=u_strFindFirst(s, -1, &nul, 0) ||
743 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, -1) ||
744 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, 0) ||
745 s!=u_strrstr(s, &nul) ||
746 s!=u_strFindLast(s, -1, &nul, -1) ||
747 s!=u_strFindLast(s, -1, &nul, 0) ||
748 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, -1) ||
749 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, 0)
750 ) {
751 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
752 }
753
754 /* find 'a' in s[1..10[ */
755 first=s+3;
756 last=s+7;
757 if(
758 first!=u_strchr(s+1, a) ||
759 first!=u_strchr32(s+1, a) ||
760 first!=u_memchr(s+1, a, 9) ||
761 first!=u_memchr32(s+1, a, 9) ||
762 first!=u_strstr(s+1, sub_a) ||
763 first!=u_strFindFirst(s+1, -1, sub_a, -1) ||
764 first!=u_strFindFirst(s+1, -1, &a, 1) ||
765 first!=u_strFindFirst(s+1, 9, sub_a, -1) ||
766 first!=u_strFindFirst(s+1, 9, &a, 1) ||
767 (s+10)!=u_strrchr(s+1, a) ||
768 (s+10)!=u_strrchr32(s+1, a) ||
769 last!=u_memrchr(s+1, a, 9) ||
770 last!=u_memrchr32(s+1, a, 9) ||
771 (s+10)!=u_strrstr(s+1, sub_a) ||
772 (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) ||
773 (s+10)!=u_strFindLast(s+1, -1, &a, 1) ||
774 last!=u_strFindLast(s+1, 9, sub_a, -1) ||
775 last!=u_strFindLast(s+1, 9, &a, 1)
776 ) {
777 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
778 }
779
780 /* do not find 'b' in s[1..10[ */
781 if(
782 NULL!=u_strchr(s+1, b) ||
783 NULL!=u_strchr32(s+1, b) ||
784 NULL!=u_memchr(s+1, b, 9) ||
785 NULL!=u_memchr32(s+1, b, 9) ||
786 NULL!=u_strstr(s+1, sub_b) ||
787 NULL!=u_strFindFirst(s+1, -1, sub_b, -1) ||
788 NULL!=u_strFindFirst(s+1, -1, &b, 1) ||
789 NULL!=u_strFindFirst(s+1, 9, sub_b, -1) ||
790 NULL!=u_strFindFirst(s+1, 9, &b, 1) ||
791 NULL!=u_strrchr(s+1, b) ||
792 NULL!=u_strrchr32(s+1, b) ||
793 NULL!=u_memrchr(s+1, b, 9) ||
794 NULL!=u_memrchr32(s+1, b, 9) ||
795 NULL!=u_strrstr(s+1, sub_b) ||
796 NULL!=u_strFindLast(s+1, -1, sub_b, -1) ||
797 NULL!=u_strFindLast(s+1, -1, &b, 1) ||
798 NULL!=u_strFindLast(s+1, 9, sub_b, -1) ||
799 NULL!=u_strFindLast(s+1, 9, &b, 1)
800 ) {
801 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
802 }
803
804 /* do not find a non-code point in s[1..10[ */
805 if(
806 NULL!=u_strchr32(s+1, ill) ||
807 NULL!=u_memchr32(s+1, ill, 9) ||
808 NULL!=u_strrchr32(s+1, ill) ||
809 NULL!=u_memrchr32(s+1, ill, 9)
810 ) {
811 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
812 }
813
814 /* find U+d801 in s[1..10[ */
815 first=s+6;
816 if(
817 first!=u_strchr(s+1, lead) ||
818 first!=u_strchr32(s+1, lead) ||
819 first!=u_memchr(s+1, lead, 9) ||
820 first!=u_memchr32(s+1, lead, 9) ||
821 first!=u_strstr(s+1, sub_lead) ||
822 first!=u_strFindFirst(s+1, -1, sub_lead, -1) ||
823 first!=u_strFindFirst(s+1, -1, &lead, 1) ||
824 first!=u_strFindFirst(s+1, 9, sub_lead, -1) ||
825 first!=u_strFindFirst(s+1, 9, &lead, 1) ||
826 first!=u_strrchr(s+1, lead) ||
827 first!=u_strrchr32(s+1, lead) ||
828 first!=u_memrchr(s+1, lead, 9) ||
829 first!=u_memrchr32(s+1, lead, 9) ||
830 first!=u_strrstr(s+1, sub_lead) ||
831 first!=u_strFindLast(s+1, -1, sub_lead, -1) ||
832 first!=u_strFindLast(s+1, -1, &lead, 1) ||
833 first!=u_strFindLast(s+1, 9, sub_lead, -1) ||
834 first!=u_strFindLast(s+1, 9, &lead, 1)
835 ) {
836 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
837 }
838
839 /* find U+dc02 in s[1..10[ */
840 first=s+4;
841 if(
842 first!=u_strchr(s+1, trail) ||
843 first!=u_strchr32(s+1, trail) ||
844 first!=u_memchr(s+1, trail, 9) ||
845 first!=u_memchr32(s+1, trail, 9) ||
846 first!=u_strstr(s+1, sub_trail) ||
847 first!=u_strFindFirst(s+1, -1, sub_trail, -1) ||
848 first!=u_strFindFirst(s+1, -1, &trail, 1) ||
849 first!=u_strFindFirst(s+1, 9, sub_trail, -1) ||
850 first!=u_strFindFirst(s+1, 9, &trail, 1) ||
851 first!=u_strrchr(s+1, trail) ||
852 first!=u_strrchr32(s+1, trail) ||
853 first!=u_memrchr(s+1, trail, 9) ||
854 first!=u_memrchr32(s+1, trail, 9) ||
855 first!=u_strrstr(s+1, sub_trail) ||
856 first!=u_strFindLast(s+1, -1, sub_trail, -1) ||
857 first!=u_strFindLast(s+1, -1, &trail, 1) ||
858 first!=u_strFindLast(s+1, 9, sub_trail, -1) ||
859 first!=u_strFindLast(s+1, 9, &trail, 1)
860 ) {
861 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
862 }
863
864 /* find U+10402 in s[1..10[ */
865 first=s+1;
866 last=s+8;
867 if(
868 first!=u_strchr32(s+1, supp) ||
869 first!=u_memchr32(s+1, supp, 9) ||
870 first!=u_strstr(s+1, sub_supp) ||
871 first!=u_strFindFirst(s+1, -1, sub_supp, -1) ||
872 first!=u_strFindFirst(s+1, -1, sub_supp, 2) ||
873 first!=u_strFindFirst(s+1, 9, sub_supp, -1) ||
874 first!=u_strFindFirst(s+1, 9, sub_supp, 2) ||
875 last!=u_strrchr32(s+1, supp) ||
876 last!=u_memrchr32(s+1, supp, 9) ||
877 last!=u_strrstr(s+1, sub_supp) ||
878 last!=u_strFindLast(s+1, -1, sub_supp, -1) ||
879 last!=u_strFindLast(s+1, -1, sub_supp, 2) ||
880 last!=u_strFindLast(s+1, 9, sub_supp, -1) ||
881 last!=u_strFindLast(s+1, 9, sub_supp, 2)
882 ) {
883 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
884 }
885
886 /* do not find U+10402 in a single UChar */
887 if(
888 NULL!=u_memchr32(s+1, supp, 1) ||
889 NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) ||
890 NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) ||
891 NULL!=u_memrchr32(s+1, supp, 1) ||
892 NULL!=u_strFindLast(s+1, 1, sub_supp, -1) ||
893 NULL!=u_strFindLast(s+1, 1, sub_supp, 2) ||
894 NULL!=u_memrchr32(s+2, supp, 1) ||
895 NULL!=u_strFindLast(s+2, 1, sub_supp, -1) ||
896 NULL!=u_strFindLast(s+2, 1, sub_supp, 2)
897 ) {
898 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
899 }
900
901 /* do not find U+10403 in s[1..10[ */
902 if(
903 NULL!=u_strchr32(s+1, supp2) ||
904 NULL!=u_memchr32(s+1, supp2, 9) ||
905 NULL!=u_strstr(s+1, sub_supp2) ||
906 NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) ||
907 NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) ||
908 NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) ||
909 NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) ||
910 NULL!=u_strrchr32(s+1, supp2) ||
911 NULL!=u_memrchr32(s+1, supp2, 9) ||
912 NULL!=u_strrstr(s+1, sub_supp2) ||
913 NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) ||
914 NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) ||
915 NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) ||
916 NULL!=u_strFindLast(s+1, 9, sub_supp2, 2)
917 ) {
918 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
919 }
920
921 /* find <0061 d801> in s[1..10[ */
922 first=s+5;
923 if(
924 first!=u_strstr(s+1, sub_a_lead) ||
925 first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) ||
926 first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) ||
927 first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) ||
928 first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) ||
929 first!=u_strrstr(s+1, sub_a_lead) ||
930 first!=u_strFindLast(s+1, -1, sub_a_lead, -1) ||
931 first!=u_strFindLast(s+1, -1, sub_a_lead, 2) ||
932 first!=u_strFindLast(s+1, 9, sub_a_lead, -1) ||
933 first!=u_strFindLast(s+1, 9, sub_a_lead, 2)
934 ) {
935 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
936 }
937
938 /* find <dc02 0061> in s[1..10[ */
939 first=s+4;
940 if(
941 first!=u_strstr(s+1, sub_trail_a) ||
942 first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) ||
943 first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) ||
944 first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) ||
945 first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) ||
946 first!=u_strrstr(s+1, sub_trail_a) ||
947 first!=u_strFindLast(s+1, -1, sub_trail_a, -1) ||
948 first!=u_strFindLast(s+1, -1, sub_trail_a, 2) ||
949 first!=u_strFindLast(s+1, 9, sub_trail_a, -1) ||
950 first!=u_strFindLast(s+1, 9, sub_trail_a, 2)
951 ) {
952 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
953 }
954
955 /* do not find "aba" in s[1..10[ */
956 if(
957 NULL!=u_strstr(s+1, sub_aba) ||
958 NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) ||
959 NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) ||
960 NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) ||
961 NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) ||
962 NULL!=u_strrstr(s+1, sub_aba) ||
963 NULL!=u_strFindLast(s+1, -1, sub_aba, -1) ||
964 NULL!=u_strFindLast(s+1, -1, sub_aba, 3) ||
965 NULL!=u_strFindLast(s+1, 9, sub_aba, -1) ||
966 NULL!=u_strFindLast(s+1, 9, sub_aba, 3)
967 ) {
968 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
969 }
970 /* Regression test for ICU-20684 Use-of-uninitialized-value in isMatchAtCPBoundary
971 * Condition: search the same string while the first char is not an
972 * surrogate and the last char is the leading surragte.
973 */
974 {
975 static const UChar s[]={ 0x0020, 0xD9C1 };
976 if (u_strFindFirst(s, 2, s, 2) != s) {
977 log_err("error: ending with a partial supplementary code point should match\n");
978 }
979 }
980 }
981
TestStringCopy()982 static void TestStringCopy()
983 {
984 UChar temp[40];
985 UChar *result=0;
986 UChar subString[5];
987 UChar uchars[]={0x61, 0x62, 0x63, 0x00};
988 char charOut[40];
989 char chars[]="abc"; /* needs default codepage */
990
991 log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
992
993 u_uastrcpy(temp, "abc");
994 if(u_strcmp(temp, uchars) != 0) {
995 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
996 }
997
998 temp[0] = 0xFB; /* load garbage into it */
999 temp[1] = 0xFB;
1000 temp[2] = 0xFB;
1001 temp[3] = 0xFB;
1002
1003 u_uastrncpy(temp, "abcabcabc", 3);
1004 if(u_strncmp(uchars, temp, 3) != 0){
1005 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1006 }
1007 if(temp[3] != 0xFB) {
1008 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1009 }
1010
1011 charOut[0] = (char)0x7B; /* load garbage into it */
1012 charOut[1] = (char)0x7B;
1013 charOut[2] = (char)0x7B;
1014 charOut[3] = (char)0x7B;
1015
1016 temp[0] = 0x0061;
1017 temp[1] = 0x0062;
1018 temp[2] = 0x0063;
1019 temp[3] = 0x0061;
1020 temp[4] = 0x0062;
1021 temp[5] = 0x0063;
1022 temp[6] = 0x0000;
1023
1024 u_austrncpy(charOut, temp, 3);
1025 if(strncmp(chars, charOut, 3) != 0){
1026 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1027 }
1028 if(charOut[3] != (char)0x7B) {
1029 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1030 }
1031
1032 /*Testing u_strchr()*/
1033 log_verbose("Testing u_strchr\n");
1034 temp[0]=0x42;
1035 temp[1]=0x62;
1036 temp[2]=0x62;
1037 temp[3]=0x63;
1038 temp[4]=0xd841;
1039 temp[5]=0xd841;
1040 temp[6]=0xdc02;
1041 temp[7]=0;
1042 result=u_strchr(temp, (UChar)0x62);
1043 if(result != temp+1){
1044 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1045 }
1046 /*Testing u_strstr()*/
1047 log_verbose("Testing u_strstr\n");
1048 subString[0]=0x62;
1049 subString[1]=0x63;
1050 subString[2]=0;
1051 result=u_strstr(temp, subString);
1052 if(result != temp+2){
1053 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result);
1054 }
1055 result=u_strstr(temp, subString+2); /* subString+2 is an empty string */
1056 if(result != temp){
1057 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result);
1058 }
1059 result=u_strstr(subString, temp);
1060 if(result != NULL){
1061 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1062 }
1063
1064 /*Testing u_strchr32*/
1065 log_verbose("Testing u_strchr32\n");
1066 result=u_strchr32(temp, (UChar32)0x62);
1067 if(result != temp+1){
1068 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1069 }
1070 result=u_strchr32(temp, (UChar32)0xfb);
1071 if(result != NULL){
1072 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1073 }
1074 result=u_strchr32(temp, (UChar32)0x20402);
1075 if(result != temp+5){
1076 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1077 }
1078
1079 temp[7]=0xfc00;
1080 result=u_memchr32(temp, (UChar32)0x20402, 7);
1081 if(result != temp+5){
1082 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1083 }
1084 result=u_memchr32(temp, (UChar32)0x20402, 6);
1085 if(result != NULL){
1086 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1087 }
1088 result=u_memchr32(temp, (UChar32)0x20402, 1);
1089 if(result != NULL){
1090 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1091 }
1092 result=u_memchr32(temp, (UChar32)0xfc00, 8);
1093 if(result != temp+7){
1094 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result);
1095 }
1096 }
1097
1098 /* test u_unescape() and u_unescapeAt() ------------------------------------- */
1099
1100 static void
TestUnescape()1101 TestUnescape() {
1102 static UChar buffer[200];
1103
1104 static const char* input =
1105 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1106
1107 static const UChar expect[]={
1108 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1109 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1110 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1111 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1112 };
1113 static const int32_t explength = UPRV_LENGTHOF(expect)-1;
1114 int32_t length;
1115
1116 /* test u_unescape() */
1117 length=u_unescape(input, buffer, UPRV_LENGTHOF(buffer));
1118 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1119 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
1120 explength);
1121 }
1122
1123 /* try preflighting */
1124 length=u_unescape(input, NULL, UPRV_LENGTHOF(buffer));
1125 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1126 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
1127 }
1128
1129 /* ### TODO: test u_unescapeAt() */
1130 }
1131
1132 static void
TestUnescapeRepeatedSurrogateLead20725()1133 TestUnescapeRepeatedSurrogateLead20725() {
1134 const int32_t repeat = 20000;
1135 const int32_t srclen = repeat * 6 + 1;
1136 char *src = (char*)malloc(srclen);
1137 UChar *dest = (UChar*) malloc(sizeof(UChar) * (repeat + 1));
1138 if (src == NULL || dest == NULL) {
1139 log_err("memory allocation error");
1140 }
1141 for (int32_t i = 0; i < repeat; i++) {
1142 uprv_strcpy(src + (i * 6), "\\ud841");
1143 }
1144 int32_t len = u_unescape(src, dest, repeat);
1145 if (len != repeat) {
1146 log_err("failure in u_unescape()");
1147 }
1148 for (int32_t i = 0; i < repeat; i++) {
1149 if (dest[i] != 0xd841) {
1150 log_err("failure in u_unescape() return value");
1151 }
1152 }
1153 free(src);
1154
1155 // A few simple test cases to make sure that the code recovers properly
1156 u_unescape("\\ud841\\x5A", dest, repeat);
1157 const UChar expected1[] = {0xd841, 'Z', 0};
1158 if (u_strcmp(dest, expected1)!=0) {
1159 log_err("u_unescape() should return u\"\\ud841Z\" but got %s", dest);
1160 }
1161
1162 u_unescape("\\ud841\\U00050005", dest, repeat);
1163 const UChar expected2[] = {0xd841, 0xd900, 0xdc05, 0};
1164 if (u_strcmp(dest, expected2)!=0) {
1165 log_err("u_unescape() should return u\"\\ud841\\ud900\\udc05\" "
1166 "but got %s", dest);
1167 }
1168
1169 // \\xXX is ill-formed. The documentation states:
1170 // If an escape sequence is ill-formed, this method returns an empty string.
1171 u_unescape("\\ud841\\xXX", dest, repeat);
1172 const UChar expected3[] = { 0 };
1173 if (u_strcmp(dest, expected3)!=0) {
1174 log_err("u_unescape() should return empty string");
1175 }
1176
1177 free(dest);
1178
1179 }
1180
1181 /* test code point counting functions --------------------------------------- */
1182
1183 /* reference implementation of u_strHasMoreChar32Than() */
1184 static int32_t
_refStrHasMoreChar32Than(const UChar * s,int32_t length,int32_t number)1185 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1186 int32_t count=u_countChar32(s, length);
1187 return count>number;
1188 }
1189
1190 /* compare the real function against the reference */
1191 static void
_testStrHasMoreChar32Than(const UChar * s,int32_t i,int32_t length,int32_t number)1192 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) {
1193 if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) {
1194 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1195 i, length, number, u_strHasMoreChar32Than(s, length, number));
1196 }
1197 }
1198
1199 static void
TestCountChar32()1200 TestCountChar32() {
1201 static const UChar string[]={
1202 0x61, 0x62, 0xd800, 0xdc00,
1203 0xd801, 0xdc01, 0x63, 0xd802,
1204 0x64, 0xdc03, 0x65, 0x66,
1205 0xd804, 0xdc04, 0xd805, 0xdc05,
1206 0x67
1207 };
1208 UChar buffer[100];
1209 int32_t i, length, number;
1210
1211 /* test u_strHasMoreChar32Than() with length>=0 */
1212 length=UPRV_LENGTHOF(string);
1213 while(length>=0) {
1214 for(i=0; i<=length; ++i) {
1215 for(number=-1; number<=((length-i)+2); ++number) {
1216 _testStrHasMoreChar32Than(string+i, i, length-i, number);
1217 }
1218 }
1219 --length;
1220 }
1221
1222 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1223 length=UPRV_LENGTHOF(string);
1224 u_memcpy(buffer, string, length);
1225 while(length>=0) {
1226 buffer[length]=0;
1227 for(i=0; i<=length; ++i) {
1228 for(number=-1; number<=((length-i)+2); ++number) {
1229 _testStrHasMoreChar32Than(buffer+i, i, -1, number);
1230 }
1231 }
1232 --length;
1233 }
1234
1235 /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1236 for(length=-1; length<=1; ++length) {
1237 for(i=0; i<=length; ++i) {
1238 for(number=-2; number<=2; ++number) {
1239 _testStrHasMoreChar32Than(NULL, 0, length, number);
1240 }
1241 }
1242 }
1243 }
1244
1245 /* UCharIterator ------------------------------------------------------------ */
1246
1247 /*
1248 * Compare results from two iterators, should be same.
1249 * Assume that the text is not empty and that
1250 * iteration start==0 and iteration limit==length.
1251 */
1252 static void
compareIterators(UCharIterator * iter1,const char * n1,UCharIterator * iter2,const char * n2)1253 compareIterators(UCharIterator *iter1, const char *n1,
1254 UCharIterator *iter2, const char *n2) {
1255 int32_t i, pos1, pos2, middle, length;
1256 UChar32 c1, c2;
1257
1258 /* compare lengths */
1259 length=iter1->getIndex(iter1, UITER_LENGTH);
1260 pos2=iter2->getIndex(iter2, UITER_LENGTH);
1261 if(length!=pos2) {
1262 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
1263 return;
1264 }
1265
1266 /* set into the middle */
1267 middle=length/2;
1268
1269 pos1=iter1->move(iter1, middle, UITER_ZERO);
1270 if(pos1!=middle) {
1271 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1272 return;
1273 }
1274
1275 pos2=iter2->move(iter2, middle, UITER_ZERO);
1276 if(pos2!=middle) {
1277 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1278 return;
1279 }
1280
1281 /* test current() */
1282 c1=iter1->current(iter1);
1283 c2=iter2->current(iter2);
1284 if(c1!=c2) {
1285 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1286 return;
1287 }
1288
1289 /* move forward 3 UChars */
1290 for(i=0; i<3; ++i) {
1291 c1=iter1->next(iter1);
1292 c2=iter2->next(iter2);
1293 if(c1!=c2) {
1294 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1295 return;
1296 }
1297 }
1298
1299 /* move backward 5 UChars */
1300 for(i=0; i<5; ++i) {
1301 c1=iter1->previous(iter1);
1302 c2=iter2->previous(iter2);
1303 if(c1!=c2) {
1304 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1305 return;
1306 }
1307 }
1308
1309 /* iterate forward from the beginning */
1310 pos1=iter1->move(iter1, 0, UITER_START);
1311 if(pos1<0) {
1312 log_err("%s->move(start) failed\n", n1);
1313 return;
1314 }
1315 if(!iter1->hasNext(iter1)) {
1316 log_err("%s->hasNext() at the start returns false\n", n1);
1317 return;
1318 }
1319
1320 pos2=iter2->move(iter2, 0, UITER_START);
1321 if(pos2<0) {
1322 log_err("%s->move(start) failed\n", n2);
1323 return;
1324 }
1325 if(!iter2->hasNext(iter2)) {
1326 log_err("%s->hasNext() at the start returns false\n", n2);
1327 return;
1328 }
1329
1330 do {
1331 c1=iter1->next(iter1);
1332 c2=iter2->next(iter2);
1333 if(c1!=c2) {
1334 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1335 return;
1336 }
1337 } while(c1>=0);
1338
1339 if(iter1->hasNext(iter1)) {
1340 log_err("%s->hasNext() at the end returns true\n", n1);
1341 return;
1342 }
1343 if(iter2->hasNext(iter2)) {
1344 log_err("%s->hasNext() at the end returns true\n", n2);
1345 return;
1346 }
1347
1348 /* back to the middle */
1349 pos1=iter1->move(iter1, middle, UITER_ZERO);
1350 if(pos1!=middle) {
1351 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1352 return;
1353 }
1354
1355 pos2=iter2->move(iter2, middle, UITER_ZERO);
1356 if(pos2!=middle) {
1357 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1358 return;
1359 }
1360
1361 /* move to index 1 */
1362 pos1=iter1->move(iter1, 1, UITER_ZERO);
1363 if(pos1!=1) {
1364 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
1365 return;
1366 }
1367
1368 pos2=iter2->move(iter2, 1, UITER_ZERO);
1369 if(pos2!=1) {
1370 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
1371 return;
1372 }
1373
1374 /* iterate backward from the end */
1375 pos1=iter1->move(iter1, 0, UITER_LIMIT);
1376 if(pos1<0) {
1377 log_err("%s->move(limit) failed\n", n1);
1378 return;
1379 }
1380 if(!iter1->hasPrevious(iter1)) {
1381 log_err("%s->hasPrevious() at the end returns false\n", n1);
1382 return;
1383 }
1384
1385 pos2=iter2->move(iter2, 0, UITER_LIMIT);
1386 if(pos2<0) {
1387 log_err("%s->move(limit) failed\n", n2);
1388 return;
1389 }
1390 if(!iter2->hasPrevious(iter2)) {
1391 log_err("%s->hasPrevious() at the end returns false\n", n2);
1392 return;
1393 }
1394
1395 do {
1396 c1=iter1->previous(iter1);
1397 c2=iter2->previous(iter2);
1398 if(c1!=c2) {
1399 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1400 return;
1401 }
1402 } while(c1>=0);
1403
1404 if(iter1->hasPrevious(iter1)) {
1405 log_err("%s->hasPrevious() at the start returns true\n", n1);
1406 return;
1407 }
1408 if(iter2->hasPrevious(iter2)) {
1409 log_err("%s->hasPrevious() at the start returns true\n", n2);
1410 return;
1411 }
1412 }
1413
1414 /*
1415 * Test the iterator's getState() and setState() functions.
1416 * iter1 and iter2 must be set up for the same iterator type and the same string
1417 * but may be physically different structs (different addresses).
1418 *
1419 * Assume that the text is not empty and that
1420 * iteration start==0 and iteration limit==length.
1421 * It must be 2<=middle<=length-2.
1422 */
1423 static void
testIteratorState(UCharIterator * iter1,UCharIterator * iter2,const char * n,int32_t middle)1424 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
1425 UChar32 u[4];
1426
1427 UErrorCode errorCode;
1428 UChar32 c;
1429 uint32_t state;
1430 int32_t i, j;
1431
1432 /* get four UChars from the middle of the string */
1433 iter1->move(iter1, middle-2, UITER_ZERO);
1434 for(i=0; i<4; ++i) {
1435 c=iter1->next(iter1);
1436 if(c<0) {
1437 /* the test violates the assumptions, see comment above */
1438 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
1439 return;
1440 }
1441 u[i]=c;
1442 }
1443
1444 /* move to the middle and get the state */
1445 iter1->move(iter1, -2, UITER_CURRENT);
1446 state=uiter_getState(iter1);
1447
1448 /* set the state into the second iterator and compare the results */
1449 errorCode=U_ZERO_ERROR;
1450 uiter_setState(iter2, state, &errorCode);
1451 if(U_FAILURE(errorCode)) {
1452 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
1453 return;
1454 }
1455
1456 c=iter2->current(iter2);
1457 if(c!=u[2]) {
1458 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
1459 }
1460
1461 c=iter2->previous(iter2);
1462 if(c!=u[1]) {
1463 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
1464 }
1465
1466 iter2->move(iter2, 2, UITER_CURRENT);
1467 c=iter2->next(iter2);
1468 if(c!=u[3]) {
1469 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
1470 }
1471
1472 iter2->move(iter2, -3, UITER_CURRENT);
1473 c=iter2->previous(iter2);
1474 if(c!=u[0]) {
1475 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
1476 }
1477
1478 /* move the second iterator back to the middle */
1479 iter2->move(iter2, 1, UITER_CURRENT);
1480 iter2->next(iter2);
1481
1482 /* check that both are in the middle */
1483 i=iter1->getIndex(iter1, UITER_CURRENT);
1484 j=iter2->getIndex(iter2, UITER_CURRENT);
1485 if(i!=middle) {
1486 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
1487 }
1488 if(i!=j) {
1489 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
1490 }
1491
1492 /* compare lengths */
1493 i=iter1->getIndex(iter1, UITER_LENGTH);
1494 j=iter2->getIndex(iter2, UITER_LENGTH);
1495 if(i!=j) {
1496 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
1497 }
1498 }
1499
1500 static void
TestUCharIterator()1501 TestUCharIterator() {
1502 static const UChar text[]={
1503 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1504 };
1505 char bytes[40];
1506
1507 UCharIterator iter, iter1, iter2;
1508 UConverter *cnv;
1509 UErrorCode errorCode;
1510 int32_t length;
1511
1512 /* simple API/code coverage - test NOOP UCharIterator */
1513 uiter_setString(&iter, NULL, 0);
1514 if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 ||
1515 iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 ||
1516 iter.hasNext(&iter) || iter.hasPrevious(&iter)
1517 ) {
1518 log_err("NOOP UCharIterator behaves unexpectedly\n");
1519 }
1520
1521 /* test get/set state */
1522 length=UPRV_LENGTHOF(text)-1;
1523 uiter_setString(&iter1, text, -1);
1524 uiter_setString(&iter2, text, length);
1525 testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
1526 testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1);
1527
1528 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1529 errorCode=U_ZERO_ERROR;
1530 u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode);
1531 if(U_FAILURE(errorCode)) {
1532 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode));
1533 return;
1534 }
1535
1536 uiter_setString(&iter1, text, -1);
1537 uiter_setUTF8(&iter2, bytes, length);
1538 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator");
1539
1540 /* try again with length=-1 */
1541 uiter_setUTF8(&iter2, bytes, -1);
1542 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
1543
1544 /* test get/set state */
1545 length=UPRV_LENGTHOF(text)-1;
1546 uiter_setUTF8(&iter1, bytes, -1);
1547 testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
1548 testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
1549
1550 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1551 errorCode=U_ZERO_ERROR;
1552 cnv=ucnv_open("UTF-16BE", &errorCode);
1553 length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode);
1554 ucnv_close(cnv);
1555 if(U_FAILURE(errorCode)) {
1556 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode));
1557 return;
1558 }
1559
1560 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1561 bytes[length]=bytes[length+1]=0;
1562
1563 uiter_setString(&iter1, text, -1);
1564 uiter_setUTF16BE(&iter2, bytes, length);
1565 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator");
1566
1567 /* try again with length=-1 */
1568 uiter_setUTF16BE(&iter2, bytes, -1);
1569 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1");
1570
1571 /* try again after moving the bytes up one, and with length=-1 */
1572 memmove(bytes+1, bytes, length+2);
1573 uiter_setUTF16BE(&iter2, bytes+1, -1);
1574 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1");
1575
1576 /* ### TODO test other iterators: CharacterIterator, Replaceable */
1577 }
1578