1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1999-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
8 *
9 * File unistr.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 09/25/98 stephen Creation.
15 * 04/20/99 stephen Overhauled per 4/16 code review.
16 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
18 * Replaceable.
19 * 06/25/01 grhoten Removed the dependency on iostream
20 ******************************************************************************
21 */
22
23 #include "unicode/utypes.h"
24 #include "unicode/appendable.h"
25 #include "unicode/putil.h"
26 #include "cstring.h"
27 #include "cmemory.h"
28 #include "unicode/ustring.h"
29 #include "unicode/unistr.h"
30 #include "unicode/utf.h"
31 #include "unicode/utf16.h"
32 #include "uelement.h"
33 #include "ustr_imp.h"
34 #include "umutex.h"
35 #include "uassert.h"
36
37 #if 0
38
39 #include <iostream>
40 using namespace std;
41
42 //DEBUGGING
43 void
44 print(const UnicodeString& s,
45 const char *name)
46 {
47 char16_t c;
48 cout << name << ":|";
49 for(int i = 0; i < s.length(); ++i) {
50 c = s[i];
51 if(c>= 0x007E || c < 0x0020)
52 cout << "[0x" << hex << s[i] << "]";
53 else
54 cout << (char) s[i];
55 }
56 cout << '|' << endl;
57 }
58
59 void
60 print(const char16_t *s,
61 int32_t len,
62 const char *name)
63 {
64 char16_t c;
65 cout << name << ":|";
66 for(int i = 0; i < len; ++i) {
67 c = s[i];
68 if(c>= 0x007E || c < 0x0020)
69 cout << "[0x" << hex << s[i] << "]";
70 else
71 cout << (char) s[i];
72 }
73 cout << '|' << endl;
74 }
75 // END DEBUGGING
76 #endif
77
78 // Local function definitions for now
79
80 // need to copy areas that may overlap
81 static
82 inline void
us_arrayCopy(const char16_t * src,int32_t srcStart,char16_t * dst,int32_t dstStart,int32_t count)83 us_arrayCopy(const char16_t *src, int32_t srcStart,
84 char16_t *dst, int32_t dstStart, int32_t count)
85 {
86 if(count>0) {
87 uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
88 }
89 }
90
91 // u_unescapeAt() callback to get a char16_t from a UnicodeString
92 U_CDECL_BEGIN
93 static char16_t U_CALLCONV
UnicodeString_charAt(int32_t offset,void * context)94 UnicodeString_charAt(int32_t offset, void *context) {
95 return ((icu::UnicodeString*) context)->charAt(offset);
96 }
97 U_CDECL_END
98
99 U_NAMESPACE_BEGIN
100
101 /* The Replaceable virtual destructor can't be defined in the header
102 due to how AIX works with multiple definitions of virtual functions.
103 */
~Replaceable()104 Replaceable::~Replaceable() {}
105
106 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
107
108 UnicodeString U_EXPORT2
109 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
110 return
111 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
112 append(s1).
113 append(s2);
114 }
115
116 //========================================
117 // Reference Counting functions, put at top of file so that optimizing compilers
118 // have a chance to automatically inline.
119 //========================================
120
121 void
addRef()122 UnicodeString::addRef() {
123 umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
124 }
125
126 int32_t
removeRef()127 UnicodeString::removeRef() {
128 return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
129 }
130
131 int32_t
refCount() const132 UnicodeString::refCount() const {
133 return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
134 }
135
136 void
releaseArray()137 UnicodeString::releaseArray() {
138 if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
139 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
140 }
141 }
142
143
144
145 //========================================
146 // Constructors
147 //========================================
148
149 // The default constructor is inline in unistr.h.
150
UnicodeString(int32_t capacity,UChar32 c,int32_t count)151 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
152 fUnion.fFields.fLengthAndFlags = 0;
153 if(count <= 0 || (uint32_t)c > 0x10ffff) {
154 // just allocate and do not do anything else
155 allocate(capacity);
156 } else if(c <= 0xffff) {
157 int32_t length = count;
158 if(capacity < length) {
159 capacity = length;
160 }
161 if(allocate(capacity)) {
162 char16_t *array = getArrayStart();
163 char16_t unit = (char16_t)c;
164 for(int32_t i = 0; i < length; ++i) {
165 array[i] = unit;
166 }
167 setLength(length);
168 }
169 } else { // supplementary code point, write surrogate pairs
170 if(count > (INT32_MAX / 2)) {
171 // We would get more than 2G UChars.
172 allocate(capacity);
173 return;
174 }
175 int32_t length = count * 2;
176 if(capacity < length) {
177 capacity = length;
178 }
179 if(allocate(capacity)) {
180 char16_t *array = getArrayStart();
181 char16_t lead = U16_LEAD(c);
182 char16_t trail = U16_TRAIL(c);
183 for(int32_t i = 0; i < length; i += 2) {
184 array[i] = lead;
185 array[i + 1] = trail;
186 }
187 setLength(length);
188 }
189 }
190 }
191
UnicodeString(char16_t ch)192 UnicodeString::UnicodeString(char16_t ch) {
193 fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
194 fUnion.fStackFields.fBuffer[0] = ch;
195 }
196
UnicodeString(UChar32 ch)197 UnicodeString::UnicodeString(UChar32 ch) {
198 fUnion.fFields.fLengthAndFlags = kShortString;
199 int32_t i = 0;
200 UBool isError = false;
201 U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
202 // We test isError so that the compiler does not complain that we don't.
203 // If isError then i==0 which is what we want anyway.
204 if(!isError) {
205 setShortLength(i);
206 }
207 }
208
UnicodeString(const char16_t * text)209 UnicodeString::UnicodeString(const char16_t *text) {
210 fUnion.fFields.fLengthAndFlags = kShortString;
211 doAppend(text, 0, -1);
212 }
213
UnicodeString(const char16_t * text,int32_t textLength)214 UnicodeString::UnicodeString(const char16_t *text,
215 int32_t textLength) {
216 fUnion.fFields.fLengthAndFlags = kShortString;
217 doAppend(text, 0, textLength);
218 }
219
UnicodeString(UBool isTerminated,ConstChar16Ptr textPtr,int32_t textLength)220 UnicodeString::UnicodeString(UBool isTerminated,
221 ConstChar16Ptr textPtr,
222 int32_t textLength) {
223 fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
224 const char16_t *text = textPtr;
225 if(text == nullptr) {
226 // treat as an empty string, do not alias
227 setToEmpty();
228 } else if(textLength < -1 ||
229 (textLength == -1 && !isTerminated) ||
230 (textLength >= 0 && isTerminated && text[textLength] != 0)
231 ) {
232 setToBogus();
233 } else {
234 if(textLength == -1) {
235 // text is terminated, or else it would have failed the above test
236 textLength = u_strlen(text);
237 }
238 setArray(const_cast<char16_t *>(text), textLength,
239 isTerminated ? textLength + 1 : textLength);
240 }
241 }
242
UnicodeString(char16_t * buff,int32_t buffLength,int32_t buffCapacity)243 UnicodeString::UnicodeString(char16_t *buff,
244 int32_t buffLength,
245 int32_t buffCapacity) {
246 fUnion.fFields.fLengthAndFlags = kWritableAlias;
247 if(buff == nullptr) {
248 // treat as an empty string, do not alias
249 setToEmpty();
250 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
251 setToBogus();
252 } else {
253 if(buffLength == -1) {
254 // fLength = u_strlen(buff); but do not look beyond buffCapacity
255 const char16_t *p = buff, *limit = buff + buffCapacity;
256 while(p != limit && *p != 0) {
257 ++p;
258 }
259 buffLength = (int32_t)(p - buff);
260 }
261 setArray(buff, buffLength, buffCapacity);
262 }
263 }
264
UnicodeString(const char * src,int32_t length,EInvariant)265 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
266 fUnion.fFields.fLengthAndFlags = kShortString;
267 if(src==nullptr) {
268 // treat as an empty string
269 } else {
270 if(length<0) {
271 length=(int32_t)uprv_strlen(src);
272 }
273 if(cloneArrayIfNeeded(length, length, false)) {
274 u_charsToUChars(src, getArrayStart(), length);
275 setLength(length);
276 } else {
277 setToBogus();
278 }
279 }
280 }
281
282 #if U_CHARSET_IS_UTF8
283
UnicodeString(const char * codepageData)284 UnicodeString::UnicodeString(const char *codepageData) {
285 fUnion.fFields.fLengthAndFlags = kShortString;
286 if(codepageData != 0) {
287 setToUTF8(codepageData);
288 }
289 }
290
UnicodeString(const char * codepageData,int32_t dataLength)291 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
292 fUnion.fFields.fLengthAndFlags = kShortString;
293 // if there's nothing to convert, do nothing
294 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
295 return;
296 }
297 if(dataLength == -1) {
298 dataLength = (int32_t)uprv_strlen(codepageData);
299 }
300 setToUTF8(StringPiece(codepageData, dataLength));
301 }
302
303 // else see unistr_cnv.cpp
304 #endif
305
UnicodeString(const UnicodeString & that)306 UnicodeString::UnicodeString(const UnicodeString& that) {
307 fUnion.fFields.fLengthAndFlags = kShortString;
308 copyFrom(that);
309 }
310
UnicodeString(UnicodeString && src)311 UnicodeString::UnicodeString(UnicodeString &&src) noexcept {
312 copyFieldsFrom(src, true);
313 }
314
UnicodeString(const UnicodeString & that,int32_t srcStart)315 UnicodeString::UnicodeString(const UnicodeString& that,
316 int32_t srcStart) {
317 fUnion.fFields.fLengthAndFlags = kShortString;
318 setTo(that, srcStart);
319 }
320
UnicodeString(const UnicodeString & that,int32_t srcStart,int32_t srcLength)321 UnicodeString::UnicodeString(const UnicodeString& that,
322 int32_t srcStart,
323 int32_t srcLength) {
324 fUnion.fFields.fLengthAndFlags = kShortString;
325 setTo(that, srcStart, srcLength);
326 }
327
328 // Replaceable base class clone() default implementation, does not clone
329 Replaceable *
clone() const330 Replaceable::clone() const {
331 return nullptr;
332 }
333
334 // UnicodeString overrides clone() with a real implementation
335 UnicodeString *
clone() const336 UnicodeString::clone() const {
337 LocalPointer<UnicodeString> clonedString(new UnicodeString(*this));
338 return clonedString.isValid() && !clonedString->isBogus() ? clonedString.orphan() : nullptr;
339 }
340
341 //========================================
342 // array allocation
343 //========================================
344
345 namespace {
346
347 const int32_t kGrowSize = 128;
348
349 // The number of bytes for one int32_t reference counter and capacity UChars
350 // must fit into a 32-bit size_t (at least when on a 32-bit platform).
351 // We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
352 // and round up to a multiple of 16 bytes.
353 // This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
354 // (With more complicated checks we could go up to 0x7ffffffd without rounding up,
355 // but that does not seem worth it.)
356 const int32_t kMaxCapacity = 0x7ffffff5;
357
getGrowCapacity(int32_t newLength)358 int32_t getGrowCapacity(int32_t newLength) {
359 int32_t growSize = (newLength >> 2) + kGrowSize;
360 if(growSize <= (kMaxCapacity - newLength)) {
361 return newLength + growSize;
362 } else {
363 return kMaxCapacity;
364 }
365 }
366
367 } // namespace
368
369 UBool
allocate(int32_t capacity)370 UnicodeString::allocate(int32_t capacity) {
371 if(capacity <= US_STACKBUF_SIZE) {
372 fUnion.fFields.fLengthAndFlags = kShortString;
373 return true;
374 }
375 if(capacity <= kMaxCapacity) {
376 ++capacity; // for the NUL
377 // Switch to size_t which is unsigned so that we can allocate up to 4GB.
378 // Reference counter + UChars.
379 size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
380 // Round up to a multiple of 16.
381 numBytes = (numBytes + 15) & ~15;
382 int32_t *array = (int32_t *) uprv_malloc(numBytes);
383 if(array != nullptr) {
384 // set initial refCount and point behind the refCount
385 *array++ = 1;
386 numBytes -= sizeof(int32_t);
387
388 // have fArray point to the first char16_t
389 fUnion.fFields.fArray = (char16_t *)array;
390 fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
391 fUnion.fFields.fLengthAndFlags = kLongString;
392 return true;
393 }
394 }
395 fUnion.fFields.fLengthAndFlags = kIsBogus;
396 fUnion.fFields.fArray = 0;
397 fUnion.fFields.fCapacity = 0;
398 return false;
399 }
400
401 //========================================
402 // Destructor
403 //========================================
404
405 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
406 static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1
407 static u_atomic_int32_t beyondCount(0);
408
unistr_printLengths()409 U_CAPI void unistr_printLengths() {
410 int32_t i;
411 for(i = 0; i <= 59; ++i) {
412 printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
413 }
414 int32_t beyond = beyondCount;
415 for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
416 beyond += finalLengthCounts[i];
417 }
418 printf(">59, %9d\n", beyond);
419 }
420 #endif
421
~UnicodeString()422 UnicodeString::~UnicodeString()
423 {
424 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
425 // Count lengths of strings at the end of their lifetime.
426 // Useful for discussion of a desirable stack buffer size.
427 // Count the contents length, not the optional NUL terminator nor further capacity.
428 // Ignore open-buffer strings and strings which alias external storage.
429 if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
430 if(hasShortLength()) {
431 umtx_atomic_inc(finalLengthCounts + getShortLength());
432 } else {
433 umtx_atomic_inc(&beyondCount);
434 }
435 }
436 #endif
437
438 releaseArray();
439 }
440
441 //========================================
442 // Factory methods
443 //========================================
444
fromUTF8(StringPiece utf8)445 UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
446 UnicodeString result;
447 result.setToUTF8(utf8);
448 return result;
449 }
450
fromUTF32(const UChar32 * utf32,int32_t length)451 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
452 UnicodeString result;
453 int32_t capacity;
454 // Most UTF-32 strings will be BMP-only and result in a same-length
455 // UTF-16 string. We overestimate the capacity just slightly,
456 // just in case there are a few supplementary characters.
457 if(length <= US_STACKBUF_SIZE) {
458 capacity = US_STACKBUF_SIZE;
459 } else {
460 capacity = length + (length >> 4) + 4;
461 }
462 do {
463 char16_t *utf16 = result.getBuffer(capacity);
464 int32_t length16;
465 UErrorCode errorCode = U_ZERO_ERROR;
466 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
467 utf32, length,
468 0xfffd, // Substitution character.
469 nullptr, // Don't care about number of substitutions.
470 &errorCode);
471 result.releaseBuffer(length16);
472 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
473 capacity = length16 + 1; // +1 for the terminating NUL.
474 continue;
475 } else if(U_FAILURE(errorCode)) {
476 result.setToBogus();
477 }
478 break;
479 } while(true);
480 return result;
481 }
482
483 //========================================
484 // Assignment
485 //========================================
486
487 UnicodeString &
operator =(const UnicodeString & src)488 UnicodeString::operator=(const UnicodeString &src) {
489 return copyFrom(src);
490 }
491
492 UnicodeString &
fastCopyFrom(const UnicodeString & src)493 UnicodeString::fastCopyFrom(const UnicodeString &src) {
494 return copyFrom(src, true);
495 }
496
497 UnicodeString &
copyFrom(const UnicodeString & src,UBool fastCopy)498 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
499 // if assigning to ourselves, do nothing
500 if(this == &src) {
501 return *this;
502 }
503
504 // is the right side bogus?
505 if(src.isBogus()) {
506 setToBogus();
507 return *this;
508 }
509
510 // delete the current contents
511 releaseArray();
512
513 if(src.isEmpty()) {
514 // empty string - use the stack buffer
515 setToEmpty();
516 return *this;
517 }
518
519 // fLength>0 and not an "open" src.getBuffer(minCapacity)
520 fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
521 switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
522 case kShortString:
523 // short string using the stack buffer, do the same
524 uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
525 getShortLength() * U_SIZEOF_UCHAR);
526 break;
527 case kLongString:
528 // src uses a refCounted string buffer, use that buffer with refCount
529 // src is const, use a cast - we don't actually change it
530 const_cast<UnicodeString &>(src).addRef();
531 // copy all fields, share the reference-counted buffer
532 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
533 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
534 if(!hasShortLength()) {
535 fUnion.fFields.fLength = src.fUnion.fFields.fLength;
536 }
537 break;
538 case kReadonlyAlias:
539 if(fastCopy) {
540 // src is a readonly alias, do the same
541 // -> maintain the readonly alias as such
542 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
543 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
544 if(!hasShortLength()) {
545 fUnion.fFields.fLength = src.fUnion.fFields.fLength;
546 }
547 break;
548 }
549 // else if(!fastCopy) fall through to case kWritableAlias
550 // -> allocate a new buffer and copy the contents
551 U_FALLTHROUGH;
552 case kWritableAlias: {
553 // src is a writable alias; we make a copy of that instead
554 int32_t srcLength = src.length();
555 if(allocate(srcLength)) {
556 u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
557 setLength(srcLength);
558 break;
559 }
560 // if there is not enough memory, then fall through to setting to bogus
561 U_FALLTHROUGH;
562 }
563 default:
564 // if src is bogus, set ourselves to bogus
565 // do not call setToBogus() here because fArray and flags are not consistent here
566 fUnion.fFields.fLengthAndFlags = kIsBogus;
567 fUnion.fFields.fArray = 0;
568 fUnion.fFields.fCapacity = 0;
569 break;
570 }
571
572 return *this;
573 }
574
operator =(UnicodeString && src)575 UnicodeString &UnicodeString::operator=(UnicodeString &&src) noexcept {
576 // No explicit check for self move assignment, consistent with standard library.
577 // Self move assignment causes no crash nor leak but might make the object bogus.
578 releaseArray();
579 copyFieldsFrom(src, true);
580 return *this;
581 }
582
583 // Same as move assignment except without memory management.
copyFieldsFrom(UnicodeString & src,UBool setSrcToBogus)584 void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept {
585 int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
586 if(lengthAndFlags & kUsingStackBuffer) {
587 // Short string using the stack buffer, copy the contents.
588 // Check for self assignment to prevent "overlap in memcpy" warnings,
589 // although it should be harmless to copy a buffer to itself exactly.
590 if(this != &src) {
591 uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
592 getShortLength() * U_SIZEOF_UCHAR);
593 }
594 } else {
595 // In all other cases, copy all fields.
596 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
597 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
598 if(!hasShortLength()) {
599 fUnion.fFields.fLength = src.fUnion.fFields.fLength;
600 }
601 if(setSrcToBogus) {
602 // Set src to bogus without releasing any memory.
603 src.fUnion.fFields.fLengthAndFlags = kIsBogus;
604 src.fUnion.fFields.fArray = nullptr;
605 src.fUnion.fFields.fCapacity = 0;
606 }
607 }
608 }
609
swap(UnicodeString & other)610 void UnicodeString::swap(UnicodeString &other) noexcept {
611 UnicodeString temp; // Empty short string: Known not to need releaseArray().
612 // Copy fields without resetting source values in between.
613 temp.copyFieldsFrom(*this, false);
614 this->copyFieldsFrom(other, false);
615 other.copyFieldsFrom(temp, false);
616 // Set temp to an empty string so that other's memory is not released twice.
617 temp.fUnion.fFields.fLengthAndFlags = kShortString;
618 }
619
620 //========================================
621 // Miscellaneous operations
622 //========================================
623
unescape() const624 UnicodeString UnicodeString::unescape() const {
625 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
626 if (result.isBogus()) {
627 return result;
628 }
629 const char16_t *array = getBuffer();
630 int32_t len = length();
631 int32_t prev = 0;
632 for (int32_t i=0;;) {
633 if (i == len) {
634 result.append(array, prev, len - prev);
635 break;
636 }
637 if (array[i++] == 0x5C /*'\\'*/) {
638 result.append(array, prev, (i - 1) - prev);
639 UChar32 c = unescapeAt(i); // advances i
640 if (c < 0) {
641 result.remove(); // return empty string
642 break; // invalid escape sequence
643 }
644 result.append(c);
645 prev = i;
646 }
647 }
648 return result;
649 }
650
unescapeAt(int32_t & offset) const651 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
652 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
653 }
654
655 //========================================
656 // Read-only implementation
657 //========================================
658 UBool
doEquals(const UnicodeString & text,int32_t len) const659 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
660 // Requires: this & text not bogus and have same lengths.
661 // Byte-wise comparison works for equality regardless of endianness.
662 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
663 }
664
665 UBool
doEqualsSubstring(int32_t start,int32_t length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength) const666 UnicodeString::doEqualsSubstring( int32_t start,
667 int32_t length,
668 const char16_t *srcChars,
669 int32_t srcStart,
670 int32_t srcLength) const
671 {
672 // compare illegal string values
673 if(isBogus()) {
674 return false;
675 }
676
677 // pin indices to legal values
678 pinIndices(start, length);
679
680 if(srcChars == nullptr) {
681 // treat const char16_t *srcChars==nullptr as an empty string
682 return length == 0 ? true : false;
683 }
684
685 // get the correct pointer
686 const char16_t *chars = getArrayStart();
687
688 chars += start;
689 srcChars += srcStart;
690
691 // get the srcLength if necessary
692 if(srcLength < 0) {
693 srcLength = u_strlen(srcChars + srcStart);
694 }
695
696 if (length != srcLength) {
697 return false;
698 }
699
700 if(length == 0 || chars == srcChars) {
701 return true;
702 }
703
704 return u_memcmp(chars, srcChars, srcLength) == 0;
705 }
706
707 int8_t
doCompare(int32_t start,int32_t length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength) const708 UnicodeString::doCompare( int32_t start,
709 int32_t length,
710 const char16_t *srcChars,
711 int32_t srcStart,
712 int32_t srcLength) const
713 {
714 // compare illegal string values
715 if(isBogus()) {
716 return -1;
717 }
718
719 // pin indices to legal values
720 pinIndices(start, length);
721
722 if(srcChars == nullptr) {
723 // treat const char16_t *srcChars==nullptr as an empty string
724 return length == 0 ? 0 : 1;
725 }
726
727 // get the correct pointer
728 const char16_t *chars = getArrayStart();
729
730 chars += start;
731 srcChars += srcStart;
732
733 int32_t minLength;
734 int8_t lengthResult;
735
736 // get the srcLength if necessary
737 if(srcLength < 0) {
738 srcLength = u_strlen(srcChars + srcStart);
739 }
740
741 // are we comparing different lengths?
742 if(length != srcLength) {
743 if(length < srcLength) {
744 minLength = length;
745 lengthResult = -1;
746 } else {
747 minLength = srcLength;
748 lengthResult = 1;
749 }
750 } else {
751 minLength = length;
752 lengthResult = 0;
753 }
754
755 /*
756 * note that uprv_memcmp() returns an int but we return an int8_t;
757 * we need to take care not to truncate the result -
758 * one way to do this is to right-shift the value to
759 * move the sign bit into the lower 8 bits and making sure that this
760 * does not become 0 itself
761 */
762
763 if(minLength > 0 && chars != srcChars) {
764 int32_t result;
765
766 # if U_IS_BIG_ENDIAN
767 // big-endian: byte comparison works
768 result = uprv_memcmp(chars, srcChars, minLength * sizeof(char16_t));
769 if(result != 0) {
770 return (int8_t)(result >> 15 | 1);
771 }
772 # else
773 // little-endian: compare char16_t units
774 do {
775 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
776 if(result != 0) {
777 return (int8_t)(result >> 15 | 1);
778 }
779 } while(--minLength > 0);
780 # endif
781 }
782 return lengthResult;
783 }
784
785 /* String compare in code point order - doCompare() compares in code unit order. */
786 int8_t
doCompareCodePointOrder(int32_t start,int32_t length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength) const787 UnicodeString::doCompareCodePointOrder(int32_t start,
788 int32_t length,
789 const char16_t *srcChars,
790 int32_t srcStart,
791 int32_t srcLength) const
792 {
793 // compare illegal string values
794 // treat const char16_t *srcChars==nullptr as an empty string
795 if(isBogus()) {
796 return -1;
797 }
798
799 // pin indices to legal values
800 pinIndices(start, length);
801
802 if(srcChars == nullptr) {
803 srcStart = srcLength = 0;
804 }
805
806 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=nullptr)?(srcChars + srcStart):nullptr, srcLength, false, true);
807 /* translate the 32-bit result into an 8-bit one */
808 if(diff!=0) {
809 return (int8_t)(diff >> 15 | 1);
810 } else {
811 return 0;
812 }
813 }
814
815 int32_t
getLength() const816 UnicodeString::getLength() const {
817 return length();
818 }
819
820 char16_t
getCharAt(int32_t offset) const821 UnicodeString::getCharAt(int32_t offset) const {
822 return charAt(offset);
823 }
824
825 UChar32
getChar32At(int32_t offset) const826 UnicodeString::getChar32At(int32_t offset) const {
827 return char32At(offset);
828 }
829
830 UChar32
char32At(int32_t offset) const831 UnicodeString::char32At(int32_t offset) const
832 {
833 int32_t len = length();
834 if((uint32_t)offset < (uint32_t)len) {
835 const char16_t *array = getArrayStart();
836 UChar32 c;
837 U16_GET(array, 0, offset, len, c);
838 return c;
839 } else {
840 return kInvalidUChar;
841 }
842 }
843
844 int32_t
getChar32Start(int32_t offset) const845 UnicodeString::getChar32Start(int32_t offset) const {
846 if((uint32_t)offset < (uint32_t)length()) {
847 const char16_t *array = getArrayStart();
848 U16_SET_CP_START(array, 0, offset);
849 return offset;
850 } else {
851 return 0;
852 }
853 }
854
855 int32_t
getChar32Limit(int32_t offset) const856 UnicodeString::getChar32Limit(int32_t offset) const {
857 int32_t len = length();
858 if((uint32_t)offset < (uint32_t)len) {
859 const char16_t *array = getArrayStart();
860 U16_SET_CP_LIMIT(array, 0, offset, len);
861 return offset;
862 } else {
863 return len;
864 }
865 }
866
867 int32_t
countChar32(int32_t start,int32_t length) const868 UnicodeString::countChar32(int32_t start, int32_t length) const {
869 pinIndices(start, length);
870 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for nullptr
871 return u_countChar32(getArrayStart()+start, length);
872 }
873
874 UBool
hasMoreChar32Than(int32_t start,int32_t length,int32_t number) const875 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
876 pinIndices(start, length);
877 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for nullptr
878 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
879 }
880
881 int32_t
moveIndex32(int32_t index,int32_t delta) const882 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
883 // pin index
884 int32_t len = length();
885 if(index<0) {
886 index=0;
887 } else if(index>len) {
888 index=len;
889 }
890
891 const char16_t *array = getArrayStart();
892 if(delta>0) {
893 U16_FWD_N(array, index, len, delta);
894 } else {
895 U16_BACK_N(array, 0, index, -delta);
896 }
897
898 return index;
899 }
900
901 void
doExtract(int32_t start,int32_t length,char16_t * dst,int32_t dstStart) const902 UnicodeString::doExtract(int32_t start,
903 int32_t length,
904 char16_t *dst,
905 int32_t dstStart) const
906 {
907 // pin indices to legal values
908 pinIndices(start, length);
909
910 // do not copy anything if we alias dst itself
911 const char16_t *array = getArrayStart();
912 if(array + start != dst + dstStart) {
913 us_arrayCopy(array, start, dst, dstStart, length);
914 }
915 }
916
917 int32_t
extract(Char16Ptr dest,int32_t destCapacity,UErrorCode & errorCode) const918 UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
919 UErrorCode &errorCode) const {
920 int32_t len = length();
921 if(U_SUCCESS(errorCode)) {
922 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
923 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
924 } else {
925 const char16_t *array = getArrayStart();
926 if(len>0 && len<=destCapacity && array!=dest) {
927 u_memcpy(dest, array, len);
928 }
929 return u_terminateUChars(dest, destCapacity, len, &errorCode);
930 }
931 }
932
933 return len;
934 }
935
936 int32_t
extract(int32_t start,int32_t length,char * target,int32_t targetCapacity,enum EInvariant) const937 UnicodeString::extract(int32_t start,
938 int32_t length,
939 char *target,
940 int32_t targetCapacity,
941 enum EInvariant) const
942 {
943 // if the arguments are illegal, then do nothing
944 if(targetCapacity < 0 || (targetCapacity > 0 && target == nullptr)) {
945 return 0;
946 }
947
948 // pin the indices to legal values
949 pinIndices(start, length);
950
951 if(length <= targetCapacity) {
952 u_UCharsToChars(getArrayStart() + start, target, length);
953 }
954 UErrorCode status = U_ZERO_ERROR;
955 return u_terminateChars(target, targetCapacity, length, &status);
956 }
957
958 UnicodeString
tempSubString(int32_t start,int32_t len) const959 UnicodeString::tempSubString(int32_t start, int32_t len) const {
960 pinIndices(start, len);
961 const char16_t *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
962 if(array==nullptr) {
963 array=fUnion.fStackFields.fBuffer; // anything not nullptr because that would make an empty string
964 len=-2; // bogus result string
965 }
966 return UnicodeString(false, array + start, len);
967 }
968
969 int32_t
toUTF8(int32_t start,int32_t len,char * target,int32_t capacity) const970 UnicodeString::toUTF8(int32_t start, int32_t len,
971 char *target, int32_t capacity) const {
972 pinIndices(start, len);
973 int32_t length8;
974 UErrorCode errorCode = U_ZERO_ERROR;
975 u_strToUTF8WithSub(target, capacity, &length8,
976 getBuffer() + start, len,
977 0xFFFD, // Standard substitution character.
978 nullptr, // Don't care about number of substitutions.
979 &errorCode);
980 return length8;
981 }
982
983 #if U_CHARSET_IS_UTF8
984
985 int32_t
extract(int32_t start,int32_t len,char * target,uint32_t dstSize) const986 UnicodeString::extract(int32_t start, int32_t len,
987 char *target, uint32_t dstSize) const {
988 // if the arguments are illegal, then do nothing
989 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
990 return 0;
991 }
992 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
993 }
994
995 // else see unistr_cnv.cpp
996 #endif
997
998 void
extractBetween(int32_t start,int32_t limit,UnicodeString & target) const999 UnicodeString::extractBetween(int32_t start,
1000 int32_t limit,
1001 UnicodeString& target) const {
1002 pinIndex(start);
1003 pinIndex(limit);
1004 doExtract(start, limit - start, target);
1005 }
1006
1007 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
1008 // as many bytes as the source has UChars.
1009 // The "worst cases" are writing systems like Indic, Thai and CJK with
1010 // 3:1 bytes:UChars.
1011 void
toUTF8(ByteSink & sink) const1012 UnicodeString::toUTF8(ByteSink &sink) const {
1013 int32_t length16 = length();
1014 if(length16 != 0) {
1015 char stackBuffer[1024];
1016 int32_t capacity = (int32_t)sizeof(stackBuffer);
1017 UBool utf8IsOwned = false;
1018 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
1019 3*length16,
1020 stackBuffer, capacity,
1021 &capacity);
1022 int32_t length8 = 0;
1023 UErrorCode errorCode = U_ZERO_ERROR;
1024 u_strToUTF8WithSub(utf8, capacity, &length8,
1025 getBuffer(), length16,
1026 0xFFFD, // Standard substitution character.
1027 nullptr, // Don't care about number of substitutions.
1028 &errorCode);
1029 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
1030 utf8 = (char *)uprv_malloc(length8);
1031 if(utf8 != nullptr) {
1032 utf8IsOwned = true;
1033 errorCode = U_ZERO_ERROR;
1034 u_strToUTF8WithSub(utf8, length8, &length8,
1035 getBuffer(), length16,
1036 0xFFFD, // Standard substitution character.
1037 nullptr, // Don't care about number of substitutions.
1038 &errorCode);
1039 } else {
1040 errorCode = U_MEMORY_ALLOCATION_ERROR;
1041 }
1042 }
1043 if(U_SUCCESS(errorCode)) {
1044 sink.Append(utf8, length8);
1045 sink.Flush();
1046 }
1047 if(utf8IsOwned) {
1048 uprv_free(utf8);
1049 }
1050 }
1051 }
1052
1053 int32_t
toUTF32(UChar32 * utf32,int32_t capacity,UErrorCode & errorCode) const1054 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
1055 int32_t length32=0;
1056 if(U_SUCCESS(errorCode)) {
1057 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1058 u_strToUTF32WithSub(utf32, capacity, &length32,
1059 getBuffer(), length(),
1060 0xfffd, // Substitution character.
1061 nullptr, // Don't care about number of substitutions.
1062 &errorCode);
1063 }
1064 return length32;
1065 }
1066
1067 int32_t
indexOf(const char16_t * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const1068 UnicodeString::indexOf(const char16_t *srcChars,
1069 int32_t srcStart,
1070 int32_t srcLength,
1071 int32_t start,
1072 int32_t length) const
1073 {
1074 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1075 return -1;
1076 }
1077
1078 // UnicodeString does not find empty substrings
1079 if(srcLength < 0 && srcChars[srcStart] == 0) {
1080 return -1;
1081 }
1082
1083 // get the indices within bounds
1084 pinIndices(start, length);
1085
1086 // find the first occurrence of the substring
1087 const char16_t *array = getArrayStart();
1088 const char16_t *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
1089 if(match == nullptr) {
1090 return -1;
1091 } else {
1092 return (int32_t)(match - array);
1093 }
1094 }
1095
1096 int32_t
doIndexOf(char16_t c,int32_t start,int32_t length) const1097 UnicodeString::doIndexOf(char16_t c,
1098 int32_t start,
1099 int32_t length) const
1100 {
1101 // pin indices
1102 pinIndices(start, length);
1103
1104 // find the first occurrence of c
1105 const char16_t *array = getArrayStart();
1106 const char16_t *match = u_memchr(array + start, c, length);
1107 if(match == nullptr) {
1108 return -1;
1109 } else {
1110 return (int32_t)(match - array);
1111 }
1112 }
1113
1114 int32_t
doIndexOf(UChar32 c,int32_t start,int32_t length) const1115 UnicodeString::doIndexOf(UChar32 c,
1116 int32_t start,
1117 int32_t length) const {
1118 // pin indices
1119 pinIndices(start, length);
1120
1121 // find the first occurrence of c
1122 const char16_t *array = getArrayStart();
1123 const char16_t *match = u_memchr32(array + start, c, length);
1124 if(match == nullptr) {
1125 return -1;
1126 } else {
1127 return (int32_t)(match - array);
1128 }
1129 }
1130
1131 int32_t
lastIndexOf(const char16_t * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const1132 UnicodeString::lastIndexOf(const char16_t *srcChars,
1133 int32_t srcStart,
1134 int32_t srcLength,
1135 int32_t start,
1136 int32_t length) const
1137 {
1138 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1139 return -1;
1140 }
1141
1142 // UnicodeString does not find empty substrings
1143 if(srcLength < 0 && srcChars[srcStart] == 0) {
1144 return -1;
1145 }
1146
1147 // get the indices within bounds
1148 pinIndices(start, length);
1149
1150 // find the last occurrence of the substring
1151 const char16_t *array = getArrayStart();
1152 const char16_t *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1153 if(match == nullptr) {
1154 return -1;
1155 } else {
1156 return (int32_t)(match - array);
1157 }
1158 }
1159
1160 int32_t
doLastIndexOf(char16_t c,int32_t start,int32_t length) const1161 UnicodeString::doLastIndexOf(char16_t c,
1162 int32_t start,
1163 int32_t length) const
1164 {
1165 if(isBogus()) {
1166 return -1;
1167 }
1168
1169 // pin indices
1170 pinIndices(start, length);
1171
1172 // find the last occurrence of c
1173 const char16_t *array = getArrayStart();
1174 const char16_t *match = u_memrchr(array + start, c, length);
1175 if(match == nullptr) {
1176 return -1;
1177 } else {
1178 return (int32_t)(match - array);
1179 }
1180 }
1181
1182 int32_t
doLastIndexOf(UChar32 c,int32_t start,int32_t length) const1183 UnicodeString::doLastIndexOf(UChar32 c,
1184 int32_t start,
1185 int32_t length) const {
1186 // pin indices
1187 pinIndices(start, length);
1188
1189 // find the last occurrence of c
1190 const char16_t *array = getArrayStart();
1191 const char16_t *match = u_memrchr32(array + start, c, length);
1192 if(match == nullptr) {
1193 return -1;
1194 } else {
1195 return (int32_t)(match - array);
1196 }
1197 }
1198
1199 //========================================
1200 // Write implementation
1201 //========================================
1202
1203 UnicodeString&
findAndReplace(int32_t start,int32_t length,const UnicodeString & oldText,int32_t oldStart,int32_t oldLength,const UnicodeString & newText,int32_t newStart,int32_t newLength)1204 UnicodeString::findAndReplace(int32_t start,
1205 int32_t length,
1206 const UnicodeString& oldText,
1207 int32_t oldStart,
1208 int32_t oldLength,
1209 const UnicodeString& newText,
1210 int32_t newStart,
1211 int32_t newLength)
1212 {
1213 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1214 return *this;
1215 }
1216
1217 pinIndices(start, length);
1218 oldText.pinIndices(oldStart, oldLength);
1219 newText.pinIndices(newStart, newLength);
1220
1221 if(oldLength == 0) {
1222 return *this;
1223 }
1224
1225 while(length > 0 && length >= oldLength) {
1226 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1227 if(pos < 0) {
1228 // no more oldText's here: done
1229 break;
1230 } else {
1231 // we found oldText, replace it by newText and go beyond it
1232 replace(pos, oldLength, newText, newStart, newLength);
1233 length -= pos + oldLength - start;
1234 start = pos + newLength;
1235 }
1236 }
1237
1238 return *this;
1239 }
1240
1241
1242 void
setToBogus()1243 UnicodeString::setToBogus()
1244 {
1245 releaseArray();
1246
1247 fUnion.fFields.fLengthAndFlags = kIsBogus;
1248 fUnion.fFields.fArray = 0;
1249 fUnion.fFields.fCapacity = 0;
1250 }
1251
1252 // turn a bogus string into an empty one
1253 void
unBogus()1254 UnicodeString::unBogus() {
1255 if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
1256 setToEmpty();
1257 }
1258 }
1259
1260 const char16_t *
getTerminatedBuffer()1261 UnicodeString::getTerminatedBuffer() {
1262 if(!isWritable()) {
1263 return nullptr;
1264 }
1265 char16_t *array = getArrayStart();
1266 int32_t len = length();
1267 if(len < getCapacity()) {
1268 if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
1269 // If len<capacity on a read-only alias, then array[len] is
1270 // either the original NUL (if constructed with (true, s, length))
1271 // or one of the original string contents characters (if later truncated),
1272 // therefore we can assume that array[len] is initialized memory.
1273 if(array[len] == 0) {
1274 return array;
1275 }
1276 } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
1277 // kRefCounted: Do not write the NUL if the buffer is shared.
1278 // That is mostly safe, except when the length of one copy was modified
1279 // without copy-on-write, e.g., via truncate(newLength) or remove().
1280 // Then the NUL would be written into the middle of another copy's string.
1281
1282 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1283 // Do not test if there is a NUL already because it might be uninitialized memory.
1284 // (That would be safe, but tools like valgrind & Purify would complain.)
1285 array[len] = 0;
1286 return array;
1287 }
1288 }
1289 if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
1290 array = getArrayStart();
1291 array[len] = 0;
1292 return array;
1293 } else {
1294 return nullptr;
1295 }
1296 }
1297
1298 // setTo() analogous to the readonly-aliasing constructor with the same signature
1299 UnicodeString &
setTo(UBool isTerminated,ConstChar16Ptr textPtr,int32_t textLength)1300 UnicodeString::setTo(UBool isTerminated,
1301 ConstChar16Ptr textPtr,
1302 int32_t textLength)
1303 {
1304 if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1305 // do not modify a string that has an "open" getBuffer(minCapacity)
1306 return *this;
1307 }
1308
1309 const char16_t *text = textPtr;
1310 if(text == nullptr) {
1311 // treat as an empty string, do not alias
1312 releaseArray();
1313 setToEmpty();
1314 return *this;
1315 }
1316
1317 if( textLength < -1 ||
1318 (textLength == -1 && !isTerminated) ||
1319 (textLength >= 0 && isTerminated && text[textLength] != 0)
1320 ) {
1321 setToBogus();
1322 return *this;
1323 }
1324
1325 releaseArray();
1326
1327 if(textLength == -1) {
1328 // text is terminated, or else it would have failed the above test
1329 textLength = u_strlen(text);
1330 }
1331 fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
1332 setArray((char16_t *)text, textLength, isTerminated ? textLength + 1 : textLength);
1333 return *this;
1334 }
1335
1336 // setTo() analogous to the writable-aliasing constructor with the same signature
1337 UnicodeString &
setTo(char16_t * buffer,int32_t buffLength,int32_t buffCapacity)1338 UnicodeString::setTo(char16_t *buffer,
1339 int32_t buffLength,
1340 int32_t buffCapacity) {
1341 if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1342 // do not modify a string that has an "open" getBuffer(minCapacity)
1343 return *this;
1344 }
1345
1346 if(buffer == nullptr) {
1347 // treat as an empty string, do not alias
1348 releaseArray();
1349 setToEmpty();
1350 return *this;
1351 }
1352
1353 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1354 setToBogus();
1355 return *this;
1356 } else if(buffLength == -1) {
1357 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1358 const char16_t *p = buffer, *limit = buffer + buffCapacity;
1359 while(p != limit && *p != 0) {
1360 ++p;
1361 }
1362 buffLength = (int32_t)(p - buffer);
1363 }
1364
1365 releaseArray();
1366
1367 fUnion.fFields.fLengthAndFlags = kWritableAlias;
1368 setArray(buffer, buffLength, buffCapacity);
1369 return *this;
1370 }
1371
setToUTF8(StringPiece utf8)1372 UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
1373 unBogus();
1374 int32_t length = utf8.length();
1375 int32_t capacity;
1376 // The UTF-16 string will be at most as long as the UTF-8 string.
1377 if(length <= US_STACKBUF_SIZE) {
1378 capacity = US_STACKBUF_SIZE;
1379 } else {
1380 capacity = length + 1; // +1 for the terminating NUL.
1381 }
1382 char16_t *utf16 = getBuffer(capacity);
1383 int32_t length16;
1384 UErrorCode errorCode = U_ZERO_ERROR;
1385 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1386 utf8.data(), length,
1387 0xfffd, // Substitution character.
1388 nullptr, // Don't care about number of substitutions.
1389 &errorCode);
1390 releaseBuffer(length16);
1391 if(U_FAILURE(errorCode)) {
1392 setToBogus();
1393 }
1394 return *this;
1395 }
1396
1397 UnicodeString&
setCharAt(int32_t offset,char16_t c)1398 UnicodeString::setCharAt(int32_t offset,
1399 char16_t c)
1400 {
1401 int32_t len = length();
1402 if(cloneArrayIfNeeded() && len > 0) {
1403 if(offset < 0) {
1404 offset = 0;
1405 } else if(offset >= len) {
1406 offset = len - 1;
1407 }
1408
1409 getArrayStart()[offset] = c;
1410 }
1411 return *this;
1412 }
1413
1414 UnicodeString&
replace(int32_t start,int32_t _length,UChar32 srcChar)1415 UnicodeString::replace(int32_t start,
1416 int32_t _length,
1417 UChar32 srcChar) {
1418 char16_t buffer[U16_MAX_LENGTH];
1419 int32_t count = 0;
1420 UBool isError = false;
1421 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1422 // We test isError so that the compiler does not complain that we don't.
1423 // If isError (srcChar is not a valid code point) then count==0 which means
1424 // we remove the source segment rather than replacing it with srcChar.
1425 return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1426 }
1427
1428 UnicodeString&
append(UChar32 srcChar)1429 UnicodeString::append(UChar32 srcChar) {
1430 char16_t buffer[U16_MAX_LENGTH];
1431 int32_t _length = 0;
1432 UBool isError = false;
1433 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1434 // We test isError so that the compiler does not complain that we don't.
1435 // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1436 return isError ? *this : doAppend(buffer, 0, _length);
1437 }
1438
1439 UnicodeString&
doReplace(int32_t start,int32_t length,const UnicodeString & src,int32_t srcStart,int32_t srcLength)1440 UnicodeString::doReplace( int32_t start,
1441 int32_t length,
1442 const UnicodeString& src,
1443 int32_t srcStart,
1444 int32_t srcLength)
1445 {
1446 // pin the indices to legal values
1447 src.pinIndices(srcStart, srcLength);
1448
1449 // get the characters from src
1450 // and replace the range in ourselves with them
1451 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1452 }
1453
1454 UnicodeString&
doReplace(int32_t start,int32_t length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)1455 UnicodeString::doReplace(int32_t start,
1456 int32_t length,
1457 const char16_t *srcChars,
1458 int32_t srcStart,
1459 int32_t srcLength)
1460 {
1461 if(!isWritable()) {
1462 return *this;
1463 }
1464
1465 int32_t oldLength = this->length();
1466
1467 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1468 if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
1469 if(start == 0) {
1470 // remove prefix by adjusting the array pointer
1471 pinIndex(length);
1472 fUnion.fFields.fArray += length;
1473 fUnion.fFields.fCapacity -= length;
1474 setLength(oldLength - length);
1475 return *this;
1476 } else {
1477 pinIndex(start);
1478 if(length >= (oldLength - start)) {
1479 // remove suffix by reducing the length (like truncate())
1480 setLength(start);
1481 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1482 return *this;
1483 }
1484 }
1485 }
1486
1487 if(start == oldLength) {
1488 return doAppend(srcChars, srcStart, srcLength);
1489 }
1490
1491 if(srcChars == 0) {
1492 srcLength = 0;
1493 } else {
1494 // Perform all remaining operations relative to srcChars + srcStart.
1495 // From this point forward, do not use srcStart.
1496 srcChars += srcStart;
1497 if (srcLength < 0) {
1498 // get the srcLength if necessary
1499 srcLength = u_strlen(srcChars);
1500 }
1501 }
1502
1503 // pin the indices to legal values
1504 pinIndices(start, length);
1505
1506 // Calculate the size of the string after the replace.
1507 // Avoid int32_t overflow.
1508 int32_t newLength = oldLength - length;
1509 if(srcLength > (INT32_MAX - newLength)) {
1510 setToBogus();
1511 return *this;
1512 }
1513 newLength += srcLength;
1514
1515 // Check for insertion into ourself
1516 const char16_t *oldArray = getArrayStart();
1517 if (isBufferWritable() &&
1518 oldArray < srcChars + srcLength &&
1519 srcChars < oldArray + oldLength) {
1520 // Copy into a new UnicodeString and start over
1521 UnicodeString copy(srcChars, srcLength);
1522 if (copy.isBogus()) {
1523 setToBogus();
1524 return *this;
1525 }
1526 return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
1527 }
1528
1529 // cloneArrayIfNeeded(doCopyArray=false) may change fArray but will not copy the current contents;
1530 // therefore we need to keep the current fArray
1531 char16_t oldStackBuffer[US_STACKBUF_SIZE];
1532 if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1533 // copy the stack buffer contents because it will be overwritten with
1534 // fUnion.fFields values
1535 u_memcpy(oldStackBuffer, oldArray, oldLength);
1536 oldArray = oldStackBuffer;
1537 }
1538
1539 // clone our array and allocate a bigger array if needed
1540 int32_t *bufferToDelete = 0;
1541 if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
1542 false, &bufferToDelete)
1543 ) {
1544 return *this;
1545 }
1546
1547 // now do the replace
1548
1549 char16_t *newArray = getArrayStart();
1550 if(newArray != oldArray) {
1551 // if fArray changed, then we need to copy everything except what will change
1552 us_arrayCopy(oldArray, 0, newArray, 0, start);
1553 us_arrayCopy(oldArray, start + length,
1554 newArray, start + srcLength,
1555 oldLength - (start + length));
1556 } else if(length != srcLength) {
1557 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1558 us_arrayCopy(oldArray, start + length,
1559 newArray, start + srcLength,
1560 oldLength - (start + length));
1561 }
1562
1563 // now fill in the hole with the new string
1564 us_arrayCopy(srcChars, 0, newArray, start, srcLength);
1565
1566 setLength(newLength);
1567
1568 // delayed delete in case srcChars == fArray when we started, and
1569 // to keep oldArray alive for the above operations
1570 if (bufferToDelete) {
1571 uprv_free(bufferToDelete);
1572 }
1573
1574 return *this;
1575 }
1576
1577 // Versions of doReplace() only for append() variants.
1578 // doReplace() and doAppend() optimize for different cases.
1579
1580 UnicodeString&
doAppend(const UnicodeString & src,int32_t srcStart,int32_t srcLength)1581 UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
1582 if(srcLength == 0) {
1583 return *this;
1584 }
1585
1586 // pin the indices to legal values
1587 src.pinIndices(srcStart, srcLength);
1588 return doAppend(src.getArrayStart(), srcStart, srcLength);
1589 }
1590
1591 UnicodeString&
doAppend(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)1592 UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) {
1593 if(!isWritable() || srcLength == 0 || srcChars == nullptr) {
1594 return *this;
1595 }
1596
1597 // Perform all remaining operations relative to srcChars + srcStart.
1598 // From this point forward, do not use srcStart.
1599 srcChars += srcStart;
1600
1601 if(srcLength < 0) {
1602 // get the srcLength if necessary
1603 if((srcLength = u_strlen(srcChars)) == 0) {
1604 return *this;
1605 }
1606 }
1607
1608 int32_t oldLength = length();
1609 int32_t newLength;
1610 if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
1611 setToBogus();
1612 return *this;
1613 }
1614
1615 // Check for append onto ourself
1616 const char16_t* oldArray = getArrayStart();
1617 if (isBufferWritable() &&
1618 oldArray < srcChars + srcLength &&
1619 srcChars < oldArray + oldLength) {
1620 // Copy into a new UnicodeString and start over
1621 UnicodeString copy(srcChars, srcLength);
1622 if (copy.isBogus()) {
1623 setToBogus();
1624 return *this;
1625 }
1626 return doAppend(copy.getArrayStart(), 0, srcLength);
1627 }
1628
1629 // optimize append() onto a large-enough, owned string
1630 if((newLength <= getCapacity() && isBufferWritable()) ||
1631 cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
1632 char16_t *newArray = getArrayStart();
1633 // Do not copy characters when
1634 // char16_t *buffer=str.getAppendBuffer(...);
1635 // is followed by
1636 // str.append(buffer, length);
1637 // or
1638 // str.appendString(buffer, length)
1639 // or similar.
1640 if(srcChars != newArray + oldLength) {
1641 us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
1642 }
1643 setLength(newLength);
1644 }
1645 return *this;
1646 }
1647
1648 /**
1649 * Replaceable API
1650 */
1651 void
handleReplaceBetween(int32_t start,int32_t limit,const UnicodeString & text)1652 UnicodeString::handleReplaceBetween(int32_t start,
1653 int32_t limit,
1654 const UnicodeString& text) {
1655 replaceBetween(start, limit, text);
1656 }
1657
1658 /**
1659 * Replaceable API
1660 */
1661 void
copy(int32_t start,int32_t limit,int32_t dest)1662 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1663 if (limit <= start) {
1664 return; // Nothing to do; avoid bogus malloc call
1665 }
1666 char16_t* text = (char16_t*) uprv_malloc( sizeof(char16_t) * (limit - start) );
1667 // Check to make sure text is not null.
1668 if (text != nullptr) {
1669 extractBetween(start, limit, text, 0);
1670 insert(dest, text, 0, limit - start);
1671 uprv_free(text);
1672 }
1673 }
1674
1675 /**
1676 * Replaceable API
1677 *
1678 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1679 * so we implement this function here.
1680 */
hasMetaData() const1681 UBool Replaceable::hasMetaData() const {
1682 return true;
1683 }
1684
1685 /**
1686 * Replaceable API
1687 */
hasMetaData() const1688 UBool UnicodeString::hasMetaData() const {
1689 return false;
1690 }
1691
1692 UnicodeString&
doReverse(int32_t start,int32_t length)1693 UnicodeString::doReverse(int32_t start, int32_t length) {
1694 if(length <= 1 || !cloneArrayIfNeeded()) {
1695 return *this;
1696 }
1697
1698 // pin the indices to legal values
1699 pinIndices(start, length);
1700 if(length <= 1) { // pinIndices() might have shrunk the length
1701 return *this;
1702 }
1703
1704 char16_t *left = getArrayStart() + start;
1705 char16_t *right = left + length - 1; // -1 for inclusive boundary (length>=2)
1706 char16_t swap;
1707 UBool hasSupplementary = false;
1708
1709 // Before the loop we know left<right because length>=2.
1710 do {
1711 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1712 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1713 *right-- = swap;
1714 } while(left < right);
1715 // Make sure to test the middle code unit of an odd-length string.
1716 // Redundant if the length is even.
1717 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1718
1719 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1720 if(hasSupplementary) {
1721 char16_t swap2;
1722
1723 left = getArrayStart() + start;
1724 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1725 while(left < right) {
1726 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1727 *left++ = swap2;
1728 *left++ = swap;
1729 } else {
1730 ++left;
1731 }
1732 }
1733 }
1734
1735 return *this;
1736 }
1737
1738 UBool
padLeading(int32_t targetLength,char16_t padChar)1739 UnicodeString::padLeading(int32_t targetLength,
1740 char16_t padChar)
1741 {
1742 int32_t oldLength = length();
1743 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1744 return false;
1745 } else {
1746 // move contents up by padding width
1747 char16_t *array = getArrayStart();
1748 int32_t start = targetLength - oldLength;
1749 us_arrayCopy(array, 0, array, start, oldLength);
1750
1751 // fill in padding character
1752 while(--start >= 0) {
1753 array[start] = padChar;
1754 }
1755 setLength(targetLength);
1756 return true;
1757 }
1758 }
1759
1760 UBool
padTrailing(int32_t targetLength,char16_t padChar)1761 UnicodeString::padTrailing(int32_t targetLength,
1762 char16_t padChar)
1763 {
1764 int32_t oldLength = length();
1765 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1766 return false;
1767 } else {
1768 // fill in padding character
1769 char16_t *array = getArrayStart();
1770 int32_t length = targetLength;
1771 while(--length >= oldLength) {
1772 array[length] = padChar;
1773 }
1774 setLength(targetLength);
1775 return true;
1776 }
1777 }
1778
1779 //========================================
1780 // Hashing
1781 //========================================
1782 int32_t
doHashCode() const1783 UnicodeString::doHashCode() const
1784 {
1785 /* Delegate hash computation to uhash. This makes UnicodeString
1786 * hashing consistent with char16_t* hashing. */
1787 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1788 if (hashCode == kInvalidHashCode) {
1789 hashCode = kEmptyHashCode;
1790 }
1791 return hashCode;
1792 }
1793
1794 //========================================
1795 // External Buffer
1796 //========================================
1797
1798 char16_t *
getBuffer(int32_t minCapacity)1799 UnicodeString::getBuffer(int32_t minCapacity) {
1800 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1801 fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
1802 setZeroLength();
1803 return getArrayStart();
1804 } else {
1805 return nullptr;
1806 }
1807 }
1808
1809 void
releaseBuffer(int32_t newLength)1810 UnicodeString::releaseBuffer(int32_t newLength) {
1811 if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
1812 // set the new fLength
1813 int32_t capacity=getCapacity();
1814 if(newLength==-1) {
1815 // the new length is the string length, capped by fCapacity
1816 const char16_t *array=getArrayStart(), *p=array, *limit=array+capacity;
1817 while(p<limit && *p!=0) {
1818 ++p;
1819 }
1820 newLength=(int32_t)(p-array);
1821 } else if(newLength>capacity) {
1822 newLength=capacity;
1823 }
1824 setLength(newLength);
1825 fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
1826 }
1827 }
1828
1829 //========================================
1830 // Miscellaneous
1831 //========================================
1832 UBool
cloneArrayIfNeeded(int32_t newCapacity,int32_t growCapacity,UBool doCopyArray,int32_t ** pBufferToDelete,UBool forceClone)1833 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1834 int32_t growCapacity,
1835 UBool doCopyArray,
1836 int32_t **pBufferToDelete,
1837 UBool forceClone) {
1838 // default parameters need to be static, therefore
1839 // the defaults are -1 to have convenience defaults
1840 if(newCapacity == -1) {
1841 newCapacity = getCapacity();
1842 }
1843
1844 // while a getBuffer(minCapacity) is "open",
1845 // prevent any modifications of the string by returning false here
1846 // if the string is bogus, then only an assignment or similar can revive it
1847 if(!isWritable()) {
1848 return false;
1849 }
1850
1851 /*
1852 * We need to make a copy of the array if
1853 * the buffer is read-only, or
1854 * the buffer is refCounted (shared), and refCount>1, or
1855 * the buffer is too small.
1856 * Return false if memory could not be allocated.
1857 */
1858 if(forceClone ||
1859 fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
1860 (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
1861 newCapacity > getCapacity()
1862 ) {
1863 // check growCapacity for default value and use of the stack buffer
1864 if(growCapacity < 0) {
1865 growCapacity = newCapacity;
1866 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1867 growCapacity = US_STACKBUF_SIZE;
1868 }
1869
1870 // save old values
1871 char16_t oldStackBuffer[US_STACKBUF_SIZE];
1872 char16_t *oldArray;
1873 int32_t oldLength = length();
1874 int16_t flags = fUnion.fFields.fLengthAndFlags;
1875
1876 if(flags&kUsingStackBuffer) {
1877 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1878 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1879 // copy the stack buffer contents because it will be overwritten with
1880 // fUnion.fFields values
1881 us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
1882 oldArray = oldStackBuffer;
1883 } else {
1884 oldArray = nullptr; // no need to copy from the stack buffer to itself
1885 }
1886 } else {
1887 oldArray = fUnion.fFields.fArray;
1888 U_ASSERT(oldArray!=nullptr); /* when stack buffer is not used, oldArray must have a non-nullptr reference */
1889 }
1890
1891 // allocate a new array
1892 if(allocate(growCapacity) ||
1893 (newCapacity < growCapacity && allocate(newCapacity))
1894 ) {
1895 if(doCopyArray) {
1896 // copy the contents
1897 // do not copy more than what fits - it may be smaller than before
1898 int32_t minLength = oldLength;
1899 newCapacity = getCapacity();
1900 if(newCapacity < minLength) {
1901 minLength = newCapacity;
1902 }
1903 if(oldArray != nullptr) {
1904 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1905 }
1906 setLength(minLength);
1907 } else {
1908 setZeroLength();
1909 }
1910
1911 // release the old array
1912 if(flags & kRefCounted) {
1913 // the array is refCounted; decrement and release if 0
1914 u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
1915 if(umtx_atomic_dec(pRefCount) == 0) {
1916 if(pBufferToDelete == 0) {
1917 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1918 // is defined as volatile. (Volatile has useful non-standard behavior
1919 // with this compiler.)
1920 uprv_free((void *)pRefCount);
1921 } else {
1922 // the caller requested to delete it himself
1923 *pBufferToDelete = (int32_t *)pRefCount;
1924 }
1925 }
1926 }
1927 } else {
1928 // not enough memory for growCapacity and not even for the smaller newCapacity
1929 // reset the old values for setToBogus() to release the array
1930 if(!(flags&kUsingStackBuffer)) {
1931 fUnion.fFields.fArray = oldArray;
1932 }
1933 fUnion.fFields.fLengthAndFlags = flags;
1934 setToBogus();
1935 return false;
1936 }
1937 }
1938 return true;
1939 }
1940
1941 // UnicodeStringAppendable ------------------------------------------------- ***
1942
~UnicodeStringAppendable()1943 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1944
1945 UBool
appendCodeUnit(char16_t c)1946 UnicodeStringAppendable::appendCodeUnit(char16_t c) {
1947 return str.doAppend(&c, 0, 1).isWritable();
1948 }
1949
1950 UBool
appendCodePoint(UChar32 c)1951 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1952 char16_t buffer[U16_MAX_LENGTH];
1953 int32_t cLength = 0;
1954 UBool isError = false;
1955 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1956 return !isError && str.doAppend(buffer, 0, cLength).isWritable();
1957 }
1958
1959 UBool
appendString(const char16_t * s,int32_t length)1960 UnicodeStringAppendable::appendString(const char16_t *s, int32_t length) {
1961 return str.doAppend(s, 0, length).isWritable();
1962 }
1963
1964 UBool
reserveAppendCapacity(int32_t appendCapacity)1965 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1966 return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1967 }
1968
1969 char16_t *
getAppendBuffer(int32_t minCapacity,int32_t desiredCapacityHint,char16_t * scratch,int32_t scratchCapacity,int32_t * resultCapacity)1970 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1971 int32_t desiredCapacityHint,
1972 char16_t *scratch, int32_t scratchCapacity,
1973 int32_t *resultCapacity) {
1974 if(minCapacity < 1 || scratchCapacity < minCapacity) {
1975 *resultCapacity = 0;
1976 return nullptr;
1977 }
1978 int32_t oldLength = str.length();
1979 if(minCapacity <= (kMaxCapacity - oldLength) &&
1980 desiredCapacityHint <= (kMaxCapacity - oldLength) &&
1981 str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1982 *resultCapacity = str.getCapacity() - oldLength;
1983 return str.getArrayStart() + oldLength;
1984 }
1985 *resultCapacity = scratchCapacity;
1986 return scratch;
1987 }
1988
1989 U_NAMESPACE_END
1990
1991 U_NAMESPACE_USE
1992
1993 U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UElement key)1994 uhash_hashUnicodeString(const UElement key) {
1995 const UnicodeString *str = (const UnicodeString*) key.pointer;
1996 return (str == nullptr) ? 0 : str->hashCode();
1997 }
1998
1999 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
2000 // does not depend on hashtable code.
2001 U_CAPI UBool U_EXPORT2
uhash_compareUnicodeString(const UElement key1,const UElement key2)2002 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
2003 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
2004 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
2005 if (str1 == str2) {
2006 return true;
2007 }
2008 if (str1 == nullptr || str2 == nullptr) {
2009 return false;
2010 }
2011 return *str1 == *str2;
2012 }
2013
2014 #ifdef U_STATIC_IMPLEMENTATION
2015 /*
2016 This should never be called. It is defined here to make sure that the
2017 virtual vector deleting destructor is defined within unistr.cpp.
2018 The vector deleting destructor is already a part of UObject,
2019 but defining it here makes sure that it is included with this object file.
2020 This makes sure that static library dependencies are kept to a minimum.
2021 */
2022 #if defined(__clang__) || U_GCC_MAJOR_MINOR >= 1100
2023 #pragma GCC diagnostic push
2024 #pragma GCC diagnostic ignored "-Wunused-function"
uprv_UnicodeStringDummy()2025 static void uprv_UnicodeStringDummy() {
2026 delete [] (new UnicodeString[2]);
2027 }
2028 #pragma GCC diagnostic pop
2029 #endif
2030 #endif
2031