xref: /aosp_15_r20/external/cronet/third_party/icu/source/common/unistr.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1999-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
8 *
9 * File unistr.cpp
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   09/25/98    stephen     Creation.
15 *   04/20/99    stephen     Overhauled per 4/16 code review.
16 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
18 *                           Replaceable.
19 *   06/25/01    grhoten     Removed the dependency on iostream
20 ******************************************************************************
21 */
22 
23 #include "unicode/utypes.h"
24 #include "unicode/appendable.h"
25 #include "unicode/putil.h"
26 #include "cstring.h"
27 #include "cmemory.h"
28 #include "unicode/ustring.h"
29 #include "unicode/unistr.h"
30 #include "unicode/utf.h"
31 #include "unicode/utf16.h"
32 #include "uelement.h"
33 #include "ustr_imp.h"
34 #include "umutex.h"
35 #include "uassert.h"
36 
37 #if 0
38 
39 #include <iostream>
40 using namespace std;
41 
42 //DEBUGGING
43 void
44 print(const UnicodeString& s,
45       const char *name)
46 {
47   char16_t c;
48   cout << name << ":|";
49   for(int i = 0; i < s.length(); ++i) {
50     c = s[i];
51     if(c>= 0x007E || c < 0x0020)
52       cout << "[0x" << hex << s[i] << "]";
53     else
54       cout << (char) s[i];
55   }
56   cout << '|' << endl;
57 }
58 
59 void
60 print(const char16_t *s,
61       int32_t len,
62       const char *name)
63 {
64   char16_t c;
65   cout << name << ":|";
66   for(int i = 0; i < len; ++i) {
67     c = s[i];
68     if(c>= 0x007E || c < 0x0020)
69       cout << "[0x" << hex << s[i] << "]";
70     else
71       cout << (char) s[i];
72   }
73   cout << '|' << endl;
74 }
75 // END DEBUGGING
76 #endif
77 
78 // Local function definitions for now
79 
80 // need to copy areas that may overlap
81 static
82 inline void
us_arrayCopy(const char16_t * src,int32_t srcStart,char16_t * dst,int32_t dstStart,int32_t count)83 us_arrayCopy(const char16_t *src, int32_t srcStart,
84          char16_t *dst, int32_t dstStart, int32_t count)
85 {
86   if(count>0) {
87     uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
88   }
89 }
90 
91 // u_unescapeAt() callback to get a char16_t from a UnicodeString
92 U_CDECL_BEGIN
93 static char16_t U_CALLCONV
UnicodeString_charAt(int32_t offset,void * context)94 UnicodeString_charAt(int32_t offset, void *context) {
95     return ((icu::UnicodeString*) context)->charAt(offset);
96 }
97 U_CDECL_END
98 
99 U_NAMESPACE_BEGIN
100 
101 /* The Replaceable virtual destructor can't be defined in the header
102    due to how AIX works with multiple definitions of virtual functions.
103 */
~Replaceable()104 Replaceable::~Replaceable() {}
105 
106 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
107 
108 UnicodeString U_EXPORT2
109 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
110     return
111         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
112             append(s1).
113                 append(s2);
114 }
115 
116 //========================================
117 // Reference Counting functions, put at top of file so that optimizing compilers
118 //                               have a chance to automatically inline.
119 //========================================
120 
121 void
addRef()122 UnicodeString::addRef() {
123   umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
124 }
125 
126 int32_t
removeRef()127 UnicodeString::removeRef() {
128   return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
129 }
130 
131 int32_t
refCount() const132 UnicodeString::refCount() const {
133   return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
134 }
135 
136 void
releaseArray()137 UnicodeString::releaseArray() {
138   if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
139     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
140   }
141 }
142 
143 
144 
145 //========================================
146 // Constructors
147 //========================================
148 
149 // The default constructor is inline in unistr.h.
150 
UnicodeString(int32_t capacity,UChar32 c,int32_t count)151 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
152   fUnion.fFields.fLengthAndFlags = 0;
153   if(count <= 0 || (uint32_t)c > 0x10ffff) {
154     // just allocate and do not do anything else
155     allocate(capacity);
156   } else if(c <= 0xffff) {
157     int32_t length = count;
158     if(capacity < length) {
159       capacity = length;
160     }
161     if(allocate(capacity)) {
162       char16_t *array = getArrayStart();
163       char16_t unit = (char16_t)c;
164       for(int32_t i = 0; i < length; ++i) {
165         array[i] = unit;
166       }
167       setLength(length);
168     }
169   } else {  // supplementary code point, write surrogate pairs
170     if(count > (INT32_MAX / 2)) {
171       // We would get more than 2G UChars.
172       allocate(capacity);
173       return;
174     }
175     int32_t length = count * 2;
176     if(capacity < length) {
177       capacity = length;
178     }
179     if(allocate(capacity)) {
180       char16_t *array = getArrayStart();
181       char16_t lead = U16_LEAD(c);
182       char16_t trail = U16_TRAIL(c);
183       for(int32_t i = 0; i < length; i += 2) {
184         array[i] = lead;
185         array[i + 1] = trail;
186       }
187       setLength(length);
188     }
189   }
190 }
191 
UnicodeString(char16_t ch)192 UnicodeString::UnicodeString(char16_t ch) {
193   fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
194   fUnion.fStackFields.fBuffer[0] = ch;
195 }
196 
UnicodeString(UChar32 ch)197 UnicodeString::UnicodeString(UChar32 ch) {
198   fUnion.fFields.fLengthAndFlags = kShortString;
199   int32_t i = 0;
200   UBool isError = false;
201   U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
202   // We test isError so that the compiler does not complain that we don't.
203   // If isError then i==0 which is what we want anyway.
204   if(!isError) {
205     setShortLength(i);
206   }
207 }
208 
UnicodeString(const char16_t * text)209 UnicodeString::UnicodeString(const char16_t *text) {
210   fUnion.fFields.fLengthAndFlags = kShortString;
211   doAppend(text, 0, -1);
212 }
213 
UnicodeString(const char16_t * text,int32_t textLength)214 UnicodeString::UnicodeString(const char16_t *text,
215                              int32_t textLength) {
216   fUnion.fFields.fLengthAndFlags = kShortString;
217   doAppend(text, 0, textLength);
218 }
219 
UnicodeString(UBool isTerminated,ConstChar16Ptr textPtr,int32_t textLength)220 UnicodeString::UnicodeString(UBool isTerminated,
221                              ConstChar16Ptr textPtr,
222                              int32_t textLength) {
223   fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
224   const char16_t *text = textPtr;
225   if(text == nullptr) {
226     // treat as an empty string, do not alias
227     setToEmpty();
228   } else if(textLength < -1 ||
229             (textLength == -1 && !isTerminated) ||
230             (textLength >= 0 && isTerminated && text[textLength] != 0)
231   ) {
232     setToBogus();
233   } else {
234     if(textLength == -1) {
235       // text is terminated, or else it would have failed the above test
236       textLength = u_strlen(text);
237     }
238     setArray(const_cast<char16_t *>(text), textLength,
239              isTerminated ? textLength + 1 : textLength);
240   }
241 }
242 
UnicodeString(char16_t * buff,int32_t buffLength,int32_t buffCapacity)243 UnicodeString::UnicodeString(char16_t *buff,
244                              int32_t buffLength,
245                              int32_t buffCapacity) {
246   fUnion.fFields.fLengthAndFlags = kWritableAlias;
247   if(buff == nullptr) {
248     // treat as an empty string, do not alias
249     setToEmpty();
250   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
251     setToBogus();
252   } else {
253     if(buffLength == -1) {
254       // fLength = u_strlen(buff); but do not look beyond buffCapacity
255       const char16_t *p = buff, *limit = buff + buffCapacity;
256       while(p != limit && *p != 0) {
257         ++p;
258       }
259       buffLength = (int32_t)(p - buff);
260     }
261     setArray(buff, buffLength, buffCapacity);
262   }
263 }
264 
UnicodeString(const char * src,int32_t length,EInvariant)265 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
266   fUnion.fFields.fLengthAndFlags = kShortString;
267   if(src==nullptr) {
268     // treat as an empty string
269   } else {
270     if(length<0) {
271       length=(int32_t)uprv_strlen(src);
272     }
273     if(cloneArrayIfNeeded(length, length, false)) {
274       u_charsToUChars(src, getArrayStart(), length);
275       setLength(length);
276     } else {
277       setToBogus();
278     }
279   }
280 }
281 
282 #if U_CHARSET_IS_UTF8
283 
UnicodeString(const char * codepageData)284 UnicodeString::UnicodeString(const char *codepageData) {
285   fUnion.fFields.fLengthAndFlags = kShortString;
286   if(codepageData != 0) {
287     setToUTF8(codepageData);
288   }
289 }
290 
UnicodeString(const char * codepageData,int32_t dataLength)291 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
292   fUnion.fFields.fLengthAndFlags = kShortString;
293   // if there's nothing to convert, do nothing
294   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
295     return;
296   }
297   if(dataLength == -1) {
298     dataLength = (int32_t)uprv_strlen(codepageData);
299   }
300   setToUTF8(StringPiece(codepageData, dataLength));
301 }
302 
303 // else see unistr_cnv.cpp
304 #endif
305 
UnicodeString(const UnicodeString & that)306 UnicodeString::UnicodeString(const UnicodeString& that) {
307   fUnion.fFields.fLengthAndFlags = kShortString;
308   copyFrom(that);
309 }
310 
UnicodeString(UnicodeString && src)311 UnicodeString::UnicodeString(UnicodeString &&src) noexcept {
312   copyFieldsFrom(src, true);
313 }
314 
UnicodeString(const UnicodeString & that,int32_t srcStart)315 UnicodeString::UnicodeString(const UnicodeString& that,
316                              int32_t srcStart) {
317   fUnion.fFields.fLengthAndFlags = kShortString;
318   setTo(that, srcStart);
319 }
320 
UnicodeString(const UnicodeString & that,int32_t srcStart,int32_t srcLength)321 UnicodeString::UnicodeString(const UnicodeString& that,
322                              int32_t srcStart,
323                              int32_t srcLength) {
324   fUnion.fFields.fLengthAndFlags = kShortString;
325   setTo(that, srcStart, srcLength);
326 }
327 
328 // Replaceable base class clone() default implementation, does not clone
329 Replaceable *
clone() const330 Replaceable::clone() const {
331   return nullptr;
332 }
333 
334 // UnicodeString overrides clone() with a real implementation
335 UnicodeString *
clone() const336 UnicodeString::clone() const {
337   LocalPointer<UnicodeString> clonedString(new UnicodeString(*this));
338   return clonedString.isValid() && !clonedString->isBogus() ? clonedString.orphan() : nullptr;
339 }
340 
341 //========================================
342 // array allocation
343 //========================================
344 
345 namespace {
346 
347 const int32_t kGrowSize = 128;
348 
349 // The number of bytes for one int32_t reference counter and capacity UChars
350 // must fit into a 32-bit size_t (at least when on a 32-bit platform).
351 // We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
352 // and round up to a multiple of 16 bytes.
353 // This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
354 // (With more complicated checks we could go up to 0x7ffffffd without rounding up,
355 // but that does not seem worth it.)
356 const int32_t kMaxCapacity = 0x7ffffff5;
357 
getGrowCapacity(int32_t newLength)358 int32_t getGrowCapacity(int32_t newLength) {
359   int32_t growSize = (newLength >> 2) + kGrowSize;
360   if(growSize <= (kMaxCapacity - newLength)) {
361     return newLength + growSize;
362   } else {
363     return kMaxCapacity;
364   }
365 }
366 
367 }  // namespace
368 
369 UBool
allocate(int32_t capacity)370 UnicodeString::allocate(int32_t capacity) {
371   if(capacity <= US_STACKBUF_SIZE) {
372     fUnion.fFields.fLengthAndFlags = kShortString;
373     return true;
374   }
375   if(capacity <= kMaxCapacity) {
376     ++capacity;  // for the NUL
377     // Switch to size_t which is unsigned so that we can allocate up to 4GB.
378     // Reference counter + UChars.
379     size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
380     // Round up to a multiple of 16.
381     numBytes = (numBytes + 15) & ~15;
382     int32_t *array = (int32_t *) uprv_malloc(numBytes);
383     if(array != nullptr) {
384       // set initial refCount and point behind the refCount
385       *array++ = 1;
386       numBytes -= sizeof(int32_t);
387 
388       // have fArray point to the first char16_t
389       fUnion.fFields.fArray = (char16_t *)array;
390       fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
391       fUnion.fFields.fLengthAndFlags = kLongString;
392       return true;
393     }
394   }
395   fUnion.fFields.fLengthAndFlags = kIsBogus;
396   fUnion.fFields.fArray = 0;
397   fUnion.fFields.fCapacity = 0;
398   return false;
399 }
400 
401 //========================================
402 // Destructor
403 //========================================
404 
405 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
406 static u_atomic_int32_t finalLengthCounts[0x400];  // UnicodeString::kMaxShortLength+1
407 static u_atomic_int32_t beyondCount(0);
408 
unistr_printLengths()409 U_CAPI void unistr_printLengths() {
410   int32_t i;
411   for(i = 0; i <= 59; ++i) {
412     printf("%2d,  %9d\n", i, (int32_t)finalLengthCounts[i]);
413   }
414   int32_t beyond = beyondCount;
415   for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
416     beyond += finalLengthCounts[i];
417   }
418   printf(">59, %9d\n", beyond);
419 }
420 #endif
421 
~UnicodeString()422 UnicodeString::~UnicodeString()
423 {
424 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
425   // Count lengths of strings at the end of their lifetime.
426   // Useful for discussion of a desirable stack buffer size.
427   // Count the contents length, not the optional NUL terminator nor further capacity.
428   // Ignore open-buffer strings and strings which alias external storage.
429   if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
430     if(hasShortLength()) {
431       umtx_atomic_inc(finalLengthCounts + getShortLength());
432     } else {
433       umtx_atomic_inc(&beyondCount);
434     }
435   }
436 #endif
437 
438   releaseArray();
439 }
440 
441 //========================================
442 // Factory methods
443 //========================================
444 
fromUTF8(StringPiece utf8)445 UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
446   UnicodeString result;
447   result.setToUTF8(utf8);
448   return result;
449 }
450 
fromUTF32(const UChar32 * utf32,int32_t length)451 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
452   UnicodeString result;
453   int32_t capacity;
454   // Most UTF-32 strings will be BMP-only and result in a same-length
455   // UTF-16 string. We overestimate the capacity just slightly,
456   // just in case there are a few supplementary characters.
457   if(length <= US_STACKBUF_SIZE) {
458     capacity = US_STACKBUF_SIZE;
459   } else {
460     capacity = length + (length >> 4) + 4;
461   }
462   do {
463     char16_t *utf16 = result.getBuffer(capacity);
464     int32_t length16;
465     UErrorCode errorCode = U_ZERO_ERROR;
466     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
467         utf32, length,
468         0xfffd,  // Substitution character.
469         nullptr,    // Don't care about number of substitutions.
470         &errorCode);
471     result.releaseBuffer(length16);
472     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
473       capacity = length16 + 1;  // +1 for the terminating NUL.
474       continue;
475     } else if(U_FAILURE(errorCode)) {
476       result.setToBogus();
477     }
478     break;
479   } while(true);
480   return result;
481 }
482 
483 //========================================
484 // Assignment
485 //========================================
486 
487 UnicodeString &
operator =(const UnicodeString & src)488 UnicodeString::operator=(const UnicodeString &src) {
489   return copyFrom(src);
490 }
491 
492 UnicodeString &
fastCopyFrom(const UnicodeString & src)493 UnicodeString::fastCopyFrom(const UnicodeString &src) {
494   return copyFrom(src, true);
495 }
496 
497 UnicodeString &
copyFrom(const UnicodeString & src,UBool fastCopy)498 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
499   // if assigning to ourselves, do nothing
500   if(this == &src) {
501     return *this;
502   }
503 
504   // is the right side bogus?
505   if(src.isBogus()) {
506     setToBogus();
507     return *this;
508   }
509 
510   // delete the current contents
511   releaseArray();
512 
513   if(src.isEmpty()) {
514     // empty string - use the stack buffer
515     setToEmpty();
516     return *this;
517   }
518 
519   // fLength>0 and not an "open" src.getBuffer(minCapacity)
520   fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
521   switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
522   case kShortString:
523     // short string using the stack buffer, do the same
524     uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
525                 getShortLength() * U_SIZEOF_UCHAR);
526     break;
527   case kLongString:
528     // src uses a refCounted string buffer, use that buffer with refCount
529     // src is const, use a cast - we don't actually change it
530     const_cast<UnicodeString &>(src).addRef();
531     // copy all fields, share the reference-counted buffer
532     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
533     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
534     if(!hasShortLength()) {
535       fUnion.fFields.fLength = src.fUnion.fFields.fLength;
536     }
537     break;
538   case kReadonlyAlias:
539     if(fastCopy) {
540       // src is a readonly alias, do the same
541       // -> maintain the readonly alias as such
542       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
543       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
544       if(!hasShortLength()) {
545         fUnion.fFields.fLength = src.fUnion.fFields.fLength;
546       }
547       break;
548     }
549     // else if(!fastCopy) fall through to case kWritableAlias
550     // -> allocate a new buffer and copy the contents
551     U_FALLTHROUGH;
552   case kWritableAlias: {
553     // src is a writable alias; we make a copy of that instead
554     int32_t srcLength = src.length();
555     if(allocate(srcLength)) {
556       u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
557       setLength(srcLength);
558       break;
559     }
560     // if there is not enough memory, then fall through to setting to bogus
561     U_FALLTHROUGH;
562   }
563   default:
564     // if src is bogus, set ourselves to bogus
565     // do not call setToBogus() here because fArray and flags are not consistent here
566     fUnion.fFields.fLengthAndFlags = kIsBogus;
567     fUnion.fFields.fArray = 0;
568     fUnion.fFields.fCapacity = 0;
569     break;
570   }
571 
572   return *this;
573 }
574 
operator =(UnicodeString && src)575 UnicodeString &UnicodeString::operator=(UnicodeString &&src) noexcept {
576   // No explicit check for self move assignment, consistent with standard library.
577   // Self move assignment causes no crash nor leak but might make the object bogus.
578   releaseArray();
579   copyFieldsFrom(src, true);
580   return *this;
581 }
582 
583 // Same as move assignment except without memory management.
copyFieldsFrom(UnicodeString & src,UBool setSrcToBogus)584 void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept {
585   int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
586   if(lengthAndFlags & kUsingStackBuffer) {
587     // Short string using the stack buffer, copy the contents.
588     // Check for self assignment to prevent "overlap in memcpy" warnings,
589     // although it should be harmless to copy a buffer to itself exactly.
590     if(this != &src) {
591       uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
592                   getShortLength() * U_SIZEOF_UCHAR);
593     }
594   } else {
595     // In all other cases, copy all fields.
596     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
597     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
598     if(!hasShortLength()) {
599       fUnion.fFields.fLength = src.fUnion.fFields.fLength;
600     }
601     if(setSrcToBogus) {
602       // Set src to bogus without releasing any memory.
603       src.fUnion.fFields.fLengthAndFlags = kIsBogus;
604       src.fUnion.fFields.fArray = nullptr;
605       src.fUnion.fFields.fCapacity = 0;
606     }
607   }
608 }
609 
swap(UnicodeString & other)610 void UnicodeString::swap(UnicodeString &other) noexcept {
611   UnicodeString temp;  // Empty short string: Known not to need releaseArray().
612   // Copy fields without resetting source values in between.
613   temp.copyFieldsFrom(*this, false);
614   this->copyFieldsFrom(other, false);
615   other.copyFieldsFrom(temp, false);
616   // Set temp to an empty string so that other's memory is not released twice.
617   temp.fUnion.fFields.fLengthAndFlags = kShortString;
618 }
619 
620 //========================================
621 // Miscellaneous operations
622 //========================================
623 
unescape() const624 UnicodeString UnicodeString::unescape() const {
625     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
626     if (result.isBogus()) {
627         return result;
628     }
629     const char16_t *array = getBuffer();
630     int32_t len = length();
631     int32_t prev = 0;
632     for (int32_t i=0;;) {
633         if (i == len) {
634             result.append(array, prev, len - prev);
635             break;
636         }
637         if (array[i++] == 0x5C /*'\\'*/) {
638             result.append(array, prev, (i - 1) - prev);
639             UChar32 c = unescapeAt(i); // advances i
640             if (c < 0) {
641                 result.remove(); // return empty string
642                 break; // invalid escape sequence
643             }
644             result.append(c);
645             prev = i;
646         }
647     }
648     return result;
649 }
650 
unescapeAt(int32_t & offset) const651 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
652     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
653 }
654 
655 //========================================
656 // Read-only implementation
657 //========================================
658 UBool
doEquals(const UnicodeString & text,int32_t len) const659 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
660   // Requires: this & text not bogus and have same lengths.
661   // Byte-wise comparison works for equality regardless of endianness.
662   return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
663 }
664 
665 UBool
doEqualsSubstring(int32_t start,int32_t length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength) const666 UnicodeString::doEqualsSubstring( int32_t start,
667               int32_t length,
668               const char16_t *srcChars,
669               int32_t srcStart,
670               int32_t srcLength) const
671 {
672   // compare illegal string values
673   if(isBogus()) {
674     return false;
675   }
676 
677   // pin indices to legal values
678   pinIndices(start, length);
679 
680   if(srcChars == nullptr) {
681     // treat const char16_t *srcChars==nullptr as an empty string
682     return length == 0 ? true : false;
683   }
684 
685   // get the correct pointer
686   const char16_t *chars = getArrayStart();
687 
688   chars += start;
689   srcChars += srcStart;
690 
691   // get the srcLength if necessary
692   if(srcLength < 0) {
693     srcLength = u_strlen(srcChars + srcStart);
694   }
695 
696   if (length != srcLength) {
697     return false;
698   }
699 
700   if(length == 0 || chars == srcChars) {
701     return true;
702   }
703 
704   return u_memcmp(chars, srcChars, srcLength) == 0;
705 }
706 
707 int8_t
doCompare(int32_t start,int32_t length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength) const708 UnicodeString::doCompare( int32_t start,
709               int32_t length,
710               const char16_t *srcChars,
711               int32_t srcStart,
712               int32_t srcLength) const
713 {
714   // compare illegal string values
715   if(isBogus()) {
716     return -1;
717   }
718 
719   // pin indices to legal values
720   pinIndices(start, length);
721 
722   if(srcChars == nullptr) {
723     // treat const char16_t *srcChars==nullptr as an empty string
724     return length == 0 ? 0 : 1;
725   }
726 
727   // get the correct pointer
728   const char16_t *chars = getArrayStart();
729 
730   chars += start;
731   srcChars += srcStart;
732 
733   int32_t minLength;
734   int8_t lengthResult;
735 
736   // get the srcLength if necessary
737   if(srcLength < 0) {
738     srcLength = u_strlen(srcChars + srcStart);
739   }
740 
741   // are we comparing different lengths?
742   if(length != srcLength) {
743     if(length < srcLength) {
744       minLength = length;
745       lengthResult = -1;
746     } else {
747       minLength = srcLength;
748       lengthResult = 1;
749     }
750   } else {
751     minLength = length;
752     lengthResult = 0;
753   }
754 
755   /*
756    * note that uprv_memcmp() returns an int but we return an int8_t;
757    * we need to take care not to truncate the result -
758    * one way to do this is to right-shift the value to
759    * move the sign bit into the lower 8 bits and making sure that this
760    * does not become 0 itself
761    */
762 
763   if(minLength > 0 && chars != srcChars) {
764     int32_t result;
765 
766 #   if U_IS_BIG_ENDIAN
767       // big-endian: byte comparison works
768       result = uprv_memcmp(chars, srcChars, minLength * sizeof(char16_t));
769       if(result != 0) {
770         return (int8_t)(result >> 15 | 1);
771       }
772 #   else
773       // little-endian: compare char16_t units
774       do {
775         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
776         if(result != 0) {
777           return (int8_t)(result >> 15 | 1);
778         }
779       } while(--minLength > 0);
780 #   endif
781   }
782   return lengthResult;
783 }
784 
785 /* String compare in code point order - doCompare() compares in code unit order. */
786 int8_t
doCompareCodePointOrder(int32_t start,int32_t length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength) const787 UnicodeString::doCompareCodePointOrder(int32_t start,
788                                        int32_t length,
789                                        const char16_t *srcChars,
790                                        int32_t srcStart,
791                                        int32_t srcLength) const
792 {
793   // compare illegal string values
794   // treat const char16_t *srcChars==nullptr as an empty string
795   if(isBogus()) {
796     return -1;
797   }
798 
799   // pin indices to legal values
800   pinIndices(start, length);
801 
802   if(srcChars == nullptr) {
803     srcStart = srcLength = 0;
804   }
805 
806   int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=nullptr)?(srcChars + srcStart):nullptr, srcLength, false, true);
807   /* translate the 32-bit result into an 8-bit one */
808   if(diff!=0) {
809     return (int8_t)(diff >> 15 | 1);
810   } else {
811     return 0;
812   }
813 }
814 
815 int32_t
getLength() const816 UnicodeString::getLength() const {
817     return length();
818 }
819 
820 char16_t
getCharAt(int32_t offset) const821 UnicodeString::getCharAt(int32_t offset) const {
822   return charAt(offset);
823 }
824 
825 UChar32
getChar32At(int32_t offset) const826 UnicodeString::getChar32At(int32_t offset) const {
827   return char32At(offset);
828 }
829 
830 UChar32
char32At(int32_t offset) const831 UnicodeString::char32At(int32_t offset) const
832 {
833   int32_t len = length();
834   if((uint32_t)offset < (uint32_t)len) {
835     const char16_t *array = getArrayStart();
836     UChar32 c;
837     U16_GET(array, 0, offset, len, c);
838     return c;
839   } else {
840     return kInvalidUChar;
841   }
842 }
843 
844 int32_t
getChar32Start(int32_t offset) const845 UnicodeString::getChar32Start(int32_t offset) const {
846   if((uint32_t)offset < (uint32_t)length()) {
847     const char16_t *array = getArrayStart();
848     U16_SET_CP_START(array, 0, offset);
849     return offset;
850   } else {
851     return 0;
852   }
853 }
854 
855 int32_t
getChar32Limit(int32_t offset) const856 UnicodeString::getChar32Limit(int32_t offset) const {
857   int32_t len = length();
858   if((uint32_t)offset < (uint32_t)len) {
859     const char16_t *array = getArrayStart();
860     U16_SET_CP_LIMIT(array, 0, offset, len);
861     return offset;
862   } else {
863     return len;
864   }
865 }
866 
867 int32_t
countChar32(int32_t start,int32_t length) const868 UnicodeString::countChar32(int32_t start, int32_t length) const {
869   pinIndices(start, length);
870   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for nullptr
871   return u_countChar32(getArrayStart()+start, length);
872 }
873 
874 UBool
hasMoreChar32Than(int32_t start,int32_t length,int32_t number) const875 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
876   pinIndices(start, length);
877   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for nullptr
878   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
879 }
880 
881 int32_t
moveIndex32(int32_t index,int32_t delta) const882 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
883   // pin index
884   int32_t len = length();
885   if(index<0) {
886     index=0;
887   } else if(index>len) {
888     index=len;
889   }
890 
891   const char16_t *array = getArrayStart();
892   if(delta>0) {
893     U16_FWD_N(array, index, len, delta);
894   } else {
895     U16_BACK_N(array, 0, index, -delta);
896   }
897 
898   return index;
899 }
900 
901 void
doExtract(int32_t start,int32_t length,char16_t * dst,int32_t dstStart) const902 UnicodeString::doExtract(int32_t start,
903              int32_t length,
904              char16_t *dst,
905              int32_t dstStart) const
906 {
907   // pin indices to legal values
908   pinIndices(start, length);
909 
910   // do not copy anything if we alias dst itself
911   const char16_t *array = getArrayStart();
912   if(array + start != dst + dstStart) {
913     us_arrayCopy(array, start, dst, dstStart, length);
914   }
915 }
916 
917 int32_t
extract(Char16Ptr dest,int32_t destCapacity,UErrorCode & errorCode) const918 UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
919                        UErrorCode &errorCode) const {
920   int32_t len = length();
921   if(U_SUCCESS(errorCode)) {
922     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
923       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
924     } else {
925       const char16_t *array = getArrayStart();
926       if(len>0 && len<=destCapacity && array!=dest) {
927         u_memcpy(dest, array, len);
928       }
929       return u_terminateUChars(dest, destCapacity, len, &errorCode);
930     }
931   }
932 
933   return len;
934 }
935 
936 int32_t
extract(int32_t start,int32_t length,char * target,int32_t targetCapacity,enum EInvariant) const937 UnicodeString::extract(int32_t start,
938                        int32_t length,
939                        char *target,
940                        int32_t targetCapacity,
941                        enum EInvariant) const
942 {
943   // if the arguments are illegal, then do nothing
944   if(targetCapacity < 0 || (targetCapacity > 0 && target == nullptr)) {
945     return 0;
946   }
947 
948   // pin the indices to legal values
949   pinIndices(start, length);
950 
951   if(length <= targetCapacity) {
952     u_UCharsToChars(getArrayStart() + start, target, length);
953   }
954   UErrorCode status = U_ZERO_ERROR;
955   return u_terminateChars(target, targetCapacity, length, &status);
956 }
957 
958 UnicodeString
tempSubString(int32_t start,int32_t len) const959 UnicodeString::tempSubString(int32_t start, int32_t len) const {
960   pinIndices(start, len);
961   const char16_t *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
962   if(array==nullptr) {
963     array=fUnion.fStackFields.fBuffer;  // anything not nullptr because that would make an empty string
964     len=-2;  // bogus result string
965   }
966   return UnicodeString(false, array + start, len);
967 }
968 
969 int32_t
toUTF8(int32_t start,int32_t len,char * target,int32_t capacity) const970 UnicodeString::toUTF8(int32_t start, int32_t len,
971                       char *target, int32_t capacity) const {
972   pinIndices(start, len);
973   int32_t length8;
974   UErrorCode errorCode = U_ZERO_ERROR;
975   u_strToUTF8WithSub(target, capacity, &length8,
976                      getBuffer() + start, len,
977                      0xFFFD,  // Standard substitution character.
978                      nullptr,    // Don't care about number of substitutions.
979                      &errorCode);
980   return length8;
981 }
982 
983 #if U_CHARSET_IS_UTF8
984 
985 int32_t
extract(int32_t start,int32_t len,char * target,uint32_t dstSize) const986 UnicodeString::extract(int32_t start, int32_t len,
987                        char *target, uint32_t dstSize) const {
988   // if the arguments are illegal, then do nothing
989   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
990     return 0;
991   }
992   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
993 }
994 
995 // else see unistr_cnv.cpp
996 #endif
997 
998 void
extractBetween(int32_t start,int32_t limit,UnicodeString & target) const999 UnicodeString::extractBetween(int32_t start,
1000                   int32_t limit,
1001                   UnicodeString& target) const {
1002   pinIndex(start);
1003   pinIndex(limit);
1004   doExtract(start, limit - start, target);
1005 }
1006 
1007 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
1008 // as many bytes as the source has UChars.
1009 // The "worst cases" are writing systems like Indic, Thai and CJK with
1010 // 3:1 bytes:UChars.
1011 void
toUTF8(ByteSink & sink) const1012 UnicodeString::toUTF8(ByteSink &sink) const {
1013   int32_t length16 = length();
1014   if(length16 != 0) {
1015     char stackBuffer[1024];
1016     int32_t capacity = (int32_t)sizeof(stackBuffer);
1017     UBool utf8IsOwned = false;
1018     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
1019                                       3*length16,
1020                                       stackBuffer, capacity,
1021                                       &capacity);
1022     int32_t length8 = 0;
1023     UErrorCode errorCode = U_ZERO_ERROR;
1024     u_strToUTF8WithSub(utf8, capacity, &length8,
1025                        getBuffer(), length16,
1026                        0xFFFD,  // Standard substitution character.
1027                        nullptr,    // Don't care about number of substitutions.
1028                        &errorCode);
1029     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
1030       utf8 = (char *)uprv_malloc(length8);
1031       if(utf8 != nullptr) {
1032         utf8IsOwned = true;
1033         errorCode = U_ZERO_ERROR;
1034         u_strToUTF8WithSub(utf8, length8, &length8,
1035                            getBuffer(), length16,
1036                            0xFFFD,  // Standard substitution character.
1037                            nullptr,    // Don't care about number of substitutions.
1038                            &errorCode);
1039       } else {
1040         errorCode = U_MEMORY_ALLOCATION_ERROR;
1041       }
1042     }
1043     if(U_SUCCESS(errorCode)) {
1044       sink.Append(utf8, length8);
1045       sink.Flush();
1046     }
1047     if(utf8IsOwned) {
1048       uprv_free(utf8);
1049     }
1050   }
1051 }
1052 
1053 int32_t
toUTF32(UChar32 * utf32,int32_t capacity,UErrorCode & errorCode) const1054 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
1055   int32_t length32=0;
1056   if(U_SUCCESS(errorCode)) {
1057     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1058     u_strToUTF32WithSub(utf32, capacity, &length32,
1059         getBuffer(), length(),
1060         0xfffd,  // Substitution character.
1061         nullptr,    // Don't care about number of substitutions.
1062         &errorCode);
1063   }
1064   return length32;
1065 }
1066 
1067 int32_t
indexOf(const char16_t * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const1068 UnicodeString::indexOf(const char16_t *srcChars,
1069                int32_t srcStart,
1070                int32_t srcLength,
1071                int32_t start,
1072                int32_t length) const
1073 {
1074   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1075     return -1;
1076   }
1077 
1078   // UnicodeString does not find empty substrings
1079   if(srcLength < 0 && srcChars[srcStart] == 0) {
1080     return -1;
1081   }
1082 
1083   // get the indices within bounds
1084   pinIndices(start, length);
1085 
1086   // find the first occurrence of the substring
1087   const char16_t *array = getArrayStart();
1088   const char16_t *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
1089   if(match == nullptr) {
1090     return -1;
1091   } else {
1092     return (int32_t)(match - array);
1093   }
1094 }
1095 
1096 int32_t
doIndexOf(char16_t c,int32_t start,int32_t length) const1097 UnicodeString::doIndexOf(char16_t c,
1098              int32_t start,
1099              int32_t length) const
1100 {
1101   // pin indices
1102   pinIndices(start, length);
1103 
1104   // find the first occurrence of c
1105   const char16_t *array = getArrayStart();
1106   const char16_t *match = u_memchr(array + start, c, length);
1107   if(match == nullptr) {
1108     return -1;
1109   } else {
1110     return (int32_t)(match - array);
1111   }
1112 }
1113 
1114 int32_t
doIndexOf(UChar32 c,int32_t start,int32_t length) const1115 UnicodeString::doIndexOf(UChar32 c,
1116                          int32_t start,
1117                          int32_t length) const {
1118   // pin indices
1119   pinIndices(start, length);
1120 
1121   // find the first occurrence of c
1122   const char16_t *array = getArrayStart();
1123   const char16_t *match = u_memchr32(array + start, c, length);
1124   if(match == nullptr) {
1125     return -1;
1126   } else {
1127     return (int32_t)(match - array);
1128   }
1129 }
1130 
1131 int32_t
lastIndexOf(const char16_t * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const1132 UnicodeString::lastIndexOf(const char16_t *srcChars,
1133                int32_t srcStart,
1134                int32_t srcLength,
1135                int32_t start,
1136                int32_t length) const
1137 {
1138   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1139     return -1;
1140   }
1141 
1142   // UnicodeString does not find empty substrings
1143   if(srcLength < 0 && srcChars[srcStart] == 0) {
1144     return -1;
1145   }
1146 
1147   // get the indices within bounds
1148   pinIndices(start, length);
1149 
1150   // find the last occurrence of the substring
1151   const char16_t *array = getArrayStart();
1152   const char16_t *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1153   if(match == nullptr) {
1154     return -1;
1155   } else {
1156     return (int32_t)(match - array);
1157   }
1158 }
1159 
1160 int32_t
doLastIndexOf(char16_t c,int32_t start,int32_t length) const1161 UnicodeString::doLastIndexOf(char16_t c,
1162                  int32_t start,
1163                  int32_t length) const
1164 {
1165   if(isBogus()) {
1166     return -1;
1167   }
1168 
1169   // pin indices
1170   pinIndices(start, length);
1171 
1172   // find the last occurrence of c
1173   const char16_t *array = getArrayStart();
1174   const char16_t *match = u_memrchr(array + start, c, length);
1175   if(match == nullptr) {
1176     return -1;
1177   } else {
1178     return (int32_t)(match - array);
1179   }
1180 }
1181 
1182 int32_t
doLastIndexOf(UChar32 c,int32_t start,int32_t length) const1183 UnicodeString::doLastIndexOf(UChar32 c,
1184                              int32_t start,
1185                              int32_t length) const {
1186   // pin indices
1187   pinIndices(start, length);
1188 
1189   // find the last occurrence of c
1190   const char16_t *array = getArrayStart();
1191   const char16_t *match = u_memrchr32(array + start, c, length);
1192   if(match == nullptr) {
1193     return -1;
1194   } else {
1195     return (int32_t)(match - array);
1196   }
1197 }
1198 
1199 //========================================
1200 // Write implementation
1201 //========================================
1202 
1203 UnicodeString&
findAndReplace(int32_t start,int32_t length,const UnicodeString & oldText,int32_t oldStart,int32_t oldLength,const UnicodeString & newText,int32_t newStart,int32_t newLength)1204 UnicodeString::findAndReplace(int32_t start,
1205                   int32_t length,
1206                   const UnicodeString& oldText,
1207                   int32_t oldStart,
1208                   int32_t oldLength,
1209                   const UnicodeString& newText,
1210                   int32_t newStart,
1211                   int32_t newLength)
1212 {
1213   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1214     return *this;
1215   }
1216 
1217   pinIndices(start, length);
1218   oldText.pinIndices(oldStart, oldLength);
1219   newText.pinIndices(newStart, newLength);
1220 
1221   if(oldLength == 0) {
1222     return *this;
1223   }
1224 
1225   while(length > 0 && length >= oldLength) {
1226     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1227     if(pos < 0) {
1228       // no more oldText's here: done
1229       break;
1230     } else {
1231       // we found oldText, replace it by newText and go beyond it
1232       replace(pos, oldLength, newText, newStart, newLength);
1233       length -= pos + oldLength - start;
1234       start = pos + newLength;
1235     }
1236   }
1237 
1238   return *this;
1239 }
1240 
1241 
1242 void
setToBogus()1243 UnicodeString::setToBogus()
1244 {
1245   releaseArray();
1246 
1247   fUnion.fFields.fLengthAndFlags = kIsBogus;
1248   fUnion.fFields.fArray = 0;
1249   fUnion.fFields.fCapacity = 0;
1250 }
1251 
1252 // turn a bogus string into an empty one
1253 void
unBogus()1254 UnicodeString::unBogus() {
1255   if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
1256     setToEmpty();
1257   }
1258 }
1259 
1260 const char16_t *
getTerminatedBuffer()1261 UnicodeString::getTerminatedBuffer() {
1262   if(!isWritable()) {
1263     return nullptr;
1264   }
1265   char16_t *array = getArrayStart();
1266   int32_t len = length();
1267   if(len < getCapacity()) {
1268     if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
1269       // If len<capacity on a read-only alias, then array[len] is
1270       // either the original NUL (if constructed with (true, s, length))
1271       // or one of the original string contents characters (if later truncated),
1272       // therefore we can assume that array[len] is initialized memory.
1273       if(array[len] == 0) {
1274         return array;
1275       }
1276     } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
1277       // kRefCounted: Do not write the NUL if the buffer is shared.
1278       // That is mostly safe, except when the length of one copy was modified
1279       // without copy-on-write, e.g., via truncate(newLength) or remove().
1280       // Then the NUL would be written into the middle of another copy's string.
1281 
1282       // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1283       // Do not test if there is a NUL already because it might be uninitialized memory.
1284       // (That would be safe, but tools like valgrind & Purify would complain.)
1285       array[len] = 0;
1286       return array;
1287     }
1288   }
1289   if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
1290     array = getArrayStart();
1291     array[len] = 0;
1292     return array;
1293   } else {
1294     return nullptr;
1295   }
1296 }
1297 
1298 // setTo() analogous to the readonly-aliasing constructor with the same signature
1299 UnicodeString &
setTo(UBool isTerminated,ConstChar16Ptr textPtr,int32_t textLength)1300 UnicodeString::setTo(UBool isTerminated,
1301                      ConstChar16Ptr textPtr,
1302                      int32_t textLength)
1303 {
1304   if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1305     // do not modify a string that has an "open" getBuffer(minCapacity)
1306     return *this;
1307   }
1308 
1309   const char16_t *text = textPtr;
1310   if(text == nullptr) {
1311     // treat as an empty string, do not alias
1312     releaseArray();
1313     setToEmpty();
1314     return *this;
1315   }
1316 
1317   if( textLength < -1 ||
1318       (textLength == -1 && !isTerminated) ||
1319       (textLength >= 0 && isTerminated && text[textLength] != 0)
1320   ) {
1321     setToBogus();
1322     return *this;
1323   }
1324 
1325   releaseArray();
1326 
1327   if(textLength == -1) {
1328     // text is terminated, or else it would have failed the above test
1329     textLength = u_strlen(text);
1330   }
1331   fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
1332   setArray((char16_t *)text, textLength, isTerminated ? textLength + 1 : textLength);
1333   return *this;
1334 }
1335 
1336 // setTo() analogous to the writable-aliasing constructor with the same signature
1337 UnicodeString &
setTo(char16_t * buffer,int32_t buffLength,int32_t buffCapacity)1338 UnicodeString::setTo(char16_t *buffer,
1339                      int32_t buffLength,
1340                      int32_t buffCapacity) {
1341   if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1342     // do not modify a string that has an "open" getBuffer(minCapacity)
1343     return *this;
1344   }
1345 
1346   if(buffer == nullptr) {
1347     // treat as an empty string, do not alias
1348     releaseArray();
1349     setToEmpty();
1350     return *this;
1351   }
1352 
1353   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1354     setToBogus();
1355     return *this;
1356   } else if(buffLength == -1) {
1357     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1358     const char16_t *p = buffer, *limit = buffer + buffCapacity;
1359     while(p != limit && *p != 0) {
1360       ++p;
1361     }
1362     buffLength = (int32_t)(p - buffer);
1363   }
1364 
1365   releaseArray();
1366 
1367   fUnion.fFields.fLengthAndFlags = kWritableAlias;
1368   setArray(buffer, buffLength, buffCapacity);
1369   return *this;
1370 }
1371 
setToUTF8(StringPiece utf8)1372 UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
1373   unBogus();
1374   int32_t length = utf8.length();
1375   int32_t capacity;
1376   // The UTF-16 string will be at most as long as the UTF-8 string.
1377   if(length <= US_STACKBUF_SIZE) {
1378     capacity = US_STACKBUF_SIZE;
1379   } else {
1380     capacity = length + 1;  // +1 for the terminating NUL.
1381   }
1382   char16_t *utf16 = getBuffer(capacity);
1383   int32_t length16;
1384   UErrorCode errorCode = U_ZERO_ERROR;
1385   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1386       utf8.data(), length,
1387       0xfffd,  // Substitution character.
1388       nullptr,    // Don't care about number of substitutions.
1389       &errorCode);
1390   releaseBuffer(length16);
1391   if(U_FAILURE(errorCode)) {
1392     setToBogus();
1393   }
1394   return *this;
1395 }
1396 
1397 UnicodeString&
setCharAt(int32_t offset,char16_t c)1398 UnicodeString::setCharAt(int32_t offset,
1399              char16_t c)
1400 {
1401   int32_t len = length();
1402   if(cloneArrayIfNeeded() && len > 0) {
1403     if(offset < 0) {
1404       offset = 0;
1405     } else if(offset >= len) {
1406       offset = len - 1;
1407     }
1408 
1409     getArrayStart()[offset] = c;
1410   }
1411   return *this;
1412 }
1413 
1414 UnicodeString&
replace(int32_t start,int32_t _length,UChar32 srcChar)1415 UnicodeString::replace(int32_t start,
1416                int32_t _length,
1417                UChar32 srcChar) {
1418   char16_t buffer[U16_MAX_LENGTH];
1419   int32_t count = 0;
1420   UBool isError = false;
1421   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1422   // We test isError so that the compiler does not complain that we don't.
1423   // If isError (srcChar is not a valid code point) then count==0 which means
1424   // we remove the source segment rather than replacing it with srcChar.
1425   return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1426 }
1427 
1428 UnicodeString&
append(UChar32 srcChar)1429 UnicodeString::append(UChar32 srcChar) {
1430   char16_t buffer[U16_MAX_LENGTH];
1431   int32_t _length = 0;
1432   UBool isError = false;
1433   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1434   // We test isError so that the compiler does not complain that we don't.
1435   // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1436   return isError ? *this : doAppend(buffer, 0, _length);
1437 }
1438 
1439 UnicodeString&
doReplace(int32_t start,int32_t length,const UnicodeString & src,int32_t srcStart,int32_t srcLength)1440 UnicodeString::doReplace( int32_t start,
1441               int32_t length,
1442               const UnicodeString& src,
1443               int32_t srcStart,
1444               int32_t srcLength)
1445 {
1446   // pin the indices to legal values
1447   src.pinIndices(srcStart, srcLength);
1448 
1449   // get the characters from src
1450   // and replace the range in ourselves with them
1451   return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1452 }
1453 
1454 UnicodeString&
doReplace(int32_t start,int32_t length,const char16_t * srcChars,int32_t srcStart,int32_t srcLength)1455 UnicodeString::doReplace(int32_t start,
1456              int32_t length,
1457              const char16_t *srcChars,
1458              int32_t srcStart,
1459              int32_t srcLength)
1460 {
1461   if(!isWritable()) {
1462     return *this;
1463   }
1464 
1465   int32_t oldLength = this->length();
1466 
1467   // optimize (read-only alias).remove(0, start) and .remove(start, end)
1468   if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
1469     if(start == 0) {
1470       // remove prefix by adjusting the array pointer
1471       pinIndex(length);
1472       fUnion.fFields.fArray += length;
1473       fUnion.fFields.fCapacity -= length;
1474       setLength(oldLength - length);
1475       return *this;
1476     } else {
1477       pinIndex(start);
1478       if(length >= (oldLength - start)) {
1479         // remove suffix by reducing the length (like truncate())
1480         setLength(start);
1481         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
1482         return *this;
1483       }
1484     }
1485   }
1486 
1487   if(start == oldLength) {
1488     return doAppend(srcChars, srcStart, srcLength);
1489   }
1490 
1491   if(srcChars == 0) {
1492     srcLength = 0;
1493   } else {
1494     // Perform all remaining operations relative to srcChars + srcStart.
1495     // From this point forward, do not use srcStart.
1496     srcChars += srcStart;
1497     if (srcLength < 0) {
1498       // get the srcLength if necessary
1499       srcLength = u_strlen(srcChars);
1500     }
1501   }
1502 
1503   // pin the indices to legal values
1504   pinIndices(start, length);
1505 
1506   // Calculate the size of the string after the replace.
1507   // Avoid int32_t overflow.
1508   int32_t newLength = oldLength - length;
1509   if(srcLength > (INT32_MAX - newLength)) {
1510     setToBogus();
1511     return *this;
1512   }
1513   newLength += srcLength;
1514 
1515   // Check for insertion into ourself
1516   const char16_t *oldArray = getArrayStart();
1517   if (isBufferWritable() &&
1518       oldArray < srcChars + srcLength &&
1519       srcChars < oldArray + oldLength) {
1520     // Copy into a new UnicodeString and start over
1521     UnicodeString copy(srcChars, srcLength);
1522     if (copy.isBogus()) {
1523       setToBogus();
1524       return *this;
1525     }
1526     return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
1527   }
1528 
1529   // cloneArrayIfNeeded(doCopyArray=false) may change fArray but will not copy the current contents;
1530   // therefore we need to keep the current fArray
1531   char16_t oldStackBuffer[US_STACKBUF_SIZE];
1532   if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1533     // copy the stack buffer contents because it will be overwritten with
1534     // fUnion.fFields values
1535     u_memcpy(oldStackBuffer, oldArray, oldLength);
1536     oldArray = oldStackBuffer;
1537   }
1538 
1539   // clone our array and allocate a bigger array if needed
1540   int32_t *bufferToDelete = 0;
1541   if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
1542                          false, &bufferToDelete)
1543   ) {
1544     return *this;
1545   }
1546 
1547   // now do the replace
1548 
1549   char16_t *newArray = getArrayStart();
1550   if(newArray != oldArray) {
1551     // if fArray changed, then we need to copy everything except what will change
1552     us_arrayCopy(oldArray, 0, newArray, 0, start);
1553     us_arrayCopy(oldArray, start + length,
1554                  newArray, start + srcLength,
1555                  oldLength - (start + length));
1556   } else if(length != srcLength) {
1557     // fArray did not change; copy only the portion that isn't changing, leaving a hole
1558     us_arrayCopy(oldArray, start + length,
1559                  newArray, start + srcLength,
1560                  oldLength - (start + length));
1561   }
1562 
1563   // now fill in the hole with the new string
1564   us_arrayCopy(srcChars, 0, newArray, start, srcLength);
1565 
1566   setLength(newLength);
1567 
1568   // delayed delete in case srcChars == fArray when we started, and
1569   // to keep oldArray alive for the above operations
1570   if (bufferToDelete) {
1571     uprv_free(bufferToDelete);
1572   }
1573 
1574   return *this;
1575 }
1576 
1577 // Versions of doReplace() only for append() variants.
1578 // doReplace() and doAppend() optimize for different cases.
1579 
1580 UnicodeString&
doAppend(const UnicodeString & src,int32_t srcStart,int32_t srcLength)1581 UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
1582   if(srcLength == 0) {
1583     return *this;
1584   }
1585 
1586   // pin the indices to legal values
1587   src.pinIndices(srcStart, srcLength);
1588   return doAppend(src.getArrayStart(), srcStart, srcLength);
1589 }
1590 
1591 UnicodeString&
doAppend(const char16_t * srcChars,int32_t srcStart,int32_t srcLength)1592 UnicodeString::doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) {
1593   if(!isWritable() || srcLength == 0 || srcChars == nullptr) {
1594     return *this;
1595   }
1596 
1597   // Perform all remaining operations relative to srcChars + srcStart.
1598   // From this point forward, do not use srcStart.
1599   srcChars += srcStart;
1600 
1601   if(srcLength < 0) {
1602     // get the srcLength if necessary
1603     if((srcLength = u_strlen(srcChars)) == 0) {
1604       return *this;
1605     }
1606   }
1607 
1608   int32_t oldLength = length();
1609   int32_t newLength;
1610   if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
1611     setToBogus();
1612     return *this;
1613   }
1614 
1615   // Check for append onto ourself
1616   const char16_t* oldArray = getArrayStart();
1617   if (isBufferWritable() &&
1618       oldArray < srcChars + srcLength &&
1619       srcChars < oldArray + oldLength) {
1620     // Copy into a new UnicodeString and start over
1621     UnicodeString copy(srcChars, srcLength);
1622     if (copy.isBogus()) {
1623       setToBogus();
1624       return *this;
1625     }
1626     return doAppend(copy.getArrayStart(), 0, srcLength);
1627   }
1628 
1629   // optimize append() onto a large-enough, owned string
1630   if((newLength <= getCapacity() && isBufferWritable()) ||
1631       cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
1632     char16_t *newArray = getArrayStart();
1633     // Do not copy characters when
1634     //   char16_t *buffer=str.getAppendBuffer(...);
1635     // is followed by
1636     //   str.append(buffer, length);
1637     // or
1638     //   str.appendString(buffer, length)
1639     // or similar.
1640     if(srcChars != newArray + oldLength) {
1641       us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
1642     }
1643     setLength(newLength);
1644   }
1645   return *this;
1646 }
1647 
1648 /**
1649  * Replaceable API
1650  */
1651 void
handleReplaceBetween(int32_t start,int32_t limit,const UnicodeString & text)1652 UnicodeString::handleReplaceBetween(int32_t start,
1653                                     int32_t limit,
1654                                     const UnicodeString& text) {
1655     replaceBetween(start, limit, text);
1656 }
1657 
1658 /**
1659  * Replaceable API
1660  */
1661 void
copy(int32_t start,int32_t limit,int32_t dest)1662 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1663     if (limit <= start) {
1664         return; // Nothing to do; avoid bogus malloc call
1665     }
1666     char16_t* text = (char16_t*) uprv_malloc( sizeof(char16_t) * (limit - start) );
1667     // Check to make sure text is not null.
1668     if (text != nullptr) {
1669 	    extractBetween(start, limit, text, 0);
1670 	    insert(dest, text, 0, limit - start);
1671 	    uprv_free(text);
1672     }
1673 }
1674 
1675 /**
1676  * Replaceable API
1677  *
1678  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
1679  * so we implement this function here.
1680  */
hasMetaData() const1681 UBool Replaceable::hasMetaData() const {
1682     return true;
1683 }
1684 
1685 /**
1686  * Replaceable API
1687  */
hasMetaData() const1688 UBool UnicodeString::hasMetaData() const {
1689     return false;
1690 }
1691 
1692 UnicodeString&
doReverse(int32_t start,int32_t length)1693 UnicodeString::doReverse(int32_t start, int32_t length) {
1694   if(length <= 1 || !cloneArrayIfNeeded()) {
1695     return *this;
1696   }
1697 
1698   // pin the indices to legal values
1699   pinIndices(start, length);
1700   if(length <= 1) {  // pinIndices() might have shrunk the length
1701     return *this;
1702   }
1703 
1704   char16_t *left = getArrayStart() + start;
1705   char16_t *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
1706   char16_t swap;
1707   UBool hasSupplementary = false;
1708 
1709   // Before the loop we know left<right because length>=2.
1710   do {
1711     hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1712     hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1713     *right-- = swap;
1714   } while(left < right);
1715   // Make sure to test the middle code unit of an odd-length string.
1716   // Redundant if the length is even.
1717   hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1718 
1719   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1720   if(hasSupplementary) {
1721     char16_t swap2;
1722 
1723     left = getArrayStart() + start;
1724     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1725     while(left < right) {
1726       if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1727         *left++ = swap2;
1728         *left++ = swap;
1729       } else {
1730         ++left;
1731       }
1732     }
1733   }
1734 
1735   return *this;
1736 }
1737 
1738 UBool
padLeading(int32_t targetLength,char16_t padChar)1739 UnicodeString::padLeading(int32_t targetLength,
1740                           char16_t padChar)
1741 {
1742   int32_t oldLength = length();
1743   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1744     return false;
1745   } else {
1746     // move contents up by padding width
1747     char16_t *array = getArrayStart();
1748     int32_t start = targetLength - oldLength;
1749     us_arrayCopy(array, 0, array, start, oldLength);
1750 
1751     // fill in padding character
1752     while(--start >= 0) {
1753       array[start] = padChar;
1754     }
1755     setLength(targetLength);
1756     return true;
1757   }
1758 }
1759 
1760 UBool
padTrailing(int32_t targetLength,char16_t padChar)1761 UnicodeString::padTrailing(int32_t targetLength,
1762                            char16_t padChar)
1763 {
1764   int32_t oldLength = length();
1765   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1766     return false;
1767   } else {
1768     // fill in padding character
1769     char16_t *array = getArrayStart();
1770     int32_t length = targetLength;
1771     while(--length >= oldLength) {
1772       array[length] = padChar;
1773     }
1774     setLength(targetLength);
1775     return true;
1776   }
1777 }
1778 
1779 //========================================
1780 // Hashing
1781 //========================================
1782 int32_t
doHashCode() const1783 UnicodeString::doHashCode() const
1784 {
1785     /* Delegate hash computation to uhash.  This makes UnicodeString
1786      * hashing consistent with char16_t* hashing.  */
1787     int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1788     if (hashCode == kInvalidHashCode) {
1789         hashCode = kEmptyHashCode;
1790     }
1791     return hashCode;
1792 }
1793 
1794 //========================================
1795 // External Buffer
1796 //========================================
1797 
1798 char16_t *
getBuffer(int32_t minCapacity)1799 UnicodeString::getBuffer(int32_t minCapacity) {
1800   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1801     fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
1802     setZeroLength();
1803     return getArrayStart();
1804   } else {
1805     return nullptr;
1806   }
1807 }
1808 
1809 void
releaseBuffer(int32_t newLength)1810 UnicodeString::releaseBuffer(int32_t newLength) {
1811   if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
1812     // set the new fLength
1813     int32_t capacity=getCapacity();
1814     if(newLength==-1) {
1815       // the new length is the string length, capped by fCapacity
1816       const char16_t *array=getArrayStart(), *p=array, *limit=array+capacity;
1817       while(p<limit && *p!=0) {
1818         ++p;
1819       }
1820       newLength=(int32_t)(p-array);
1821     } else if(newLength>capacity) {
1822       newLength=capacity;
1823     }
1824     setLength(newLength);
1825     fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
1826   }
1827 }
1828 
1829 //========================================
1830 // Miscellaneous
1831 //========================================
1832 UBool
cloneArrayIfNeeded(int32_t newCapacity,int32_t growCapacity,UBool doCopyArray,int32_t ** pBufferToDelete,UBool forceClone)1833 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1834                                   int32_t growCapacity,
1835                                   UBool doCopyArray,
1836                                   int32_t **pBufferToDelete,
1837                                   UBool forceClone) {
1838   // default parameters need to be static, therefore
1839   // the defaults are -1 to have convenience defaults
1840   if(newCapacity == -1) {
1841     newCapacity = getCapacity();
1842   }
1843 
1844   // while a getBuffer(minCapacity) is "open",
1845   // prevent any modifications of the string by returning false here
1846   // if the string is bogus, then only an assignment or similar can revive it
1847   if(!isWritable()) {
1848     return false;
1849   }
1850 
1851   /*
1852    * We need to make a copy of the array if
1853    * the buffer is read-only, or
1854    * the buffer is refCounted (shared), and refCount>1, or
1855    * the buffer is too small.
1856    * Return false if memory could not be allocated.
1857    */
1858   if(forceClone ||
1859      fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
1860      (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
1861      newCapacity > getCapacity()
1862   ) {
1863     // check growCapacity for default value and use of the stack buffer
1864     if(growCapacity < 0) {
1865       growCapacity = newCapacity;
1866     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1867       growCapacity = US_STACKBUF_SIZE;
1868     }
1869 
1870     // save old values
1871     char16_t oldStackBuffer[US_STACKBUF_SIZE];
1872     char16_t *oldArray;
1873     int32_t oldLength = length();
1874     int16_t flags = fUnion.fFields.fLengthAndFlags;
1875 
1876     if(flags&kUsingStackBuffer) {
1877       U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1878       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1879         // copy the stack buffer contents because it will be overwritten with
1880         // fUnion.fFields values
1881         us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
1882         oldArray = oldStackBuffer;
1883       } else {
1884         oldArray = nullptr; // no need to copy from the stack buffer to itself
1885       }
1886     } else {
1887       oldArray = fUnion.fFields.fArray;
1888       U_ASSERT(oldArray!=nullptr); /* when stack buffer is not used, oldArray must have a non-nullptr reference */
1889     }
1890 
1891     // allocate a new array
1892     if(allocate(growCapacity) ||
1893        (newCapacity < growCapacity && allocate(newCapacity))
1894     ) {
1895       if(doCopyArray) {
1896         // copy the contents
1897         // do not copy more than what fits - it may be smaller than before
1898         int32_t minLength = oldLength;
1899         newCapacity = getCapacity();
1900         if(newCapacity < minLength) {
1901           minLength = newCapacity;
1902         }
1903         if(oldArray != nullptr) {
1904           us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1905         }
1906         setLength(minLength);
1907       } else {
1908         setZeroLength();
1909       }
1910 
1911       // release the old array
1912       if(flags & kRefCounted) {
1913         // the array is refCounted; decrement and release if 0
1914         u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
1915         if(umtx_atomic_dec(pRefCount) == 0) {
1916           if(pBufferToDelete == 0) {
1917               // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1918               // is defined as volatile. (Volatile has useful non-standard behavior
1919               //   with this compiler.)
1920             uprv_free((void *)pRefCount);
1921           } else {
1922             // the caller requested to delete it himself
1923             *pBufferToDelete = (int32_t *)pRefCount;
1924           }
1925         }
1926       }
1927     } else {
1928       // not enough memory for growCapacity and not even for the smaller newCapacity
1929       // reset the old values for setToBogus() to release the array
1930       if(!(flags&kUsingStackBuffer)) {
1931         fUnion.fFields.fArray = oldArray;
1932       }
1933       fUnion.fFields.fLengthAndFlags = flags;
1934       setToBogus();
1935       return false;
1936     }
1937   }
1938   return true;
1939 }
1940 
1941 // UnicodeStringAppendable ------------------------------------------------- ***
1942 
~UnicodeStringAppendable()1943 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1944 
1945 UBool
appendCodeUnit(char16_t c)1946 UnicodeStringAppendable::appendCodeUnit(char16_t c) {
1947   return str.doAppend(&c, 0, 1).isWritable();
1948 }
1949 
1950 UBool
appendCodePoint(UChar32 c)1951 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1952   char16_t buffer[U16_MAX_LENGTH];
1953   int32_t cLength = 0;
1954   UBool isError = false;
1955   U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1956   return !isError && str.doAppend(buffer, 0, cLength).isWritable();
1957 }
1958 
1959 UBool
appendString(const char16_t * s,int32_t length)1960 UnicodeStringAppendable::appendString(const char16_t *s, int32_t length) {
1961   return str.doAppend(s, 0, length).isWritable();
1962 }
1963 
1964 UBool
reserveAppendCapacity(int32_t appendCapacity)1965 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1966   return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1967 }
1968 
1969 char16_t *
getAppendBuffer(int32_t minCapacity,int32_t desiredCapacityHint,char16_t * scratch,int32_t scratchCapacity,int32_t * resultCapacity)1970 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1971                                          int32_t desiredCapacityHint,
1972                                          char16_t *scratch, int32_t scratchCapacity,
1973                                          int32_t *resultCapacity) {
1974   if(minCapacity < 1 || scratchCapacity < minCapacity) {
1975     *resultCapacity = 0;
1976     return nullptr;
1977   }
1978   int32_t oldLength = str.length();
1979   if(minCapacity <= (kMaxCapacity - oldLength) &&
1980       desiredCapacityHint <= (kMaxCapacity - oldLength) &&
1981       str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1982     *resultCapacity = str.getCapacity() - oldLength;
1983     return str.getArrayStart() + oldLength;
1984   }
1985   *resultCapacity = scratchCapacity;
1986   return scratch;
1987 }
1988 
1989 U_NAMESPACE_END
1990 
1991 U_NAMESPACE_USE
1992 
1993 U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UElement key)1994 uhash_hashUnicodeString(const UElement key) {
1995     const UnicodeString *str = (const UnicodeString*) key.pointer;
1996     return (str == nullptr) ? 0 : str->hashCode();
1997 }
1998 
1999 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
2000 // does not depend on hashtable code.
2001 U_CAPI UBool U_EXPORT2
uhash_compareUnicodeString(const UElement key1,const UElement key2)2002 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
2003     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
2004     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
2005     if (str1 == str2) {
2006         return true;
2007     }
2008     if (str1 == nullptr || str2 == nullptr) {
2009         return false;
2010     }
2011     return *str1 == *str2;
2012 }
2013 
2014 #ifdef U_STATIC_IMPLEMENTATION
2015 /*
2016 This should never be called. It is defined here to make sure that the
2017 virtual vector deleting destructor is defined within unistr.cpp.
2018 The vector deleting destructor is already a part of UObject,
2019 but defining it here makes sure that it is included with this object file.
2020 This makes sure that static library dependencies are kept to a minimum.
2021 */
2022 #if defined(__clang__) || U_GCC_MAJOR_MINOR >= 1100
2023 #pragma GCC diagnostic push
2024 #pragma GCC diagnostic ignored "-Wunused-function"
uprv_UnicodeStringDummy()2025 static void uprv_UnicodeStringDummy() {
2026     delete [] (new UnicodeString[2]);
2027 }
2028 #pragma GCC diagnostic pop
2029 #endif
2030 #endif
2031