xref: /aosp_15_r20/external/pdfium/core/fxcrt/widestring.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/widestring.h"
8 
9 #include <stddef.h>
10 #include <string.h>
11 
12 #include <algorithm>
13 #include <sstream>
14 
15 #include "core/fxcrt/fx_codepage.h"
16 #include "core/fxcrt/fx_extension.h"
17 #include "core/fxcrt/fx_memcpy_wrappers.h"
18 #include "core/fxcrt/fx_safe_types.h"
19 #include "core/fxcrt/fx_system.h"
20 #include "core/fxcrt/string_pool_template.h"
21 #include "third_party/base/check.h"
22 #include "third_party/base/check_op.h"
23 #include "third_party/base/numerics/safe_math.h"
24 
25 template class fxcrt::StringDataTemplate<wchar_t>;
26 template class fxcrt::StringViewTemplate<wchar_t>;
27 template class fxcrt::StringPoolTemplate<WideString>;
28 template struct std::hash<WideString>;
29 
30 #define FORCE_ANSI 0x10000
31 #define FORCE_UNICODE 0x20000
32 #define FORCE_INT64 0x40000
33 
34 namespace {
35 
36 constexpr wchar_t kWideTrimChars[] = L"\x09\x0a\x0b\x0c\x0d\x20";
37 
FX_wcsstr(const wchar_t * haystack,size_t haystack_len,const wchar_t * needle,size_t needle_len)38 const wchar_t* FX_wcsstr(const wchar_t* haystack,
39                          size_t haystack_len,
40                          const wchar_t* needle,
41                          size_t needle_len) {
42   if (needle_len > haystack_len || needle_len == 0)
43     return nullptr;
44 
45   const wchar_t* end_ptr = haystack + haystack_len - needle_len;
46   while (haystack <= end_ptr) {
47     size_t i = 0;
48     while (true) {
49       if (haystack[i] != needle[i])
50         break;
51 
52       i++;
53       if (i == needle_len)
54         return haystack;
55     }
56     haystack++;
57   }
58   return nullptr;
59 }
60 
GuessSizeForVSWPrintf(const wchar_t * pFormat,va_list argList)61 absl::optional<size_t> GuessSizeForVSWPrintf(const wchar_t* pFormat,
62                                              va_list argList) {
63   size_t nMaxLen = 0;
64   for (const wchar_t* pStr = pFormat; *pStr != 0; pStr++) {
65     if (*pStr != '%' || *(pStr = pStr + 1) == '%') {
66       ++nMaxLen;
67       continue;
68     }
69     int iWidth = 0;
70     for (; *pStr != 0; pStr++) {
71       if (*pStr == '#') {
72         nMaxLen += 2;
73       } else if (*pStr == '*') {
74         iWidth = va_arg(argList, int);
75       } else if (*pStr != '-' && *pStr != '+' && *pStr != '0' && *pStr != ' ') {
76         break;
77       }
78     }
79     if (iWidth == 0) {
80       iWidth = FXSYS_wtoi(pStr);
81       while (FXSYS_IsDecimalDigit(*pStr))
82         ++pStr;
83     }
84     if (iWidth < 0 || iWidth > 128 * 1024)
85       return absl::nullopt;
86     uint32_t nWidth = static_cast<uint32_t>(iWidth);
87     int iPrecision = 0;
88     if (*pStr == '.') {
89       pStr++;
90       if (*pStr == '*') {
91         iPrecision = va_arg(argList, int);
92         pStr++;
93       } else {
94         iPrecision = FXSYS_wtoi(pStr);
95         while (FXSYS_IsDecimalDigit(*pStr))
96           ++pStr;
97       }
98     }
99     if (iPrecision < 0 || iPrecision > 128 * 1024)
100       return absl::nullopt;
101     uint32_t nPrecision = static_cast<uint32_t>(iPrecision);
102     int nModifier = 0;
103     if (*pStr == L'I' && *(pStr + 1) == L'6' && *(pStr + 2) == L'4') {
104       pStr += 3;
105       nModifier = FORCE_INT64;
106     } else {
107       switch (*pStr) {
108         case 'h':
109           nModifier = FORCE_ANSI;
110           pStr++;
111           break;
112         case 'l':
113           nModifier = FORCE_UNICODE;
114           pStr++;
115           break;
116         case 'F':
117         case 'N':
118         case 'L':
119           pStr++;
120           break;
121       }
122     }
123     size_t nItemLen = 0;
124     switch (*pStr | nModifier) {
125       case 'c':
126       case 'C':
127         nItemLen = 2;
128         va_arg(argList, int);
129         break;
130       case 'c' | FORCE_ANSI:
131       case 'C' | FORCE_ANSI:
132         nItemLen = 2;
133         va_arg(argList, int);
134         break;
135       case 'c' | FORCE_UNICODE:
136       case 'C' | FORCE_UNICODE:
137         nItemLen = 2;
138         va_arg(argList, int);
139         break;
140       case 's': {
141         const wchar_t* pstrNextArg = va_arg(argList, const wchar_t*);
142         if (pstrNextArg) {
143           nItemLen = wcslen(pstrNextArg);
144           if (nItemLen < 1) {
145             nItemLen = 1;
146           }
147         } else {
148           nItemLen = 6;
149         }
150       } break;
151       case 'S': {
152         const char* pstrNextArg = va_arg(argList, const char*);
153         if (pstrNextArg) {
154           nItemLen = strlen(pstrNextArg);
155           if (nItemLen < 1) {
156             nItemLen = 1;
157           }
158         } else {
159           nItemLen = 6;
160         }
161       } break;
162       case 's' | FORCE_ANSI:
163       case 'S' | FORCE_ANSI: {
164         const char* pstrNextArg = va_arg(argList, const char*);
165         if (pstrNextArg) {
166           nItemLen = strlen(pstrNextArg);
167           if (nItemLen < 1) {
168             nItemLen = 1;
169           }
170         } else {
171           nItemLen = 6;
172         }
173       } break;
174       case 's' | FORCE_UNICODE:
175       case 'S' | FORCE_UNICODE: {
176         const wchar_t* pstrNextArg = va_arg(argList, wchar_t*);
177         if (pstrNextArg) {
178           nItemLen = wcslen(pstrNextArg);
179           if (nItemLen < 1) {
180             nItemLen = 1;
181           }
182         } else {
183           nItemLen = 6;
184         }
185       } break;
186     }
187     if (nItemLen != 0) {
188       if (nPrecision != 0 && nItemLen > nPrecision) {
189         nItemLen = nPrecision;
190       }
191       if (nItemLen < nWidth) {
192         nItemLen = nWidth;
193       }
194     } else {
195       switch (*pStr) {
196         case 'd':
197         case 'i':
198         case 'u':
199         case 'x':
200         case 'X':
201         case 'o':
202           if (nModifier & FORCE_INT64) {
203             va_arg(argList, int64_t);
204           } else {
205             va_arg(argList, int);
206           }
207           nItemLen = 32;
208           if (nItemLen < nWidth + nPrecision) {
209             nItemLen = nWidth + nPrecision;
210           }
211           break;
212         case 'a':
213         case 'A':
214         case 'e':
215         case 'E':
216         case 'g':
217         case 'G':
218           va_arg(argList, double);
219           nItemLen = 128;
220           if (nItemLen < nWidth + nPrecision) {
221             nItemLen = nWidth + nPrecision;
222           }
223           break;
224         case 'f':
225           if (nWidth + nPrecision > 100) {
226             nItemLen = nPrecision + nWidth + 128;
227           } else {
228             double f;
229             char pszTemp[256];
230             f = va_arg(argList, double);
231             FXSYS_snprintf(pszTemp, sizeof(pszTemp), "%*.*f", nWidth,
232                            nPrecision + 6, f);
233             nItemLen = strlen(pszTemp);
234           }
235           break;
236         case 'p':
237           va_arg(argList, void*);
238           nItemLen = 32;
239           if (nItemLen < nWidth + nPrecision) {
240             nItemLen = nWidth + nPrecision;
241           }
242           break;
243         case 'n':
244           va_arg(argList, int*);
245           break;
246       }
247     }
248     nMaxLen += nItemLen;
249   }
250   nMaxLen += 32;  // Fudge factor.
251   return nMaxLen;
252 }
253 
254 // Returns string unless we ran out of space.
TryVSWPrintf(size_t size,const wchar_t * pFormat,va_list argList)255 absl::optional<WideString> TryVSWPrintf(size_t size,
256                                         const wchar_t* pFormat,
257                                         va_list argList) {
258   if (!size)
259     return absl::nullopt;
260 
261   WideString str;
262   {
263     // Span's lifetime must end before ReleaseBuffer() below.
264     pdfium::span<wchar_t> buffer = str.GetBuffer(size);
265 
266     // In the following two calls, there's always space in the WideString
267     // for a terminating NUL that's not included in the span.
268     // For vswprintf(), MSAN won't untaint the buffer on a truncated write's
269     // -1 return code even though the buffer is written. Probably just as well
270     // not to trust the vendor's implementation to write anything anyways.
271     // See https://crbug.com/705912.
272     memset(buffer.data(), 0, (size + 1) * sizeof(wchar_t));
273     int ret = vswprintf(buffer.data(), size + 1, pFormat, argList);
274 
275     bool bSufficientBuffer = ret >= 0 || buffer[size - 1] == 0;
276     if (!bSufficientBuffer)
277       return absl::nullopt;
278   }
279   str.ReleaseBuffer(str.GetStringLength());
280   return str;
281 }
282 
283 }  // namespace
284 
285 namespace fxcrt {
286 
287 static_assert(sizeof(WideString) <= sizeof(wchar_t*),
288               "Strings must not require more space than pointers");
289 
290 // static
FormatInteger(int i)291 WideString WideString::FormatInteger(int i) {
292   wchar_t wbuf[32];
293   swprintf(wbuf, std::size(wbuf), L"%d", i);
294   return WideString(wbuf);
295 }
296 
297 // static
FormatV(const wchar_t * format,va_list argList)298 WideString WideString::FormatV(const wchar_t* format, va_list argList) {
299   va_list argListCopy;
300   va_copy(argListCopy, argList);
301   auto guess = GuessSizeForVSWPrintf(format, argListCopy);
302   va_end(argListCopy);
303 
304   if (!guess.has_value()) {
305     return WideString();
306   }
307   int maxLen = pdfium::base::checked_cast<int>(guess.value());
308 
309   while (maxLen < 32 * 1024) {
310     va_copy(argListCopy, argList);
311     absl::optional<WideString> ret =
312         TryVSWPrintf(static_cast<size_t>(maxLen), format, argListCopy);
313     va_end(argListCopy);
314     if (ret.has_value())
315       return ret.value();
316 
317     maxLen *= 2;
318   }
319   return WideString();
320 }
321 
322 // static
Format(const wchar_t * pFormat,...)323 WideString WideString::Format(const wchar_t* pFormat, ...) {
324   va_list argList;
325   va_start(argList, pFormat);
326   WideString ret = FormatV(pFormat, argList);
327   va_end(argList);
328   return ret;
329 }
330 
331 WideString::WideString() = default;
332 
WideString(const WideString & other)333 WideString::WideString(const WideString& other) : m_pData(other.m_pData) {}
334 
WideString(WideString && other)335 WideString::WideString(WideString&& other) noexcept {
336   m_pData.Swap(other.m_pData);
337 }
338 
WideString(const wchar_t * pStr,size_t nLen)339 WideString::WideString(const wchar_t* pStr, size_t nLen) {
340   if (nLen)
341     m_pData.Reset(StringData::Create(pStr, nLen));
342 }
343 
WideString(wchar_t ch)344 WideString::WideString(wchar_t ch) {
345   m_pData.Reset(StringData::Create(1));
346   m_pData->m_String[0] = ch;
347 }
348 
WideString(const wchar_t * ptr)349 WideString::WideString(const wchar_t* ptr)
350     : WideString(ptr, ptr ? wcslen(ptr) : 0) {}
351 
WideString(WideStringView stringSrc)352 WideString::WideString(WideStringView stringSrc) {
353   if (!stringSrc.IsEmpty()) {
354     m_pData.Reset(StringData::Create(stringSrc.unterminated_c_str(),
355                                      stringSrc.GetLength()));
356   }
357 }
358 
WideString(WideStringView str1,WideStringView str2)359 WideString::WideString(WideStringView str1, WideStringView str2) {
360   FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
361   nSafeLen += str2.GetLength();
362 
363   size_t nNewLen = nSafeLen.ValueOrDie();
364   if (nNewLen == 0)
365     return;
366 
367   m_pData.Reset(StringData::Create(nNewLen));
368   m_pData->CopyContents(str1.unterminated_c_str(), str1.GetLength());
369   m_pData->CopyContentsAt(str1.GetLength(), str2.unterminated_c_str(),
370                           str2.GetLength());
371 }
372 
WideString(const std::initializer_list<WideStringView> & list)373 WideString::WideString(const std::initializer_list<WideStringView>& list) {
374   FX_SAFE_SIZE_T nSafeLen = 0;
375   for (const auto& item : list)
376     nSafeLen += item.GetLength();
377 
378   size_t nNewLen = nSafeLen.ValueOrDie();
379   if (nNewLen == 0)
380     return;
381 
382   m_pData.Reset(StringData::Create(nNewLen));
383 
384   size_t nOffset = 0;
385   for (const auto& item : list) {
386     m_pData->CopyContentsAt(nOffset, item.unterminated_c_str(),
387                             item.GetLength());
388     nOffset += item.GetLength();
389   }
390 }
391 
392 WideString::~WideString() = default;
393 
clear()394 void WideString::clear() {
395   if (m_pData && m_pData->CanOperateInPlace(0)) {
396     m_pData->m_nDataLength = 0;
397     return;
398   }
399   m_pData.Reset();
400 }
401 
operator =(const wchar_t * str)402 WideString& WideString::operator=(const wchar_t* str) {
403   if (!str || !str[0])
404     clear();
405   else
406     AssignCopy(str, wcslen(str));
407 
408   return *this;
409 }
410 
operator =(WideStringView str)411 WideString& WideString::operator=(WideStringView str) {
412   if (str.IsEmpty())
413     clear();
414   else
415     AssignCopy(str.unterminated_c_str(), str.GetLength());
416 
417   return *this;
418 }
419 
operator =(const WideString & that)420 WideString& WideString::operator=(const WideString& that) {
421   if (m_pData != that.m_pData)
422     m_pData = that.m_pData;
423 
424   return *this;
425 }
426 
operator =(WideString && that)427 WideString& WideString::operator=(WideString&& that) noexcept {
428   if (m_pData != that.m_pData)
429     m_pData = std::move(that.m_pData);
430 
431   return *this;
432 }
433 
operator +=(const wchar_t * str)434 WideString& WideString::operator+=(const wchar_t* str) {
435   if (str)
436     Concat(str, wcslen(str));
437 
438   return *this;
439 }
440 
operator +=(wchar_t ch)441 WideString& WideString::operator+=(wchar_t ch) {
442   Concat(&ch, 1);
443   return *this;
444 }
445 
operator +=(const WideString & str)446 WideString& WideString::operator+=(const WideString& str) {
447   if (str.m_pData)
448     Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
449 
450   return *this;
451 }
452 
operator +=(WideStringView str)453 WideString& WideString::operator+=(WideStringView str) {
454   if (!str.IsEmpty())
455     Concat(str.unterminated_c_str(), str.GetLength());
456 
457   return *this;
458 }
459 
operator ==(const wchar_t * ptr) const460 bool WideString::operator==(const wchar_t* ptr) const {
461   if (!m_pData)
462     return !ptr || !ptr[0];
463 
464   if (!ptr)
465     return m_pData->m_nDataLength == 0;
466 
467   return wcslen(ptr) == m_pData->m_nDataLength &&
468          FXSYS_wmemcmp(ptr, m_pData->m_String, m_pData->m_nDataLength) == 0;
469 }
470 
operator ==(WideStringView str) const471 bool WideString::operator==(WideStringView str) const {
472   if (!m_pData)
473     return str.IsEmpty();
474 
475   return m_pData->m_nDataLength == str.GetLength() &&
476          FXSYS_wmemcmp(m_pData->m_String, str.unterminated_c_str(),
477                        str.GetLength()) == 0;
478 }
479 
operator ==(const WideString & other) const480 bool WideString::operator==(const WideString& other) const {
481   if (m_pData == other.m_pData)
482     return true;
483 
484   if (IsEmpty())
485     return other.IsEmpty();
486 
487   if (other.IsEmpty())
488     return false;
489 
490   return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
491          wmemcmp(other.m_pData->m_String, m_pData->m_String,
492                  m_pData->m_nDataLength) == 0;
493 }
494 
operator <(const wchar_t * ptr) const495 bool WideString::operator<(const wchar_t* ptr) const {
496   return Compare(ptr) < 0;
497 }
498 
operator <(WideStringView str) const499 bool WideString::operator<(WideStringView str) const {
500   if (!m_pData && !str.unterminated_c_str())
501     return false;
502   if (c_str() == str.unterminated_c_str())
503     return false;
504 
505   size_t len = GetLength();
506   size_t other_len = str.GetLength();
507   int result = FXSYS_wmemcmp(c_str(), str.unterminated_c_str(),
508                              std::min(len, other_len));
509   return result < 0 || (result == 0 && len < other_len);
510 }
511 
operator <(const WideString & other) const512 bool WideString::operator<(const WideString& other) const {
513   return Compare(other) < 0;
514 }
515 
AssignCopy(const wchar_t * pSrcData,size_t nSrcLen)516 void WideString::AssignCopy(const wchar_t* pSrcData, size_t nSrcLen) {
517   AllocBeforeWrite(nSrcLen);
518   m_pData->CopyContents(pSrcData, nSrcLen);
519   m_pData->m_nDataLength = nSrcLen;
520 }
521 
ReallocBeforeWrite(size_t nNewLength)522 void WideString::ReallocBeforeWrite(size_t nNewLength) {
523   if (m_pData && m_pData->CanOperateInPlace(nNewLength))
524     return;
525 
526   if (nNewLength == 0) {
527     clear();
528     return;
529   }
530 
531   RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
532   if (m_pData) {
533     size_t nCopyLength = std::min(m_pData->m_nDataLength, nNewLength);
534     pNewData->CopyContents(m_pData->m_String, nCopyLength);
535     pNewData->m_nDataLength = nCopyLength;
536   } else {
537     pNewData->m_nDataLength = 0;
538   }
539   pNewData->m_String[pNewData->m_nDataLength] = 0;
540   m_pData.Swap(pNewData);
541 }
542 
AllocBeforeWrite(size_t nNewLength)543 void WideString::AllocBeforeWrite(size_t nNewLength) {
544   if (m_pData && m_pData->CanOperateInPlace(nNewLength))
545     return;
546 
547   if (nNewLength == 0) {
548     clear();
549     return;
550   }
551 
552   m_pData.Reset(StringData::Create(nNewLength));
553 }
554 
ReleaseBuffer(size_t nNewLength)555 void WideString::ReleaseBuffer(size_t nNewLength) {
556   if (!m_pData)
557     return;
558 
559   nNewLength = std::min(nNewLength, m_pData->m_nAllocLength);
560   if (nNewLength == 0) {
561     clear();
562     return;
563   }
564 
565   DCHECK_EQ(m_pData->m_nRefs, 1);
566   m_pData->m_nDataLength = nNewLength;
567   m_pData->m_String[nNewLength] = 0;
568   if (m_pData->m_nAllocLength - nNewLength >= 32) {
569     // Over arbitrary threshold, so pay the price to relocate.  Force copy to
570     // always occur by holding a second reference to the string.
571     WideString preserve(*this);
572     ReallocBeforeWrite(nNewLength);
573   }
574 }
575 
Reserve(size_t len)576 void WideString::Reserve(size_t len) {
577   GetBuffer(len);
578 }
579 
GetBuffer(size_t nMinBufLength)580 pdfium::span<wchar_t> WideString::GetBuffer(size_t nMinBufLength) {
581   if (!m_pData) {
582     if (nMinBufLength == 0)
583       return pdfium::span<wchar_t>();
584 
585     m_pData.Reset(StringData::Create(nMinBufLength));
586     m_pData->m_nDataLength = 0;
587     m_pData->m_String[0] = 0;
588     return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
589   }
590 
591   if (m_pData->CanOperateInPlace(nMinBufLength))
592     return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
593 
594   nMinBufLength = std::max(nMinBufLength, m_pData->m_nDataLength);
595   if (nMinBufLength == 0)
596     return pdfium::span<wchar_t>();
597 
598   RetainPtr<StringData> pNewData(StringData::Create(nMinBufLength));
599   pNewData->CopyContents(*m_pData);
600   pNewData->m_nDataLength = m_pData->m_nDataLength;
601   m_pData.Swap(pNewData);
602   return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
603 }
604 
Delete(size_t index,size_t count)605 size_t WideString::Delete(size_t index, size_t count) {
606   if (!m_pData)
607     return 0;
608 
609   size_t old_length = m_pData->m_nDataLength;
610   if (count == 0 || index != std::clamp<size_t>(index, 0, old_length)) {
611     return old_length;
612   }
613 
614   size_t removal_length = index + count;
615   if (removal_length > old_length)
616     return old_length;
617 
618   ReallocBeforeWrite(old_length);
619   size_t chars_to_copy = old_length - removal_length + 1;
620   wmemmove(m_pData->m_String + index, m_pData->m_String + removal_length,
621            chars_to_copy);
622   m_pData->m_nDataLength = old_length - count;
623   return m_pData->m_nDataLength;
624 }
625 
Concat(const wchar_t * pSrcData,size_t nSrcLen)626 void WideString::Concat(const wchar_t* pSrcData, size_t nSrcLen) {
627   if (!pSrcData || nSrcLen == 0)
628     return;
629 
630   if (!m_pData) {
631     m_pData.Reset(StringData::Create(pSrcData, nSrcLen));
632     return;
633   }
634 
635   if (m_pData->CanOperateInPlace(m_pData->m_nDataLength + nSrcLen)) {
636     m_pData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
637     m_pData->m_nDataLength += nSrcLen;
638     return;
639   }
640 
641   size_t nConcatLen = std::max(m_pData->m_nDataLength / 2, nSrcLen);
642   RetainPtr<StringData> pNewData(
643       StringData::Create(m_pData->m_nDataLength + nConcatLen));
644   pNewData->CopyContents(*m_pData);
645   pNewData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
646   pNewData->m_nDataLength = m_pData->m_nDataLength + nSrcLen;
647   m_pData.Swap(pNewData);
648 }
649 
ReferenceCountForTesting() const650 intptr_t WideString::ReferenceCountForTesting() const {
651   return m_pData ? m_pData->m_nRefs : 0;
652 }
653 
ToASCII() const654 ByteString WideString::ToASCII() const {
655   ByteString result;
656   result.Reserve(GetLength());
657   for (wchar_t wc : *this)
658     result.InsertAtBack(static_cast<char>(wc & 0x7f));
659   return result;
660 }
661 
ToLatin1() const662 ByteString WideString::ToLatin1() const {
663   ByteString result;
664   result.Reserve(GetLength());
665   for (wchar_t wc : *this)
666     result.InsertAtBack(static_cast<char>(wc & 0xff));
667   return result;
668 }
669 
ToDefANSI() const670 ByteString WideString::ToDefANSI() const {
671   size_t dest_len =
672       FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), {});
673   if (!dest_len)
674     return ByteString();
675 
676   ByteString bstr;
677   {
678     // Span's lifetime must end before ReleaseBuffer() below.
679     pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
680     FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), dest_buf);
681   }
682   bstr.ReleaseBuffer(dest_len);
683   return bstr;
684 }
685 
ToUTF8() const686 ByteString WideString::ToUTF8() const {
687   return FX_UTF8Encode(AsStringView());
688 }
689 
ToUTF16LE() const690 ByteString WideString::ToUTF16LE() const {
691   if (!m_pData)
692     return ByteString("\0\0", 2);
693 
694   ByteString result;
695   size_t len = m_pData->m_nDataLength;
696   {
697     // Span's lifetime must end before ReleaseBuffer() below.
698     pdfium::span<char> buffer = result.GetBuffer(len * 2 + 2);
699     for (size_t i = 0; i < len; i++) {
700       buffer[i * 2] = m_pData->m_String[i] & 0xff;
701       buffer[i * 2 + 1] = m_pData->m_String[i] >> 8;
702     }
703     buffer[len * 2] = 0;
704     buffer[len * 2 + 1] = 0;
705   }
706   result.ReleaseBuffer(len * 2 + 2);
707   return result;
708 }
709 
EncodeEntities() const710 WideString WideString::EncodeEntities() const {
711   WideString ret = *this;
712   ret.Replace(L"&", L"&amp;");
713   ret.Replace(L"<", L"&lt;");
714   ret.Replace(L">", L"&gt;");
715   ret.Replace(L"\'", L"&apos;");
716   ret.Replace(L"\"", L"&quot;");
717   return ret;
718 }
719 
Substr(size_t offset) const720 WideString WideString::Substr(size_t offset) const {
721   // Unsigned underflow is well-defined and out-of-range is handled by Substr().
722   return Substr(offset, GetLength() - offset);
723 }
724 
Substr(size_t first,size_t count) const725 WideString WideString::Substr(size_t first, size_t count) const {
726   if (!m_pData)
727     return WideString();
728 
729   if (!IsValidIndex(first))
730     return WideString();
731 
732   if (count == 0 || !IsValidLength(count))
733     return WideString();
734 
735   if (!IsValidIndex(first + count - 1))
736     return WideString();
737 
738   if (first == 0 && count == GetLength())
739     return *this;
740 
741   WideString dest;
742   AllocCopy(dest, count, first);
743   return dest;
744 }
745 
First(size_t count) const746 WideString WideString::First(size_t count) const {
747   return Substr(0, count);
748 }
749 
Last(size_t count) const750 WideString WideString::Last(size_t count) const {
751   // Unsigned underflow is well-defined and out-of-range is handled by Substr().
752   return Substr(GetLength() - count, count);
753 }
754 
AllocCopy(WideString & dest,size_t nCopyLen,size_t nCopyIndex) const755 void WideString::AllocCopy(WideString& dest,
756                            size_t nCopyLen,
757                            size_t nCopyIndex) const {
758   if (nCopyLen == 0)
759     return;
760 
761   RetainPtr<StringData> pNewData(
762       StringData::Create(m_pData->m_String + nCopyIndex, nCopyLen));
763   dest.m_pData.Swap(pNewData);
764 }
765 
Insert(size_t index,wchar_t ch)766 size_t WideString::Insert(size_t index, wchar_t ch) {
767   const size_t cur_length = GetLength();
768   if (!IsValidLength(index))
769     return cur_length;
770 
771   const size_t new_length = cur_length + 1;
772   ReallocBeforeWrite(new_length);
773   FXSYS_wmemmove(m_pData->m_String + index + 1, m_pData->m_String + index,
774                  new_length - index);
775   m_pData->m_String[index] = ch;
776   m_pData->m_nDataLength = new_length;
777   return new_length;
778 }
779 
Find(wchar_t ch,size_t start) const780 absl::optional<size_t> WideString::Find(wchar_t ch, size_t start) const {
781   if (!m_pData)
782     return absl::nullopt;
783 
784   if (!IsValidIndex(start))
785     return absl::nullopt;
786 
787   const wchar_t* pStr = FXSYS_wmemchr(m_pData->m_String + start, ch,
788                                       m_pData->m_nDataLength - start);
789   return pStr ? absl::optional<size_t>(
790                     static_cast<size_t>(pStr - m_pData->m_String))
791               : absl::nullopt;
792 }
793 
Find(WideStringView subStr,size_t start) const794 absl::optional<size_t> WideString::Find(WideStringView subStr,
795                                         size_t start) const {
796   if (!m_pData)
797     return absl::nullopt;
798 
799   if (!IsValidIndex(start))
800     return absl::nullopt;
801 
802   const wchar_t* pStr =
803       FX_wcsstr(m_pData->m_String + start, m_pData->m_nDataLength - start,
804                 subStr.unterminated_c_str(), subStr.GetLength());
805   return pStr ? absl::optional<size_t>(
806                     static_cast<size_t>(pStr - m_pData->m_String))
807               : absl::nullopt;
808 }
809 
ReverseFind(wchar_t ch) const810 absl::optional<size_t> WideString::ReverseFind(wchar_t ch) const {
811   if (!m_pData)
812     return absl::nullopt;
813 
814   size_t nLength = m_pData->m_nDataLength;
815   while (nLength--) {
816     if (m_pData->m_String[nLength] == ch)
817       return nLength;
818   }
819   return absl::nullopt;
820 }
821 
MakeLower()822 void WideString::MakeLower() {
823   if (IsEmpty())
824     return;
825 
826   ReallocBeforeWrite(m_pData->m_nDataLength);
827   FXSYS_wcslwr(m_pData->m_String);
828 }
829 
MakeUpper()830 void WideString::MakeUpper() {
831   if (IsEmpty())
832     return;
833 
834   ReallocBeforeWrite(m_pData->m_nDataLength);
835   FXSYS_wcsupr(m_pData->m_String);
836 }
837 
Remove(wchar_t chRemove)838 size_t WideString::Remove(wchar_t chRemove) {
839   if (IsEmpty())
840     return 0;
841 
842   wchar_t* pstrSource = m_pData->m_String;
843   wchar_t* pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
844   while (pstrSource < pstrEnd) {
845     if (*pstrSource == chRemove)
846       break;
847     pstrSource++;
848   }
849   if (pstrSource == pstrEnd)
850     return 0;
851 
852   ptrdiff_t copied = pstrSource - m_pData->m_String;
853   ReallocBeforeWrite(m_pData->m_nDataLength);
854   pstrSource = m_pData->m_String + copied;
855   pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
856 
857   wchar_t* pstrDest = pstrSource;
858   while (pstrSource < pstrEnd) {
859     if (*pstrSource != chRemove) {
860       *pstrDest = *pstrSource;
861       pstrDest++;
862     }
863     pstrSource++;
864   }
865 
866   *pstrDest = 0;
867   size_t count = static_cast<size_t>(pstrSource - pstrDest);
868   m_pData->m_nDataLength -= count;
869   return count;
870 }
871 
Replace(WideStringView pOld,WideStringView pNew)872 size_t WideString::Replace(WideStringView pOld, WideStringView pNew) {
873   if (!m_pData || pOld.IsEmpty())
874     return 0;
875 
876   size_t nSourceLen = pOld.GetLength();
877   size_t nReplacementLen = pNew.GetLength();
878   size_t count = 0;
879   const wchar_t* pStart = m_pData->m_String;
880   wchar_t* pEnd = m_pData->m_String + m_pData->m_nDataLength;
881   while (true) {
882     const wchar_t* pTarget =
883         FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
884                   pOld.unterminated_c_str(), nSourceLen);
885     if (!pTarget)
886       break;
887 
888     count++;
889     pStart = pTarget + nSourceLen;
890   }
891   if (count == 0)
892     return 0;
893 
894   size_t nNewLength =
895       m_pData->m_nDataLength + (nReplacementLen - nSourceLen) * count;
896 
897   if (nNewLength == 0) {
898     clear();
899     return count;
900   }
901 
902   RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
903   pStart = m_pData->m_String;
904   wchar_t* pDest = pNewData->m_String;
905   for (size_t i = 0; i < count; i++) {
906     const wchar_t* pTarget =
907         FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
908                   pOld.unterminated_c_str(), nSourceLen);
909     FXSYS_wmemcpy(pDest, pStart, pTarget - pStart);
910     pDest += pTarget - pStart;
911     FXSYS_wmemcpy(pDest, pNew.unterminated_c_str(), pNew.GetLength());
912     pDest += pNew.GetLength();
913     pStart = pTarget + nSourceLen;
914   }
915   FXSYS_wmemcpy(pDest, pStart, pEnd - pStart);
916   m_pData.Swap(pNewData);
917   return count;
918 }
919 
920 // static
FromASCII(ByteStringView bstr)921 WideString WideString::FromASCII(ByteStringView bstr) {
922   WideString result;
923   result.Reserve(bstr.GetLength());
924   for (char c : bstr)
925     result.InsertAtBack(static_cast<wchar_t>(c & 0x7f));
926   return result;
927 }
928 
929 // static
FromLatin1(ByteStringView bstr)930 WideString WideString::FromLatin1(ByteStringView bstr) {
931   WideString result;
932   result.Reserve(bstr.GetLength());
933   for (char c : bstr)
934     result.InsertAtBack(static_cast<wchar_t>(c & 0xff));
935   return result;
936 }
937 
938 // static
FromDefANSI(ByteStringView bstr)939 WideString WideString::FromDefANSI(ByteStringView bstr) {
940   size_t dest_len = FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, {});
941   if (!dest_len)
942     return WideString();
943 
944   WideString wstr;
945   {
946     // Span's lifetime must end before ReleaseBuffer() below.
947     pdfium::span<wchar_t> dest_buf = wstr.GetBuffer(dest_len);
948     FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, dest_buf);
949   }
950   wstr.ReleaseBuffer(dest_len);
951   return wstr;
952 }
953 
954 // static
FromUTF8(ByteStringView str)955 WideString WideString::FromUTF8(ByteStringView str) {
956   return FX_UTF8Decode(str);
957 }
958 
959 // static
FromUTF16LE(const unsigned short * wstr,size_t wlen)960 WideString WideString::FromUTF16LE(const unsigned short* wstr, size_t wlen) {
961   if (!wstr || wlen == 0)
962     return WideString();
963 
964   WideString result;
965   {
966     // Span's lifetime must end before ReleaseBuffer() below.
967     pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
968     for (size_t i = 0; i < wlen; i++)
969       buf[i] = wstr[i];
970   }
971   result.ReleaseBuffer(wlen);
972   return result;
973 }
974 
FromUTF16BE(const unsigned short * wstr,size_t wlen)975 WideString WideString::FromUTF16BE(const unsigned short* wstr, size_t wlen) {
976   if (!wstr || wlen == 0)
977     return WideString();
978 
979   WideString result;
980   {
981     // Span's lifetime must end before ReleaseBuffer() below.
982     pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
983     for (size_t i = 0; i < wlen; i++) {
984       auto wch = wstr[i];
985       wch = (wch >> 8) | (wch << 8);
986       buf[i] = wch;
987     }
988   }
989   result.ReleaseBuffer(wlen);
990   return result;
991 }
992 
SetAt(size_t index,wchar_t c)993 void WideString::SetAt(size_t index, wchar_t c) {
994   DCHECK(IsValidIndex(index));
995   ReallocBeforeWrite(m_pData->m_nDataLength);
996   m_pData->m_String[index] = c;
997 }
998 
Compare(const wchar_t * str) const999 int WideString::Compare(const wchar_t* str) const {
1000   if (m_pData)
1001     return str ? wcscmp(m_pData->m_String, str) : 1;
1002   return (!str || str[0] == 0) ? 0 : -1;
1003 }
1004 
Compare(const WideString & str) const1005 int WideString::Compare(const WideString& str) const {
1006   if (!m_pData)
1007     return str.m_pData ? -1 : 0;
1008   if (!str.m_pData)
1009     return 1;
1010 
1011   size_t this_len = m_pData->m_nDataLength;
1012   size_t that_len = str.m_pData->m_nDataLength;
1013   size_t min_len = std::min(this_len, that_len);
1014   int result = FXSYS_wmemcmp(m_pData->m_String, str.m_pData->m_String, min_len);
1015   if (result != 0)
1016     return result;
1017   if (this_len == that_len)
1018     return 0;
1019   return this_len < that_len ? -1 : 1;
1020 }
1021 
CompareNoCase(const wchar_t * str) const1022 int WideString::CompareNoCase(const wchar_t* str) const {
1023   if (m_pData)
1024     return str ? FXSYS_wcsicmp(m_pData->m_String, str) : 1;
1025   return (!str || str[0] == 0) ? 0 : -1;
1026 }
1027 
WStringLength(const unsigned short * str)1028 size_t WideString::WStringLength(const unsigned short* str) {
1029   size_t len = 0;
1030   if (str)
1031     while (str[len])
1032       len++;
1033   return len;
1034 }
1035 
Trim()1036 void WideString::Trim() {
1037   TrimRight(kWideTrimChars);
1038   TrimLeft(kWideTrimChars);
1039 }
1040 
Trim(wchar_t target)1041 void WideString::Trim(wchar_t target) {
1042   wchar_t str[2] = {target, 0};
1043   TrimRight(str);
1044   TrimLeft(str);
1045 }
1046 
Trim(WideStringView targets)1047 void WideString::Trim(WideStringView targets) {
1048   TrimRight(targets);
1049   TrimLeft(targets);
1050 }
1051 
TrimLeft()1052 void WideString::TrimLeft() {
1053   TrimLeft(kWideTrimChars);
1054 }
1055 
TrimLeft(wchar_t target)1056 void WideString::TrimLeft(wchar_t target) {
1057   wchar_t str[2] = {target, 0};
1058   TrimLeft(str);
1059 }
1060 
TrimLeft(WideStringView targets)1061 void WideString::TrimLeft(WideStringView targets) {
1062   if (!m_pData || targets.IsEmpty())
1063     return;
1064 
1065   size_t len = GetLength();
1066   if (len == 0)
1067     return;
1068 
1069   size_t pos = 0;
1070   while (pos < len) {
1071     size_t i = 0;
1072     while (i < targets.GetLength() &&
1073            targets.CharAt(i) != m_pData->m_String[pos]) {
1074       i++;
1075     }
1076     if (i == targets.GetLength())
1077       break;
1078     pos++;
1079   }
1080   if (!pos)
1081     return;
1082 
1083   ReallocBeforeWrite(len);
1084   size_t nDataLength = len - pos;
1085   memmove(m_pData->m_String, m_pData->m_String + pos,
1086           (nDataLength + 1) * sizeof(wchar_t));
1087   m_pData->m_nDataLength = nDataLength;
1088 }
1089 
TrimRight()1090 void WideString::TrimRight() {
1091   TrimRight(kWideTrimChars);
1092 }
1093 
TrimRight(wchar_t target)1094 void WideString::TrimRight(wchar_t target) {
1095   wchar_t str[2] = {target, 0};
1096   TrimRight(str);
1097 }
1098 
TrimRight(WideStringView targets)1099 void WideString::TrimRight(WideStringView targets) {
1100   if (IsEmpty() || targets.IsEmpty())
1101     return;
1102 
1103   size_t pos = GetLength();
1104   while (pos && targets.Contains(m_pData->m_String[pos - 1]))
1105     pos--;
1106 
1107   if (pos < m_pData->m_nDataLength) {
1108     ReallocBeforeWrite(m_pData->m_nDataLength);
1109     m_pData->m_String[pos] = 0;
1110     m_pData->m_nDataLength = pos;
1111   }
1112 }
1113 
GetInteger() const1114 int WideString::GetInteger() const {
1115   return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
1116 }
1117 
operator <<(std::wostream & os,const WideString & str)1118 std::wostream& operator<<(std::wostream& os, const WideString& str) {
1119   return os.write(str.c_str(), str.GetLength());
1120 }
1121 
operator <<(std::ostream & os,const WideString & str)1122 std::ostream& operator<<(std::ostream& os, const WideString& str) {
1123   os << str.ToUTF8();
1124   return os;
1125 }
1126 
operator <<(std::wostream & os,WideStringView str)1127 std::wostream& operator<<(std::wostream& os, WideStringView str) {
1128   return os.write(str.unterminated_c_str(), str.GetLength());
1129 }
1130 
operator <<(std::ostream & os,WideStringView str)1131 std::ostream& operator<<(std::ostream& os, WideStringView str) {
1132   os << FX_UTF8Encode(str);
1133   return os;
1134 }
1135 
1136 }  // namespace fxcrt
1137 
FX_HashCode_GetW(WideStringView str)1138 uint32_t FX_HashCode_GetW(WideStringView str) {
1139   uint32_t dwHashCode = 0;
1140   for (WideStringView::UnsignedType c : str)
1141     dwHashCode = 1313 * dwHashCode + c;
1142   return dwHashCode;
1143 }
1144 
FX_HashCode_GetLoweredW(WideStringView str)1145 uint32_t FX_HashCode_GetLoweredW(WideStringView str) {
1146   uint32_t dwHashCode = 0;
1147   for (wchar_t c : str)  // match FXSYS_towlower() arg type.
1148     dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
1149   return dwHashCode;
1150 }
1151