1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/widestring.h"
8
9 #include <stddef.h>
10 #include <string.h>
11
12 #include <algorithm>
13 #include <sstream>
14
15 #include "core/fxcrt/fx_codepage.h"
16 #include "core/fxcrt/fx_extension.h"
17 #include "core/fxcrt/fx_memcpy_wrappers.h"
18 #include "core/fxcrt/fx_safe_types.h"
19 #include "core/fxcrt/fx_system.h"
20 #include "core/fxcrt/string_pool_template.h"
21 #include "third_party/base/check.h"
22 #include "third_party/base/check_op.h"
23 #include "third_party/base/numerics/safe_math.h"
24
25 template class fxcrt::StringDataTemplate<wchar_t>;
26 template class fxcrt::StringViewTemplate<wchar_t>;
27 template class fxcrt::StringPoolTemplate<WideString>;
28 template struct std::hash<WideString>;
29
30 #define FORCE_ANSI 0x10000
31 #define FORCE_UNICODE 0x20000
32 #define FORCE_INT64 0x40000
33
34 namespace {
35
36 constexpr wchar_t kWideTrimChars[] = L"\x09\x0a\x0b\x0c\x0d\x20";
37
FX_wcsstr(const wchar_t * haystack,size_t haystack_len,const wchar_t * needle,size_t needle_len)38 const wchar_t* FX_wcsstr(const wchar_t* haystack,
39 size_t haystack_len,
40 const wchar_t* needle,
41 size_t needle_len) {
42 if (needle_len > haystack_len || needle_len == 0)
43 return nullptr;
44
45 const wchar_t* end_ptr = haystack + haystack_len - needle_len;
46 while (haystack <= end_ptr) {
47 size_t i = 0;
48 while (true) {
49 if (haystack[i] != needle[i])
50 break;
51
52 i++;
53 if (i == needle_len)
54 return haystack;
55 }
56 haystack++;
57 }
58 return nullptr;
59 }
60
GuessSizeForVSWPrintf(const wchar_t * pFormat,va_list argList)61 absl::optional<size_t> GuessSizeForVSWPrintf(const wchar_t* pFormat,
62 va_list argList) {
63 size_t nMaxLen = 0;
64 for (const wchar_t* pStr = pFormat; *pStr != 0; pStr++) {
65 if (*pStr != '%' || *(pStr = pStr + 1) == '%') {
66 ++nMaxLen;
67 continue;
68 }
69 int iWidth = 0;
70 for (; *pStr != 0; pStr++) {
71 if (*pStr == '#') {
72 nMaxLen += 2;
73 } else if (*pStr == '*') {
74 iWidth = va_arg(argList, int);
75 } else if (*pStr != '-' && *pStr != '+' && *pStr != '0' && *pStr != ' ') {
76 break;
77 }
78 }
79 if (iWidth == 0) {
80 iWidth = FXSYS_wtoi(pStr);
81 while (FXSYS_IsDecimalDigit(*pStr))
82 ++pStr;
83 }
84 if (iWidth < 0 || iWidth > 128 * 1024)
85 return absl::nullopt;
86 uint32_t nWidth = static_cast<uint32_t>(iWidth);
87 int iPrecision = 0;
88 if (*pStr == '.') {
89 pStr++;
90 if (*pStr == '*') {
91 iPrecision = va_arg(argList, int);
92 pStr++;
93 } else {
94 iPrecision = FXSYS_wtoi(pStr);
95 while (FXSYS_IsDecimalDigit(*pStr))
96 ++pStr;
97 }
98 }
99 if (iPrecision < 0 || iPrecision > 128 * 1024)
100 return absl::nullopt;
101 uint32_t nPrecision = static_cast<uint32_t>(iPrecision);
102 int nModifier = 0;
103 if (*pStr == L'I' && *(pStr + 1) == L'6' && *(pStr + 2) == L'4') {
104 pStr += 3;
105 nModifier = FORCE_INT64;
106 } else {
107 switch (*pStr) {
108 case 'h':
109 nModifier = FORCE_ANSI;
110 pStr++;
111 break;
112 case 'l':
113 nModifier = FORCE_UNICODE;
114 pStr++;
115 break;
116 case 'F':
117 case 'N':
118 case 'L':
119 pStr++;
120 break;
121 }
122 }
123 size_t nItemLen = 0;
124 switch (*pStr | nModifier) {
125 case 'c':
126 case 'C':
127 nItemLen = 2;
128 va_arg(argList, int);
129 break;
130 case 'c' | FORCE_ANSI:
131 case 'C' | FORCE_ANSI:
132 nItemLen = 2;
133 va_arg(argList, int);
134 break;
135 case 'c' | FORCE_UNICODE:
136 case 'C' | FORCE_UNICODE:
137 nItemLen = 2;
138 va_arg(argList, int);
139 break;
140 case 's': {
141 const wchar_t* pstrNextArg = va_arg(argList, const wchar_t*);
142 if (pstrNextArg) {
143 nItemLen = wcslen(pstrNextArg);
144 if (nItemLen < 1) {
145 nItemLen = 1;
146 }
147 } else {
148 nItemLen = 6;
149 }
150 } break;
151 case 'S': {
152 const char* pstrNextArg = va_arg(argList, const char*);
153 if (pstrNextArg) {
154 nItemLen = strlen(pstrNextArg);
155 if (nItemLen < 1) {
156 nItemLen = 1;
157 }
158 } else {
159 nItemLen = 6;
160 }
161 } break;
162 case 's' | FORCE_ANSI:
163 case 'S' | FORCE_ANSI: {
164 const char* pstrNextArg = va_arg(argList, const char*);
165 if (pstrNextArg) {
166 nItemLen = strlen(pstrNextArg);
167 if (nItemLen < 1) {
168 nItemLen = 1;
169 }
170 } else {
171 nItemLen = 6;
172 }
173 } break;
174 case 's' | FORCE_UNICODE:
175 case 'S' | FORCE_UNICODE: {
176 const wchar_t* pstrNextArg = va_arg(argList, wchar_t*);
177 if (pstrNextArg) {
178 nItemLen = wcslen(pstrNextArg);
179 if (nItemLen < 1) {
180 nItemLen = 1;
181 }
182 } else {
183 nItemLen = 6;
184 }
185 } break;
186 }
187 if (nItemLen != 0) {
188 if (nPrecision != 0 && nItemLen > nPrecision) {
189 nItemLen = nPrecision;
190 }
191 if (nItemLen < nWidth) {
192 nItemLen = nWidth;
193 }
194 } else {
195 switch (*pStr) {
196 case 'd':
197 case 'i':
198 case 'u':
199 case 'x':
200 case 'X':
201 case 'o':
202 if (nModifier & FORCE_INT64) {
203 va_arg(argList, int64_t);
204 } else {
205 va_arg(argList, int);
206 }
207 nItemLen = 32;
208 if (nItemLen < nWidth + nPrecision) {
209 nItemLen = nWidth + nPrecision;
210 }
211 break;
212 case 'a':
213 case 'A':
214 case 'e':
215 case 'E':
216 case 'g':
217 case 'G':
218 va_arg(argList, double);
219 nItemLen = 128;
220 if (nItemLen < nWidth + nPrecision) {
221 nItemLen = nWidth + nPrecision;
222 }
223 break;
224 case 'f':
225 if (nWidth + nPrecision > 100) {
226 nItemLen = nPrecision + nWidth + 128;
227 } else {
228 double f;
229 char pszTemp[256];
230 f = va_arg(argList, double);
231 FXSYS_snprintf(pszTemp, sizeof(pszTemp), "%*.*f", nWidth,
232 nPrecision + 6, f);
233 nItemLen = strlen(pszTemp);
234 }
235 break;
236 case 'p':
237 va_arg(argList, void*);
238 nItemLen = 32;
239 if (nItemLen < nWidth + nPrecision) {
240 nItemLen = nWidth + nPrecision;
241 }
242 break;
243 case 'n':
244 va_arg(argList, int*);
245 break;
246 }
247 }
248 nMaxLen += nItemLen;
249 }
250 nMaxLen += 32; // Fudge factor.
251 return nMaxLen;
252 }
253
254 // Returns string unless we ran out of space.
TryVSWPrintf(size_t size,const wchar_t * pFormat,va_list argList)255 absl::optional<WideString> TryVSWPrintf(size_t size,
256 const wchar_t* pFormat,
257 va_list argList) {
258 if (!size)
259 return absl::nullopt;
260
261 WideString str;
262 {
263 // Span's lifetime must end before ReleaseBuffer() below.
264 pdfium::span<wchar_t> buffer = str.GetBuffer(size);
265
266 // In the following two calls, there's always space in the WideString
267 // for a terminating NUL that's not included in the span.
268 // For vswprintf(), MSAN won't untaint the buffer on a truncated write's
269 // -1 return code even though the buffer is written. Probably just as well
270 // not to trust the vendor's implementation to write anything anyways.
271 // See https://crbug.com/705912.
272 memset(buffer.data(), 0, (size + 1) * sizeof(wchar_t));
273 int ret = vswprintf(buffer.data(), size + 1, pFormat, argList);
274
275 bool bSufficientBuffer = ret >= 0 || buffer[size - 1] == 0;
276 if (!bSufficientBuffer)
277 return absl::nullopt;
278 }
279 str.ReleaseBuffer(str.GetStringLength());
280 return str;
281 }
282
283 } // namespace
284
285 namespace fxcrt {
286
287 static_assert(sizeof(WideString) <= sizeof(wchar_t*),
288 "Strings must not require more space than pointers");
289
290 // static
FormatInteger(int i)291 WideString WideString::FormatInteger(int i) {
292 wchar_t wbuf[32];
293 swprintf(wbuf, std::size(wbuf), L"%d", i);
294 return WideString(wbuf);
295 }
296
297 // static
FormatV(const wchar_t * format,va_list argList)298 WideString WideString::FormatV(const wchar_t* format, va_list argList) {
299 va_list argListCopy;
300 va_copy(argListCopy, argList);
301 auto guess = GuessSizeForVSWPrintf(format, argListCopy);
302 va_end(argListCopy);
303
304 if (!guess.has_value()) {
305 return WideString();
306 }
307 int maxLen = pdfium::base::checked_cast<int>(guess.value());
308
309 while (maxLen < 32 * 1024) {
310 va_copy(argListCopy, argList);
311 absl::optional<WideString> ret =
312 TryVSWPrintf(static_cast<size_t>(maxLen), format, argListCopy);
313 va_end(argListCopy);
314 if (ret.has_value())
315 return ret.value();
316
317 maxLen *= 2;
318 }
319 return WideString();
320 }
321
322 // static
Format(const wchar_t * pFormat,...)323 WideString WideString::Format(const wchar_t* pFormat, ...) {
324 va_list argList;
325 va_start(argList, pFormat);
326 WideString ret = FormatV(pFormat, argList);
327 va_end(argList);
328 return ret;
329 }
330
331 WideString::WideString() = default;
332
WideString(const WideString & other)333 WideString::WideString(const WideString& other) : m_pData(other.m_pData) {}
334
WideString(WideString && other)335 WideString::WideString(WideString&& other) noexcept {
336 m_pData.Swap(other.m_pData);
337 }
338
WideString(const wchar_t * pStr,size_t nLen)339 WideString::WideString(const wchar_t* pStr, size_t nLen) {
340 if (nLen)
341 m_pData.Reset(StringData::Create(pStr, nLen));
342 }
343
WideString(wchar_t ch)344 WideString::WideString(wchar_t ch) {
345 m_pData.Reset(StringData::Create(1));
346 m_pData->m_String[0] = ch;
347 }
348
WideString(const wchar_t * ptr)349 WideString::WideString(const wchar_t* ptr)
350 : WideString(ptr, ptr ? wcslen(ptr) : 0) {}
351
WideString(WideStringView stringSrc)352 WideString::WideString(WideStringView stringSrc) {
353 if (!stringSrc.IsEmpty()) {
354 m_pData.Reset(StringData::Create(stringSrc.unterminated_c_str(),
355 stringSrc.GetLength()));
356 }
357 }
358
WideString(WideStringView str1,WideStringView str2)359 WideString::WideString(WideStringView str1, WideStringView str2) {
360 FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
361 nSafeLen += str2.GetLength();
362
363 size_t nNewLen = nSafeLen.ValueOrDie();
364 if (nNewLen == 0)
365 return;
366
367 m_pData.Reset(StringData::Create(nNewLen));
368 m_pData->CopyContents(str1.unterminated_c_str(), str1.GetLength());
369 m_pData->CopyContentsAt(str1.GetLength(), str2.unterminated_c_str(),
370 str2.GetLength());
371 }
372
WideString(const std::initializer_list<WideStringView> & list)373 WideString::WideString(const std::initializer_list<WideStringView>& list) {
374 FX_SAFE_SIZE_T nSafeLen = 0;
375 for (const auto& item : list)
376 nSafeLen += item.GetLength();
377
378 size_t nNewLen = nSafeLen.ValueOrDie();
379 if (nNewLen == 0)
380 return;
381
382 m_pData.Reset(StringData::Create(nNewLen));
383
384 size_t nOffset = 0;
385 for (const auto& item : list) {
386 m_pData->CopyContentsAt(nOffset, item.unterminated_c_str(),
387 item.GetLength());
388 nOffset += item.GetLength();
389 }
390 }
391
392 WideString::~WideString() = default;
393
clear()394 void WideString::clear() {
395 if (m_pData && m_pData->CanOperateInPlace(0)) {
396 m_pData->m_nDataLength = 0;
397 return;
398 }
399 m_pData.Reset();
400 }
401
operator =(const wchar_t * str)402 WideString& WideString::operator=(const wchar_t* str) {
403 if (!str || !str[0])
404 clear();
405 else
406 AssignCopy(str, wcslen(str));
407
408 return *this;
409 }
410
operator =(WideStringView str)411 WideString& WideString::operator=(WideStringView str) {
412 if (str.IsEmpty())
413 clear();
414 else
415 AssignCopy(str.unterminated_c_str(), str.GetLength());
416
417 return *this;
418 }
419
operator =(const WideString & that)420 WideString& WideString::operator=(const WideString& that) {
421 if (m_pData != that.m_pData)
422 m_pData = that.m_pData;
423
424 return *this;
425 }
426
operator =(WideString && that)427 WideString& WideString::operator=(WideString&& that) noexcept {
428 if (m_pData != that.m_pData)
429 m_pData = std::move(that.m_pData);
430
431 return *this;
432 }
433
operator +=(const wchar_t * str)434 WideString& WideString::operator+=(const wchar_t* str) {
435 if (str)
436 Concat(str, wcslen(str));
437
438 return *this;
439 }
440
operator +=(wchar_t ch)441 WideString& WideString::operator+=(wchar_t ch) {
442 Concat(&ch, 1);
443 return *this;
444 }
445
operator +=(const WideString & str)446 WideString& WideString::operator+=(const WideString& str) {
447 if (str.m_pData)
448 Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
449
450 return *this;
451 }
452
operator +=(WideStringView str)453 WideString& WideString::operator+=(WideStringView str) {
454 if (!str.IsEmpty())
455 Concat(str.unterminated_c_str(), str.GetLength());
456
457 return *this;
458 }
459
operator ==(const wchar_t * ptr) const460 bool WideString::operator==(const wchar_t* ptr) const {
461 if (!m_pData)
462 return !ptr || !ptr[0];
463
464 if (!ptr)
465 return m_pData->m_nDataLength == 0;
466
467 return wcslen(ptr) == m_pData->m_nDataLength &&
468 FXSYS_wmemcmp(ptr, m_pData->m_String, m_pData->m_nDataLength) == 0;
469 }
470
operator ==(WideStringView str) const471 bool WideString::operator==(WideStringView str) const {
472 if (!m_pData)
473 return str.IsEmpty();
474
475 return m_pData->m_nDataLength == str.GetLength() &&
476 FXSYS_wmemcmp(m_pData->m_String, str.unterminated_c_str(),
477 str.GetLength()) == 0;
478 }
479
operator ==(const WideString & other) const480 bool WideString::operator==(const WideString& other) const {
481 if (m_pData == other.m_pData)
482 return true;
483
484 if (IsEmpty())
485 return other.IsEmpty();
486
487 if (other.IsEmpty())
488 return false;
489
490 return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
491 wmemcmp(other.m_pData->m_String, m_pData->m_String,
492 m_pData->m_nDataLength) == 0;
493 }
494
operator <(const wchar_t * ptr) const495 bool WideString::operator<(const wchar_t* ptr) const {
496 return Compare(ptr) < 0;
497 }
498
operator <(WideStringView str) const499 bool WideString::operator<(WideStringView str) const {
500 if (!m_pData && !str.unterminated_c_str())
501 return false;
502 if (c_str() == str.unterminated_c_str())
503 return false;
504
505 size_t len = GetLength();
506 size_t other_len = str.GetLength();
507 int result = FXSYS_wmemcmp(c_str(), str.unterminated_c_str(),
508 std::min(len, other_len));
509 return result < 0 || (result == 0 && len < other_len);
510 }
511
operator <(const WideString & other) const512 bool WideString::operator<(const WideString& other) const {
513 return Compare(other) < 0;
514 }
515
AssignCopy(const wchar_t * pSrcData,size_t nSrcLen)516 void WideString::AssignCopy(const wchar_t* pSrcData, size_t nSrcLen) {
517 AllocBeforeWrite(nSrcLen);
518 m_pData->CopyContents(pSrcData, nSrcLen);
519 m_pData->m_nDataLength = nSrcLen;
520 }
521
ReallocBeforeWrite(size_t nNewLength)522 void WideString::ReallocBeforeWrite(size_t nNewLength) {
523 if (m_pData && m_pData->CanOperateInPlace(nNewLength))
524 return;
525
526 if (nNewLength == 0) {
527 clear();
528 return;
529 }
530
531 RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
532 if (m_pData) {
533 size_t nCopyLength = std::min(m_pData->m_nDataLength, nNewLength);
534 pNewData->CopyContents(m_pData->m_String, nCopyLength);
535 pNewData->m_nDataLength = nCopyLength;
536 } else {
537 pNewData->m_nDataLength = 0;
538 }
539 pNewData->m_String[pNewData->m_nDataLength] = 0;
540 m_pData.Swap(pNewData);
541 }
542
AllocBeforeWrite(size_t nNewLength)543 void WideString::AllocBeforeWrite(size_t nNewLength) {
544 if (m_pData && m_pData->CanOperateInPlace(nNewLength))
545 return;
546
547 if (nNewLength == 0) {
548 clear();
549 return;
550 }
551
552 m_pData.Reset(StringData::Create(nNewLength));
553 }
554
ReleaseBuffer(size_t nNewLength)555 void WideString::ReleaseBuffer(size_t nNewLength) {
556 if (!m_pData)
557 return;
558
559 nNewLength = std::min(nNewLength, m_pData->m_nAllocLength);
560 if (nNewLength == 0) {
561 clear();
562 return;
563 }
564
565 DCHECK_EQ(m_pData->m_nRefs, 1);
566 m_pData->m_nDataLength = nNewLength;
567 m_pData->m_String[nNewLength] = 0;
568 if (m_pData->m_nAllocLength - nNewLength >= 32) {
569 // Over arbitrary threshold, so pay the price to relocate. Force copy to
570 // always occur by holding a second reference to the string.
571 WideString preserve(*this);
572 ReallocBeforeWrite(nNewLength);
573 }
574 }
575
Reserve(size_t len)576 void WideString::Reserve(size_t len) {
577 GetBuffer(len);
578 }
579
GetBuffer(size_t nMinBufLength)580 pdfium::span<wchar_t> WideString::GetBuffer(size_t nMinBufLength) {
581 if (!m_pData) {
582 if (nMinBufLength == 0)
583 return pdfium::span<wchar_t>();
584
585 m_pData.Reset(StringData::Create(nMinBufLength));
586 m_pData->m_nDataLength = 0;
587 m_pData->m_String[0] = 0;
588 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
589 }
590
591 if (m_pData->CanOperateInPlace(nMinBufLength))
592 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
593
594 nMinBufLength = std::max(nMinBufLength, m_pData->m_nDataLength);
595 if (nMinBufLength == 0)
596 return pdfium::span<wchar_t>();
597
598 RetainPtr<StringData> pNewData(StringData::Create(nMinBufLength));
599 pNewData->CopyContents(*m_pData);
600 pNewData->m_nDataLength = m_pData->m_nDataLength;
601 m_pData.Swap(pNewData);
602 return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
603 }
604
Delete(size_t index,size_t count)605 size_t WideString::Delete(size_t index, size_t count) {
606 if (!m_pData)
607 return 0;
608
609 size_t old_length = m_pData->m_nDataLength;
610 if (count == 0 || index != std::clamp<size_t>(index, 0, old_length)) {
611 return old_length;
612 }
613
614 size_t removal_length = index + count;
615 if (removal_length > old_length)
616 return old_length;
617
618 ReallocBeforeWrite(old_length);
619 size_t chars_to_copy = old_length - removal_length + 1;
620 wmemmove(m_pData->m_String + index, m_pData->m_String + removal_length,
621 chars_to_copy);
622 m_pData->m_nDataLength = old_length - count;
623 return m_pData->m_nDataLength;
624 }
625
Concat(const wchar_t * pSrcData,size_t nSrcLen)626 void WideString::Concat(const wchar_t* pSrcData, size_t nSrcLen) {
627 if (!pSrcData || nSrcLen == 0)
628 return;
629
630 if (!m_pData) {
631 m_pData.Reset(StringData::Create(pSrcData, nSrcLen));
632 return;
633 }
634
635 if (m_pData->CanOperateInPlace(m_pData->m_nDataLength + nSrcLen)) {
636 m_pData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
637 m_pData->m_nDataLength += nSrcLen;
638 return;
639 }
640
641 size_t nConcatLen = std::max(m_pData->m_nDataLength / 2, nSrcLen);
642 RetainPtr<StringData> pNewData(
643 StringData::Create(m_pData->m_nDataLength + nConcatLen));
644 pNewData->CopyContents(*m_pData);
645 pNewData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
646 pNewData->m_nDataLength = m_pData->m_nDataLength + nSrcLen;
647 m_pData.Swap(pNewData);
648 }
649
ReferenceCountForTesting() const650 intptr_t WideString::ReferenceCountForTesting() const {
651 return m_pData ? m_pData->m_nRefs : 0;
652 }
653
ToASCII() const654 ByteString WideString::ToASCII() const {
655 ByteString result;
656 result.Reserve(GetLength());
657 for (wchar_t wc : *this)
658 result.InsertAtBack(static_cast<char>(wc & 0x7f));
659 return result;
660 }
661
ToLatin1() const662 ByteString WideString::ToLatin1() const {
663 ByteString result;
664 result.Reserve(GetLength());
665 for (wchar_t wc : *this)
666 result.InsertAtBack(static_cast<char>(wc & 0xff));
667 return result;
668 }
669
ToDefANSI() const670 ByteString WideString::ToDefANSI() const {
671 size_t dest_len =
672 FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), {});
673 if (!dest_len)
674 return ByteString();
675
676 ByteString bstr;
677 {
678 // Span's lifetime must end before ReleaseBuffer() below.
679 pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
680 FX_WideCharToMultiByte(FX_CodePage::kDefANSI, AsStringView(), dest_buf);
681 }
682 bstr.ReleaseBuffer(dest_len);
683 return bstr;
684 }
685
ToUTF8() const686 ByteString WideString::ToUTF8() const {
687 return FX_UTF8Encode(AsStringView());
688 }
689
ToUTF16LE() const690 ByteString WideString::ToUTF16LE() const {
691 if (!m_pData)
692 return ByteString("\0\0", 2);
693
694 ByteString result;
695 size_t len = m_pData->m_nDataLength;
696 {
697 // Span's lifetime must end before ReleaseBuffer() below.
698 pdfium::span<char> buffer = result.GetBuffer(len * 2 + 2);
699 for (size_t i = 0; i < len; i++) {
700 buffer[i * 2] = m_pData->m_String[i] & 0xff;
701 buffer[i * 2 + 1] = m_pData->m_String[i] >> 8;
702 }
703 buffer[len * 2] = 0;
704 buffer[len * 2 + 1] = 0;
705 }
706 result.ReleaseBuffer(len * 2 + 2);
707 return result;
708 }
709
EncodeEntities() const710 WideString WideString::EncodeEntities() const {
711 WideString ret = *this;
712 ret.Replace(L"&", L"&");
713 ret.Replace(L"<", L"<");
714 ret.Replace(L">", L">");
715 ret.Replace(L"\'", L"'");
716 ret.Replace(L"\"", L""");
717 return ret;
718 }
719
Substr(size_t offset) const720 WideString WideString::Substr(size_t offset) const {
721 // Unsigned underflow is well-defined and out-of-range is handled by Substr().
722 return Substr(offset, GetLength() - offset);
723 }
724
Substr(size_t first,size_t count) const725 WideString WideString::Substr(size_t first, size_t count) const {
726 if (!m_pData)
727 return WideString();
728
729 if (!IsValidIndex(first))
730 return WideString();
731
732 if (count == 0 || !IsValidLength(count))
733 return WideString();
734
735 if (!IsValidIndex(first + count - 1))
736 return WideString();
737
738 if (first == 0 && count == GetLength())
739 return *this;
740
741 WideString dest;
742 AllocCopy(dest, count, first);
743 return dest;
744 }
745
First(size_t count) const746 WideString WideString::First(size_t count) const {
747 return Substr(0, count);
748 }
749
Last(size_t count) const750 WideString WideString::Last(size_t count) const {
751 // Unsigned underflow is well-defined and out-of-range is handled by Substr().
752 return Substr(GetLength() - count, count);
753 }
754
AllocCopy(WideString & dest,size_t nCopyLen,size_t nCopyIndex) const755 void WideString::AllocCopy(WideString& dest,
756 size_t nCopyLen,
757 size_t nCopyIndex) const {
758 if (nCopyLen == 0)
759 return;
760
761 RetainPtr<StringData> pNewData(
762 StringData::Create(m_pData->m_String + nCopyIndex, nCopyLen));
763 dest.m_pData.Swap(pNewData);
764 }
765
Insert(size_t index,wchar_t ch)766 size_t WideString::Insert(size_t index, wchar_t ch) {
767 const size_t cur_length = GetLength();
768 if (!IsValidLength(index))
769 return cur_length;
770
771 const size_t new_length = cur_length + 1;
772 ReallocBeforeWrite(new_length);
773 FXSYS_wmemmove(m_pData->m_String + index + 1, m_pData->m_String + index,
774 new_length - index);
775 m_pData->m_String[index] = ch;
776 m_pData->m_nDataLength = new_length;
777 return new_length;
778 }
779
Find(wchar_t ch,size_t start) const780 absl::optional<size_t> WideString::Find(wchar_t ch, size_t start) const {
781 if (!m_pData)
782 return absl::nullopt;
783
784 if (!IsValidIndex(start))
785 return absl::nullopt;
786
787 const wchar_t* pStr = FXSYS_wmemchr(m_pData->m_String + start, ch,
788 m_pData->m_nDataLength - start);
789 return pStr ? absl::optional<size_t>(
790 static_cast<size_t>(pStr - m_pData->m_String))
791 : absl::nullopt;
792 }
793
Find(WideStringView subStr,size_t start) const794 absl::optional<size_t> WideString::Find(WideStringView subStr,
795 size_t start) const {
796 if (!m_pData)
797 return absl::nullopt;
798
799 if (!IsValidIndex(start))
800 return absl::nullopt;
801
802 const wchar_t* pStr =
803 FX_wcsstr(m_pData->m_String + start, m_pData->m_nDataLength - start,
804 subStr.unterminated_c_str(), subStr.GetLength());
805 return pStr ? absl::optional<size_t>(
806 static_cast<size_t>(pStr - m_pData->m_String))
807 : absl::nullopt;
808 }
809
ReverseFind(wchar_t ch) const810 absl::optional<size_t> WideString::ReverseFind(wchar_t ch) const {
811 if (!m_pData)
812 return absl::nullopt;
813
814 size_t nLength = m_pData->m_nDataLength;
815 while (nLength--) {
816 if (m_pData->m_String[nLength] == ch)
817 return nLength;
818 }
819 return absl::nullopt;
820 }
821
MakeLower()822 void WideString::MakeLower() {
823 if (IsEmpty())
824 return;
825
826 ReallocBeforeWrite(m_pData->m_nDataLength);
827 FXSYS_wcslwr(m_pData->m_String);
828 }
829
MakeUpper()830 void WideString::MakeUpper() {
831 if (IsEmpty())
832 return;
833
834 ReallocBeforeWrite(m_pData->m_nDataLength);
835 FXSYS_wcsupr(m_pData->m_String);
836 }
837
Remove(wchar_t chRemove)838 size_t WideString::Remove(wchar_t chRemove) {
839 if (IsEmpty())
840 return 0;
841
842 wchar_t* pstrSource = m_pData->m_String;
843 wchar_t* pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
844 while (pstrSource < pstrEnd) {
845 if (*pstrSource == chRemove)
846 break;
847 pstrSource++;
848 }
849 if (pstrSource == pstrEnd)
850 return 0;
851
852 ptrdiff_t copied = pstrSource - m_pData->m_String;
853 ReallocBeforeWrite(m_pData->m_nDataLength);
854 pstrSource = m_pData->m_String + copied;
855 pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
856
857 wchar_t* pstrDest = pstrSource;
858 while (pstrSource < pstrEnd) {
859 if (*pstrSource != chRemove) {
860 *pstrDest = *pstrSource;
861 pstrDest++;
862 }
863 pstrSource++;
864 }
865
866 *pstrDest = 0;
867 size_t count = static_cast<size_t>(pstrSource - pstrDest);
868 m_pData->m_nDataLength -= count;
869 return count;
870 }
871
Replace(WideStringView pOld,WideStringView pNew)872 size_t WideString::Replace(WideStringView pOld, WideStringView pNew) {
873 if (!m_pData || pOld.IsEmpty())
874 return 0;
875
876 size_t nSourceLen = pOld.GetLength();
877 size_t nReplacementLen = pNew.GetLength();
878 size_t count = 0;
879 const wchar_t* pStart = m_pData->m_String;
880 wchar_t* pEnd = m_pData->m_String + m_pData->m_nDataLength;
881 while (true) {
882 const wchar_t* pTarget =
883 FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
884 pOld.unterminated_c_str(), nSourceLen);
885 if (!pTarget)
886 break;
887
888 count++;
889 pStart = pTarget + nSourceLen;
890 }
891 if (count == 0)
892 return 0;
893
894 size_t nNewLength =
895 m_pData->m_nDataLength + (nReplacementLen - nSourceLen) * count;
896
897 if (nNewLength == 0) {
898 clear();
899 return count;
900 }
901
902 RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
903 pStart = m_pData->m_String;
904 wchar_t* pDest = pNewData->m_String;
905 for (size_t i = 0; i < count; i++) {
906 const wchar_t* pTarget =
907 FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
908 pOld.unterminated_c_str(), nSourceLen);
909 FXSYS_wmemcpy(pDest, pStart, pTarget - pStart);
910 pDest += pTarget - pStart;
911 FXSYS_wmemcpy(pDest, pNew.unterminated_c_str(), pNew.GetLength());
912 pDest += pNew.GetLength();
913 pStart = pTarget + nSourceLen;
914 }
915 FXSYS_wmemcpy(pDest, pStart, pEnd - pStart);
916 m_pData.Swap(pNewData);
917 return count;
918 }
919
920 // static
FromASCII(ByteStringView bstr)921 WideString WideString::FromASCII(ByteStringView bstr) {
922 WideString result;
923 result.Reserve(bstr.GetLength());
924 for (char c : bstr)
925 result.InsertAtBack(static_cast<wchar_t>(c & 0x7f));
926 return result;
927 }
928
929 // static
FromLatin1(ByteStringView bstr)930 WideString WideString::FromLatin1(ByteStringView bstr) {
931 WideString result;
932 result.Reserve(bstr.GetLength());
933 for (char c : bstr)
934 result.InsertAtBack(static_cast<wchar_t>(c & 0xff));
935 return result;
936 }
937
938 // static
FromDefANSI(ByteStringView bstr)939 WideString WideString::FromDefANSI(ByteStringView bstr) {
940 size_t dest_len = FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, {});
941 if (!dest_len)
942 return WideString();
943
944 WideString wstr;
945 {
946 // Span's lifetime must end before ReleaseBuffer() below.
947 pdfium::span<wchar_t> dest_buf = wstr.GetBuffer(dest_len);
948 FX_MultiByteToWideChar(FX_CodePage::kDefANSI, bstr, dest_buf);
949 }
950 wstr.ReleaseBuffer(dest_len);
951 return wstr;
952 }
953
954 // static
FromUTF8(ByteStringView str)955 WideString WideString::FromUTF8(ByteStringView str) {
956 return FX_UTF8Decode(str);
957 }
958
959 // static
FromUTF16LE(const unsigned short * wstr,size_t wlen)960 WideString WideString::FromUTF16LE(const unsigned short* wstr, size_t wlen) {
961 if (!wstr || wlen == 0)
962 return WideString();
963
964 WideString result;
965 {
966 // Span's lifetime must end before ReleaseBuffer() below.
967 pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
968 for (size_t i = 0; i < wlen; i++)
969 buf[i] = wstr[i];
970 }
971 result.ReleaseBuffer(wlen);
972 return result;
973 }
974
FromUTF16BE(const unsigned short * wstr,size_t wlen)975 WideString WideString::FromUTF16BE(const unsigned short* wstr, size_t wlen) {
976 if (!wstr || wlen == 0)
977 return WideString();
978
979 WideString result;
980 {
981 // Span's lifetime must end before ReleaseBuffer() below.
982 pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
983 for (size_t i = 0; i < wlen; i++) {
984 auto wch = wstr[i];
985 wch = (wch >> 8) | (wch << 8);
986 buf[i] = wch;
987 }
988 }
989 result.ReleaseBuffer(wlen);
990 return result;
991 }
992
SetAt(size_t index,wchar_t c)993 void WideString::SetAt(size_t index, wchar_t c) {
994 DCHECK(IsValidIndex(index));
995 ReallocBeforeWrite(m_pData->m_nDataLength);
996 m_pData->m_String[index] = c;
997 }
998
Compare(const wchar_t * str) const999 int WideString::Compare(const wchar_t* str) const {
1000 if (m_pData)
1001 return str ? wcscmp(m_pData->m_String, str) : 1;
1002 return (!str || str[0] == 0) ? 0 : -1;
1003 }
1004
Compare(const WideString & str) const1005 int WideString::Compare(const WideString& str) const {
1006 if (!m_pData)
1007 return str.m_pData ? -1 : 0;
1008 if (!str.m_pData)
1009 return 1;
1010
1011 size_t this_len = m_pData->m_nDataLength;
1012 size_t that_len = str.m_pData->m_nDataLength;
1013 size_t min_len = std::min(this_len, that_len);
1014 int result = FXSYS_wmemcmp(m_pData->m_String, str.m_pData->m_String, min_len);
1015 if (result != 0)
1016 return result;
1017 if (this_len == that_len)
1018 return 0;
1019 return this_len < that_len ? -1 : 1;
1020 }
1021
CompareNoCase(const wchar_t * str) const1022 int WideString::CompareNoCase(const wchar_t* str) const {
1023 if (m_pData)
1024 return str ? FXSYS_wcsicmp(m_pData->m_String, str) : 1;
1025 return (!str || str[0] == 0) ? 0 : -1;
1026 }
1027
WStringLength(const unsigned short * str)1028 size_t WideString::WStringLength(const unsigned short* str) {
1029 size_t len = 0;
1030 if (str)
1031 while (str[len])
1032 len++;
1033 return len;
1034 }
1035
Trim()1036 void WideString::Trim() {
1037 TrimRight(kWideTrimChars);
1038 TrimLeft(kWideTrimChars);
1039 }
1040
Trim(wchar_t target)1041 void WideString::Trim(wchar_t target) {
1042 wchar_t str[2] = {target, 0};
1043 TrimRight(str);
1044 TrimLeft(str);
1045 }
1046
Trim(WideStringView targets)1047 void WideString::Trim(WideStringView targets) {
1048 TrimRight(targets);
1049 TrimLeft(targets);
1050 }
1051
TrimLeft()1052 void WideString::TrimLeft() {
1053 TrimLeft(kWideTrimChars);
1054 }
1055
TrimLeft(wchar_t target)1056 void WideString::TrimLeft(wchar_t target) {
1057 wchar_t str[2] = {target, 0};
1058 TrimLeft(str);
1059 }
1060
TrimLeft(WideStringView targets)1061 void WideString::TrimLeft(WideStringView targets) {
1062 if (!m_pData || targets.IsEmpty())
1063 return;
1064
1065 size_t len = GetLength();
1066 if (len == 0)
1067 return;
1068
1069 size_t pos = 0;
1070 while (pos < len) {
1071 size_t i = 0;
1072 while (i < targets.GetLength() &&
1073 targets.CharAt(i) != m_pData->m_String[pos]) {
1074 i++;
1075 }
1076 if (i == targets.GetLength())
1077 break;
1078 pos++;
1079 }
1080 if (!pos)
1081 return;
1082
1083 ReallocBeforeWrite(len);
1084 size_t nDataLength = len - pos;
1085 memmove(m_pData->m_String, m_pData->m_String + pos,
1086 (nDataLength + 1) * sizeof(wchar_t));
1087 m_pData->m_nDataLength = nDataLength;
1088 }
1089
TrimRight()1090 void WideString::TrimRight() {
1091 TrimRight(kWideTrimChars);
1092 }
1093
TrimRight(wchar_t target)1094 void WideString::TrimRight(wchar_t target) {
1095 wchar_t str[2] = {target, 0};
1096 TrimRight(str);
1097 }
1098
TrimRight(WideStringView targets)1099 void WideString::TrimRight(WideStringView targets) {
1100 if (IsEmpty() || targets.IsEmpty())
1101 return;
1102
1103 size_t pos = GetLength();
1104 while (pos && targets.Contains(m_pData->m_String[pos - 1]))
1105 pos--;
1106
1107 if (pos < m_pData->m_nDataLength) {
1108 ReallocBeforeWrite(m_pData->m_nDataLength);
1109 m_pData->m_String[pos] = 0;
1110 m_pData->m_nDataLength = pos;
1111 }
1112 }
1113
GetInteger() const1114 int WideString::GetInteger() const {
1115 return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
1116 }
1117
operator <<(std::wostream & os,const WideString & str)1118 std::wostream& operator<<(std::wostream& os, const WideString& str) {
1119 return os.write(str.c_str(), str.GetLength());
1120 }
1121
operator <<(std::ostream & os,const WideString & str)1122 std::ostream& operator<<(std::ostream& os, const WideString& str) {
1123 os << str.ToUTF8();
1124 return os;
1125 }
1126
operator <<(std::wostream & os,WideStringView str)1127 std::wostream& operator<<(std::wostream& os, WideStringView str) {
1128 return os.write(str.unterminated_c_str(), str.GetLength());
1129 }
1130
operator <<(std::ostream & os,WideStringView str)1131 std::ostream& operator<<(std::ostream& os, WideStringView str) {
1132 os << FX_UTF8Encode(str);
1133 return os;
1134 }
1135
1136 } // namespace fxcrt
1137
FX_HashCode_GetW(WideStringView str)1138 uint32_t FX_HashCode_GetW(WideStringView str) {
1139 uint32_t dwHashCode = 0;
1140 for (WideStringView::UnsignedType c : str)
1141 dwHashCode = 1313 * dwHashCode + c;
1142 return dwHashCode;
1143 }
1144
FX_HashCode_GetLoweredW(WideStringView str)1145 uint32_t FX_HashCode_GetLoweredW(WideStringView str) {
1146 uint32_t dwHashCode = 0;
1147 for (wchar_t c : str) // match FXSYS_towlower() arg type.
1148 dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
1149 return dwHashCode;
1150 }
1151