xref: /aosp_15_r20/external/pdfium/core/fxcrt/widestring.h (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FXCRT_WIDESTRING_H_
8 #define CORE_FXCRT_WIDESTRING_H_
9 
10 #include <stdarg.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <wchar.h>
14 
15 #include <functional>
16 #include <iosfwd>
17 #include <iterator>
18 #include <utility>
19 
20 #include "core/fxcrt/retain_ptr.h"
21 #include "core/fxcrt/string_data_template.h"
22 #include "core/fxcrt/string_view_template.h"
23 #include "third_party/abseil-cpp/absl/types/optional.h"
24 #include "third_party/base/check.h"
25 #include "third_party/base/containers/span.h"
26 
27 namespace fxcrt {
28 
29 class ByteString;
30 
31 // A mutable string with shared buffers using copy-on-write semantics that
32 // avoids the cost of std::string's iterator stability guarantees.
33 class WideString {
34  public:
35   // TODO(crbug.com/pdfium/2031): Consider switching to `char16_t` instead.
36   using CharType = wchar_t;
37   using const_iterator = const CharType*;
38   using const_reverse_iterator = std::reverse_iterator<const_iterator>;
39 
40   [[nodiscard]] static WideString FormatInteger(int i);
41   [[nodiscard]] static WideString Format(const wchar_t* pFormat, ...);
42   [[nodiscard]] static WideString FormatV(const wchar_t* lpszFormat,
43                                           va_list argList);
44 
45   WideString();
46   WideString(const WideString& other);
47 
48   // Move-construct a WideString. After construction, |other| is empty.
49   WideString(WideString&& other) noexcept;
50 
51   // Make a one-character string from one wide char.
52   explicit WideString(wchar_t ch);
53 
54   // Deliberately implicit to avoid calling on every string literal.
55   // NOLINTNEXTLINE(runtime/explicit)
56   WideString(const wchar_t* ptr);
57 
58   // No implicit conversions from byte strings.
59   // NOLINTNEXTLINE(runtime/explicit)
60   WideString(char) = delete;
61 
62   WideString(const wchar_t* pStr, size_t len);
63 
64   explicit WideString(WideStringView str);
65   WideString(WideStringView str1, WideStringView str2);
66   WideString(const std::initializer_list<WideStringView>& list);
67 
68   ~WideString();
69 
70   [[nodiscard]] static WideString FromASCII(ByteStringView str);
71   [[nodiscard]] static WideString FromLatin1(ByteStringView str);
72   [[nodiscard]] static WideString FromDefANSI(ByteStringView str);
73   [[nodiscard]] static WideString FromUTF8(ByteStringView str);
74   [[nodiscard]] static WideString FromUTF16LE(const unsigned short* str,
75                                               size_t len);
76   [[nodiscard]] static WideString FromUTF16BE(const unsigned short* wstr,
77                                               size_t wlen);
78 
79   [[nodiscard]] static size_t WStringLength(const unsigned short* str);
80 
81   // Explicit conversion to C-style wide string.
82   // Note: Any subsequent modification of |this| will invalidate the result.
c_str()83   const wchar_t* c_str() const { return m_pData ? m_pData->m_String : L""; }
84 
85   // Explicit conversion to WideStringView.
86   // Note: Any subsequent modification of |this| will invalidate the result.
AsStringView()87   WideStringView AsStringView() const {
88     return WideStringView(c_str(), GetLength());
89   }
90 
91   // Explicit conversion to span.
92   // Note: Any subsequent modification of |this| will invalidate the result.
span()93   pdfium::span<const wchar_t> span() const {
94     return pdfium::make_span(m_pData ? m_pData->m_String : nullptr,
95                              GetLength());
96   }
97 
98   // Note: Any subsequent modification of |this| will invalidate iterators.
begin()99   const_iterator begin() const { return m_pData ? m_pData->m_String : nullptr; }
end()100   const_iterator end() const {
101     return m_pData ? m_pData->m_String + m_pData->m_nDataLength : nullptr;
102   }
103 
104   // Note: Any subsequent modification of |this| will invalidate iterators.
rbegin()105   const_reverse_iterator rbegin() const {
106     return const_reverse_iterator(end());
107   }
rend()108   const_reverse_iterator rend() const {
109     return const_reverse_iterator(begin());
110   }
111 
112   // Holds on to buffer if possible for later re-use. Assign WideString()
113   // to force immediate release if desired.
114   void clear();
115 
GetLength()116   size_t GetLength() const { return m_pData ? m_pData->m_nDataLength : 0; }
GetStringLength()117   size_t GetStringLength() const {
118     return m_pData ? wcslen(m_pData->m_String) : 0;
119   }
IsEmpty()120   bool IsEmpty() const { return !GetLength(); }
IsValidIndex(size_t index)121   bool IsValidIndex(size_t index) const { return index < GetLength(); }
IsValidLength(size_t length)122   bool IsValidLength(size_t length) const { return length <= GetLength(); }
123 
124   WideString& operator=(const wchar_t* str);
125   WideString& operator=(WideStringView str);
126   WideString& operator=(const WideString& that);
127 
128   // Move-assign a WideString. After assignment, |that| is empty.
129   WideString& operator=(WideString&& that) noexcept;
130 
131   WideString& operator+=(const wchar_t* str);
132   WideString& operator+=(wchar_t ch);
133   WideString& operator+=(const WideString& str);
134   WideString& operator+=(WideStringView str);
135 
136   bool operator==(const wchar_t* ptr) const;
137   bool operator==(WideStringView str) const;
138   bool operator==(const WideString& other) const;
139 
140   bool operator!=(const wchar_t* ptr) const { return !(*this == ptr); }
141   bool operator!=(WideStringView str) const { return !(*this == str); }
142   bool operator!=(const WideString& other) const { return !(*this == other); }
143 
144   bool operator<(const wchar_t* ptr) const;
145   bool operator<(WideStringView str) const;
146   bool operator<(const WideString& other) const;
147 
148   CharType operator[](const size_t index) const {
149     CHECK(IsValidIndex(index));
150     return m_pData->m_String[index];
151   }
152 
Front()153   CharType Front() const { return GetLength() ? (*this)[0] : 0; }
Back()154   CharType Back() const { return GetLength() ? (*this)[GetLength() - 1] : 0; }
155 
156   void SetAt(size_t index, wchar_t c);
157 
158   int Compare(const wchar_t* str) const;
159   int Compare(const WideString& str) const;
160   int CompareNoCase(const wchar_t* str) const;
161 
162   WideString Substr(size_t offset) const;
163   WideString Substr(size_t first, size_t count) const;
164   WideString First(size_t count) const;
165   WideString Last(size_t count) const;
166 
167   size_t Insert(size_t index, wchar_t ch);
InsertAtFront(wchar_t ch)168   size_t InsertAtFront(wchar_t ch) { return Insert(0, ch); }
InsertAtBack(wchar_t ch)169   size_t InsertAtBack(wchar_t ch) { return Insert(GetLength(), ch); }
170   size_t Delete(size_t index, size_t count = 1);
171 
172   void MakeLower();
173   void MakeUpper();
174 
175   void Trim();
176   void Trim(wchar_t target);
177   void Trim(WideStringView targets);
178 
179   void TrimLeft();
180   void TrimLeft(wchar_t target);
181   void TrimLeft(WideStringView targets);
182 
183   void TrimRight();
184   void TrimRight(wchar_t target);
185   void TrimRight(WideStringView targets);
186 
187   void Reserve(size_t len);
188 
189   // Note: any modification of the string (including ReleaseBuffer()) may
190   // invalidate the span, which must not outlive its buffer.
191   pdfium::span<wchar_t> GetBuffer(size_t nMinBufLength);
192   void ReleaseBuffer(size_t nNewLength);
193 
194   int GetInteger() const;
195 
196   absl::optional<size_t> Find(WideStringView subStr, size_t start = 0) const;
197   absl::optional<size_t> Find(wchar_t ch, size_t start = 0) const;
198   absl::optional<size_t> ReverseFind(wchar_t ch) const;
199 
200   bool Contains(WideStringView lpszSub, size_t start = 0) const {
201     return Find(lpszSub, start).has_value();
202   }
203 
204   bool Contains(char ch, size_t start = 0) const {
205     return Find(ch, start).has_value();
206   }
207 
208   size_t Replace(WideStringView pOld, WideStringView pNew);
209   size_t Remove(wchar_t ch);
210 
IsASCII()211   bool IsASCII() const { return AsStringView().IsASCII(); }
EqualsASCII(ByteStringView that)212   bool EqualsASCII(ByteStringView that) const {
213     return AsStringView().EqualsASCII(that);
214   }
EqualsASCIINoCase(ByteStringView that)215   bool EqualsASCIINoCase(ByteStringView that) const {
216     return AsStringView().EqualsASCIINoCase(that);
217   }
218 
219   ByteString ToASCII() const;
220   ByteString ToLatin1() const;
221   ByteString ToDefANSI() const;
222   ByteString ToUTF8() const;
223 
224   // This method will add \0\0 to the end of the string to represent the
225   // wide string terminator. These values are in the string, not just the data,
226   // so GetLength() will include them.
227   ByteString ToUTF16LE() const;
228 
229   // Replace the characters &<>'" with HTML entities.
230   WideString EncodeEntities() const;
231 
232  protected:
233   using StringData = StringDataTemplate<wchar_t>;
234 
235   void ReallocBeforeWrite(size_t nNewLength);
236   void AllocBeforeWrite(size_t nNewLength);
237   void AllocCopy(WideString& dest, size_t nCopyLen, size_t nCopyIndex) const;
238   void AssignCopy(const wchar_t* pSrcData, size_t nSrcLen);
239   void Concat(const wchar_t* pSrcData, size_t nSrcLen);
240   intptr_t ReferenceCountForTesting() const;
241 
242   RetainPtr<StringData> m_pData;
243 
244   friend class WideString_Assign_Test;
245   friend class WideString_ConcatInPlace_Test;
246   friend class WideString_Construct_Test;
247   friend class StringPool_WideString_Test;
248 };
249 
250 inline WideString operator+(WideStringView str1, WideStringView str2) {
251   return WideString(str1, str2);
252 }
253 inline WideString operator+(WideStringView str1, const wchar_t* str2) {
254   return WideString(str1, str2);
255 }
256 inline WideString operator+(const wchar_t* str1, WideStringView str2) {
257   return WideString(str1, str2);
258 }
259 inline WideString operator+(WideStringView str1, wchar_t ch) {
260   return WideString(str1, WideStringView(ch));
261 }
262 inline WideString operator+(wchar_t ch, WideStringView str2) {
263   return WideString(WideStringView(ch), str2);
264 }
265 inline WideString operator+(const WideString& str1, const WideString& str2) {
266   return WideString(str1.AsStringView(), str2.AsStringView());
267 }
268 inline WideString operator+(const WideString& str1, wchar_t ch) {
269   return WideString(str1.AsStringView(), WideStringView(ch));
270 }
271 inline WideString operator+(wchar_t ch, const WideString& str2) {
272   return WideString(WideStringView(ch), str2.AsStringView());
273 }
274 inline WideString operator+(const WideString& str1, const wchar_t* str2) {
275   return WideString(str1.AsStringView(), str2);
276 }
277 inline WideString operator+(const wchar_t* str1, const WideString& str2) {
278   return WideString(str1, str2.AsStringView());
279 }
280 inline WideString operator+(const WideString& str1, WideStringView str2) {
281   return WideString(str1.AsStringView(), str2);
282 }
283 inline WideString operator+(WideStringView str1, const WideString& str2) {
284   return WideString(str1, str2.AsStringView());
285 }
286 inline bool operator==(const wchar_t* lhs, const WideString& rhs) {
287   return rhs == lhs;
288 }
289 inline bool operator==(WideStringView lhs, const WideString& rhs) {
290   return rhs == lhs;
291 }
292 inline bool operator!=(const wchar_t* lhs, const WideString& rhs) {
293   return rhs != lhs;
294 }
295 inline bool operator!=(WideStringView lhs, const WideString& rhs) {
296   return rhs != lhs;
297 }
298 inline bool operator<(const wchar_t* lhs, const WideString& rhs) {
299   return rhs.Compare(lhs) > 0;
300 }
301 
302 std::wostream& operator<<(std::wostream& os, const WideString& str);
303 std::ostream& operator<<(std::ostream& os, const WideString& str);
304 std::wostream& operator<<(std::wostream& os, WideStringView str);
305 std::ostream& operator<<(std::ostream& os, WideStringView str);
306 
307 // This is declared here for use in gtest-based tests but is defined in a test
308 // support target. This should not be used in production code. Just use
309 // operator<< from above instead.
310 // In some cases, gtest will automatically use operator<< as well, but in this
311 // case, it needs PrintTo() because WideString looks like a container to gtest.
312 void PrintTo(const WideString& str, std::ostream* os);
313 
314 }  // namespace fxcrt
315 
316 using WideString = fxcrt::WideString;
317 
318 uint32_t FX_HashCode_GetW(WideStringView str);
319 uint32_t FX_HashCode_GetLoweredW(WideStringView str);
320 
321 namespace std {
322 
323 template <>
324 struct hash<WideString> {
325   size_t operator()(const WideString& str) const {
326     return FX_HashCode_GetW(str.AsStringView());
327   }
328 };
329 
330 }  // namespace std
331 
332 extern template struct std::hash<WideString>;
333 
334 #endif  // CORE_FXCRT_WIDESTRING_H_
335