xref: /aosp_15_r20/external/cronet/base/strings/string_util_unittest.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_util.h"
6 
7 #include <math.h>
8 #include <stdarg.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 
12 #include <algorithm>
13 #include <string>
14 #include <string_view>
15 #include <type_traits>
16 
17 #include "base/bits.h"
18 #include "base/strings/string_piece.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "build/build_config.h"
21 #include "testing/gmock/include/gmock/gmock.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23 
24 using ::testing::ElementsAre;
25 
26 namespace base {
27 
28 namespace {
29 
30 const struct trim_case {
31   const wchar_t* input;
32   const TrimPositions positions;
33   const wchar_t* output;
34   const TrimPositions return_value;
35 } trim_cases[] = {
36     {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
37     {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
38     {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
39     {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
40     {L"", TRIM_ALL, L"", TRIM_NONE},
41     {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
42     {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
43     {L"  ", TRIM_ALL, L"", TRIM_ALL},
44     {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
45     {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
46 };
47 
48 const struct trim_case_ascii {
49   const char* input;
50   const TrimPositions positions;
51   const char* output;
52   const TrimPositions return_value;
53 } trim_cases_ascii[] = {
54     {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
55     {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
56     {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
57     {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
58     {"", TRIM_ALL, "", TRIM_NONE},
59     {"  ", TRIM_LEADING, "", TRIM_LEADING},
60     {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
61     {"  ", TRIM_ALL, "", TRIM_ALL},
62     {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
63 };
64 
65 // Helper used to test TruncateUTF8ToByteSize.
Truncated(const std::string & input,const size_t byte_size,std::string * output)66 bool Truncated(const std::string& input,
67                const size_t byte_size,
68                std::string* output) {
69     size_t prev = input.length();
70     TruncateUTF8ToByteSize(input, byte_size, output);
71     return prev != output->length();
72 }
73 
74 using TestFunction = bool (*)(StringPiece str);
75 
76 // Helper used to test IsStringUTF8[AllowingNoncharacters].
TestStructurallyValidUtf8(TestFunction fn)77 void TestStructurallyValidUtf8(TestFunction fn) {
78   EXPECT_TRUE(fn("abc"));
79   EXPECT_TRUE(fn("\xC2\x81"));
80   EXPECT_TRUE(fn("\xE1\x80\xBF"));
81   EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
82   EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
83   EXPECT_TRUE(fn("a\xC2\x81\xE1\x80\xBF\xF1\x80\xA0\xBF"));
84 
85   // U+FEFF used as UTF-8 BOM.
86   // clang-format off
87   EXPECT_TRUE(fn("\xEF\xBB\xBF" "abc"));
88   // clang-format on
89 
90   // Embedded nulls in canonical UTF-8 representation.
91   using std::string_literals::operator""s;
92   const std::string kEmbeddedNull = "embedded\0null"s;
93   EXPECT_TRUE(fn(kEmbeddedNull));
94 }
95 
96 // Helper used to test IsStringUTF8[AllowingNoncharacters].
TestStructurallyInvalidUtf8(TestFunction fn)97 void TestStructurallyInvalidUtf8(TestFunction fn) {
98   // Invalid encoding of U+1FFFE (0x8F instead of 0x9F)
99   EXPECT_FALSE(fn("\xF0\x8F\xBF\xBE"));
100 
101   // Surrogate code points
102   EXPECT_FALSE(fn("\xED\xA0\x80\xED\xBF\xBF"));
103   EXPECT_FALSE(fn("\xED\xA0\x8F"));
104   EXPECT_FALSE(fn("\xED\xBF\xBF"));
105 
106   // Overlong sequences
107   EXPECT_FALSE(fn("\xC0\x80"));                  // U+0000
108   EXPECT_FALSE(fn("\xC1\x80\xC1\x81"));          // "AB"
109   EXPECT_FALSE(fn("\xE0\x80\x80"));              // U+0000
110   EXPECT_FALSE(fn("\xE0\x82\x80"));              // U+0080
111   EXPECT_FALSE(fn("\xE0\x9F\xBF"));              // U+07FF
112   EXPECT_FALSE(fn("\xF0\x80\x80\x8D"));          // U+000D
113   EXPECT_FALSE(fn("\xF0\x80\x82\x91"));          // U+0091
114   EXPECT_FALSE(fn("\xF0\x80\xA0\x80"));          // U+0800
115   EXPECT_FALSE(fn("\xF0\x8F\xBB\xBF"));          // U+FEFF (BOM)
116   EXPECT_FALSE(fn("\xF8\x80\x80\x80\xBF"));      // U+003F
117   EXPECT_FALSE(fn("\xFC\x80\x80\x80\xA0\xA5"));  // U+00A5
118 
119   // Beyond U+10FFFF (the upper limit of Unicode codespace)
120   EXPECT_FALSE(fn("\xF4\x90\x80\x80"));          // U+110000
121   EXPECT_FALSE(fn("\xF8\xA0\xBF\x80\xBF"));      // 5 bytes
122   EXPECT_FALSE(fn("\xFC\x9C\xBF\x80\xBF\x80"));  // 6 bytes
123 
124   // BOM in UTF-16(BE|LE)
125   EXPECT_FALSE(fn("\xFE\xFF"));
126   EXPECT_FALSE(fn("\xFF\xFE"));
127 
128   // Strings in legacy encodings. We can certainly make up strings
129   // in a legacy encoding that are valid in UTF-8, but in real data,
130   // most of them are invalid as UTF-8.
131 
132   // cafe with U+00E9 in ISO-8859-1
133   EXPECT_FALSE(fn("caf\xE9"));
134   // U+AC00, U+AC001 in EUC-KR
135   EXPECT_FALSE(fn("\xB0\xA1\xB0\xA2"));
136   // U+4F60 U+597D in Big5
137   EXPECT_FALSE(fn("\xA7\x41\xA6\x6E"));
138   // "abc" with U+201[CD] in windows-125[0-8]
139   // clang-format off
140   EXPECT_FALSE(fn("\x93" "abc\x94"));
141   // clang-format on
142   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
143   EXPECT_FALSE(fn("\xD9\xEE\xE4\xEE"));
144   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
145   EXPECT_FALSE(fn("\xE3\xE5\xE9\xDC"));
146 
147   // BOM in UTF-32(BE|LE)
148   using std::string_literals::operator""s;
149   const std::string kUtf32BeBom = "\x00\x00\xFE\xFF"s;
150   EXPECT_FALSE(fn(kUtf32BeBom));
151   const std::string kUtf32LeBom = "\xFF\xFE\x00\x00"s;
152   EXPECT_FALSE(fn(kUtf32LeBom));
153 }
154 
155 // Helper used to test IsStringUTF8[AllowingNoncharacters].
TestNoncharacters(TestFunction fn,bool expected_result)156 void TestNoncharacters(TestFunction fn, bool expected_result) {
157   EXPECT_EQ(fn("\xEF\xB7\x90"), expected_result);      // U+FDD0
158   EXPECT_EQ(fn("\xEF\xB7\x9F"), expected_result);      // U+FDDF
159   EXPECT_EQ(fn("\xEF\xB7\xAF"), expected_result);      // U+FDEF
160   EXPECT_EQ(fn("\xEF\xBF\xBE"), expected_result);      // U+FFFE
161   EXPECT_EQ(fn("\xEF\xBF\xBF"), expected_result);      // U+FFFF
162   EXPECT_EQ(fn("\xF0\x9F\xBF\xBE"), expected_result);  // U+01FFFE
163   EXPECT_EQ(fn("\xF0\x9F\xBF\xBF"), expected_result);  // U+01FFFF
164   EXPECT_EQ(fn("\xF0\xAF\xBF\xBE"), expected_result);  // U+02FFFE
165   EXPECT_EQ(fn("\xF0\xAF\xBF\xBF"), expected_result);  // U+02FFFF
166   EXPECT_EQ(fn("\xF0\xBF\xBF\xBE"), expected_result);  // U+03FFFE
167   EXPECT_EQ(fn("\xF0\xBF\xBF\xBF"), expected_result);  // U+03FFFF
168   EXPECT_EQ(fn("\xF1\x8F\xBF\xBE"), expected_result);  // U+04FFFE
169   EXPECT_EQ(fn("\xF1\x8F\xBF\xBF"), expected_result);  // U+04FFFF
170   EXPECT_EQ(fn("\xF1\x9F\xBF\xBE"), expected_result);  // U+05FFFE
171   EXPECT_EQ(fn("\xF1\x9F\xBF\xBF"), expected_result);  // U+05FFFF
172   EXPECT_EQ(fn("\xF1\xAF\xBF\xBE"), expected_result);  // U+06FFFE
173   EXPECT_EQ(fn("\xF1\xAF\xBF\xBF"), expected_result);  // U+06FFFF
174   EXPECT_EQ(fn("\xF1\xBF\xBF\xBE"), expected_result);  // U+07FFFE
175   EXPECT_EQ(fn("\xF1\xBF\xBF\xBF"), expected_result);  // U+07FFFF
176   EXPECT_EQ(fn("\xF2\x8F\xBF\xBE"), expected_result);  // U+08FFFE
177   EXPECT_EQ(fn("\xF2\x8F\xBF\xBF"), expected_result);  // U+08FFFF
178   EXPECT_EQ(fn("\xF2\x9F\xBF\xBE"), expected_result);  // U+09FFFE
179   EXPECT_EQ(fn("\xF2\x9F\xBF\xBF"), expected_result);  // U+09FFFF
180   EXPECT_EQ(fn("\xF2\xAF\xBF\xBE"), expected_result);  // U+0AFFFE
181   EXPECT_EQ(fn("\xF2\xAF\xBF\xBF"), expected_result);  // U+0AFFFF
182   EXPECT_EQ(fn("\xF2\xBF\xBF\xBE"), expected_result);  // U+0BFFFE
183   EXPECT_EQ(fn("\xF2\xBF\xBF\xBF"), expected_result);  // U+0BFFFF
184   EXPECT_EQ(fn("\xF3\x8F\xBF\xBE"), expected_result);  // U+0CFFFE
185   EXPECT_EQ(fn("\xF3\x8F\xBF\xBF"), expected_result);  // U+0CFFFF
186   EXPECT_EQ(fn("\xF3\x9F\xBF\xBE"), expected_result);  // U+0DFFFE
187   EXPECT_EQ(fn("\xF3\x9F\xBF\xBF"), expected_result);  // U+0DFFFF
188   EXPECT_EQ(fn("\xF3\xAF\xBF\xBE"), expected_result);  // U+0EFFFE
189   EXPECT_EQ(fn("\xF3\xAF\xBF\xBF"), expected_result);  // U+0EFFFF
190   EXPECT_EQ(fn("\xF3\xBF\xBF\xBE"), expected_result);  // U+0FFFFE
191   EXPECT_EQ(fn("\xF3\xBF\xBF\xBF"), expected_result);  // U+0FFFFF
192   EXPECT_EQ(fn("\xF4\x8F\xBF\xBE"), expected_result);  // U+10FFFE
193   EXPECT_EQ(fn("\xF4\x8F\xBF\xBF"), expected_result);  // U+10FFFF
194 }
195 
TEST(StringUtilTest,TruncateUTF8ToByteSize)196 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
197   std::string output;
198 
199   // Empty strings and invalid byte_size arguments
200   EXPECT_FALSE(Truncated(std::string(), 0, &output));
201   EXPECT_EQ(output, "");
202   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
203   EXPECT_EQ(output, "");
204   EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
205   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
206 
207   // Testing the truncation of valid UTF8 correctly
208   EXPECT_TRUE(Truncated("abc", 2, &output));
209   EXPECT_EQ(output, "ab");
210   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
211   EXPECT_EQ(output.compare("\xc2\x81"), 0);
212   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
213   EXPECT_EQ(output.compare("\xc2\x81"), 0);
214   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
215   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
216 
217   {
218     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
219     const std::string array_string(array, std::size(array));
220     EXPECT_TRUE(Truncated(array_string, 4, &output));
221     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
222   }
223 
224   {
225     const char array[] = "\x00\xc2\x81\xc2\x81";
226     const std::string array_string(array, std::size(array));
227     EXPECT_TRUE(Truncated(array_string, 4, &output));
228     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
229   }
230 
231   // Testing invalid UTF8
232   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
233   EXPECT_EQ(output.compare(""), 0);
234   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
235   EXPECT_EQ(output.compare(""), 0);
236   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
237   EXPECT_EQ(output.compare(""), 0);
238 
239   // Testing invalid UTF8 mixed with valid UTF8
240   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
241   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
242   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
243   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
244   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
245               10, &output));
246   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
247   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
248               10, &output));
249   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
250   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
251   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
252 
253   // Overlong sequences
254   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
255   EXPECT_EQ(output.compare(""), 0);
256   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
257   EXPECT_EQ(output.compare(""), 0);
258   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
259   EXPECT_EQ(output.compare(""), 0);
260   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
261   EXPECT_EQ(output.compare(""), 0);
262   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
263   EXPECT_EQ(output.compare(""), 0);
264   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
265   EXPECT_EQ(output.compare(""), 0);
266   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
267   EXPECT_EQ(output.compare(""), 0);
268   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
269   EXPECT_EQ(output.compare(""), 0);
270   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
271   EXPECT_EQ(output.compare(""), 0);
272   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
273   EXPECT_EQ(output.compare(""), 0);
274   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
275   EXPECT_EQ(output.compare(""), 0);
276 
277   // Beyond U+10FFFF (the upper limit of Unicode codespace)
278   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
279   EXPECT_EQ(output.compare(""), 0);
280   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
281   EXPECT_EQ(output.compare(""), 0);
282   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
283   EXPECT_EQ(output.compare(""), 0);
284 
285   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
286   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
287   EXPECT_EQ(output.compare(""), 0);
288   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
289   EXPECT_EQ(output.compare(""), 0);
290 
291   {
292     const char array[] = "\x00\x00\xfe\xff";
293     const std::string array_string(array, std::size(array));
294     EXPECT_TRUE(Truncated(array_string, 4, &output));
295     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
296   }
297 
298   // Variants on the previous test
299   {
300     const char array[] = "\xff\xfe\x00\x00";
301     const std::string array_string(array, 4);
302     EXPECT_FALSE(Truncated(array_string, 4, &output));
303     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
304   }
305   {
306     const char array[] = "\xff\x00\x00\xfe";
307     const std::string array_string(array, std::size(array));
308     EXPECT_TRUE(Truncated(array_string, 4, &output));
309     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
310   }
311 
312   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
313   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
314   EXPECT_EQ(output.compare(""), 0);
315   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
316   EXPECT_EQ(output.compare(""), 0);
317   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
318   EXPECT_EQ(output.compare(""), 0);
319   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
320   EXPECT_EQ(output.compare(""), 0);
321   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
322   EXPECT_EQ(output.compare(""), 0);
323 
324   // Strings in legacy encodings that are valid in UTF-8, but
325   // are invalid as UTF-8 in real data.
326   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
327   EXPECT_EQ(output.compare("caf"), 0);
328   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
329   EXPECT_EQ(output.compare(""), 0);
330   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
331   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
332   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
333               &output));
334   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
335 
336   // Testing using the same string as input and output.
337   EXPECT_FALSE(Truncated(output, 4, &output));
338   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
339   EXPECT_TRUE(Truncated(output, 3, &output));
340   EXPECT_EQ(output.compare("\xa7\x41"), 0);
341 
342   // "abc" with U+201[CD] in windows-125[0-8]
343   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
344   EXPECT_EQ(output.compare("\x93" "abc"), 0);
345 
346   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
347   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
348   EXPECT_EQ(output.compare(""), 0);
349 
350   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
351   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
352   EXPECT_EQ(output.compare(""), 0);
353 }
354 
355 #if defined(WCHAR_T_IS_16_BIT)
TEST(StringUtilTest,as_wcstr)356 TEST(StringUtilTest, as_wcstr) {
357   char16_t rw_buffer[10] = {};
358   static_assert(
359       std::is_same_v<wchar_t*, decltype(as_writable_wcstr(rw_buffer))>, "");
360   EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_wcstr(rw_buffer));
361 
362   std::u16string rw_str(10, '\0');
363   static_assert(std::is_same_v<wchar_t*, decltype(as_writable_wcstr(rw_str))>,
364                 "");
365   EXPECT_EQ(static_cast<const void*>(rw_str.data()), as_writable_wcstr(rw_str));
366 
367   const char16_t ro_buffer[10] = {};
368   static_assert(std::is_same_v<const wchar_t*, decltype(as_wcstr(ro_buffer))>,
369                 "");
370   EXPECT_EQ(static_cast<const void*>(ro_buffer), as_wcstr(ro_buffer));
371 
372   const std::u16string ro_str(10, '\0');
373   static_assert(std::is_same_v<const wchar_t*, decltype(as_wcstr(ro_str))>, "");
374   EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_wcstr(ro_str));
375 
376   StringPiece16 piece = ro_buffer;
377   static_assert(std::is_same_v<const wchar_t*, decltype(as_wcstr(piece))>, "");
378   EXPECT_EQ(static_cast<const void*>(piece.data()), as_wcstr(piece));
379 }
380 
TEST(StringUtilTest,as_u16cstr)381 TEST(StringUtilTest, as_u16cstr) {
382   wchar_t rw_buffer[10] = {};
383   static_assert(
384       std::is_same_v<char16_t*, decltype(as_writable_u16cstr(rw_buffer))>, "");
385   EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_u16cstr(rw_buffer));
386 
387   std::wstring rw_str(10, '\0');
388   static_assert(
389       std::is_same_v<char16_t*, decltype(as_writable_u16cstr(rw_str))>, "");
390   EXPECT_EQ(static_cast<const void*>(rw_str.data()),
391             as_writable_u16cstr(rw_str));
392 
393   const wchar_t ro_buffer[10] = {};
394   static_assert(
395       std::is_same_v<const char16_t*, decltype(as_u16cstr(ro_buffer))>, "");
396   EXPECT_EQ(static_cast<const void*>(ro_buffer), as_u16cstr(ro_buffer));
397 
398   const std::wstring ro_str(10, '\0');
399   static_assert(std::is_same_v<const char16_t*, decltype(as_u16cstr(ro_str))>,
400                 "");
401   EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_u16cstr(ro_str));
402 
403   std::wstring_view piece = ro_buffer;
404   static_assert(std::is_same_v<const char16_t*, decltype(as_u16cstr(piece))>,
405                 "");
406   EXPECT_EQ(static_cast<const void*>(piece.data()), as_u16cstr(piece));
407 }
408 #endif  // defined(WCHAR_T_IS_16_BIT)
409 
TEST(StringUtilTest,TrimWhitespace)410 TEST(StringUtilTest, TrimWhitespace) {
411   std::u16string output;  // Allow contents to carry over to next testcase
412   for (const auto& value : trim_cases) {
413     EXPECT_EQ(value.return_value,
414               TrimWhitespace(WideToUTF16(value.input), value.positions,
415                              &output));
416     EXPECT_EQ(WideToUTF16(value.output), output);
417   }
418 
419   // Test that TrimWhitespace() can take the same string for input and output
420   output = u"  This is a test \r\n";
421   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
422   EXPECT_EQ(u"This is a test", output);
423 
424   // Once more, but with a string of whitespace
425   output = u"  \r\n";
426   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
427   EXPECT_EQ(std::u16string(), output);
428 
429   std::string output_ascii;
430   for (const auto& value : trim_cases_ascii) {
431     EXPECT_EQ(value.return_value,
432               TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
433     EXPECT_EQ(value.output, output_ascii);
434   }
435 }
436 
437 static const struct collapse_case {
438   const wchar_t* input;
439   const bool trim;
440   const wchar_t* output;
441 } collapse_cases[] = {
442   {L" Google Video ", false, L"Google Video"},
443   {L"Google Video", false, L"Google Video"},
444   {L"", false, L""},
445   {L"  ", false, L""},
446   {L"\t\rTest String\n", false, L"Test String"},
447   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
448   {L"    Test     \n  \t String    ", false, L"Test String"},
449   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
450   {L"   Test String", false, L"Test String"},
451   {L"Test String    ", false, L"Test String"},
452   {L"Test String", false, L"Test String"},
453   {L"", true, L""},
454   {L"\n", true, L""},
455   {L"  \r  ", true, L""},
456   {L"\nFoo", true, L"Foo"},
457   {L"\r  Foo  ", true, L"Foo"},
458   {L" Foo bar ", true, L"Foo bar"},
459   {L"  \tFoo  bar  \n", true, L"Foo bar"},
460   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
461 };
462 
TEST(StringUtilTest,CollapseWhitespace)463 TEST(StringUtilTest, CollapseWhitespace) {
464   for (const auto& value : collapse_cases) {
465     EXPECT_EQ(WideToUTF16(value.output),
466               CollapseWhitespace(WideToUTF16(value.input), value.trim));
467   }
468 }
469 
470 static const struct collapse_case_ascii {
471   const char* input;
472   const bool trim;
473   const char* output;
474 } collapse_cases_ascii[] = {
475     {" Google Video ", false, "Google Video"},
476     {"Google Video", false, "Google Video"},
477     {"", false, ""},
478     {"  ", false, ""},
479     {"\t\rTest String\n", false, "Test String"},
480     {"    Test     \n  \t String    ", false, "Test String"},
481     {"   Test String", false, "Test String"},
482     {"Test String    ", false, "Test String"},
483     {"Test String", false, "Test String"},
484     {"", true, ""},
485     {"\n", true, ""},
486     {"  \r  ", true, ""},
487     {"\nFoo", true, "Foo"},
488     {"\r  Foo  ", true, "Foo"},
489     {" Foo bar ", true, "Foo bar"},
490     // \u00A0 is whitespace, but not _ASCII_ whitespace, so it should not be
491     // collapsed by CollapseWhitespaceASCII().
492     {"Foo\u00A0bar", true, "Foo\u00A0bar"},
493     {"  \tFoo  bar  \n", true, "Foo bar"},
494     {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
495 };
496 
TEST(StringUtilTest,CollapseWhitespaceASCII)497 TEST(StringUtilTest, CollapseWhitespaceASCII) {
498   for (const auto& value : collapse_cases_ascii) {
499     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
500   }
501 }
502 
TEST(StringUtilTest,IsStringUTF8)503 TEST(StringUtilTest, IsStringUTF8) {
504   {
505     SCOPED_TRACE("IsStringUTF8");
506     TestStructurallyValidUtf8(&IsStringUTF8);
507     TestStructurallyInvalidUtf8(&IsStringUTF8);
508     TestNoncharacters(&IsStringUTF8, false);
509   }
510 
511   {
512     SCOPED_TRACE("IsStringUTF8AllowingNoncharacters");
513     TestStructurallyValidUtf8(&IsStringUTF8AllowingNoncharacters);
514     TestStructurallyInvalidUtf8(&IsStringUTF8AllowingNoncharacters);
515     TestNoncharacters(&IsStringUTF8AllowingNoncharacters, true);
516   }
517 }
518 
TEST(StringUtilTest,IsStringASCII)519 TEST(StringUtilTest, IsStringASCII) {
520   static char char_ascii[] =
521       "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
522   static char16_t char16_ascii[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8',
523                                     '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', '0',
524                                     '1', '2', '3', '4', '5', '6', '7', '8', '9',
525                                     '0', 'A', 'B', 'C', 'D', 'E', 'F', 0};
526   static std::wstring wchar_ascii(
527       L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
528 
529   // Test a variety of the fragment start positions and lengths in order to make
530   // sure that bit masking in IsStringASCII works correctly.
531   // Also, test that a non-ASCII character will be detected regardless of its
532   // position inside the string.
533   {
534     const size_t string_length = std::size(char_ascii) - 1;
535     for (size_t offset = 0; offset < 8; ++offset) {
536       for (size_t len = 0, max_len = string_length - offset; len < max_len;
537            ++len) {
538         EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
539         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
540           char_ascii[char_pos] |= '\x80';
541           EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
542           char_ascii[char_pos] &= ~'\x80';
543         }
544       }
545     }
546   }
547 
548   {
549     const size_t string_length = std::size(char16_ascii) - 1;
550     for (size_t offset = 0; offset < 4; ++offset) {
551       for (size_t len = 0, max_len = string_length - offset; len < max_len;
552            ++len) {
553         EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
554         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
555           char16_ascii[char_pos] |= 0x80;
556           EXPECT_FALSE(
557               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
558           char16_ascii[char_pos] &= ~0x80;
559           // Also test when the upper half is non-zero.
560           char16_ascii[char_pos] |= 0x100;
561           EXPECT_FALSE(
562               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
563           char16_ascii[char_pos] &= ~0x100;
564         }
565       }
566     }
567   }
568 
569 #if defined(WCHAR_T_IS_32_BIT)
570   {
571     const size_t string_length = wchar_ascii.length();
572     for (size_t len = 0; len < string_length; ++len) {
573       EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
574       for (size_t char_pos = 0; char_pos < len; ++char_pos) {
575         wchar_ascii[char_pos] |= 0x80;
576         EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
577         wchar_ascii[char_pos] &= ~0x80;
578         wchar_ascii[char_pos] |= 0x100;
579         EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
580         wchar_ascii[char_pos] &= ~0x100;
581         wchar_ascii[char_pos] |= 0x10000;
582         EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
583         wchar_ascii[char_pos] &= ~0x10000;
584       }
585     }
586   }
587 #endif  // WCHAR_T_IS_32_BIT
588 }
589 
TEST(StringUtilTest,ConvertASCII)590 TEST(StringUtilTest, ConvertASCII) {
591   static const char* const char_cases[] = {
592     "Google Video",
593     "Hello, world\n",
594     "0123ABCDwxyz \a\b\t\r\n!+,.~"
595   };
596 
597   static const wchar_t* const wchar_cases[] = {
598     L"Google Video",
599     L"Hello, world\n",
600     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
601   };
602 
603   for (size_t i = 0; i < std::size(char_cases); ++i) {
604     EXPECT_TRUE(IsStringASCII(char_cases[i]));
605     std::u16string utf16 = ASCIIToUTF16(char_cases[i]);
606     EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
607 
608     std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
609     EXPECT_EQ(char_cases[i], ascii);
610   }
611 
612   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
613 
614   // Convert empty strings.
615   std::u16string empty16;
616   std::string empty;
617   EXPECT_EQ(empty, UTF16ToASCII(empty16));
618   EXPECT_EQ(empty16, ASCIIToUTF16(empty));
619 
620   // Convert strings with an embedded NUL character.
621   const char chars_with_nul[] = "test\0string";
622   const int length_with_nul = std::size(chars_with_nul) - 1;
623   std::string string_with_nul(chars_with_nul, length_with_nul);
624   std::u16string string16_with_nul = ASCIIToUTF16(string_with_nul);
625   EXPECT_EQ(static_cast<std::u16string::size_type>(length_with_nul),
626             string16_with_nul.length());
627   std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
628   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
629             narrow_with_nul.length());
630   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
631 }
632 
TEST(StringUtilTest,ToLowerASCII)633 TEST(StringUtilTest, ToLowerASCII) {
634   EXPECT_EQ('c', ToLowerASCII('C'));
635   EXPECT_EQ('c', ToLowerASCII('c'));
636   EXPECT_EQ('2', ToLowerASCII('2'));
637 
638   EXPECT_EQ(u'c', ToLowerASCII(u'C'));
639   EXPECT_EQ(u'c', ToLowerASCII(u'c'));
640   EXPECT_EQ(u'2', ToLowerASCII(u'2'));
641 
642   EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
643   EXPECT_EQ(u"cc2", ToLowerASCII(u"Cc2"));
644 
645   // Non-ASCII characters are unmodified. U+00C4 is LATIN CAPITAL LETTER A WITH
646   // DIAERESIS.
647   EXPECT_EQ('\xc4', ToLowerASCII('\xc4'));
648   EXPECT_EQ(u'\x00c4', ToLowerASCII(u'\x00c4'));
649 }
650 
TEST(StringUtilTest,ToUpperASCII)651 TEST(StringUtilTest, ToUpperASCII) {
652   EXPECT_EQ('C', ToUpperASCII('C'));
653   EXPECT_EQ('C', ToUpperASCII('c'));
654   EXPECT_EQ('2', ToUpperASCII('2'));
655 
656   EXPECT_EQ(u'C', ToUpperASCII(u'C'));
657   EXPECT_EQ(u'C', ToUpperASCII(u'c'));
658   EXPECT_EQ(u'2', ToUpperASCII(u'2'));
659 
660   EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
661   EXPECT_EQ(u"CC2", ToUpperASCII(u"Cc2"));
662 
663   // Non-ASCII characters are unmodified. U+00E4 is LATIN SMALL LETTER A WITH
664   // DIAERESIS.
665   EXPECT_EQ('\xe4', ToUpperASCII('\xe4'));
666   EXPECT_EQ(u'\x00e4', ToUpperASCII(u'\x00e4'));
667 }
668 
TEST(StringUtilTest,FormatBytesUnlocalized)669 TEST(StringUtilTest, FormatBytesUnlocalized) {
670   static const struct {
671     int64_t bytes;
672     const char* expected;
673   } cases[] = {
674       // Expected behavior: we show one post-decimal digit when we have
675       // under two pre-decimal digits, except in cases where it makes no
676       // sense (zero or bytes).
677       // Since we switch units once we cross the 1000 mark, this keeps
678       // the display of file sizes or bytes consistently around three
679       // digits.
680       {0, "0 B"},
681       {512, "512 B"},
682       {1024 * 1024, "1.0 MB"},
683       {1024 * 1024 * 1024, "1.0 GB"},
684       {10LL * 1024 * 1024 * 1024, "10.0 GB"},
685       {99LL * 1024 * 1024 * 1024, "99.0 GB"},
686       {105LL * 1024 * 1024 * 1024, "105 GB"},
687       {105LL * 1024 * 1024 * 1024 + 500LL * 1024 * 1024, "105 GB"},
688       {~(bits::LeftmostBit<int64_t>()), "8192 PB"},
689 
690       {99 * 1024 + 103, "99.1 kB"},
691       {1024 * 1024 + 103, "1.0 MB"},
692       {1024 * 1024 + 205 * 1024, "1.2 MB"},
693       {1024 * 1024 * 1024 + (927 * 1024 * 1024), "1.9 GB"},
694       {10LL * 1024 * 1024 * 1024, "10.0 GB"},
695       {100LL * 1024 * 1024 * 1024, "100 GB"},
696   };
697 
698   for (const auto& i : cases) {
699     EXPECT_EQ(ASCIIToUTF16(i.expected), FormatBytesUnlocalized(i.bytes));
700   }
701 }
TEST(StringUtilTest,ReplaceSubstringsAfterOffset)702 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
703   static const struct {
704     StringPiece str;
705     size_t start_offset;
706     StringPiece find_this;
707     StringPiece replace_with;
708     StringPiece expected;
709   } cases[] = {
710       {"aaa", 0, "", "b", "aaa"},
711       {"aaa", 1, "", "b", "aaa"},
712       {"aaa", 0, "a", "b", "bbb"},
713       {"aaa", 0, "aa", "b", "ba"},
714       {"aaa", 0, "aa", "bbb", "bbba"},
715       {"aaaaa", 0, "aa", "b", "bba"},
716       {"ababaaababa", 0, "aba", "", "baaba"},
717       {"ababaaababa", 0, "aba", "_", "_baa_ba"},
718       {"ababaaababa", 0, "aba", "__", "__baa__ba"},
719       {"ababaaababa", 0, "aba", "___", "___baa___ba"},
720       {"ababaaababa", 0, "aba", "____", "____baa____ba"},
721       {"ababaaababa", 0, "aba", "_____", "_____baa_____ba"},
722       {"abb", 0, "ab", "a", "ab"},
723       {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
724       {"Not found", 0, "x", "0", "Not found"},
725       {"Not found again", 5, "x", "0", "Not found again"},
726       {" Making it much longer ", 0, " ", "Four score and seven years ago",
727        "Four score and seven years agoMakingFour score and seven years agoit"
728        "Four score and seven years agomuchFour score and seven years agolonger"
729        "Four score and seven years ago"},
730       {" Making it much much much much shorter ", 0,
731        "Making it much much much much shorter", "", "  "},
732       {"so much much much much much very much much much shorter", 0, "much ",
733        "", "so very shorter"},
734       {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
735       {"Replace me only me once", 9, "me ", "", "Replace me only once"},
736       {"abababab", 2, "ab", "c", "abccc"},
737       {"abababab", 1, "ab", "c", "abccc"},
738       {"abababab", 1, "aba", "c", "abcbab"},
739   };
740 
741   // std::u16string variant
742   for (const auto& scenario : cases) {
743     std::u16string str = ASCIIToUTF16(scenario.str);
744     ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
745                                  ASCIIToUTF16(scenario.find_this),
746                                  ASCIIToUTF16(scenario.replace_with));
747     EXPECT_EQ(ASCIIToUTF16(scenario.expected), str);
748   }
749 
750   // std::string with insufficient capacity: expansion must realloc the buffer.
751   for (const auto& scenario : cases) {
752     std::string str(scenario.str);
753     str.shrink_to_fit();  // This is nonbinding, but it's the best we've got.
754     ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
755                                  scenario.find_this, scenario.replace_with);
756     EXPECT_EQ(scenario.expected, str);
757   }
758 
759   // std::string with ample capacity: should be possible to grow in-place.
760   for (const auto& scenario : cases) {
761     std::string str(scenario.str);
762     str.reserve(std::max(scenario.str.length(), scenario.expected.length()) *
763                 2);
764 
765     ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
766                                  scenario.find_this, scenario.replace_with);
767     EXPECT_EQ(scenario.expected, str);
768   }
769 }
770 
TEST(StringUtilTest,ReplaceFirstSubstringAfterOffset)771 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
772   static const struct {
773     const char* str;
774     std::u16string::size_type start_offset;
775     const char* find_this;
776     const char* replace_with;
777     const char* expected;
778   } cases[] = {
779     {"aaa", 0, "a", "b", "baa"},
780     {"abb", 0, "ab", "a", "ab"},
781     {"Removing some substrings inging", 0, "ing", "",
782       "Remov some substrings inging"},
783     {"Not found", 0, "x", "0", "Not found"},
784     {"Not found again", 5, "x", "0", "Not found again"},
785     {" Making it much longer ", 0, " ", "Four score and seven years ago",
786      "Four score and seven years agoMaking it much longer "},
787     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
788     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
789     {"abababab", 2, "ab", "c", "abcabab"},
790   };
791 
792   for (const auto& i : cases) {
793     std::u16string str = ASCIIToUTF16(i.str);
794     ReplaceFirstSubstringAfterOffset(&str, i.start_offset,
795                                      ASCIIToUTF16(i.find_this),
796                                      ASCIIToUTF16(i.replace_with));
797     EXPECT_EQ(ASCIIToUTF16(i.expected), str);
798   }
799 }
800 
TEST(StringUtilTest,HexDigitToInt)801 TEST(StringUtilTest, HexDigitToInt) {
802   EXPECT_EQ(0, HexDigitToInt('0'));
803   EXPECT_EQ(1, HexDigitToInt('1'));
804   EXPECT_EQ(2, HexDigitToInt('2'));
805   EXPECT_EQ(3, HexDigitToInt('3'));
806   EXPECT_EQ(4, HexDigitToInt('4'));
807   EXPECT_EQ(5, HexDigitToInt('5'));
808   EXPECT_EQ(6, HexDigitToInt('6'));
809   EXPECT_EQ(7, HexDigitToInt('7'));
810   EXPECT_EQ(8, HexDigitToInt('8'));
811   EXPECT_EQ(9, HexDigitToInt('9'));
812   EXPECT_EQ(10, HexDigitToInt('A'));
813   EXPECT_EQ(11, HexDigitToInt('B'));
814   EXPECT_EQ(12, HexDigitToInt('C'));
815   EXPECT_EQ(13, HexDigitToInt('D'));
816   EXPECT_EQ(14, HexDigitToInt('E'));
817   EXPECT_EQ(15, HexDigitToInt('F'));
818 
819   // Verify the lower case as well.
820   EXPECT_EQ(10, HexDigitToInt('a'));
821   EXPECT_EQ(11, HexDigitToInt('b'));
822   EXPECT_EQ(12, HexDigitToInt('c'));
823   EXPECT_EQ(13, HexDigitToInt('d'));
824   EXPECT_EQ(14, HexDigitToInt('e'));
825   EXPECT_EQ(15, HexDigitToInt('f'));
826 }
827 
TEST(StringUtilTest,JoinString)828 TEST(StringUtilTest, JoinString) {
829   std::string separator(", ");
830   std::vector<std::string> parts;
831   EXPECT_EQ(std::string(), JoinString(parts, separator));
832 
833   parts.push_back(std::string());
834   EXPECT_EQ(std::string(), JoinString(parts, separator));
835   parts.clear();
836 
837   parts.push_back("a");
838   EXPECT_EQ("a", JoinString(parts, separator));
839 
840   parts.push_back("b");
841   parts.push_back("c");
842   EXPECT_EQ("a, b, c", JoinString(parts, separator));
843 
844   parts.push_back(std::string());
845   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
846   parts.push_back(" ");
847   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
848 }
849 
TEST(StringUtilTest,JoinString16)850 TEST(StringUtilTest, JoinString16) {
851   std::u16string separator = u", ";
852   std::vector<std::u16string> parts;
853   EXPECT_EQ(std::u16string(), JoinString(parts, separator));
854 
855   parts.push_back(std::u16string());
856   EXPECT_EQ(std::u16string(), JoinString(parts, separator));
857   parts.clear();
858 
859   parts.push_back(u"a");
860   EXPECT_EQ(u"a", JoinString(parts, separator));
861 
862   parts.push_back(u"b");
863   parts.push_back(u"c");
864   EXPECT_EQ(u"a, b, c", JoinString(parts, separator));
865 
866   parts.push_back(u"");
867   EXPECT_EQ(u"a, b, c, ", JoinString(parts, separator));
868   parts.push_back(u" ");
869   EXPECT_EQ(u"a|b|c|| ", JoinString(parts, u"|"));
870 }
871 
TEST(StringUtilTest,JoinStringPiece)872 TEST(StringUtilTest, JoinStringPiece) {
873   std::string separator(", ");
874   std::vector<StringPiece> parts;
875   EXPECT_EQ(std::string(), JoinString(parts, separator));
876 
877   // Test empty first part (https://crbug.com/698073).
878   parts.push_back(StringPiece());
879   EXPECT_EQ(std::string(), JoinString(parts, separator));
880   parts.clear();
881 
882   parts.push_back("a");
883   EXPECT_EQ("a", JoinString(parts, separator));
884 
885   parts.push_back("b");
886   parts.push_back("c");
887   EXPECT_EQ("a, b, c", JoinString(parts, separator));
888 
889   parts.push_back(StringPiece());
890   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
891   parts.push_back(" ");
892   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
893 }
894 
TEST(StringUtilTest,JoinStringPiece16)895 TEST(StringUtilTest, JoinStringPiece16) {
896   std::u16string separator = u", ";
897   std::vector<StringPiece16> parts;
898   EXPECT_EQ(std::u16string(), JoinString(parts, separator));
899 
900   // Test empty first part (https://crbug.com/698073).
901   parts.push_back(StringPiece16());
902   EXPECT_EQ(std::u16string(), JoinString(parts, separator));
903   parts.clear();
904 
905   const std::u16string kA = u"a";
906   parts.push_back(kA);
907   EXPECT_EQ(u"a", JoinString(parts, separator));
908 
909   const std::u16string kB = u"b";
910   parts.push_back(kB);
911   const std::u16string kC = u"c";
912   parts.push_back(kC);
913   EXPECT_EQ(u"a, b, c", JoinString(parts, separator));
914 
915   parts.push_back(StringPiece16());
916   EXPECT_EQ(u"a, b, c, ", JoinString(parts, separator));
917   const std::u16string kSpace = u" ";
918   parts.push_back(kSpace);
919   EXPECT_EQ(u"a|b|c|| ", JoinString(parts, u"|"));
920 }
921 
TEST(StringUtilTest,JoinStringInitializerList)922 TEST(StringUtilTest, JoinStringInitializerList) {
923   std::string separator(", ");
924   EXPECT_EQ(std::string(), JoinString({}, separator));
925 
926   // Test empty first part (https://crbug.com/698073).
927   EXPECT_EQ(std::string(), JoinString({StringPiece()}, separator));
928 
929   // With const char*s.
930   EXPECT_EQ("a", JoinString({"a"}, separator));
931   EXPECT_EQ("a, b, c", JoinString({"a", "b", "c"}, separator));
932   EXPECT_EQ("a, b, c, ", JoinString({"a", "b", "c", StringPiece()}, separator));
933   EXPECT_EQ("a|b|c|| ", JoinString({"a", "b", "c", StringPiece(), " "}, "|"));
934 
935   // With std::strings.
936   const std::string kA = "a";
937   const std::string kB = "b";
938   EXPECT_EQ("a, b", JoinString({kA, kB}, separator));
939 
940   // With StringPieces.
941   const StringPiece kPieceA = kA;
942   const StringPiece kPieceB = kB;
943   EXPECT_EQ("a, b", JoinString({kPieceA, kPieceB}, separator));
944 }
945 
TEST(StringUtilTest,JoinStringInitializerList16)946 TEST(StringUtilTest, JoinStringInitializerList16) {
947   std::u16string separator = u", ";
948   EXPECT_EQ(std::u16string(), JoinString({}, separator));
949 
950   // Test empty first part (https://crbug.com/698073).
951   EXPECT_EQ(std::u16string(), JoinString({StringPiece16()}, separator));
952 
953   // With string16s.
954   const std::u16string kA = u"a";
955   EXPECT_EQ(u"a", JoinString({kA}, separator));
956 
957   const std::u16string kB = u"b";
958   const std::u16string kC = u"c";
959   EXPECT_EQ(u"a, b, c", JoinString({kA, kB, kC}, separator));
960 
961   EXPECT_EQ(u"a, b, c, ", JoinString({kA, kB, kC, StringPiece16()}, separator));
962   const std::u16string kSpace = u" ";
963   EXPECT_EQ(u"a|b|c|| ",
964             JoinString({kA, kB, kC, StringPiece16(), kSpace}, u"|"));
965 
966   // With StringPiece16s.
967   const StringPiece16 kPieceA = kA;
968   const StringPiece16 kPieceB = kB;
969   EXPECT_EQ(u"a, b", JoinString({kPieceA, kPieceB}, separator));
970 }
971 
TEST(StringUtilTest,StartsWith)972 TEST(StringUtilTest, StartsWith) {
973   EXPECT_TRUE(StartsWith("javascript:url", "javascript",
974                          base::CompareCase::SENSITIVE));
975   EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
976                           base::CompareCase::SENSITIVE));
977   EXPECT_TRUE(StartsWith("javascript:url", "javascript",
978                          base::CompareCase::INSENSITIVE_ASCII));
979   EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
980                          base::CompareCase::INSENSITIVE_ASCII));
981   EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
982   EXPECT_FALSE(StartsWith("java", "javascript",
983                           base::CompareCase::INSENSITIVE_ASCII));
984   EXPECT_FALSE(StartsWith(std::string(), "javascript",
985                           base::CompareCase::INSENSITIVE_ASCII));
986   EXPECT_FALSE(StartsWith(std::string(), "javascript",
987                           base::CompareCase::SENSITIVE));
988   EXPECT_TRUE(StartsWith("java", std::string(),
989                          base::CompareCase::INSENSITIVE_ASCII));
990   EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
991 
992   EXPECT_TRUE(StartsWith(u"javascript:url", u"javascript",
993                          base::CompareCase::SENSITIVE));
994   EXPECT_FALSE(StartsWith(u"JavaScript:url", u"javascript",
995                           base::CompareCase::SENSITIVE));
996   EXPECT_TRUE(StartsWith(u"javascript:url", u"javascript",
997                          base::CompareCase::INSENSITIVE_ASCII));
998   EXPECT_TRUE(StartsWith(u"JavaScript:url", u"javascript",
999                          base::CompareCase::INSENSITIVE_ASCII));
1000   EXPECT_FALSE(
1001       StartsWith(u"java", u"javascript", base::CompareCase::SENSITIVE));
1002   EXPECT_FALSE(
1003       StartsWith(u"java", u"javascript", base::CompareCase::INSENSITIVE_ASCII));
1004   EXPECT_FALSE(StartsWith(std::u16string(), u"javascript",
1005                           base::CompareCase::INSENSITIVE_ASCII));
1006   EXPECT_FALSE(StartsWith(std::u16string(), u"javascript",
1007                           base::CompareCase::SENSITIVE));
1008   EXPECT_TRUE(StartsWith(u"java", std::u16string(),
1009                          base::CompareCase::INSENSITIVE_ASCII));
1010   EXPECT_TRUE(
1011       StartsWith(u"java", std::u16string(), base::CompareCase::SENSITIVE));
1012 }
1013 
TEST(StringUtilTest,EndsWith)1014 TEST(StringUtilTest, EndsWith) {
1015   EXPECT_TRUE(
1016       EndsWith(u"Foo.plugin", u".plugin", base::CompareCase::SENSITIVE));
1017   EXPECT_FALSE(
1018       EndsWith(u"Foo.Plugin", u".plugin", base::CompareCase::SENSITIVE));
1019   EXPECT_TRUE(EndsWith(u"Foo.plugin", u".plugin",
1020                        base::CompareCase::INSENSITIVE_ASCII));
1021   EXPECT_TRUE(EndsWith(u"Foo.Plugin", u".plugin",
1022                        base::CompareCase::INSENSITIVE_ASCII));
1023   EXPECT_FALSE(EndsWith(u".plug", u".plugin", base::CompareCase::SENSITIVE));
1024   EXPECT_FALSE(
1025       EndsWith(u".plug", u".plugin", base::CompareCase::INSENSITIVE_ASCII));
1026   EXPECT_FALSE(
1027       EndsWith(u"Foo.plugin Bar", u".plugin", base::CompareCase::SENSITIVE));
1028   EXPECT_FALSE(EndsWith(u"Foo.plugin Bar", u".plugin",
1029                         base::CompareCase::INSENSITIVE_ASCII));
1030   EXPECT_FALSE(EndsWith(std::u16string(), u".plugin",
1031                         base::CompareCase::INSENSITIVE_ASCII));
1032   EXPECT_FALSE(
1033       EndsWith(std::u16string(), u".plugin", base::CompareCase::SENSITIVE));
1034   EXPECT_TRUE(EndsWith(u"Foo.plugin", std::u16string(),
1035                        base::CompareCase::INSENSITIVE_ASCII));
1036   EXPECT_TRUE(
1037       EndsWith(u"Foo.plugin", std::u16string(), base::CompareCase::SENSITIVE));
1038   EXPECT_TRUE(
1039       EndsWith(u".plugin", u".plugin", base::CompareCase::INSENSITIVE_ASCII));
1040   EXPECT_TRUE(EndsWith(u".plugin", u".plugin", base::CompareCase::SENSITIVE));
1041   EXPECT_TRUE(EndsWith(std::u16string(), std::u16string(),
1042                        base::CompareCase::INSENSITIVE_ASCII));
1043   EXPECT_TRUE(EndsWith(std::u16string(), std::u16string(),
1044                        base::CompareCase::SENSITIVE));
1045 }
1046 
TEST(StringUtilTest,GetStringFWithOffsets)1047 TEST(StringUtilTest, GetStringFWithOffsets) {
1048   std::vector<std::u16string> subst;
1049   subst.push_back(u"1");
1050   subst.push_back(u"2");
1051   std::vector<size_t> offsets;
1052 
1053   ReplaceStringPlaceholders(u"Hello, $1. Your number is $2.", subst, &offsets);
1054   EXPECT_EQ(2U, offsets.size());
1055   EXPECT_EQ(7U, offsets[0]);
1056   EXPECT_EQ(25U, offsets[1]);
1057   offsets.clear();
1058 
1059   ReplaceStringPlaceholders(u"Hello, $2. Your number is $1.", subst, &offsets);
1060   EXPECT_EQ(2U, offsets.size());
1061   EXPECT_EQ(25U, offsets[0]);
1062   EXPECT_EQ(7U, offsets[1]);
1063   offsets.clear();
1064 }
1065 
TEST(StringUtilTest,ReplaceStringPlaceholdersTooFew)1066 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
1067   // Test whether replacestringplaceholders works as expected when there
1068   // are fewer inputs than outputs.
1069   std::vector<std::u16string> subst;
1070   subst.push_back(u"9a");
1071   subst.push_back(u"8b");
1072   subst.push_back(u"7c");
1073 
1074   std::u16string formatted = ReplaceStringPlaceholders(
1075       u"$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i", subst, nullptr);
1076 
1077   EXPECT_EQ(u"9aa,8bb,7cc,d,e,f,9ag,8bh,7ci", formatted);
1078 }
1079 
TEST(StringUtilTest,ReplaceStringPlaceholders)1080 TEST(StringUtilTest, ReplaceStringPlaceholders) {
1081   std::vector<std::u16string> subst;
1082   subst.push_back(u"9a");
1083   subst.push_back(u"8b");
1084   subst.push_back(u"7c");
1085   subst.push_back(u"6d");
1086   subst.push_back(u"5e");
1087   subst.push_back(u"4f");
1088   subst.push_back(u"3g");
1089   subst.push_back(u"2h");
1090   subst.push_back(u"1i");
1091 
1092   std::u16string formatted = ReplaceStringPlaceholders(
1093       u"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
1094 
1095   EXPECT_EQ(u"9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
1096 }
1097 
TEST(StringUtilTest,ReplaceStringPlaceholdersNetExpansionWithContraction)1098 TEST(StringUtilTest, ReplaceStringPlaceholdersNetExpansionWithContraction) {
1099   // In this test, some of the substitutions are shorter than the placeholders,
1100   // but overall the string gets longer.
1101   std::vector<std::u16string> subst;
1102   subst.push_back(u"9a____");
1103   subst.push_back(u"B");
1104   subst.push_back(u"7c___");
1105   subst.push_back(u"d");
1106   subst.push_back(u"5e____");
1107   subst.push_back(u"F");
1108   subst.push_back(u"3g___");
1109   subst.push_back(u"h");
1110   subst.push_back(u"1i_____");
1111 
1112   std::u16string original = u"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i";
1113   std::u16string expected =
1114       u"9a____a,Bb,7c___c,dd,5e____e,Ff,3g___g,hh,1i_____i";
1115 
1116   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1117 
1118   std::vector<size_t> offsets;
1119   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1120   std::vector<size_t> expected_offsets = {0, 8, 11, 18, 21, 29, 32, 39, 42};
1121   EXPECT_EQ(offsets.size(), subst.size());
1122   EXPECT_EQ(expected_offsets, offsets);
1123   for (size_t i = 0; i < offsets.size(); i++) {
1124     EXPECT_EQ(expected.substr(expected_offsets[i], subst[i].length()),
1125               subst[i]);
1126   }
1127 }
1128 
TEST(StringUtilTest,ReplaceStringPlaceholdersNetContractionWithExpansion)1129 TEST(StringUtilTest, ReplaceStringPlaceholdersNetContractionWithExpansion) {
1130   // In this test, some of the substitutions are longer than the placeholders,
1131   // but overall the string gets smaller. Additionally, the placeholders appear
1132   // in a permuted order.
1133   std::vector<std::u16string> subst;
1134   subst.push_back(u"z");
1135   subst.push_back(u"y");
1136   subst.push_back(u"XYZW");
1137   subst.push_back(u"x");
1138   subst.push_back(u"w");
1139 
1140   std::u16string formatted =
1141       ReplaceStringPlaceholders(u"$3_$4$2$1$5", subst, nullptr);
1142 
1143   EXPECT_EQ(u"XYZW_xyzw", formatted);
1144 }
1145 
TEST(StringUtilTest,ReplaceStringPlaceholdersOneDigit)1146 TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
1147   std::vector<std::u16string> subst;
1148   subst.push_back(u"1a");
1149   std::u16string formatted =
1150       ReplaceStringPlaceholders(u" $16 ", subst, nullptr);
1151   EXPECT_EQ(u" 1a6 ", formatted);
1152 }
1153 
TEST(StringUtilTest,ReplaceStringPlaceholdersInvalidPlaceholder)1154 TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
1155   std::vector<std::u16string> subst;
1156   subst.push_back(u"1a");
1157   std::u16string formatted =
1158       ReplaceStringPlaceholders(u"+$-+$A+$1+", subst, nullptr);
1159   EXPECT_EQ(u"+++1a+", formatted);
1160 }
1161 
TEST(StringUtilTest,StdStringReplaceStringPlaceholders)1162 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
1163   std::vector<std::string> subst;
1164   subst.push_back("9a");
1165   subst.push_back("8b");
1166   subst.push_back("7c");
1167   subst.push_back("6d");
1168   subst.push_back("5e");
1169   subst.push_back("4f");
1170   subst.push_back("3g");
1171   subst.push_back("2h");
1172   subst.push_back("1i");
1173 
1174   std::string formatted =
1175       ReplaceStringPlaceholders(
1176           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
1177 
1178   EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
1179 }
1180 
TEST(StringUtilTest,StdStringReplaceStringPlaceholdersMultipleMatches)1181 TEST(StringUtilTest, StdStringReplaceStringPlaceholdersMultipleMatches) {
1182   std::vector<std::string> subst;
1183   subst.push_back("4");   // Referenced twice.
1184   subst.push_back("?");   // Unreferenced.
1185   subst.push_back("!");   // Unreferenced.
1186   subst.push_back("16");  // Referenced once.
1187 
1188   std::string original = "$1 * $1 == $4";
1189   std::string expected = "4 * 4 == 16";
1190   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1191   std::vector<size_t> offsets;
1192   EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1193   std::vector<size_t> expected_offsets = {0, 4, 9};
1194   EXPECT_EQ(expected_offsets, offsets);
1195 }
1196 
TEST(StringUtilTest,ReplaceStringPlaceholdersConsecutiveDollarSigns)1197 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
1198   std::vector<std::string> subst;
1199   subst.push_back("a");
1200   subst.push_back("b");
1201   subst.push_back("c");
1202   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr),
1203             "$1 $$2 $$$3");
1204 }
1205 
TEST(StringUtilTest,LcpyTest)1206 TEST(StringUtilTest, LcpyTest) {
1207   // Test the normal case where we fit in our buffer.
1208   {
1209     char dst[10];
1210     char16_t u16dst[10];
1211     wchar_t wdst[10];
1212     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
1213     EXPECT_EQ(0, memcmp(dst, "abcdefg", sizeof(dst[0]) * 8));
1214     EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1215     EXPECT_EQ(0, memcmp(u16dst, u"abcdefg", sizeof(u16dst[0]) * 8));
1216     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
1217     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wdst[0]) * 8));
1218   }
1219 
1220   // Test dst_size == 0, nothing should be written to |dst| and we should
1221   // have the equivalent of strlen(src).
1222   {
1223     char dst[2] = {1, 2};
1224     char16_t u16dst[2] = {1, 2};
1225     wchar_t wdst[2] = {1, 2};
1226     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
1227     EXPECT_EQ(1, dst[0]);
1228     EXPECT_EQ(2, dst[1]);
1229     EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", 0));
1230     EXPECT_EQ(char16_t{1}, u16dst[0]);
1231     EXPECT_EQ(char16_t{2}, u16dst[1]);
1232     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
1233     EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1234     EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1235   }
1236 
1237   // Test the case were we _just_ competely fit including the null.
1238   {
1239     char dst[8];
1240     char16_t u16dst[8];
1241     wchar_t wdst[8];
1242     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
1243     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1244     EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1245     EXPECT_EQ(0, memcmp(u16dst, u"abcdefg", sizeof(u16dst)));
1246     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
1247     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wdst)));
1248   }
1249 
1250   // Test the case were we we are one smaller, so we can't fit the null.
1251   {
1252     char dst[7];
1253     char16_t u16dst[7];
1254     wchar_t wdst[7];
1255     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
1256     EXPECT_EQ(0, memcmp(dst, "abcdef", sizeof(dst[0]) * 7));
1257     EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1258     EXPECT_EQ(0, memcmp(u16dst, u"abcdef", sizeof(u16dst[0]) * 7));
1259     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
1260     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wdst[0]) * 7));
1261   }
1262 
1263   // Test the case were we are just too small.
1264   {
1265     char dst[3];
1266     char16_t u16dst[3];
1267     wchar_t wdst[3];
1268     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
1269     EXPECT_EQ(0, memcmp(dst, "ab", sizeof(dst)));
1270     EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1271     EXPECT_EQ(0, memcmp(u16dst, u"ab", sizeof(u16dst)));
1272     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
1273     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wdst)));
1274   }
1275 }
1276 
TEST(StringUtilTest,WprintfFormatPortabilityTest)1277 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1278   static const struct {
1279     const wchar_t* input;
1280     bool portable;
1281   } cases[] = {
1282     { L"%ls", true },
1283     { L"%s", false },
1284     { L"%S", false },
1285     { L"%lS", false },
1286     { L"Hello, %s", false },
1287     { L"%lc", true },
1288     { L"%c", false },
1289     { L"%C", false },
1290     { L"%lC", false },
1291     { L"%ls %s", false },
1292     { L"%s %ls", false },
1293     { L"%s %ls %s", false },
1294     { L"%f", true },
1295     { L"%f %F", false },
1296     { L"%d %D", false },
1297     { L"%o %O", false },
1298     { L"%u %U", false },
1299     { L"%f %d %o %u", true },
1300     { L"%-8d (%02.1f%)", true },
1301     { L"% 10s", false },
1302     { L"% 10ls", true }
1303   };
1304   for (const auto& i : cases)
1305     EXPECT_EQ(i.portable, IsWprintfFormatPortable(i.input));
1306 }
1307 
TEST(StringUtilTest,MakeBasicStringPieceTest)1308 TEST(StringUtilTest, MakeBasicStringPieceTest) {
1309   constexpr char kFoo[] = "Foo";
1310   static_assert(MakeStringPiece(kFoo, kFoo + 3) == kFoo, "");
1311   static_assert(MakeStringPiece(kFoo, kFoo + 3).data() == kFoo, "");
1312   static_assert(MakeStringPiece(kFoo, kFoo + 3).size() == 3, "");
1313   static_assert(MakeStringPiece(kFoo + 3, kFoo + 3).empty(), "");
1314   static_assert(MakeStringPiece(kFoo + 4, kFoo + 4).empty(), "");
1315 
1316   std::string foo = kFoo;
1317   EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()), foo);
1318   EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).data(), foo.data());
1319   EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).size(), foo.size());
1320   EXPECT_TRUE(MakeStringPiece(foo.end(), foo.end()).empty());
1321 
1322   constexpr char16_t kBar[] = u"Bar";
1323   static_assert(MakeStringPiece16(kBar, kBar + 3) == kBar, "");
1324   static_assert(MakeStringPiece16(kBar, kBar + 3).data() == kBar, "");
1325   static_assert(MakeStringPiece16(kBar, kBar + 3).size() == 3, "");
1326   static_assert(MakeStringPiece16(kBar + 3, kBar + 3).empty(), "");
1327   static_assert(MakeStringPiece16(kBar + 4, kBar + 4).empty(), "");
1328 
1329   std::u16string bar = kBar;
1330   EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()), bar);
1331   EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).data(), bar.data());
1332   EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).size(), bar.size());
1333   EXPECT_TRUE(MakeStringPiece16(bar.end(), bar.end()).empty());
1334 
1335   constexpr wchar_t kBaz[] = L"Baz";
1336   static_assert(MakeWStringView(kBaz, kBaz + 3) == kBaz, "");
1337   static_assert(MakeWStringView(kBaz, kBaz + 3).data() == kBaz, "");
1338   static_assert(MakeWStringView(kBaz, kBaz + 3).size() == 3, "");
1339   static_assert(MakeWStringView(kBaz + 3, kBaz + 3).empty(), "");
1340   static_assert(MakeWStringView(kBaz + 4, kBaz + 4).empty(), "");
1341 
1342   std::wstring baz = kBaz;
1343   EXPECT_EQ(MakeWStringView(baz.begin(), baz.end()), baz);
1344   EXPECT_EQ(MakeWStringView(baz.begin(), baz.end()).data(), baz.data());
1345   EXPECT_EQ(MakeWStringView(baz.begin(), baz.end()).size(), baz.size());
1346   EXPECT_TRUE(MakeWStringView(baz.end(), baz.end()).empty());
1347 }
1348 
TEST(StringUtilTest,RemoveChars)1349 TEST(StringUtilTest, RemoveChars) {
1350   const char kRemoveChars[] = "-/+*";
1351   std::string input = "A-+bc/d!*";
1352   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1353   EXPECT_EQ("Abcd!", input);
1354 
1355   // No characters match kRemoveChars.
1356   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1357   EXPECT_EQ("Abcd!", input);
1358 
1359   // Empty string.
1360   input.clear();
1361   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1362   EXPECT_EQ(std::string(), input);
1363 }
1364 
TEST(StringUtilTest,ReplaceChars)1365 TEST(StringUtilTest, ReplaceChars) {
1366   struct TestData {
1367     const char* input;
1368     const char* replace_chars;
1369     const char* replace_with;
1370     const char* output;
1371     bool result;
1372   } cases[] = {
1373       {"", "", "", "", false},
1374       {"t", "t", "t", "t", true},
1375       {"a", "b", "c", "a", false},
1376       {"b", "b", "c", "c", true},
1377       {"bob", "b", "p", "pop", true},
1378       {"bob", "o", "i", "bib", true},
1379       {"test", "", "", "test", false},
1380       {"test", "", "!", "test", false},
1381       {"test", "z", "!", "test", false},
1382       {"test", "e", "!", "t!st", true},
1383       {"test", "e", "!?", "t!?st", true},
1384       {"test", "ez", "!", "t!st", true},
1385       {"test", "zed", "!?", "t!?st", true},
1386       {"test", "t", "!?", "!?es!?", true},
1387       {"test", "et", "!>", "!>!>s!>", true},
1388       {"test", "zest", "!", "!!!!", true},
1389       {"test", "szt", "!", "!e!!", true},
1390       {"test", "t", "test", "testestest", true},
1391       {"tetst", "t", "test", "testeteststest", true},
1392       {"ttttttt", "t", "-", "-------", true},
1393       {"aAaAaAAaAAa", "A", "", "aaaaa", true},
1394       {"xxxxxxxxxx", "x", "", "", true},
1395       {"xxxxxxxxxx", "x", "x", "xxxxxxxxxx", true},
1396       {"xxxxxxxxxx", "x", "y-", "y-y-y-y-y-y-y-y-y-y-", true},
1397       {"xxxxxxxxxx", "x", "xy", "xyxyxyxyxyxyxyxyxyxy", true},
1398       {"xxxxxxxxxx", "x", "zyx", "zyxzyxzyxzyxzyxzyxzyxzyxzyxzyx", true},
1399       {"xaxxaxxxaxxxax", "x", "xy", "xyaxyxyaxyxyxyaxyxyxyaxy", true},
1400       {"-xaxxaxxxaxxxax-", "x", "xy", "-xyaxyxyaxyxyxyaxyxyxyaxy-", true},
1401   };
1402 
1403   for (const TestData& scenario : cases) {
1404     // Test with separate output and input vars.
1405     std::string output;
1406     bool result = ReplaceChars(scenario.input, scenario.replace_chars,
1407                                scenario.replace_with, &output);
1408     EXPECT_EQ(scenario.result, result) << scenario.input;
1409     EXPECT_EQ(scenario.output, output);
1410   }
1411 
1412   for (const TestData& scenario : cases) {
1413     // Test with an input/output var of limited capacity.
1414     std::string input_output = scenario.input;
1415     input_output.shrink_to_fit();
1416     bool result = ReplaceChars(input_output, scenario.replace_chars,
1417                                scenario.replace_with, &input_output);
1418     EXPECT_EQ(scenario.result, result) << scenario.input;
1419     EXPECT_EQ(scenario.output, input_output);
1420   }
1421 
1422   for (const TestData& scenario : cases) {
1423     // Test with an input/output var of ample capacity; should
1424     // not realloc.
1425     std::string input_output = scenario.input;
1426     input_output.reserve(strlen(scenario.output) * 2);
1427     const void* original_buffer = input_output.data();
1428     bool result = ReplaceChars(input_output, scenario.replace_chars,
1429                                scenario.replace_with, &input_output);
1430     EXPECT_EQ(scenario.result, result) << scenario.input;
1431     EXPECT_EQ(scenario.output, input_output);
1432     EXPECT_EQ(original_buffer, input_output.data());
1433   }
1434 }
1435 
TEST(StringUtilTest,ContainsOnlyChars)1436 TEST(StringUtilTest, ContainsOnlyChars) {
1437   // Providing an empty list of characters should return false but for the empty
1438   // string.
1439   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1440   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1441 
1442   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1443   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1444   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1445   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1446   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1447 
1448   EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1449   EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1450   EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1451   EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
1452   EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1453   EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
1454 
1455   EXPECT_TRUE(ContainsOnlyChars(std::u16string(), kWhitespaceUTF16));
1456   EXPECT_TRUE(ContainsOnlyChars(u" ", kWhitespaceUTF16));
1457   EXPECT_TRUE(ContainsOnlyChars(u"\t", kWhitespaceUTF16));
1458   EXPECT_TRUE(ContainsOnlyChars(u"\t \r \n  ", kWhitespaceUTF16));
1459   EXPECT_FALSE(ContainsOnlyChars(u"a", kWhitespaceUTF16));
1460   EXPECT_FALSE(ContainsOnlyChars(u"\thello\r \n  ", kWhitespaceUTF16));
1461 }
1462 
TEST(StringUtilTest,CompareCaseInsensitiveASCII)1463 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
1464   EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
1465   EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
1466 
1467   // Differing lengths.
1468   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
1469   EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
1470 
1471   // Differing values.
1472   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
1473   EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
1474 
1475   // Non-ASCII bytes are permitted, but they will be compared case-sensitively.
1476   EXPECT_EQ(0, CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4"));
1477   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AAA \xc3\x84", "aaa \xc3\xa4"));
1478   EXPECT_EQ(1, CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\x84"));
1479 
1480   // ASCII bytes should sort before non-ASCII ones.
1481   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("a", "\xc3\xa4"));
1482   EXPECT_EQ(1, CompareCaseInsensitiveASCII("\xc3\xa4", "a"));
1483 
1484   // For constexpr.
1485   static_assert(CompareCaseInsensitiveASCII("", "") == 0);
1486   static_assert(CompareCaseInsensitiveASCII("Asdf", "aSDf") == 0);
1487   static_assert(CompareCaseInsensitiveASCII("Asdf", "aSDfA") == -1);
1488   static_assert(CompareCaseInsensitiveASCII("AsdfA", "aSDf") == 1);
1489   static_assert(CompareCaseInsensitiveASCII("AsdfA", "aSDfb") == -1);
1490   static_assert(CompareCaseInsensitiveASCII("Asdfb", "aSDfA") == 1);
1491   static_assert(CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4") ==
1492                 0);
1493   static_assert(CompareCaseInsensitiveASCII("AAA \xc3\x84", "aaa \xc3\xa4") ==
1494                 -1);
1495   static_assert(CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\x84") ==
1496                 1);
1497   static_assert(CompareCaseInsensitiveASCII("a", "\xc3\xa4") == -1);
1498   static_assert(CompareCaseInsensitiveASCII("\xc3\xa4", "a") == 1);
1499 }
1500 
TEST(StringUtilTest,EqualsCaseInsensitiveASCII)1501 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
1502   EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
1503   EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
1504   EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
1505   EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
1506 
1507   EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"", u""));
1508   EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"Asdf", u"aSDF"));
1509   EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"bsdf", u"aSDF"));
1510   EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"Asdf", u"aSDFz"));
1511 
1512   EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"", ""));
1513   EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"Asdf", "aSDF"));
1514   EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"bsdf", "aSDF"));
1515   EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"Asdf", "aSDFz"));
1516 
1517   EXPECT_TRUE(EqualsCaseInsensitiveASCII("", u""));
1518   EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", u"aSDF"));
1519   EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", u"aSDF"));
1520   EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", u"aSDFz"));
1521 
1522   // Non-ASCII bytes are permitted, but they will be compared case-sensitively.
1523   EXPECT_TRUE(EqualsCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4"));
1524   EXPECT_FALSE(EqualsCaseInsensitiveASCII("aaa \xc3\x84", "AAA \xc3\xa4"));
1525 
1526   // The `std::wstring_view` overloads are only defined on Windows.
1527 #if BUILDFLAG(IS_WIN)
1528   EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"", L""));
1529   EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"Asdf", L"aSDF"));
1530   EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"bsdf", L"aSDF"));
1531   EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"Asdf", L"aSDFz"));
1532 
1533   EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"", ""));
1534   EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"Asdf", "aSDF"));
1535   EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"bsdf", "aSDF"));
1536   EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"Asdf", "aSDFz"));
1537 
1538   EXPECT_TRUE(EqualsCaseInsensitiveASCII("", L""));
1539   EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", L"aSDF"));
1540   EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", L"aSDF"));
1541   EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", L"aSDFz"));
1542 #endif
1543 }
1544 
TEST(StringUtilTest,IsUnicodeWhitespace)1545 TEST(StringUtilTest, IsUnicodeWhitespace) {
1546   // NOT unicode white space.
1547   EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
1548   EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
1549   EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
1550   EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
1551   EXPECT_FALSE(IsUnicodeWhitespace(L';'));
1552   EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
1553 
1554   // Actual unicode whitespace.
1555   EXPECT_TRUE(IsUnicodeWhitespace(L' '));
1556   EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
1557   EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
1558   EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
1559   EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
1560   EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
1561   EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
1562   EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
1563 }
1564 
1565 class WriteIntoTest : public testing::Test {
1566  protected:
WritesCorrectly(size_t num_chars)1567   static void WritesCorrectly(size_t num_chars) {
1568     std::string buffer;
1569     char kOriginal[] = "supercali";
1570     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1571     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1572     // string at the first \0.
1573     EXPECT_EQ(
1574         std::string(kOriginal, std::min(num_chars, std::size(kOriginal) - 1)),
1575         std::string(buffer.c_str()));
1576     EXPECT_EQ(num_chars, buffer.size());
1577   }
1578 };
1579 
TEST_F(WriteIntoTest,WriteInto)1580 TEST_F(WriteIntoTest, WriteInto) {
1581   // Validate that WriteInto reserves enough space and
1582   // sizes a string correctly.
1583   WritesCorrectly(1);
1584   WritesCorrectly(2);
1585   WritesCorrectly(5000);
1586 
1587   // Validate that WriteInto handles 0-length strings
1588   std::string empty;
1589   const char kOriginal[] = "original";
1590   strncpy(WriteInto(&empty, 1), kOriginal, 0);
1591   EXPECT_STREQ("", empty.c_str());
1592   EXPECT_EQ(0u, empty.size());
1593 
1594   // Validate that WriteInto doesn't modify other strings
1595   // when using a Copy-on-Write implementation.
1596   const char kLive[] = "live";
1597   const char kDead[] = "dead";
1598   const std::string live = kLive;
1599   std::string dead = live;
1600   strncpy(WriteInto(&dead, 5), kDead, 4);
1601   EXPECT_EQ(kDead, dead);
1602   EXPECT_EQ(4u, dead.size());
1603   EXPECT_EQ(kLive, live);
1604   EXPECT_EQ(4u, live.size());
1605 }
1606 
1607 }  // namespace
1608 
1609 }  // namespace base
1610