1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/string_util.h"
6
7 #include <math.h>
8 #include <stdarg.h>
9 #include <stddef.h>
10 #include <stdint.h>
11
12 #include <algorithm>
13 #include <string>
14 #include <string_view>
15 #include <type_traits>
16
17 #include "base/bits.h"
18 #include "base/strings/string_piece.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "build/build_config.h"
21 #include "testing/gmock/include/gmock/gmock.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23
24 using ::testing::ElementsAre;
25
26 namespace base {
27
28 namespace {
29
30 const struct trim_case {
31 const wchar_t* input;
32 const TrimPositions positions;
33 const wchar_t* output;
34 const TrimPositions return_value;
35 } trim_cases[] = {
36 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
37 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
38 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
39 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
40 {L"", TRIM_ALL, L"", TRIM_NONE},
41 {L" ", TRIM_LEADING, L"", TRIM_LEADING},
42 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING},
43 {L" ", TRIM_ALL, L"", TRIM_ALL},
44 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
45 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
46 };
47
48 const struct trim_case_ascii {
49 const char* input;
50 const TrimPositions positions;
51 const char* output;
52 const TrimPositions return_value;
53 } trim_cases_ascii[] = {
54 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
55 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
56 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
57 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
58 {"", TRIM_ALL, "", TRIM_NONE},
59 {" ", TRIM_LEADING, "", TRIM_LEADING},
60 {" ", TRIM_TRAILING, "", TRIM_TRAILING},
61 {" ", TRIM_ALL, "", TRIM_ALL},
62 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
63 };
64
65 // Helper used to test TruncateUTF8ToByteSize.
Truncated(const std::string & input,const size_t byte_size,std::string * output)66 bool Truncated(const std::string& input,
67 const size_t byte_size,
68 std::string* output) {
69 size_t prev = input.length();
70 TruncateUTF8ToByteSize(input, byte_size, output);
71 return prev != output->length();
72 }
73
74 using TestFunction = bool (*)(StringPiece str);
75
76 // Helper used to test IsStringUTF8[AllowingNoncharacters].
TestStructurallyValidUtf8(TestFunction fn)77 void TestStructurallyValidUtf8(TestFunction fn) {
78 EXPECT_TRUE(fn("abc"));
79 EXPECT_TRUE(fn("\xC2\x81"));
80 EXPECT_TRUE(fn("\xE1\x80\xBF"));
81 EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
82 EXPECT_TRUE(fn("\xF1\x80\xA0\xBF"));
83 EXPECT_TRUE(fn("a\xC2\x81\xE1\x80\xBF\xF1\x80\xA0\xBF"));
84
85 // U+FEFF used as UTF-8 BOM.
86 // clang-format off
87 EXPECT_TRUE(fn("\xEF\xBB\xBF" "abc"));
88 // clang-format on
89
90 // Embedded nulls in canonical UTF-8 representation.
91 using std::string_literals::operator""s;
92 const std::string kEmbeddedNull = "embedded\0null"s;
93 EXPECT_TRUE(fn(kEmbeddedNull));
94 }
95
96 // Helper used to test IsStringUTF8[AllowingNoncharacters].
TestStructurallyInvalidUtf8(TestFunction fn)97 void TestStructurallyInvalidUtf8(TestFunction fn) {
98 // Invalid encoding of U+1FFFE (0x8F instead of 0x9F)
99 EXPECT_FALSE(fn("\xF0\x8F\xBF\xBE"));
100
101 // Surrogate code points
102 EXPECT_FALSE(fn("\xED\xA0\x80\xED\xBF\xBF"));
103 EXPECT_FALSE(fn("\xED\xA0\x8F"));
104 EXPECT_FALSE(fn("\xED\xBF\xBF"));
105
106 // Overlong sequences
107 EXPECT_FALSE(fn("\xC0\x80")); // U+0000
108 EXPECT_FALSE(fn("\xC1\x80\xC1\x81")); // "AB"
109 EXPECT_FALSE(fn("\xE0\x80\x80")); // U+0000
110 EXPECT_FALSE(fn("\xE0\x82\x80")); // U+0080
111 EXPECT_FALSE(fn("\xE0\x9F\xBF")); // U+07FF
112 EXPECT_FALSE(fn("\xF0\x80\x80\x8D")); // U+000D
113 EXPECT_FALSE(fn("\xF0\x80\x82\x91")); // U+0091
114 EXPECT_FALSE(fn("\xF0\x80\xA0\x80")); // U+0800
115 EXPECT_FALSE(fn("\xF0\x8F\xBB\xBF")); // U+FEFF (BOM)
116 EXPECT_FALSE(fn("\xF8\x80\x80\x80\xBF")); // U+003F
117 EXPECT_FALSE(fn("\xFC\x80\x80\x80\xA0\xA5")); // U+00A5
118
119 // Beyond U+10FFFF (the upper limit of Unicode codespace)
120 EXPECT_FALSE(fn("\xF4\x90\x80\x80")); // U+110000
121 EXPECT_FALSE(fn("\xF8\xA0\xBF\x80\xBF")); // 5 bytes
122 EXPECT_FALSE(fn("\xFC\x9C\xBF\x80\xBF\x80")); // 6 bytes
123
124 // BOM in UTF-16(BE|LE)
125 EXPECT_FALSE(fn("\xFE\xFF"));
126 EXPECT_FALSE(fn("\xFF\xFE"));
127
128 // Strings in legacy encodings. We can certainly make up strings
129 // in a legacy encoding that are valid in UTF-8, but in real data,
130 // most of them are invalid as UTF-8.
131
132 // cafe with U+00E9 in ISO-8859-1
133 EXPECT_FALSE(fn("caf\xE9"));
134 // U+AC00, U+AC001 in EUC-KR
135 EXPECT_FALSE(fn("\xB0\xA1\xB0\xA2"));
136 // U+4F60 U+597D in Big5
137 EXPECT_FALSE(fn("\xA7\x41\xA6\x6E"));
138 // "abc" with U+201[CD] in windows-125[0-8]
139 // clang-format off
140 EXPECT_FALSE(fn("\x93" "abc\x94"));
141 // clang-format on
142 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
143 EXPECT_FALSE(fn("\xD9\xEE\xE4\xEE"));
144 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
145 EXPECT_FALSE(fn("\xE3\xE5\xE9\xDC"));
146
147 // BOM in UTF-32(BE|LE)
148 using std::string_literals::operator""s;
149 const std::string kUtf32BeBom = "\x00\x00\xFE\xFF"s;
150 EXPECT_FALSE(fn(kUtf32BeBom));
151 const std::string kUtf32LeBom = "\xFF\xFE\x00\x00"s;
152 EXPECT_FALSE(fn(kUtf32LeBom));
153 }
154
155 // Helper used to test IsStringUTF8[AllowingNoncharacters].
TestNoncharacters(TestFunction fn,bool expected_result)156 void TestNoncharacters(TestFunction fn, bool expected_result) {
157 EXPECT_EQ(fn("\xEF\xB7\x90"), expected_result); // U+FDD0
158 EXPECT_EQ(fn("\xEF\xB7\x9F"), expected_result); // U+FDDF
159 EXPECT_EQ(fn("\xEF\xB7\xAF"), expected_result); // U+FDEF
160 EXPECT_EQ(fn("\xEF\xBF\xBE"), expected_result); // U+FFFE
161 EXPECT_EQ(fn("\xEF\xBF\xBF"), expected_result); // U+FFFF
162 EXPECT_EQ(fn("\xF0\x9F\xBF\xBE"), expected_result); // U+01FFFE
163 EXPECT_EQ(fn("\xF0\x9F\xBF\xBF"), expected_result); // U+01FFFF
164 EXPECT_EQ(fn("\xF0\xAF\xBF\xBE"), expected_result); // U+02FFFE
165 EXPECT_EQ(fn("\xF0\xAF\xBF\xBF"), expected_result); // U+02FFFF
166 EXPECT_EQ(fn("\xF0\xBF\xBF\xBE"), expected_result); // U+03FFFE
167 EXPECT_EQ(fn("\xF0\xBF\xBF\xBF"), expected_result); // U+03FFFF
168 EXPECT_EQ(fn("\xF1\x8F\xBF\xBE"), expected_result); // U+04FFFE
169 EXPECT_EQ(fn("\xF1\x8F\xBF\xBF"), expected_result); // U+04FFFF
170 EXPECT_EQ(fn("\xF1\x9F\xBF\xBE"), expected_result); // U+05FFFE
171 EXPECT_EQ(fn("\xF1\x9F\xBF\xBF"), expected_result); // U+05FFFF
172 EXPECT_EQ(fn("\xF1\xAF\xBF\xBE"), expected_result); // U+06FFFE
173 EXPECT_EQ(fn("\xF1\xAF\xBF\xBF"), expected_result); // U+06FFFF
174 EXPECT_EQ(fn("\xF1\xBF\xBF\xBE"), expected_result); // U+07FFFE
175 EXPECT_EQ(fn("\xF1\xBF\xBF\xBF"), expected_result); // U+07FFFF
176 EXPECT_EQ(fn("\xF2\x8F\xBF\xBE"), expected_result); // U+08FFFE
177 EXPECT_EQ(fn("\xF2\x8F\xBF\xBF"), expected_result); // U+08FFFF
178 EXPECT_EQ(fn("\xF2\x9F\xBF\xBE"), expected_result); // U+09FFFE
179 EXPECT_EQ(fn("\xF2\x9F\xBF\xBF"), expected_result); // U+09FFFF
180 EXPECT_EQ(fn("\xF2\xAF\xBF\xBE"), expected_result); // U+0AFFFE
181 EXPECT_EQ(fn("\xF2\xAF\xBF\xBF"), expected_result); // U+0AFFFF
182 EXPECT_EQ(fn("\xF2\xBF\xBF\xBE"), expected_result); // U+0BFFFE
183 EXPECT_EQ(fn("\xF2\xBF\xBF\xBF"), expected_result); // U+0BFFFF
184 EXPECT_EQ(fn("\xF3\x8F\xBF\xBE"), expected_result); // U+0CFFFE
185 EXPECT_EQ(fn("\xF3\x8F\xBF\xBF"), expected_result); // U+0CFFFF
186 EXPECT_EQ(fn("\xF3\x9F\xBF\xBE"), expected_result); // U+0DFFFE
187 EXPECT_EQ(fn("\xF3\x9F\xBF\xBF"), expected_result); // U+0DFFFF
188 EXPECT_EQ(fn("\xF3\xAF\xBF\xBE"), expected_result); // U+0EFFFE
189 EXPECT_EQ(fn("\xF3\xAF\xBF\xBF"), expected_result); // U+0EFFFF
190 EXPECT_EQ(fn("\xF3\xBF\xBF\xBE"), expected_result); // U+0FFFFE
191 EXPECT_EQ(fn("\xF3\xBF\xBF\xBF"), expected_result); // U+0FFFFF
192 EXPECT_EQ(fn("\xF4\x8F\xBF\xBE"), expected_result); // U+10FFFE
193 EXPECT_EQ(fn("\xF4\x8F\xBF\xBF"), expected_result); // U+10FFFF
194 }
195
TEST(StringUtilTest,TruncateUTF8ToByteSize)196 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
197 std::string output;
198
199 // Empty strings and invalid byte_size arguments
200 EXPECT_FALSE(Truncated(std::string(), 0, &output));
201 EXPECT_EQ(output, "");
202 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
203 EXPECT_EQ(output, "");
204 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
205 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
206
207 // Testing the truncation of valid UTF8 correctly
208 EXPECT_TRUE(Truncated("abc", 2, &output));
209 EXPECT_EQ(output, "ab");
210 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
211 EXPECT_EQ(output.compare("\xc2\x81"), 0);
212 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
213 EXPECT_EQ(output.compare("\xc2\x81"), 0);
214 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
215 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
216
217 {
218 const char array[] = "\x00\x00\xc2\x81\xc2\x81";
219 const std::string array_string(array, std::size(array));
220 EXPECT_TRUE(Truncated(array_string, 4, &output));
221 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
222 }
223
224 {
225 const char array[] = "\x00\xc2\x81\xc2\x81";
226 const std::string array_string(array, std::size(array));
227 EXPECT_TRUE(Truncated(array_string, 4, &output));
228 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
229 }
230
231 // Testing invalid UTF8
232 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
233 EXPECT_EQ(output.compare(""), 0);
234 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
235 EXPECT_EQ(output.compare(""), 0);
236 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
237 EXPECT_EQ(output.compare(""), 0);
238
239 // Testing invalid UTF8 mixed with valid UTF8
240 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
241 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
242 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
243 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
244 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
245 10, &output));
246 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
247 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
248 10, &output));
249 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
250 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
251 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
252
253 // Overlong sequences
254 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
255 EXPECT_EQ(output.compare(""), 0);
256 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
257 EXPECT_EQ(output.compare(""), 0);
258 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
259 EXPECT_EQ(output.compare(""), 0);
260 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
261 EXPECT_EQ(output.compare(""), 0);
262 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
263 EXPECT_EQ(output.compare(""), 0);
264 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
265 EXPECT_EQ(output.compare(""), 0);
266 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
267 EXPECT_EQ(output.compare(""), 0);
268 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
269 EXPECT_EQ(output.compare(""), 0);
270 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
271 EXPECT_EQ(output.compare(""), 0);
272 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
273 EXPECT_EQ(output.compare(""), 0);
274 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
275 EXPECT_EQ(output.compare(""), 0);
276
277 // Beyond U+10FFFF (the upper limit of Unicode codespace)
278 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
279 EXPECT_EQ(output.compare(""), 0);
280 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
281 EXPECT_EQ(output.compare(""), 0);
282 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
283 EXPECT_EQ(output.compare(""), 0);
284
285 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
286 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
287 EXPECT_EQ(output.compare(""), 0);
288 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
289 EXPECT_EQ(output.compare(""), 0);
290
291 {
292 const char array[] = "\x00\x00\xfe\xff";
293 const std::string array_string(array, std::size(array));
294 EXPECT_TRUE(Truncated(array_string, 4, &output));
295 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
296 }
297
298 // Variants on the previous test
299 {
300 const char array[] = "\xff\xfe\x00\x00";
301 const std::string array_string(array, 4);
302 EXPECT_FALSE(Truncated(array_string, 4, &output));
303 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
304 }
305 {
306 const char array[] = "\xff\x00\x00\xfe";
307 const std::string array_string(array, std::size(array));
308 EXPECT_TRUE(Truncated(array_string, 4, &output));
309 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
310 }
311
312 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
313 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
314 EXPECT_EQ(output.compare(""), 0);
315 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
316 EXPECT_EQ(output.compare(""), 0);
317 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
318 EXPECT_EQ(output.compare(""), 0);
319 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
320 EXPECT_EQ(output.compare(""), 0);
321 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
322 EXPECT_EQ(output.compare(""), 0);
323
324 // Strings in legacy encodings that are valid in UTF-8, but
325 // are invalid as UTF-8 in real data.
326 EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
327 EXPECT_EQ(output.compare("caf"), 0);
328 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
329 EXPECT_EQ(output.compare(""), 0);
330 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
331 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
332 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
333 &output));
334 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
335
336 // Testing using the same string as input and output.
337 EXPECT_FALSE(Truncated(output, 4, &output));
338 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
339 EXPECT_TRUE(Truncated(output, 3, &output));
340 EXPECT_EQ(output.compare("\xa7\x41"), 0);
341
342 // "abc" with U+201[CD] in windows-125[0-8]
343 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
344 EXPECT_EQ(output.compare("\x93" "abc"), 0);
345
346 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
347 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
348 EXPECT_EQ(output.compare(""), 0);
349
350 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
351 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
352 EXPECT_EQ(output.compare(""), 0);
353 }
354
355 #if defined(WCHAR_T_IS_16_BIT)
TEST(StringUtilTest,as_wcstr)356 TEST(StringUtilTest, as_wcstr) {
357 char16_t rw_buffer[10] = {};
358 static_assert(
359 std::is_same_v<wchar_t*, decltype(as_writable_wcstr(rw_buffer))>, "");
360 EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_wcstr(rw_buffer));
361
362 std::u16string rw_str(10, '\0');
363 static_assert(std::is_same_v<wchar_t*, decltype(as_writable_wcstr(rw_str))>,
364 "");
365 EXPECT_EQ(static_cast<const void*>(rw_str.data()), as_writable_wcstr(rw_str));
366
367 const char16_t ro_buffer[10] = {};
368 static_assert(std::is_same_v<const wchar_t*, decltype(as_wcstr(ro_buffer))>,
369 "");
370 EXPECT_EQ(static_cast<const void*>(ro_buffer), as_wcstr(ro_buffer));
371
372 const std::u16string ro_str(10, '\0');
373 static_assert(std::is_same_v<const wchar_t*, decltype(as_wcstr(ro_str))>, "");
374 EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_wcstr(ro_str));
375
376 StringPiece16 piece = ro_buffer;
377 static_assert(std::is_same_v<const wchar_t*, decltype(as_wcstr(piece))>, "");
378 EXPECT_EQ(static_cast<const void*>(piece.data()), as_wcstr(piece));
379 }
380
TEST(StringUtilTest,as_u16cstr)381 TEST(StringUtilTest, as_u16cstr) {
382 wchar_t rw_buffer[10] = {};
383 static_assert(
384 std::is_same_v<char16_t*, decltype(as_writable_u16cstr(rw_buffer))>, "");
385 EXPECT_EQ(static_cast<void*>(rw_buffer), as_writable_u16cstr(rw_buffer));
386
387 std::wstring rw_str(10, '\0');
388 static_assert(
389 std::is_same_v<char16_t*, decltype(as_writable_u16cstr(rw_str))>, "");
390 EXPECT_EQ(static_cast<const void*>(rw_str.data()),
391 as_writable_u16cstr(rw_str));
392
393 const wchar_t ro_buffer[10] = {};
394 static_assert(
395 std::is_same_v<const char16_t*, decltype(as_u16cstr(ro_buffer))>, "");
396 EXPECT_EQ(static_cast<const void*>(ro_buffer), as_u16cstr(ro_buffer));
397
398 const std::wstring ro_str(10, '\0');
399 static_assert(std::is_same_v<const char16_t*, decltype(as_u16cstr(ro_str))>,
400 "");
401 EXPECT_EQ(static_cast<const void*>(ro_str.data()), as_u16cstr(ro_str));
402
403 std::wstring_view piece = ro_buffer;
404 static_assert(std::is_same_v<const char16_t*, decltype(as_u16cstr(piece))>,
405 "");
406 EXPECT_EQ(static_cast<const void*>(piece.data()), as_u16cstr(piece));
407 }
408 #endif // defined(WCHAR_T_IS_16_BIT)
409
TEST(StringUtilTest,TrimWhitespace)410 TEST(StringUtilTest, TrimWhitespace) {
411 std::u16string output; // Allow contents to carry over to next testcase
412 for (const auto& value : trim_cases) {
413 EXPECT_EQ(value.return_value,
414 TrimWhitespace(WideToUTF16(value.input), value.positions,
415 &output));
416 EXPECT_EQ(WideToUTF16(value.output), output);
417 }
418
419 // Test that TrimWhitespace() can take the same string for input and output
420 output = u" This is a test \r\n";
421 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
422 EXPECT_EQ(u"This is a test", output);
423
424 // Once more, but with a string of whitespace
425 output = u" \r\n";
426 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
427 EXPECT_EQ(std::u16string(), output);
428
429 std::string output_ascii;
430 for (const auto& value : trim_cases_ascii) {
431 EXPECT_EQ(value.return_value,
432 TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
433 EXPECT_EQ(value.output, output_ascii);
434 }
435 }
436
437 static const struct collapse_case {
438 const wchar_t* input;
439 const bool trim;
440 const wchar_t* output;
441 } collapse_cases[] = {
442 {L" Google Video ", false, L"Google Video"},
443 {L"Google Video", false, L"Google Video"},
444 {L"", false, L""},
445 {L" ", false, L""},
446 {L"\t\rTest String\n", false, L"Test String"},
447 {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
448 {L" Test \n \t String ", false, L"Test String"},
449 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
450 {L" Test String", false, L"Test String"},
451 {L"Test String ", false, L"Test String"},
452 {L"Test String", false, L"Test String"},
453 {L"", true, L""},
454 {L"\n", true, L""},
455 {L" \r ", true, L""},
456 {L"\nFoo", true, L"Foo"},
457 {L"\r Foo ", true, L"Foo"},
458 {L" Foo bar ", true, L"Foo bar"},
459 {L" \tFoo bar \n", true, L"Foo bar"},
460 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
461 };
462
TEST(StringUtilTest,CollapseWhitespace)463 TEST(StringUtilTest, CollapseWhitespace) {
464 for (const auto& value : collapse_cases) {
465 EXPECT_EQ(WideToUTF16(value.output),
466 CollapseWhitespace(WideToUTF16(value.input), value.trim));
467 }
468 }
469
470 static const struct collapse_case_ascii {
471 const char* input;
472 const bool trim;
473 const char* output;
474 } collapse_cases_ascii[] = {
475 {" Google Video ", false, "Google Video"},
476 {"Google Video", false, "Google Video"},
477 {"", false, ""},
478 {" ", false, ""},
479 {"\t\rTest String\n", false, "Test String"},
480 {" Test \n \t String ", false, "Test String"},
481 {" Test String", false, "Test String"},
482 {"Test String ", false, "Test String"},
483 {"Test String", false, "Test String"},
484 {"", true, ""},
485 {"\n", true, ""},
486 {" \r ", true, ""},
487 {"\nFoo", true, "Foo"},
488 {"\r Foo ", true, "Foo"},
489 {" Foo bar ", true, "Foo bar"},
490 // \u00A0 is whitespace, but not _ASCII_ whitespace, so it should not be
491 // collapsed by CollapseWhitespaceASCII().
492 {"Foo\u00A0bar", true, "Foo\u00A0bar"},
493 {" \tFoo bar \n", true, "Foo bar"},
494 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
495 };
496
TEST(StringUtilTest,CollapseWhitespaceASCII)497 TEST(StringUtilTest, CollapseWhitespaceASCII) {
498 for (const auto& value : collapse_cases_ascii) {
499 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
500 }
501 }
502
TEST(StringUtilTest,IsStringUTF8)503 TEST(StringUtilTest, IsStringUTF8) {
504 {
505 SCOPED_TRACE("IsStringUTF8");
506 TestStructurallyValidUtf8(&IsStringUTF8);
507 TestStructurallyInvalidUtf8(&IsStringUTF8);
508 TestNoncharacters(&IsStringUTF8, false);
509 }
510
511 {
512 SCOPED_TRACE("IsStringUTF8AllowingNoncharacters");
513 TestStructurallyValidUtf8(&IsStringUTF8AllowingNoncharacters);
514 TestStructurallyInvalidUtf8(&IsStringUTF8AllowingNoncharacters);
515 TestNoncharacters(&IsStringUTF8AllowingNoncharacters, true);
516 }
517 }
518
TEST(StringUtilTest,IsStringASCII)519 TEST(StringUtilTest, IsStringASCII) {
520 static char char_ascii[] =
521 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
522 static char16_t char16_ascii[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8',
523 '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', '0',
524 '1', '2', '3', '4', '5', '6', '7', '8', '9',
525 '0', 'A', 'B', 'C', 'D', 'E', 'F', 0};
526 static std::wstring wchar_ascii(
527 L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
528
529 // Test a variety of the fragment start positions and lengths in order to make
530 // sure that bit masking in IsStringASCII works correctly.
531 // Also, test that a non-ASCII character will be detected regardless of its
532 // position inside the string.
533 {
534 const size_t string_length = std::size(char_ascii) - 1;
535 for (size_t offset = 0; offset < 8; ++offset) {
536 for (size_t len = 0, max_len = string_length - offset; len < max_len;
537 ++len) {
538 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
539 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
540 char_ascii[char_pos] |= '\x80';
541 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
542 char_ascii[char_pos] &= ~'\x80';
543 }
544 }
545 }
546 }
547
548 {
549 const size_t string_length = std::size(char16_ascii) - 1;
550 for (size_t offset = 0; offset < 4; ++offset) {
551 for (size_t len = 0, max_len = string_length - offset; len < max_len;
552 ++len) {
553 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
554 for (size_t char_pos = offset; char_pos < len; ++char_pos) {
555 char16_ascii[char_pos] |= 0x80;
556 EXPECT_FALSE(
557 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
558 char16_ascii[char_pos] &= ~0x80;
559 // Also test when the upper half is non-zero.
560 char16_ascii[char_pos] |= 0x100;
561 EXPECT_FALSE(
562 IsStringASCII(StringPiece16(char16_ascii + offset, len)));
563 char16_ascii[char_pos] &= ~0x100;
564 }
565 }
566 }
567 }
568
569 #if defined(WCHAR_T_IS_32_BIT)
570 {
571 const size_t string_length = wchar_ascii.length();
572 for (size_t len = 0; len < string_length; ++len) {
573 EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
574 for (size_t char_pos = 0; char_pos < len; ++char_pos) {
575 wchar_ascii[char_pos] |= 0x80;
576 EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
577 wchar_ascii[char_pos] &= ~0x80;
578 wchar_ascii[char_pos] |= 0x100;
579 EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
580 wchar_ascii[char_pos] &= ~0x100;
581 wchar_ascii[char_pos] |= 0x10000;
582 EXPECT_FALSE(IsStringASCII(wchar_ascii.substr(0, len)));
583 wchar_ascii[char_pos] &= ~0x10000;
584 }
585 }
586 }
587 #endif // WCHAR_T_IS_32_BIT
588 }
589
TEST(StringUtilTest,ConvertASCII)590 TEST(StringUtilTest, ConvertASCII) {
591 static const char* const char_cases[] = {
592 "Google Video",
593 "Hello, world\n",
594 "0123ABCDwxyz \a\b\t\r\n!+,.~"
595 };
596
597 static const wchar_t* const wchar_cases[] = {
598 L"Google Video",
599 L"Hello, world\n",
600 L"0123ABCDwxyz \a\b\t\r\n!+,.~"
601 };
602
603 for (size_t i = 0; i < std::size(char_cases); ++i) {
604 EXPECT_TRUE(IsStringASCII(char_cases[i]));
605 std::u16string utf16 = ASCIIToUTF16(char_cases[i]);
606 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
607
608 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
609 EXPECT_EQ(char_cases[i], ascii);
610 }
611
612 EXPECT_FALSE(IsStringASCII("Google \x80Video"));
613
614 // Convert empty strings.
615 std::u16string empty16;
616 std::string empty;
617 EXPECT_EQ(empty, UTF16ToASCII(empty16));
618 EXPECT_EQ(empty16, ASCIIToUTF16(empty));
619
620 // Convert strings with an embedded NUL character.
621 const char chars_with_nul[] = "test\0string";
622 const int length_with_nul = std::size(chars_with_nul) - 1;
623 std::string string_with_nul(chars_with_nul, length_with_nul);
624 std::u16string string16_with_nul = ASCIIToUTF16(string_with_nul);
625 EXPECT_EQ(static_cast<std::u16string::size_type>(length_with_nul),
626 string16_with_nul.length());
627 std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
628 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
629 narrow_with_nul.length());
630 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
631 }
632
TEST(StringUtilTest,ToLowerASCII)633 TEST(StringUtilTest, ToLowerASCII) {
634 EXPECT_EQ('c', ToLowerASCII('C'));
635 EXPECT_EQ('c', ToLowerASCII('c'));
636 EXPECT_EQ('2', ToLowerASCII('2'));
637
638 EXPECT_EQ(u'c', ToLowerASCII(u'C'));
639 EXPECT_EQ(u'c', ToLowerASCII(u'c'));
640 EXPECT_EQ(u'2', ToLowerASCII(u'2'));
641
642 EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
643 EXPECT_EQ(u"cc2", ToLowerASCII(u"Cc2"));
644
645 // Non-ASCII characters are unmodified. U+00C4 is LATIN CAPITAL LETTER A WITH
646 // DIAERESIS.
647 EXPECT_EQ('\xc4', ToLowerASCII('\xc4'));
648 EXPECT_EQ(u'\x00c4', ToLowerASCII(u'\x00c4'));
649 }
650
TEST(StringUtilTest,ToUpperASCII)651 TEST(StringUtilTest, ToUpperASCII) {
652 EXPECT_EQ('C', ToUpperASCII('C'));
653 EXPECT_EQ('C', ToUpperASCII('c'));
654 EXPECT_EQ('2', ToUpperASCII('2'));
655
656 EXPECT_EQ(u'C', ToUpperASCII(u'C'));
657 EXPECT_EQ(u'C', ToUpperASCII(u'c'));
658 EXPECT_EQ(u'2', ToUpperASCII(u'2'));
659
660 EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
661 EXPECT_EQ(u"CC2", ToUpperASCII(u"Cc2"));
662
663 // Non-ASCII characters are unmodified. U+00E4 is LATIN SMALL LETTER A WITH
664 // DIAERESIS.
665 EXPECT_EQ('\xe4', ToUpperASCII('\xe4'));
666 EXPECT_EQ(u'\x00e4', ToUpperASCII(u'\x00e4'));
667 }
668
TEST(StringUtilTest,FormatBytesUnlocalized)669 TEST(StringUtilTest, FormatBytesUnlocalized) {
670 static const struct {
671 int64_t bytes;
672 const char* expected;
673 } cases[] = {
674 // Expected behavior: we show one post-decimal digit when we have
675 // under two pre-decimal digits, except in cases where it makes no
676 // sense (zero or bytes).
677 // Since we switch units once we cross the 1000 mark, this keeps
678 // the display of file sizes or bytes consistently around three
679 // digits.
680 {0, "0 B"},
681 {512, "512 B"},
682 {1024 * 1024, "1.0 MB"},
683 {1024 * 1024 * 1024, "1.0 GB"},
684 {10LL * 1024 * 1024 * 1024, "10.0 GB"},
685 {99LL * 1024 * 1024 * 1024, "99.0 GB"},
686 {105LL * 1024 * 1024 * 1024, "105 GB"},
687 {105LL * 1024 * 1024 * 1024 + 500LL * 1024 * 1024, "105 GB"},
688 {~(bits::LeftmostBit<int64_t>()), "8192 PB"},
689
690 {99 * 1024 + 103, "99.1 kB"},
691 {1024 * 1024 + 103, "1.0 MB"},
692 {1024 * 1024 + 205 * 1024, "1.2 MB"},
693 {1024 * 1024 * 1024 + (927 * 1024 * 1024), "1.9 GB"},
694 {10LL * 1024 * 1024 * 1024, "10.0 GB"},
695 {100LL * 1024 * 1024 * 1024, "100 GB"},
696 };
697
698 for (const auto& i : cases) {
699 EXPECT_EQ(ASCIIToUTF16(i.expected), FormatBytesUnlocalized(i.bytes));
700 }
701 }
TEST(StringUtilTest,ReplaceSubstringsAfterOffset)702 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
703 static const struct {
704 StringPiece str;
705 size_t start_offset;
706 StringPiece find_this;
707 StringPiece replace_with;
708 StringPiece expected;
709 } cases[] = {
710 {"aaa", 0, "", "b", "aaa"},
711 {"aaa", 1, "", "b", "aaa"},
712 {"aaa", 0, "a", "b", "bbb"},
713 {"aaa", 0, "aa", "b", "ba"},
714 {"aaa", 0, "aa", "bbb", "bbba"},
715 {"aaaaa", 0, "aa", "b", "bba"},
716 {"ababaaababa", 0, "aba", "", "baaba"},
717 {"ababaaababa", 0, "aba", "_", "_baa_ba"},
718 {"ababaaababa", 0, "aba", "__", "__baa__ba"},
719 {"ababaaababa", 0, "aba", "___", "___baa___ba"},
720 {"ababaaababa", 0, "aba", "____", "____baa____ba"},
721 {"ababaaababa", 0, "aba", "_____", "_____baa_____ba"},
722 {"abb", 0, "ab", "a", "ab"},
723 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
724 {"Not found", 0, "x", "0", "Not found"},
725 {"Not found again", 5, "x", "0", "Not found again"},
726 {" Making it much longer ", 0, " ", "Four score and seven years ago",
727 "Four score and seven years agoMakingFour score and seven years agoit"
728 "Four score and seven years agomuchFour score and seven years agolonger"
729 "Four score and seven years ago"},
730 {" Making it much much much much shorter ", 0,
731 "Making it much much much much shorter", "", " "},
732 {"so much much much much much very much much much shorter", 0, "much ",
733 "", "so very shorter"},
734 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
735 {"Replace me only me once", 9, "me ", "", "Replace me only once"},
736 {"abababab", 2, "ab", "c", "abccc"},
737 {"abababab", 1, "ab", "c", "abccc"},
738 {"abababab", 1, "aba", "c", "abcbab"},
739 };
740
741 // std::u16string variant
742 for (const auto& scenario : cases) {
743 std::u16string str = ASCIIToUTF16(scenario.str);
744 ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
745 ASCIIToUTF16(scenario.find_this),
746 ASCIIToUTF16(scenario.replace_with));
747 EXPECT_EQ(ASCIIToUTF16(scenario.expected), str);
748 }
749
750 // std::string with insufficient capacity: expansion must realloc the buffer.
751 for (const auto& scenario : cases) {
752 std::string str(scenario.str);
753 str.shrink_to_fit(); // This is nonbinding, but it's the best we've got.
754 ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
755 scenario.find_this, scenario.replace_with);
756 EXPECT_EQ(scenario.expected, str);
757 }
758
759 // std::string with ample capacity: should be possible to grow in-place.
760 for (const auto& scenario : cases) {
761 std::string str(scenario.str);
762 str.reserve(std::max(scenario.str.length(), scenario.expected.length()) *
763 2);
764
765 ReplaceSubstringsAfterOffset(&str, scenario.start_offset,
766 scenario.find_this, scenario.replace_with);
767 EXPECT_EQ(scenario.expected, str);
768 }
769 }
770
TEST(StringUtilTest,ReplaceFirstSubstringAfterOffset)771 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
772 static const struct {
773 const char* str;
774 std::u16string::size_type start_offset;
775 const char* find_this;
776 const char* replace_with;
777 const char* expected;
778 } cases[] = {
779 {"aaa", 0, "a", "b", "baa"},
780 {"abb", 0, "ab", "a", "ab"},
781 {"Removing some substrings inging", 0, "ing", "",
782 "Remov some substrings inging"},
783 {"Not found", 0, "x", "0", "Not found"},
784 {"Not found again", 5, "x", "0", "Not found again"},
785 {" Making it much longer ", 0, " ", "Four score and seven years ago",
786 "Four score and seven years agoMaking it much longer "},
787 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
788 {"Replace me only me once", 4, "me ", "", "Replace only me once"},
789 {"abababab", 2, "ab", "c", "abcabab"},
790 };
791
792 for (const auto& i : cases) {
793 std::u16string str = ASCIIToUTF16(i.str);
794 ReplaceFirstSubstringAfterOffset(&str, i.start_offset,
795 ASCIIToUTF16(i.find_this),
796 ASCIIToUTF16(i.replace_with));
797 EXPECT_EQ(ASCIIToUTF16(i.expected), str);
798 }
799 }
800
TEST(StringUtilTest,HexDigitToInt)801 TEST(StringUtilTest, HexDigitToInt) {
802 EXPECT_EQ(0, HexDigitToInt('0'));
803 EXPECT_EQ(1, HexDigitToInt('1'));
804 EXPECT_EQ(2, HexDigitToInt('2'));
805 EXPECT_EQ(3, HexDigitToInt('3'));
806 EXPECT_EQ(4, HexDigitToInt('4'));
807 EXPECT_EQ(5, HexDigitToInt('5'));
808 EXPECT_EQ(6, HexDigitToInt('6'));
809 EXPECT_EQ(7, HexDigitToInt('7'));
810 EXPECT_EQ(8, HexDigitToInt('8'));
811 EXPECT_EQ(9, HexDigitToInt('9'));
812 EXPECT_EQ(10, HexDigitToInt('A'));
813 EXPECT_EQ(11, HexDigitToInt('B'));
814 EXPECT_EQ(12, HexDigitToInt('C'));
815 EXPECT_EQ(13, HexDigitToInt('D'));
816 EXPECT_EQ(14, HexDigitToInt('E'));
817 EXPECT_EQ(15, HexDigitToInt('F'));
818
819 // Verify the lower case as well.
820 EXPECT_EQ(10, HexDigitToInt('a'));
821 EXPECT_EQ(11, HexDigitToInt('b'));
822 EXPECT_EQ(12, HexDigitToInt('c'));
823 EXPECT_EQ(13, HexDigitToInt('d'));
824 EXPECT_EQ(14, HexDigitToInt('e'));
825 EXPECT_EQ(15, HexDigitToInt('f'));
826 }
827
TEST(StringUtilTest,JoinString)828 TEST(StringUtilTest, JoinString) {
829 std::string separator(", ");
830 std::vector<std::string> parts;
831 EXPECT_EQ(std::string(), JoinString(parts, separator));
832
833 parts.push_back(std::string());
834 EXPECT_EQ(std::string(), JoinString(parts, separator));
835 parts.clear();
836
837 parts.push_back("a");
838 EXPECT_EQ("a", JoinString(parts, separator));
839
840 parts.push_back("b");
841 parts.push_back("c");
842 EXPECT_EQ("a, b, c", JoinString(parts, separator));
843
844 parts.push_back(std::string());
845 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
846 parts.push_back(" ");
847 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
848 }
849
TEST(StringUtilTest,JoinString16)850 TEST(StringUtilTest, JoinString16) {
851 std::u16string separator = u", ";
852 std::vector<std::u16string> parts;
853 EXPECT_EQ(std::u16string(), JoinString(parts, separator));
854
855 parts.push_back(std::u16string());
856 EXPECT_EQ(std::u16string(), JoinString(parts, separator));
857 parts.clear();
858
859 parts.push_back(u"a");
860 EXPECT_EQ(u"a", JoinString(parts, separator));
861
862 parts.push_back(u"b");
863 parts.push_back(u"c");
864 EXPECT_EQ(u"a, b, c", JoinString(parts, separator));
865
866 parts.push_back(u"");
867 EXPECT_EQ(u"a, b, c, ", JoinString(parts, separator));
868 parts.push_back(u" ");
869 EXPECT_EQ(u"a|b|c|| ", JoinString(parts, u"|"));
870 }
871
TEST(StringUtilTest,JoinStringPiece)872 TEST(StringUtilTest, JoinStringPiece) {
873 std::string separator(", ");
874 std::vector<StringPiece> parts;
875 EXPECT_EQ(std::string(), JoinString(parts, separator));
876
877 // Test empty first part (https://crbug.com/698073).
878 parts.push_back(StringPiece());
879 EXPECT_EQ(std::string(), JoinString(parts, separator));
880 parts.clear();
881
882 parts.push_back("a");
883 EXPECT_EQ("a", JoinString(parts, separator));
884
885 parts.push_back("b");
886 parts.push_back("c");
887 EXPECT_EQ("a, b, c", JoinString(parts, separator));
888
889 parts.push_back(StringPiece());
890 EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
891 parts.push_back(" ");
892 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
893 }
894
TEST(StringUtilTest,JoinStringPiece16)895 TEST(StringUtilTest, JoinStringPiece16) {
896 std::u16string separator = u", ";
897 std::vector<StringPiece16> parts;
898 EXPECT_EQ(std::u16string(), JoinString(parts, separator));
899
900 // Test empty first part (https://crbug.com/698073).
901 parts.push_back(StringPiece16());
902 EXPECT_EQ(std::u16string(), JoinString(parts, separator));
903 parts.clear();
904
905 const std::u16string kA = u"a";
906 parts.push_back(kA);
907 EXPECT_EQ(u"a", JoinString(parts, separator));
908
909 const std::u16string kB = u"b";
910 parts.push_back(kB);
911 const std::u16string kC = u"c";
912 parts.push_back(kC);
913 EXPECT_EQ(u"a, b, c", JoinString(parts, separator));
914
915 parts.push_back(StringPiece16());
916 EXPECT_EQ(u"a, b, c, ", JoinString(parts, separator));
917 const std::u16string kSpace = u" ";
918 parts.push_back(kSpace);
919 EXPECT_EQ(u"a|b|c|| ", JoinString(parts, u"|"));
920 }
921
TEST(StringUtilTest,JoinStringInitializerList)922 TEST(StringUtilTest, JoinStringInitializerList) {
923 std::string separator(", ");
924 EXPECT_EQ(std::string(), JoinString({}, separator));
925
926 // Test empty first part (https://crbug.com/698073).
927 EXPECT_EQ(std::string(), JoinString({StringPiece()}, separator));
928
929 // With const char*s.
930 EXPECT_EQ("a", JoinString({"a"}, separator));
931 EXPECT_EQ("a, b, c", JoinString({"a", "b", "c"}, separator));
932 EXPECT_EQ("a, b, c, ", JoinString({"a", "b", "c", StringPiece()}, separator));
933 EXPECT_EQ("a|b|c|| ", JoinString({"a", "b", "c", StringPiece(), " "}, "|"));
934
935 // With std::strings.
936 const std::string kA = "a";
937 const std::string kB = "b";
938 EXPECT_EQ("a, b", JoinString({kA, kB}, separator));
939
940 // With StringPieces.
941 const StringPiece kPieceA = kA;
942 const StringPiece kPieceB = kB;
943 EXPECT_EQ("a, b", JoinString({kPieceA, kPieceB}, separator));
944 }
945
TEST(StringUtilTest,JoinStringInitializerList16)946 TEST(StringUtilTest, JoinStringInitializerList16) {
947 std::u16string separator = u", ";
948 EXPECT_EQ(std::u16string(), JoinString({}, separator));
949
950 // Test empty first part (https://crbug.com/698073).
951 EXPECT_EQ(std::u16string(), JoinString({StringPiece16()}, separator));
952
953 // With string16s.
954 const std::u16string kA = u"a";
955 EXPECT_EQ(u"a", JoinString({kA}, separator));
956
957 const std::u16string kB = u"b";
958 const std::u16string kC = u"c";
959 EXPECT_EQ(u"a, b, c", JoinString({kA, kB, kC}, separator));
960
961 EXPECT_EQ(u"a, b, c, ", JoinString({kA, kB, kC, StringPiece16()}, separator));
962 const std::u16string kSpace = u" ";
963 EXPECT_EQ(u"a|b|c|| ",
964 JoinString({kA, kB, kC, StringPiece16(), kSpace}, u"|"));
965
966 // With StringPiece16s.
967 const StringPiece16 kPieceA = kA;
968 const StringPiece16 kPieceB = kB;
969 EXPECT_EQ(u"a, b", JoinString({kPieceA, kPieceB}, separator));
970 }
971
TEST(StringUtilTest,StartsWith)972 TEST(StringUtilTest, StartsWith) {
973 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
974 base::CompareCase::SENSITIVE));
975 EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
976 base::CompareCase::SENSITIVE));
977 EXPECT_TRUE(StartsWith("javascript:url", "javascript",
978 base::CompareCase::INSENSITIVE_ASCII));
979 EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
980 base::CompareCase::INSENSITIVE_ASCII));
981 EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
982 EXPECT_FALSE(StartsWith("java", "javascript",
983 base::CompareCase::INSENSITIVE_ASCII));
984 EXPECT_FALSE(StartsWith(std::string(), "javascript",
985 base::CompareCase::INSENSITIVE_ASCII));
986 EXPECT_FALSE(StartsWith(std::string(), "javascript",
987 base::CompareCase::SENSITIVE));
988 EXPECT_TRUE(StartsWith("java", std::string(),
989 base::CompareCase::INSENSITIVE_ASCII));
990 EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
991
992 EXPECT_TRUE(StartsWith(u"javascript:url", u"javascript",
993 base::CompareCase::SENSITIVE));
994 EXPECT_FALSE(StartsWith(u"JavaScript:url", u"javascript",
995 base::CompareCase::SENSITIVE));
996 EXPECT_TRUE(StartsWith(u"javascript:url", u"javascript",
997 base::CompareCase::INSENSITIVE_ASCII));
998 EXPECT_TRUE(StartsWith(u"JavaScript:url", u"javascript",
999 base::CompareCase::INSENSITIVE_ASCII));
1000 EXPECT_FALSE(
1001 StartsWith(u"java", u"javascript", base::CompareCase::SENSITIVE));
1002 EXPECT_FALSE(
1003 StartsWith(u"java", u"javascript", base::CompareCase::INSENSITIVE_ASCII));
1004 EXPECT_FALSE(StartsWith(std::u16string(), u"javascript",
1005 base::CompareCase::INSENSITIVE_ASCII));
1006 EXPECT_FALSE(StartsWith(std::u16string(), u"javascript",
1007 base::CompareCase::SENSITIVE));
1008 EXPECT_TRUE(StartsWith(u"java", std::u16string(),
1009 base::CompareCase::INSENSITIVE_ASCII));
1010 EXPECT_TRUE(
1011 StartsWith(u"java", std::u16string(), base::CompareCase::SENSITIVE));
1012 }
1013
TEST(StringUtilTest,EndsWith)1014 TEST(StringUtilTest, EndsWith) {
1015 EXPECT_TRUE(
1016 EndsWith(u"Foo.plugin", u".plugin", base::CompareCase::SENSITIVE));
1017 EXPECT_FALSE(
1018 EndsWith(u"Foo.Plugin", u".plugin", base::CompareCase::SENSITIVE));
1019 EXPECT_TRUE(EndsWith(u"Foo.plugin", u".plugin",
1020 base::CompareCase::INSENSITIVE_ASCII));
1021 EXPECT_TRUE(EndsWith(u"Foo.Plugin", u".plugin",
1022 base::CompareCase::INSENSITIVE_ASCII));
1023 EXPECT_FALSE(EndsWith(u".plug", u".plugin", base::CompareCase::SENSITIVE));
1024 EXPECT_FALSE(
1025 EndsWith(u".plug", u".plugin", base::CompareCase::INSENSITIVE_ASCII));
1026 EXPECT_FALSE(
1027 EndsWith(u"Foo.plugin Bar", u".plugin", base::CompareCase::SENSITIVE));
1028 EXPECT_FALSE(EndsWith(u"Foo.plugin Bar", u".plugin",
1029 base::CompareCase::INSENSITIVE_ASCII));
1030 EXPECT_FALSE(EndsWith(std::u16string(), u".plugin",
1031 base::CompareCase::INSENSITIVE_ASCII));
1032 EXPECT_FALSE(
1033 EndsWith(std::u16string(), u".plugin", base::CompareCase::SENSITIVE));
1034 EXPECT_TRUE(EndsWith(u"Foo.plugin", std::u16string(),
1035 base::CompareCase::INSENSITIVE_ASCII));
1036 EXPECT_TRUE(
1037 EndsWith(u"Foo.plugin", std::u16string(), base::CompareCase::SENSITIVE));
1038 EXPECT_TRUE(
1039 EndsWith(u".plugin", u".plugin", base::CompareCase::INSENSITIVE_ASCII));
1040 EXPECT_TRUE(EndsWith(u".plugin", u".plugin", base::CompareCase::SENSITIVE));
1041 EXPECT_TRUE(EndsWith(std::u16string(), std::u16string(),
1042 base::CompareCase::INSENSITIVE_ASCII));
1043 EXPECT_TRUE(EndsWith(std::u16string(), std::u16string(),
1044 base::CompareCase::SENSITIVE));
1045 }
1046
TEST(StringUtilTest,GetStringFWithOffsets)1047 TEST(StringUtilTest, GetStringFWithOffsets) {
1048 std::vector<std::u16string> subst;
1049 subst.push_back(u"1");
1050 subst.push_back(u"2");
1051 std::vector<size_t> offsets;
1052
1053 ReplaceStringPlaceholders(u"Hello, $1. Your number is $2.", subst, &offsets);
1054 EXPECT_EQ(2U, offsets.size());
1055 EXPECT_EQ(7U, offsets[0]);
1056 EXPECT_EQ(25U, offsets[1]);
1057 offsets.clear();
1058
1059 ReplaceStringPlaceholders(u"Hello, $2. Your number is $1.", subst, &offsets);
1060 EXPECT_EQ(2U, offsets.size());
1061 EXPECT_EQ(25U, offsets[0]);
1062 EXPECT_EQ(7U, offsets[1]);
1063 offsets.clear();
1064 }
1065
TEST(StringUtilTest,ReplaceStringPlaceholdersTooFew)1066 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
1067 // Test whether replacestringplaceholders works as expected when there
1068 // are fewer inputs than outputs.
1069 std::vector<std::u16string> subst;
1070 subst.push_back(u"9a");
1071 subst.push_back(u"8b");
1072 subst.push_back(u"7c");
1073
1074 std::u16string formatted = ReplaceStringPlaceholders(
1075 u"$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i", subst, nullptr);
1076
1077 EXPECT_EQ(u"9aa,8bb,7cc,d,e,f,9ag,8bh,7ci", formatted);
1078 }
1079
TEST(StringUtilTest,ReplaceStringPlaceholders)1080 TEST(StringUtilTest, ReplaceStringPlaceholders) {
1081 std::vector<std::u16string> subst;
1082 subst.push_back(u"9a");
1083 subst.push_back(u"8b");
1084 subst.push_back(u"7c");
1085 subst.push_back(u"6d");
1086 subst.push_back(u"5e");
1087 subst.push_back(u"4f");
1088 subst.push_back(u"3g");
1089 subst.push_back(u"2h");
1090 subst.push_back(u"1i");
1091
1092 std::u16string formatted = ReplaceStringPlaceholders(
1093 u"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
1094
1095 EXPECT_EQ(u"9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
1096 }
1097
TEST(StringUtilTest,ReplaceStringPlaceholdersNetExpansionWithContraction)1098 TEST(StringUtilTest, ReplaceStringPlaceholdersNetExpansionWithContraction) {
1099 // In this test, some of the substitutions are shorter than the placeholders,
1100 // but overall the string gets longer.
1101 std::vector<std::u16string> subst;
1102 subst.push_back(u"9a____");
1103 subst.push_back(u"B");
1104 subst.push_back(u"7c___");
1105 subst.push_back(u"d");
1106 subst.push_back(u"5e____");
1107 subst.push_back(u"F");
1108 subst.push_back(u"3g___");
1109 subst.push_back(u"h");
1110 subst.push_back(u"1i_____");
1111
1112 std::u16string original = u"$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i";
1113 std::u16string expected =
1114 u"9a____a,Bb,7c___c,dd,5e____e,Ff,3g___g,hh,1i_____i";
1115
1116 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1117
1118 std::vector<size_t> offsets;
1119 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1120 std::vector<size_t> expected_offsets = {0, 8, 11, 18, 21, 29, 32, 39, 42};
1121 EXPECT_EQ(offsets.size(), subst.size());
1122 EXPECT_EQ(expected_offsets, offsets);
1123 for (size_t i = 0; i < offsets.size(); i++) {
1124 EXPECT_EQ(expected.substr(expected_offsets[i], subst[i].length()),
1125 subst[i]);
1126 }
1127 }
1128
TEST(StringUtilTest,ReplaceStringPlaceholdersNetContractionWithExpansion)1129 TEST(StringUtilTest, ReplaceStringPlaceholdersNetContractionWithExpansion) {
1130 // In this test, some of the substitutions are longer than the placeholders,
1131 // but overall the string gets smaller. Additionally, the placeholders appear
1132 // in a permuted order.
1133 std::vector<std::u16string> subst;
1134 subst.push_back(u"z");
1135 subst.push_back(u"y");
1136 subst.push_back(u"XYZW");
1137 subst.push_back(u"x");
1138 subst.push_back(u"w");
1139
1140 std::u16string formatted =
1141 ReplaceStringPlaceholders(u"$3_$4$2$1$5", subst, nullptr);
1142
1143 EXPECT_EQ(u"XYZW_xyzw", formatted);
1144 }
1145
TEST(StringUtilTest,ReplaceStringPlaceholdersOneDigit)1146 TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
1147 std::vector<std::u16string> subst;
1148 subst.push_back(u"1a");
1149 std::u16string formatted =
1150 ReplaceStringPlaceholders(u" $16 ", subst, nullptr);
1151 EXPECT_EQ(u" 1a6 ", formatted);
1152 }
1153
TEST(StringUtilTest,ReplaceStringPlaceholdersInvalidPlaceholder)1154 TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
1155 std::vector<std::u16string> subst;
1156 subst.push_back(u"1a");
1157 std::u16string formatted =
1158 ReplaceStringPlaceholders(u"+$-+$A+$1+", subst, nullptr);
1159 EXPECT_EQ(u"+++1a+", formatted);
1160 }
1161
TEST(StringUtilTest,StdStringReplaceStringPlaceholders)1162 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
1163 std::vector<std::string> subst;
1164 subst.push_back("9a");
1165 subst.push_back("8b");
1166 subst.push_back("7c");
1167 subst.push_back("6d");
1168 subst.push_back("5e");
1169 subst.push_back("4f");
1170 subst.push_back("3g");
1171 subst.push_back("2h");
1172 subst.push_back("1i");
1173
1174 std::string formatted =
1175 ReplaceStringPlaceholders(
1176 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
1177
1178 EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
1179 }
1180
TEST(StringUtilTest,StdStringReplaceStringPlaceholdersMultipleMatches)1181 TEST(StringUtilTest, StdStringReplaceStringPlaceholdersMultipleMatches) {
1182 std::vector<std::string> subst;
1183 subst.push_back("4"); // Referenced twice.
1184 subst.push_back("?"); // Unreferenced.
1185 subst.push_back("!"); // Unreferenced.
1186 subst.push_back("16"); // Referenced once.
1187
1188 std::string original = "$1 * $1 == $4";
1189 std::string expected = "4 * 4 == 16";
1190 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, nullptr));
1191 std::vector<size_t> offsets;
1192 EXPECT_EQ(expected, ReplaceStringPlaceholders(original, subst, &offsets));
1193 std::vector<size_t> expected_offsets = {0, 4, 9};
1194 EXPECT_EQ(expected_offsets, offsets);
1195 }
1196
TEST(StringUtilTest,ReplaceStringPlaceholdersConsecutiveDollarSigns)1197 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
1198 std::vector<std::string> subst;
1199 subst.push_back("a");
1200 subst.push_back("b");
1201 subst.push_back("c");
1202 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr),
1203 "$1 $$2 $$$3");
1204 }
1205
TEST(StringUtilTest,LcpyTest)1206 TEST(StringUtilTest, LcpyTest) {
1207 // Test the normal case where we fit in our buffer.
1208 {
1209 char dst[10];
1210 char16_t u16dst[10];
1211 wchar_t wdst[10];
1212 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
1213 EXPECT_EQ(0, memcmp(dst, "abcdefg", sizeof(dst[0]) * 8));
1214 EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1215 EXPECT_EQ(0, memcmp(u16dst, u"abcdefg", sizeof(u16dst[0]) * 8));
1216 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
1217 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wdst[0]) * 8));
1218 }
1219
1220 // Test dst_size == 0, nothing should be written to |dst| and we should
1221 // have the equivalent of strlen(src).
1222 {
1223 char dst[2] = {1, 2};
1224 char16_t u16dst[2] = {1, 2};
1225 wchar_t wdst[2] = {1, 2};
1226 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
1227 EXPECT_EQ(1, dst[0]);
1228 EXPECT_EQ(2, dst[1]);
1229 EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", 0));
1230 EXPECT_EQ(char16_t{1}, u16dst[0]);
1231 EXPECT_EQ(char16_t{2}, u16dst[1]);
1232 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
1233 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1234 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1235 }
1236
1237 // Test the case were we _just_ competely fit including the null.
1238 {
1239 char dst[8];
1240 char16_t u16dst[8];
1241 wchar_t wdst[8];
1242 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
1243 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1244 EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1245 EXPECT_EQ(0, memcmp(u16dst, u"abcdefg", sizeof(u16dst)));
1246 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
1247 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wdst)));
1248 }
1249
1250 // Test the case were we we are one smaller, so we can't fit the null.
1251 {
1252 char dst[7];
1253 char16_t u16dst[7];
1254 wchar_t wdst[7];
1255 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
1256 EXPECT_EQ(0, memcmp(dst, "abcdef", sizeof(dst[0]) * 7));
1257 EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1258 EXPECT_EQ(0, memcmp(u16dst, u"abcdef", sizeof(u16dst[0]) * 7));
1259 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
1260 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wdst[0]) * 7));
1261 }
1262
1263 // Test the case were we are just too small.
1264 {
1265 char dst[3];
1266 char16_t u16dst[3];
1267 wchar_t wdst[3];
1268 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", std::size(dst)));
1269 EXPECT_EQ(0, memcmp(dst, "ab", sizeof(dst)));
1270 EXPECT_EQ(7U, u16cstrlcpy(u16dst, u"abcdefg", std::size(u16dst)));
1271 EXPECT_EQ(0, memcmp(u16dst, u"ab", sizeof(u16dst)));
1272 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", std::size(wdst)));
1273 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wdst)));
1274 }
1275 }
1276
TEST(StringUtilTest,WprintfFormatPortabilityTest)1277 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1278 static const struct {
1279 const wchar_t* input;
1280 bool portable;
1281 } cases[] = {
1282 { L"%ls", true },
1283 { L"%s", false },
1284 { L"%S", false },
1285 { L"%lS", false },
1286 { L"Hello, %s", false },
1287 { L"%lc", true },
1288 { L"%c", false },
1289 { L"%C", false },
1290 { L"%lC", false },
1291 { L"%ls %s", false },
1292 { L"%s %ls", false },
1293 { L"%s %ls %s", false },
1294 { L"%f", true },
1295 { L"%f %F", false },
1296 { L"%d %D", false },
1297 { L"%o %O", false },
1298 { L"%u %U", false },
1299 { L"%f %d %o %u", true },
1300 { L"%-8d (%02.1f%)", true },
1301 { L"% 10s", false },
1302 { L"% 10ls", true }
1303 };
1304 for (const auto& i : cases)
1305 EXPECT_EQ(i.portable, IsWprintfFormatPortable(i.input));
1306 }
1307
TEST(StringUtilTest,MakeBasicStringPieceTest)1308 TEST(StringUtilTest, MakeBasicStringPieceTest) {
1309 constexpr char kFoo[] = "Foo";
1310 static_assert(MakeStringPiece(kFoo, kFoo + 3) == kFoo, "");
1311 static_assert(MakeStringPiece(kFoo, kFoo + 3).data() == kFoo, "");
1312 static_assert(MakeStringPiece(kFoo, kFoo + 3).size() == 3, "");
1313 static_assert(MakeStringPiece(kFoo + 3, kFoo + 3).empty(), "");
1314 static_assert(MakeStringPiece(kFoo + 4, kFoo + 4).empty(), "");
1315
1316 std::string foo = kFoo;
1317 EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()), foo);
1318 EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).data(), foo.data());
1319 EXPECT_EQ(MakeStringPiece(foo.begin(), foo.end()).size(), foo.size());
1320 EXPECT_TRUE(MakeStringPiece(foo.end(), foo.end()).empty());
1321
1322 constexpr char16_t kBar[] = u"Bar";
1323 static_assert(MakeStringPiece16(kBar, kBar + 3) == kBar, "");
1324 static_assert(MakeStringPiece16(kBar, kBar + 3).data() == kBar, "");
1325 static_assert(MakeStringPiece16(kBar, kBar + 3).size() == 3, "");
1326 static_assert(MakeStringPiece16(kBar + 3, kBar + 3).empty(), "");
1327 static_assert(MakeStringPiece16(kBar + 4, kBar + 4).empty(), "");
1328
1329 std::u16string bar = kBar;
1330 EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()), bar);
1331 EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).data(), bar.data());
1332 EXPECT_EQ(MakeStringPiece16(bar.begin(), bar.end()).size(), bar.size());
1333 EXPECT_TRUE(MakeStringPiece16(bar.end(), bar.end()).empty());
1334
1335 constexpr wchar_t kBaz[] = L"Baz";
1336 static_assert(MakeWStringView(kBaz, kBaz + 3) == kBaz, "");
1337 static_assert(MakeWStringView(kBaz, kBaz + 3).data() == kBaz, "");
1338 static_assert(MakeWStringView(kBaz, kBaz + 3).size() == 3, "");
1339 static_assert(MakeWStringView(kBaz + 3, kBaz + 3).empty(), "");
1340 static_assert(MakeWStringView(kBaz + 4, kBaz + 4).empty(), "");
1341
1342 std::wstring baz = kBaz;
1343 EXPECT_EQ(MakeWStringView(baz.begin(), baz.end()), baz);
1344 EXPECT_EQ(MakeWStringView(baz.begin(), baz.end()).data(), baz.data());
1345 EXPECT_EQ(MakeWStringView(baz.begin(), baz.end()).size(), baz.size());
1346 EXPECT_TRUE(MakeWStringView(baz.end(), baz.end()).empty());
1347 }
1348
TEST(StringUtilTest,RemoveChars)1349 TEST(StringUtilTest, RemoveChars) {
1350 const char kRemoveChars[] = "-/+*";
1351 std::string input = "A-+bc/d!*";
1352 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1353 EXPECT_EQ("Abcd!", input);
1354
1355 // No characters match kRemoveChars.
1356 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1357 EXPECT_EQ("Abcd!", input);
1358
1359 // Empty string.
1360 input.clear();
1361 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1362 EXPECT_EQ(std::string(), input);
1363 }
1364
TEST(StringUtilTest,ReplaceChars)1365 TEST(StringUtilTest, ReplaceChars) {
1366 struct TestData {
1367 const char* input;
1368 const char* replace_chars;
1369 const char* replace_with;
1370 const char* output;
1371 bool result;
1372 } cases[] = {
1373 {"", "", "", "", false},
1374 {"t", "t", "t", "t", true},
1375 {"a", "b", "c", "a", false},
1376 {"b", "b", "c", "c", true},
1377 {"bob", "b", "p", "pop", true},
1378 {"bob", "o", "i", "bib", true},
1379 {"test", "", "", "test", false},
1380 {"test", "", "!", "test", false},
1381 {"test", "z", "!", "test", false},
1382 {"test", "e", "!", "t!st", true},
1383 {"test", "e", "!?", "t!?st", true},
1384 {"test", "ez", "!", "t!st", true},
1385 {"test", "zed", "!?", "t!?st", true},
1386 {"test", "t", "!?", "!?es!?", true},
1387 {"test", "et", "!>", "!>!>s!>", true},
1388 {"test", "zest", "!", "!!!!", true},
1389 {"test", "szt", "!", "!e!!", true},
1390 {"test", "t", "test", "testestest", true},
1391 {"tetst", "t", "test", "testeteststest", true},
1392 {"ttttttt", "t", "-", "-------", true},
1393 {"aAaAaAAaAAa", "A", "", "aaaaa", true},
1394 {"xxxxxxxxxx", "x", "", "", true},
1395 {"xxxxxxxxxx", "x", "x", "xxxxxxxxxx", true},
1396 {"xxxxxxxxxx", "x", "y-", "y-y-y-y-y-y-y-y-y-y-", true},
1397 {"xxxxxxxxxx", "x", "xy", "xyxyxyxyxyxyxyxyxyxy", true},
1398 {"xxxxxxxxxx", "x", "zyx", "zyxzyxzyxzyxzyxzyxzyxzyxzyxzyx", true},
1399 {"xaxxaxxxaxxxax", "x", "xy", "xyaxyxyaxyxyxyaxyxyxyaxy", true},
1400 {"-xaxxaxxxaxxxax-", "x", "xy", "-xyaxyxyaxyxyxyaxyxyxyaxy-", true},
1401 };
1402
1403 for (const TestData& scenario : cases) {
1404 // Test with separate output and input vars.
1405 std::string output;
1406 bool result = ReplaceChars(scenario.input, scenario.replace_chars,
1407 scenario.replace_with, &output);
1408 EXPECT_EQ(scenario.result, result) << scenario.input;
1409 EXPECT_EQ(scenario.output, output);
1410 }
1411
1412 for (const TestData& scenario : cases) {
1413 // Test with an input/output var of limited capacity.
1414 std::string input_output = scenario.input;
1415 input_output.shrink_to_fit();
1416 bool result = ReplaceChars(input_output, scenario.replace_chars,
1417 scenario.replace_with, &input_output);
1418 EXPECT_EQ(scenario.result, result) << scenario.input;
1419 EXPECT_EQ(scenario.output, input_output);
1420 }
1421
1422 for (const TestData& scenario : cases) {
1423 // Test with an input/output var of ample capacity; should
1424 // not realloc.
1425 std::string input_output = scenario.input;
1426 input_output.reserve(strlen(scenario.output) * 2);
1427 const void* original_buffer = input_output.data();
1428 bool result = ReplaceChars(input_output, scenario.replace_chars,
1429 scenario.replace_with, &input_output);
1430 EXPECT_EQ(scenario.result, result) << scenario.input;
1431 EXPECT_EQ(scenario.output, input_output);
1432 EXPECT_EQ(original_buffer, input_output.data());
1433 }
1434 }
1435
TEST(StringUtilTest,ContainsOnlyChars)1436 TEST(StringUtilTest, ContainsOnlyChars) {
1437 // Providing an empty list of characters should return false but for the empty
1438 // string.
1439 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1440 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1441
1442 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1443 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1444 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1445 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1446 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1447
1448 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1449 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1450 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1451 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII));
1452 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1453 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII));
1454
1455 EXPECT_TRUE(ContainsOnlyChars(std::u16string(), kWhitespaceUTF16));
1456 EXPECT_TRUE(ContainsOnlyChars(u" ", kWhitespaceUTF16));
1457 EXPECT_TRUE(ContainsOnlyChars(u"\t", kWhitespaceUTF16));
1458 EXPECT_TRUE(ContainsOnlyChars(u"\t \r \n ", kWhitespaceUTF16));
1459 EXPECT_FALSE(ContainsOnlyChars(u"a", kWhitespaceUTF16));
1460 EXPECT_FALSE(ContainsOnlyChars(u"\thello\r \n ", kWhitespaceUTF16));
1461 }
1462
TEST(StringUtilTest,CompareCaseInsensitiveASCII)1463 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
1464 EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
1465 EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
1466
1467 // Differing lengths.
1468 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
1469 EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
1470
1471 // Differing values.
1472 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
1473 EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
1474
1475 // Non-ASCII bytes are permitted, but they will be compared case-sensitively.
1476 EXPECT_EQ(0, CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4"));
1477 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AAA \xc3\x84", "aaa \xc3\xa4"));
1478 EXPECT_EQ(1, CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\x84"));
1479
1480 // ASCII bytes should sort before non-ASCII ones.
1481 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("a", "\xc3\xa4"));
1482 EXPECT_EQ(1, CompareCaseInsensitiveASCII("\xc3\xa4", "a"));
1483
1484 // For constexpr.
1485 static_assert(CompareCaseInsensitiveASCII("", "") == 0);
1486 static_assert(CompareCaseInsensitiveASCII("Asdf", "aSDf") == 0);
1487 static_assert(CompareCaseInsensitiveASCII("Asdf", "aSDfA") == -1);
1488 static_assert(CompareCaseInsensitiveASCII("AsdfA", "aSDf") == 1);
1489 static_assert(CompareCaseInsensitiveASCII("AsdfA", "aSDfb") == -1);
1490 static_assert(CompareCaseInsensitiveASCII("Asdfb", "aSDfA") == 1);
1491 static_assert(CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4") ==
1492 0);
1493 static_assert(CompareCaseInsensitiveASCII("AAA \xc3\x84", "aaa \xc3\xa4") ==
1494 -1);
1495 static_assert(CompareCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\x84") ==
1496 1);
1497 static_assert(CompareCaseInsensitiveASCII("a", "\xc3\xa4") == -1);
1498 static_assert(CompareCaseInsensitiveASCII("\xc3\xa4", "a") == 1);
1499 }
1500
TEST(StringUtilTest,EqualsCaseInsensitiveASCII)1501 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
1502 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
1503 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
1504 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
1505 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
1506
1507 EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"", u""));
1508 EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"Asdf", u"aSDF"));
1509 EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"bsdf", u"aSDF"));
1510 EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"Asdf", u"aSDFz"));
1511
1512 EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"", ""));
1513 EXPECT_TRUE(EqualsCaseInsensitiveASCII(u"Asdf", "aSDF"));
1514 EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"bsdf", "aSDF"));
1515 EXPECT_FALSE(EqualsCaseInsensitiveASCII(u"Asdf", "aSDFz"));
1516
1517 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", u""));
1518 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", u"aSDF"));
1519 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", u"aSDF"));
1520 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", u"aSDFz"));
1521
1522 // Non-ASCII bytes are permitted, but they will be compared case-sensitively.
1523 EXPECT_TRUE(EqualsCaseInsensitiveASCII("aaa \xc3\xa4", "AAA \xc3\xa4"));
1524 EXPECT_FALSE(EqualsCaseInsensitiveASCII("aaa \xc3\x84", "AAA \xc3\xa4"));
1525
1526 // The `std::wstring_view` overloads are only defined on Windows.
1527 #if BUILDFLAG(IS_WIN)
1528 EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"", L""));
1529 EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"Asdf", L"aSDF"));
1530 EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"bsdf", L"aSDF"));
1531 EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"Asdf", L"aSDFz"));
1532
1533 EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"", ""));
1534 EXPECT_TRUE(EqualsCaseInsensitiveASCII(L"Asdf", "aSDF"));
1535 EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"bsdf", "aSDF"));
1536 EXPECT_FALSE(EqualsCaseInsensitiveASCII(L"Asdf", "aSDFz"));
1537
1538 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", L""));
1539 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", L"aSDF"));
1540 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", L"aSDF"));
1541 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", L"aSDFz"));
1542 #endif
1543 }
1544
TEST(StringUtilTest,IsUnicodeWhitespace)1545 TEST(StringUtilTest, IsUnicodeWhitespace) {
1546 // NOT unicode white space.
1547 EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
1548 EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
1549 EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
1550 EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
1551 EXPECT_FALSE(IsUnicodeWhitespace(L';'));
1552 EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
1553
1554 // Actual unicode whitespace.
1555 EXPECT_TRUE(IsUnicodeWhitespace(L' '));
1556 EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
1557 EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
1558 EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
1559 EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
1560 EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
1561 EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
1562 EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
1563 }
1564
1565 class WriteIntoTest : public testing::Test {
1566 protected:
WritesCorrectly(size_t num_chars)1567 static void WritesCorrectly(size_t num_chars) {
1568 std::string buffer;
1569 char kOriginal[] = "supercali";
1570 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1571 // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1572 // string at the first \0.
1573 EXPECT_EQ(
1574 std::string(kOriginal, std::min(num_chars, std::size(kOriginal) - 1)),
1575 std::string(buffer.c_str()));
1576 EXPECT_EQ(num_chars, buffer.size());
1577 }
1578 };
1579
TEST_F(WriteIntoTest,WriteInto)1580 TEST_F(WriteIntoTest, WriteInto) {
1581 // Validate that WriteInto reserves enough space and
1582 // sizes a string correctly.
1583 WritesCorrectly(1);
1584 WritesCorrectly(2);
1585 WritesCorrectly(5000);
1586
1587 // Validate that WriteInto handles 0-length strings
1588 std::string empty;
1589 const char kOriginal[] = "original";
1590 strncpy(WriteInto(&empty, 1), kOriginal, 0);
1591 EXPECT_STREQ("", empty.c_str());
1592 EXPECT_EQ(0u, empty.size());
1593
1594 // Validate that WriteInto doesn't modify other strings
1595 // when using a Copy-on-Write implementation.
1596 const char kLive[] = "live";
1597 const char kDead[] = "dead";
1598 const std::string live = kLive;
1599 std::string dead = live;
1600 strncpy(WriteInto(&dead, 5), kDead, 4);
1601 EXPECT_EQ(kDead, dead);
1602 EXPECT_EQ(4u, dead.size());
1603 EXPECT_EQ(kLive, live);
1604 EXPECT_EQ(4u, live.size());
1605 }
1606
1607 } // namespace
1608
1609 } // namespace base
1610