xref: /aosp_15_r20/external/cronet/base/strings/string_util.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_util.h"
6 
7 #include <errno.h>
8 #include <math.h>
9 #include <stdarg.h>
10 #include <stdint.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <time.h>
15 #include <wchar.h>
16 
17 #include <limits>
18 #include <optional>
19 #include <string_view>
20 #include <type_traits>
21 #include <vector>
22 
23 #include "base/check_op.h"
24 #include "base/no_destructor.h"
25 #include "base/ranges/algorithm.h"
26 #include "base/strings/string_util_impl_helpers.h"
27 #include "base/strings/string_util_internal.h"
28 #include "base/strings/utf_string_conversion_utils.h"
29 #include "base/strings/utf_string_conversions.h"
30 #include "base/third_party/icu/icu_utf.h"
31 #include "build/build_config.h"
32 
33 namespace base {
34 
IsWprintfFormatPortable(const wchar_t * format)35 bool IsWprintfFormatPortable(const wchar_t* format) {
36   for (const wchar_t* position = format; *position != '\0'; ++position) {
37     if (*position == '%') {
38       bool in_specification = true;
39       bool modifier_l = false;
40       while (in_specification) {
41         // Eat up characters until reaching a known specifier.
42         if (*++position == '\0') {
43           // The format string ended in the middle of a specification.  Call
44           // it portable because no unportable specifications were found.  The
45           // string is equally broken on all platforms.
46           return true;
47         }
48 
49         if (*position == 'l') {
50           // 'l' is the only thing that can save the 's' and 'c' specifiers.
51           modifier_l = true;
52         } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
53                    *position == 'S' || *position == 'C' || *position == 'F' ||
54                    *position == 'D' || *position == 'O' || *position == 'U') {
55           // Not portable.
56           return false;
57         }
58 
59         if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
60           // Portable, keep scanning the rest of the format string.
61           in_specification = false;
62         }
63       }
64     }
65   }
66 
67   return true;
68 }
69 
ToLowerASCII(StringPiece str)70 std::string ToLowerASCII(StringPiece str) {
71   return internal::ToLowerASCIIImpl(str);
72 }
73 
ToLowerASCII(StringPiece16 str)74 std::u16string ToLowerASCII(StringPiece16 str) {
75   return internal::ToLowerASCIIImpl(str);
76 }
77 
ToUpperASCII(StringPiece str)78 std::string ToUpperASCII(StringPiece str) {
79   return internal::ToUpperASCIIImpl(str);
80 }
81 
ToUpperASCII(StringPiece16 str)82 std::u16string ToUpperASCII(StringPiece16 str) {
83   return internal::ToUpperASCIIImpl(str);
84 }
85 
EmptyString()86 const std::string& EmptyString() {
87   static const base::NoDestructor<std::string> s;
88   return *s;
89 }
90 
EmptyString16()91 const std::u16string& EmptyString16() {
92   static const base::NoDestructor<std::u16string> s16;
93   return *s16;
94 }
95 
ReplaceChars(StringPiece16 input,StringPiece16 replace_chars,StringPiece16 replace_with,std::u16string * output)96 bool ReplaceChars(StringPiece16 input,
97                   StringPiece16 replace_chars,
98                   StringPiece16 replace_with,
99                   std::u16string* output) {
100   return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
101 }
102 
ReplaceChars(StringPiece input,StringPiece replace_chars,StringPiece replace_with,std::string * output)103 bool ReplaceChars(StringPiece input,
104                   StringPiece replace_chars,
105                   StringPiece replace_with,
106                   std::string* output) {
107   return internal::ReplaceCharsT(input, replace_chars, replace_with, output);
108 }
109 
RemoveChars(StringPiece16 input,StringPiece16 remove_chars,std::u16string * output)110 bool RemoveChars(StringPiece16 input,
111                  StringPiece16 remove_chars,
112                  std::u16string* output) {
113   return internal::ReplaceCharsT(input, remove_chars, StringPiece16(), output);
114 }
115 
RemoveChars(StringPiece input,StringPiece remove_chars,std::string * output)116 bool RemoveChars(StringPiece input,
117                  StringPiece remove_chars,
118                  std::string* output) {
119   return internal::ReplaceCharsT(input, remove_chars, StringPiece(), output);
120 }
121 
TrimString(StringPiece16 input,StringPiece16 trim_chars,std::u16string * output)122 bool TrimString(StringPiece16 input,
123                 StringPiece16 trim_chars,
124                 std::u16string* output) {
125   return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
126          TRIM_NONE;
127 }
128 
TrimString(StringPiece input,StringPiece trim_chars,std::string * output)129 bool TrimString(StringPiece input,
130                 StringPiece trim_chars,
131                 std::string* output) {
132   return internal::TrimStringT(input, trim_chars, TRIM_ALL, output) !=
133          TRIM_NONE;
134 }
135 
TrimString(StringPiece16 input,StringPiece16 trim_chars,TrimPositions positions)136 StringPiece16 TrimString(StringPiece16 input,
137                          StringPiece16 trim_chars,
138                          TrimPositions positions) {
139   return internal::TrimStringPieceT(input, trim_chars, positions);
140 }
141 
TrimString(StringPiece input,StringPiece trim_chars,TrimPositions positions)142 StringPiece TrimString(StringPiece input,
143                        StringPiece trim_chars,
144                        TrimPositions positions) {
145   return internal::TrimStringPieceT(input, trim_chars, positions);
146 }
147 
TruncateUTF8ToByteSize(const std::string & input,const size_t byte_size,std::string * output)148 void TruncateUTF8ToByteSize(const std::string& input,
149                             const size_t byte_size,
150                             std::string* output) {
151   DCHECK(output);
152   if (byte_size > input.length()) {
153     *output = input;
154     return;
155   }
156   DCHECK_LE(byte_size,
157             static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
158   // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
159   int32_t truncation_length = static_cast<int32_t>(byte_size);
160   int32_t char_index = truncation_length - 1;
161   const char* data = input.data();
162 
163   // Using CBU8, we will move backwards from the truncation point
164   // to the beginning of the string looking for a valid UTF8
165   // character.  Once a full UTF8 character is found, we will
166   // truncate the string to the end of that character.
167   while (char_index >= 0) {
168     int32_t prev = char_index;
169     base_icu::UChar32 code_point = 0;
170     CBU8_NEXT(reinterpret_cast<const uint8_t*>(data), char_index,
171               truncation_length, code_point);
172     if (!IsValidCharacter(code_point)) {
173       char_index = prev - 1;
174     } else {
175       break;
176     }
177   }
178 
179   if (char_index >= 0 )
180     *output = input.substr(0, static_cast<size_t>(char_index));
181   else
182     output->clear();
183 }
184 
TrimWhitespace(StringPiece16 input,TrimPositions positions,std::u16string * output)185 TrimPositions TrimWhitespace(StringPiece16 input,
186                              TrimPositions positions,
187                              std::u16string* output) {
188   return internal::TrimStringT(input, StringPiece16(kWhitespaceUTF16),
189                                positions, output);
190 }
191 
TrimWhitespace(StringPiece16 input,TrimPositions positions)192 StringPiece16 TrimWhitespace(StringPiece16 input,
193                              TrimPositions positions) {
194   return internal::TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16),
195                                     positions);
196 }
197 
TrimWhitespaceASCII(StringPiece input,TrimPositions positions,std::string * output)198 TrimPositions TrimWhitespaceASCII(StringPiece input,
199                                   TrimPositions positions,
200                                   std::string* output) {
201   return internal::TrimStringT(input, StringPiece(kWhitespaceASCII), positions,
202                                output);
203 }
204 
TrimWhitespaceASCII(StringPiece input,TrimPositions positions)205 StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
206   return internal::TrimStringPieceT(input, StringPiece(kWhitespaceASCII),
207                                     positions);
208 }
209 
CollapseWhitespace(StringPiece16 text,bool trim_sequences_with_line_breaks)210 std::u16string CollapseWhitespace(StringPiece16 text,
211                                   bool trim_sequences_with_line_breaks) {
212   return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
213 }
214 
CollapseWhitespaceASCII(StringPiece text,bool trim_sequences_with_line_breaks)215 std::string CollapseWhitespaceASCII(StringPiece text,
216                                     bool trim_sequences_with_line_breaks) {
217   return internal::CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
218 }
219 
ContainsOnlyChars(StringPiece input,StringPiece characters)220 bool ContainsOnlyChars(StringPiece input, StringPiece characters) {
221   return input.find_first_not_of(characters) == StringPiece::npos;
222 }
223 
ContainsOnlyChars(StringPiece16 input,StringPiece16 characters)224 bool ContainsOnlyChars(StringPiece16 input, StringPiece16 characters) {
225   return input.find_first_not_of(characters) == StringPiece16::npos;
226 }
227 
228 
IsStringASCII(StringPiece str)229 bool IsStringASCII(StringPiece str) {
230   return internal::DoIsStringASCII(str.data(), str.length());
231 }
232 
IsStringASCII(StringPiece16 str)233 bool IsStringASCII(StringPiece16 str) {
234   return internal::DoIsStringASCII(str.data(), str.length());
235 }
236 
237 #if defined(WCHAR_T_IS_32_BIT)
IsStringASCII(std::wstring_view str)238 bool IsStringASCII(std::wstring_view str) {
239   return internal::DoIsStringASCII(str.data(), str.length());
240 }
241 #endif
242 
IsStringUTF8(StringPiece str)243 bool IsStringUTF8(StringPiece str) {
244   return internal::DoIsStringUTF8<IsValidCharacter>(str);
245 }
246 
IsStringUTF8AllowingNoncharacters(StringPiece str)247 bool IsStringUTF8AllowingNoncharacters(StringPiece str) {
248   return internal::DoIsStringUTF8<IsValidCodepoint>(str);
249 }
250 
EqualsASCII(StringPiece16 str,StringPiece ascii)251 bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
252   return ranges::equal(ascii, str);
253 }
254 
StartsWith(StringPiece str,StringPiece search_for,CompareCase case_sensitivity)255 bool StartsWith(StringPiece str,
256                 StringPiece search_for,
257                 CompareCase case_sensitivity) {
258   return internal::StartsWithT(str, search_for, case_sensitivity);
259 }
260 
StartsWith(StringPiece16 str,StringPiece16 search_for,CompareCase case_sensitivity)261 bool StartsWith(StringPiece16 str,
262                 StringPiece16 search_for,
263                 CompareCase case_sensitivity) {
264   return internal::StartsWithT(str, search_for, case_sensitivity);
265 }
266 
EndsWith(StringPiece str,StringPiece search_for,CompareCase case_sensitivity)267 bool EndsWith(StringPiece str,
268               StringPiece search_for,
269               CompareCase case_sensitivity) {
270   return internal::EndsWithT(str, search_for, case_sensitivity);
271 }
272 
EndsWith(StringPiece16 str,StringPiece16 search_for,CompareCase case_sensitivity)273 bool EndsWith(StringPiece16 str,
274               StringPiece16 search_for,
275               CompareCase case_sensitivity) {
276   return internal::EndsWithT(str, search_for, case_sensitivity);
277 }
278 
HexDigitToInt(char c)279 char HexDigitToInt(char c) {
280   DCHECK(IsHexDigit(c));
281   if (c >= '0' && c <= '9')
282     return static_cast<char>(c - '0');
283   return (c >= 'A' && c <= 'F') ? static_cast<char>(c - 'A' + 10)
284                                 : static_cast<char>(c - 'a' + 10);
285 }
286 
287 static const char* const kByteStringsUnlocalized[] = {
288   " B",
289   " kB",
290   " MB",
291   " GB",
292   " TB",
293   " PB"
294 };
295 
FormatBytesUnlocalized(int64_t bytes)296 std::u16string FormatBytesUnlocalized(int64_t bytes) {
297   double unit_amount = static_cast<double>(bytes);
298   size_t dimension = 0;
299   const int kKilo = 1024;
300   while (unit_amount >= kKilo &&
301          dimension < std::size(kByteStringsUnlocalized) - 1) {
302     unit_amount /= kKilo;
303     dimension++;
304   }
305 
306   char buf[64];
307   if (bytes != 0 && dimension > 0 && unit_amount < 100) {
308     base::snprintf(buf, std::size(buf), "%.1lf%s", unit_amount,
309                    kByteStringsUnlocalized[dimension]);
310   } else {
311     base::snprintf(buf, std::size(buf), "%.0lf%s", unit_amount,
312                    kByteStringsUnlocalized[dimension]);
313   }
314 
315   return ASCIIToUTF16(buf);
316 }
317 
ReplaceFirstSubstringAfterOffset(std::u16string * str,size_t start_offset,StringPiece16 find_this,StringPiece16 replace_with)318 void ReplaceFirstSubstringAfterOffset(std::u16string* str,
319                                       size_t start_offset,
320                                       StringPiece16 find_this,
321                                       StringPiece16 replace_with) {
322   internal::DoReplaceMatchesAfterOffset(
323       str, start_offset, internal::MakeSubstringMatcher(find_this),
324       replace_with, internal::ReplaceType::REPLACE_FIRST);
325 }
326 
ReplaceFirstSubstringAfterOffset(std::string * str,size_t start_offset,StringPiece find_this,StringPiece replace_with)327 void ReplaceFirstSubstringAfterOffset(std::string* str,
328                                       size_t start_offset,
329                                       StringPiece find_this,
330                                       StringPiece replace_with) {
331   internal::DoReplaceMatchesAfterOffset(
332       str, start_offset, internal::MakeSubstringMatcher(find_this),
333       replace_with, internal::ReplaceType::REPLACE_FIRST);
334 }
335 
ReplaceSubstringsAfterOffset(std::u16string * str,size_t start_offset,StringPiece16 find_this,StringPiece16 replace_with)336 void ReplaceSubstringsAfterOffset(std::u16string* str,
337                                   size_t start_offset,
338                                   StringPiece16 find_this,
339                                   StringPiece16 replace_with) {
340   internal::DoReplaceMatchesAfterOffset(
341       str, start_offset, internal::MakeSubstringMatcher(find_this),
342       replace_with, internal::ReplaceType::REPLACE_ALL);
343 }
344 
ReplaceSubstringsAfterOffset(std::string * str,size_t start_offset,StringPiece find_this,StringPiece replace_with)345 void ReplaceSubstringsAfterOffset(std::string* str,
346                                   size_t start_offset,
347                                   StringPiece find_this,
348                                   StringPiece replace_with) {
349   internal::DoReplaceMatchesAfterOffset(
350       str, start_offset, internal::MakeSubstringMatcher(find_this),
351       replace_with, internal::ReplaceType::REPLACE_ALL);
352 }
353 
WriteInto(std::string * str,size_t length_with_null)354 char* WriteInto(std::string* str, size_t length_with_null) {
355   return internal::WriteIntoT(str, length_with_null);
356 }
357 
WriteInto(std::u16string * str,size_t length_with_null)358 char16_t* WriteInto(std::u16string* str, size_t length_with_null) {
359   return internal::WriteIntoT(str, length_with_null);
360 }
361 
JoinString(span<const std::string> parts,StringPiece separator)362 std::string JoinString(span<const std::string> parts, StringPiece separator) {
363   return internal::JoinStringT(parts, separator);
364 }
365 
JoinString(span<const std::u16string> parts,StringPiece16 separator)366 std::u16string JoinString(span<const std::u16string> parts,
367                           StringPiece16 separator) {
368   return internal::JoinStringT(parts, separator);
369 }
370 
JoinString(span<const StringPiece> parts,StringPiece separator)371 std::string JoinString(span<const StringPiece> parts, StringPiece separator) {
372   return internal::JoinStringT(parts, separator);
373 }
374 
JoinString(span<const StringPiece16> parts,StringPiece16 separator)375 std::u16string JoinString(span<const StringPiece16> parts,
376                           StringPiece16 separator) {
377   return internal::JoinStringT(parts, separator);
378 }
379 
JoinString(std::initializer_list<StringPiece> parts,StringPiece separator)380 std::string JoinString(std::initializer_list<StringPiece> parts,
381                        StringPiece separator) {
382   return internal::JoinStringT(parts, separator);
383 }
384 
JoinString(std::initializer_list<StringPiece16> parts,StringPiece16 separator)385 std::u16string JoinString(std::initializer_list<StringPiece16> parts,
386                           StringPiece16 separator) {
387   return internal::JoinStringT(parts, separator);
388 }
389 
ReplaceStringPlaceholders(StringPiece16 format_string,const std::vector<std::u16string> & subst,std::vector<size_t> * offsets)390 std::u16string ReplaceStringPlaceholders(
391     StringPiece16 format_string,
392     const std::vector<std::u16string>& subst,
393     std::vector<size_t>* offsets) {
394   std::optional<std::u16string> replacement =
395       internal::DoReplaceStringPlaceholders(
396           format_string, subst,
397           /*placeholder_prefix*/ u'$',
398           /*should_escape_multiple_placeholder_prefixes*/ true,
399           /*is_strict_mode*/ false, offsets);
400 
401   DCHECK(replacement);
402   return replacement.value();
403 }
404 
ReplaceStringPlaceholders(StringPiece format_string,const std::vector<std::string> & subst,std::vector<size_t> * offsets)405 std::string ReplaceStringPlaceholders(StringPiece format_string,
406                                       const std::vector<std::string>& subst,
407                                       std::vector<size_t>* offsets) {
408   std::optional<std::string> replacement =
409       internal::DoReplaceStringPlaceholders(
410           format_string, subst,
411           /*placeholder_prefix*/ '$',
412           /*should_escape_multiple_placeholder_prefixes*/ true,
413           /*is_strict_mode*/ false, offsets);
414 
415   DCHECK(replacement);
416   return replacement.value();
417 }
418 
ReplaceStringPlaceholders(const std::u16string & format_string,const std::u16string & a,size_t * offset)419 std::u16string ReplaceStringPlaceholders(const std::u16string& format_string,
420                                          const std::u16string& a,
421                                          size_t* offset) {
422   std::vector<size_t> offsets;
423   std::u16string result =
424       ReplaceStringPlaceholders(format_string, {a}, &offsets);
425 
426   DCHECK_EQ(1U, offsets.size());
427   if (offset)
428     *offset = offsets[0];
429   return result;
430 }
431 
strlcpy(char * dst,const char * src,size_t dst_size)432 size_t strlcpy(char* dst, const char* src, size_t dst_size) {
433   return internal::lcpyT(dst, src, dst_size);
434 }
435 
u16cstrlcpy(char16_t * dst,const char16_t * src,size_t dst_size)436 size_t u16cstrlcpy(char16_t* dst, const char16_t* src, size_t dst_size) {
437   return internal::lcpyT(dst, src, dst_size);
438 }
439 
wcslcpy(wchar_t * dst,const wchar_t * src,size_t dst_size)440 size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
441   return internal::lcpyT(dst, src, dst_size);
442 }
443 
444 }  // namespace base
445