xref: /aosp_15_r20/external/cronet/base/strings/sys_string_conversions_posix.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/sys_string_conversions.h"
6 
7 #include <stddef.h>
8 #include <string.h>
9 #include <wchar.h>
10 
11 #include "base/strings/string_piece.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "build/build_config.h"
14 
15 namespace base {
16 
SysWideToUTF8(const std::wstring & wide)17 std::string SysWideToUTF8(const std::wstring& wide) {
18   // In theory this should be using the system-provided conversion rather
19   // than our ICU, but this will do for now.
20   return WideToUTF8(wide);
21 }
SysUTF8ToWide(StringPiece utf8)22 std::wstring SysUTF8ToWide(StringPiece utf8) {
23   // In theory this should be using the system-provided conversion rather
24   // than our ICU, but this will do for now.
25   std::wstring out;
26   UTF8ToWide(utf8.data(), utf8.size(), &out);
27   return out;
28 }
29 
30 #if defined(SYSTEM_NATIVE_UTF8) || BUILDFLAG(IS_ANDROID)
31 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
32 // support and a better understanding of what calls these routines.
33 
SysWideToNativeMB(const std::wstring & wide)34 std::string SysWideToNativeMB(const std::wstring& wide) {
35   return WideToUTF8(wide);
36 }
37 
SysNativeMBToWide(StringPiece native_mb)38 std::wstring SysNativeMBToWide(StringPiece native_mb) {
39   return SysUTF8ToWide(native_mb);
40 }
41 
42 #else
43 
SysWideToNativeMB(const std::wstring & wide)44 std::string SysWideToNativeMB(const std::wstring& wide) {
45   mbstate_t ps;
46 
47   // Calculate the number of multi-byte characters.  We walk through the string
48   // without writing the output, counting the number of multi-byte characters.
49   size_t num_out_chars = 0;
50   memset(&ps, 0, sizeof(ps));
51   for (auto src : wide) {
52     // Use a temp buffer since calling wcrtomb with an output of NULL does not
53     // calculate the output length.
54     char buf[16];
55     // Skip NULLs to avoid wcrtomb's special handling of them.
56     size_t res = src ? wcrtomb(buf, src, &ps) : 0;
57     switch (res) {
58       // Handle any errors and return an empty string.
59       case static_cast<size_t>(-1):
60         return std::string();
61       case 0:
62         // We hit an embedded null byte, keep going.
63         ++num_out_chars;
64         break;
65       default:
66         num_out_chars += res;
67         break;
68     }
69   }
70 
71   if (num_out_chars == 0)
72     return std::string();
73 
74   std::string out;
75   out.resize(num_out_chars);
76 
77   // We walk the input string again, with |i| tracking the index of the
78   // wide input, and |j| tracking the multi-byte output.
79   memset(&ps, 0, sizeof(ps));
80   for (size_t i = 0, j = 0; i < wide.size(); ++i) {
81     const wchar_t src = wide[i];
82     // We don't want wcrtomb to do its funkiness for embedded NULLs.
83     size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
84     switch (res) {
85       // Handle any errors and return an empty string.
86       case static_cast<size_t>(-1):
87         return std::string();
88       case 0:
89         // We hit an embedded null byte, keep going.
90         ++j;  // Output is already zeroed.
91         break;
92       default:
93         j += res;
94         break;
95     }
96   }
97 
98   return out;
99 }
100 
SysNativeMBToWide(StringPiece native_mb)101 std::wstring SysNativeMBToWide(StringPiece native_mb) {
102   mbstate_t ps;
103 
104   // Calculate the number of wide characters.  We walk through the string
105   // without writing the output, counting the number of wide characters.
106   size_t num_out_chars = 0;
107   memset(&ps, 0, sizeof(ps));
108   for (size_t i = 0; i < native_mb.size(); ) {
109     const char* src = native_mb.data() + i;
110     size_t res = mbrtowc(nullptr, src, native_mb.size() - i, &ps);
111     switch (res) {
112       // Handle any errors and return an empty string.
113       case static_cast<size_t>(-2):
114       case static_cast<size_t>(-1):
115         return std::wstring();
116       case 0:
117         // We hit an embedded null byte, keep going.
118         i += 1;
119         [[fallthrough]];
120       default:
121         i += res;
122         ++num_out_chars;
123         break;
124     }
125   }
126 
127   if (num_out_chars == 0)
128     return std::wstring();
129 
130   std::wstring out;
131   out.resize(num_out_chars);
132 
133   memset(&ps, 0, sizeof(ps));  // Clear the shift state.
134   // We walk the input string again, with |i| tracking the index of the
135   // multi-byte input, and |j| tracking the wide output.
136   for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
137     const char* src = native_mb.data() + i;
138     wchar_t* dst = &out[j];
139     size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
140     switch (res) {
141       // Handle any errors and return an empty string.
142       case static_cast<size_t>(-2):
143       case static_cast<size_t>(-1):
144         return std::wstring();
145       case 0:
146         i += 1;  // Skip null byte.
147         break;
148       default:
149         i += res;
150         break;
151     }
152   }
153 
154   return out;
155 }
156 
157 #endif  // defined(SYSTEM_NATIVE_UTF8) || BUILDFLAG(IS_ANDROID)
158 
159 }  // namespace base
160