1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/sys_string_conversions.h"
6
7 #include <stddef.h>
8 #include <string.h>
9 #include <wchar.h>
10
11 #include "base/strings/string_piece.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "build/build_config.h"
14
15 namespace base {
16
SysWideToUTF8(const std::wstring & wide)17 std::string SysWideToUTF8(const std::wstring& wide) {
18 // In theory this should be using the system-provided conversion rather
19 // than our ICU, but this will do for now.
20 return WideToUTF8(wide);
21 }
SysUTF8ToWide(StringPiece utf8)22 std::wstring SysUTF8ToWide(StringPiece utf8) {
23 // In theory this should be using the system-provided conversion rather
24 // than our ICU, but this will do for now.
25 std::wstring out;
26 UTF8ToWide(utf8.data(), utf8.size(), &out);
27 return out;
28 }
29
30 #if defined(SYSTEM_NATIVE_UTF8) || BUILDFLAG(IS_ANDROID)
31 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb()
32 // support and a better understanding of what calls these routines.
33
SysWideToNativeMB(const std::wstring & wide)34 std::string SysWideToNativeMB(const std::wstring& wide) {
35 return WideToUTF8(wide);
36 }
37
SysNativeMBToWide(StringPiece native_mb)38 std::wstring SysNativeMBToWide(StringPiece native_mb) {
39 return SysUTF8ToWide(native_mb);
40 }
41
42 #else
43
SysWideToNativeMB(const std::wstring & wide)44 std::string SysWideToNativeMB(const std::wstring& wide) {
45 mbstate_t ps;
46
47 // Calculate the number of multi-byte characters. We walk through the string
48 // without writing the output, counting the number of multi-byte characters.
49 size_t num_out_chars = 0;
50 memset(&ps, 0, sizeof(ps));
51 for (auto src : wide) {
52 // Use a temp buffer since calling wcrtomb with an output of NULL does not
53 // calculate the output length.
54 char buf[16];
55 // Skip NULLs to avoid wcrtomb's special handling of them.
56 size_t res = src ? wcrtomb(buf, src, &ps) : 0;
57 switch (res) {
58 // Handle any errors and return an empty string.
59 case static_cast<size_t>(-1):
60 return std::string();
61 case 0:
62 // We hit an embedded null byte, keep going.
63 ++num_out_chars;
64 break;
65 default:
66 num_out_chars += res;
67 break;
68 }
69 }
70
71 if (num_out_chars == 0)
72 return std::string();
73
74 std::string out;
75 out.resize(num_out_chars);
76
77 // We walk the input string again, with |i| tracking the index of the
78 // wide input, and |j| tracking the multi-byte output.
79 memset(&ps, 0, sizeof(ps));
80 for (size_t i = 0, j = 0; i < wide.size(); ++i) {
81 const wchar_t src = wide[i];
82 // We don't want wcrtomb to do its funkiness for embedded NULLs.
83 size_t res = src ? wcrtomb(&out[j], src, &ps) : 0;
84 switch (res) {
85 // Handle any errors and return an empty string.
86 case static_cast<size_t>(-1):
87 return std::string();
88 case 0:
89 // We hit an embedded null byte, keep going.
90 ++j; // Output is already zeroed.
91 break;
92 default:
93 j += res;
94 break;
95 }
96 }
97
98 return out;
99 }
100
SysNativeMBToWide(StringPiece native_mb)101 std::wstring SysNativeMBToWide(StringPiece native_mb) {
102 mbstate_t ps;
103
104 // Calculate the number of wide characters. We walk through the string
105 // without writing the output, counting the number of wide characters.
106 size_t num_out_chars = 0;
107 memset(&ps, 0, sizeof(ps));
108 for (size_t i = 0; i < native_mb.size(); ) {
109 const char* src = native_mb.data() + i;
110 size_t res = mbrtowc(nullptr, src, native_mb.size() - i, &ps);
111 switch (res) {
112 // Handle any errors and return an empty string.
113 case static_cast<size_t>(-2):
114 case static_cast<size_t>(-1):
115 return std::wstring();
116 case 0:
117 // We hit an embedded null byte, keep going.
118 i += 1;
119 [[fallthrough]];
120 default:
121 i += res;
122 ++num_out_chars;
123 break;
124 }
125 }
126
127 if (num_out_chars == 0)
128 return std::wstring();
129
130 std::wstring out;
131 out.resize(num_out_chars);
132
133 memset(&ps, 0, sizeof(ps)); // Clear the shift state.
134 // We walk the input string again, with |i| tracking the index of the
135 // multi-byte input, and |j| tracking the wide output.
136 for (size_t i = 0, j = 0; i < native_mb.size(); ++j) {
137 const char* src = native_mb.data() + i;
138 wchar_t* dst = &out[j];
139 size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps);
140 switch (res) {
141 // Handle any errors and return an empty string.
142 case static_cast<size_t>(-2):
143 case static_cast<size_t>(-1):
144 return std::wstring();
145 case 0:
146 i += 1; // Skip null byte.
147 break;
148 default:
149 i += res;
150 break;
151 }
152 }
153
154 return out;
155 }
156
157 #endif // defined(SYSTEM_NATIVE_UTF8) || BUILDFLAG(IS_ANDROID)
158
159 } // namespace base
160