xref: /aosp_15_r20/system/libbase/utf8.cpp (revision 8f0ba417480079999ba552f1087ae592091b9d02)
1*8f0ba417SAndroid Build Coastguard Worker /*
2*8f0ba417SAndroid Build Coastguard Worker  * Copyright (C) 2015 The Android Open Source Project
3*8f0ba417SAndroid Build Coastguard Worker  *
4*8f0ba417SAndroid Build Coastguard Worker  * Licensed under the Apache License, Version 2.0 (the "License");
5*8f0ba417SAndroid Build Coastguard Worker  * you may not use this file except in compliance with the License.
6*8f0ba417SAndroid Build Coastguard Worker  * You may obtain a copy of the License at
7*8f0ba417SAndroid Build Coastguard Worker  *
8*8f0ba417SAndroid Build Coastguard Worker  *      http://www.apache.org/licenses/LICENSE-2.0
9*8f0ba417SAndroid Build Coastguard Worker  *
10*8f0ba417SAndroid Build Coastguard Worker  * Unless required by applicable law or agreed to in writing, software
11*8f0ba417SAndroid Build Coastguard Worker  * distributed under the License is distributed on an "AS IS" BASIS,
12*8f0ba417SAndroid Build Coastguard Worker  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*8f0ba417SAndroid Build Coastguard Worker  * See the License for the specific language governing permissions and
14*8f0ba417SAndroid Build Coastguard Worker  * limitations under the License.
15*8f0ba417SAndroid Build Coastguard Worker  */
16*8f0ba417SAndroid Build Coastguard Worker 
17*8f0ba417SAndroid Build Coastguard Worker #include <windows.h>
18*8f0ba417SAndroid Build Coastguard Worker 
19*8f0ba417SAndroid Build Coastguard Worker #include "android-base/utf8.h"
20*8f0ba417SAndroid Build Coastguard Worker 
21*8f0ba417SAndroid Build Coastguard Worker #include <fcntl.h>
22*8f0ba417SAndroid Build Coastguard Worker #include <stdio.h>
23*8f0ba417SAndroid Build Coastguard Worker 
24*8f0ba417SAndroid Build Coastguard Worker #include <algorithm>
25*8f0ba417SAndroid Build Coastguard Worker #include <string>
26*8f0ba417SAndroid Build Coastguard Worker 
27*8f0ba417SAndroid Build Coastguard Worker #include "android-base/logging.h"
28*8f0ba417SAndroid Build Coastguard Worker 
29*8f0ba417SAndroid Build Coastguard Worker namespace android {
30*8f0ba417SAndroid Build Coastguard Worker namespace base {
31*8f0ba417SAndroid Build Coastguard Worker 
32*8f0ba417SAndroid Build Coastguard Worker // Helper to set errno based on GetLastError() after WideCharToMultiByte()/MultiByteToWideChar().
SetErrnoFromLastError()33*8f0ba417SAndroid Build Coastguard Worker static void SetErrnoFromLastError() {
34*8f0ba417SAndroid Build Coastguard Worker   switch (GetLastError()) {
35*8f0ba417SAndroid Build Coastguard Worker     case ERROR_NO_UNICODE_TRANSLATION:
36*8f0ba417SAndroid Build Coastguard Worker       errno = EILSEQ;
37*8f0ba417SAndroid Build Coastguard Worker       break;
38*8f0ba417SAndroid Build Coastguard Worker     default:
39*8f0ba417SAndroid Build Coastguard Worker       errno = EINVAL;
40*8f0ba417SAndroid Build Coastguard Worker       break;
41*8f0ba417SAndroid Build Coastguard Worker   }
42*8f0ba417SAndroid Build Coastguard Worker }
43*8f0ba417SAndroid Build Coastguard Worker 
WideToUTF8(const wchar_t * utf16,const size_t size,std::string * utf8)44*8f0ba417SAndroid Build Coastguard Worker bool WideToUTF8(const wchar_t* utf16, const size_t size, std::string* utf8) {
45*8f0ba417SAndroid Build Coastguard Worker   utf8->clear();
46*8f0ba417SAndroid Build Coastguard Worker 
47*8f0ba417SAndroid Build Coastguard Worker   if (size == 0) {
48*8f0ba417SAndroid Build Coastguard Worker     return true;
49*8f0ba417SAndroid Build Coastguard Worker   }
50*8f0ba417SAndroid Build Coastguard Worker 
51*8f0ba417SAndroid Build Coastguard Worker   // TODO: Consider using std::wstring_convert once libcxx is supported on
52*8f0ba417SAndroid Build Coastguard Worker   // Windows.
53*8f0ba417SAndroid Build Coastguard Worker 
54*8f0ba417SAndroid Build Coastguard Worker   // Only Vista or later has this flag that causes WideCharToMultiByte() to
55*8f0ba417SAndroid Build Coastguard Worker   // return an error on invalid characters.
56*8f0ba417SAndroid Build Coastguard Worker   const DWORD flags =
57*8f0ba417SAndroid Build Coastguard Worker #if (WINVER >= 0x0600)
58*8f0ba417SAndroid Build Coastguard Worker     WC_ERR_INVALID_CHARS;
59*8f0ba417SAndroid Build Coastguard Worker #else
60*8f0ba417SAndroid Build Coastguard Worker     0;
61*8f0ba417SAndroid Build Coastguard Worker #endif
62*8f0ba417SAndroid Build Coastguard Worker 
63*8f0ba417SAndroid Build Coastguard Worker   const int chars_required = WideCharToMultiByte(CP_UTF8, flags, utf16, size,
64*8f0ba417SAndroid Build Coastguard Worker                                                  NULL, 0, NULL, NULL);
65*8f0ba417SAndroid Build Coastguard Worker   if (chars_required <= 0) {
66*8f0ba417SAndroid Build Coastguard Worker     SetErrnoFromLastError();
67*8f0ba417SAndroid Build Coastguard Worker     return false;
68*8f0ba417SAndroid Build Coastguard Worker   }
69*8f0ba417SAndroid Build Coastguard Worker 
70*8f0ba417SAndroid Build Coastguard Worker   // This could potentially throw a std::bad_alloc exception.
71*8f0ba417SAndroid Build Coastguard Worker   utf8->resize(chars_required);
72*8f0ba417SAndroid Build Coastguard Worker 
73*8f0ba417SAndroid Build Coastguard Worker   const int result = WideCharToMultiByte(CP_UTF8, flags, utf16, size,
74*8f0ba417SAndroid Build Coastguard Worker                                          &(*utf8)[0], chars_required, NULL,
75*8f0ba417SAndroid Build Coastguard Worker                                          NULL);
76*8f0ba417SAndroid Build Coastguard Worker   if (result != chars_required) {
77*8f0ba417SAndroid Build Coastguard Worker     SetErrnoFromLastError();
78*8f0ba417SAndroid Build Coastguard Worker     CHECK_LE(result, chars_required) << "WideCharToMultiByte wrote " << result
79*8f0ba417SAndroid Build Coastguard Worker         << " chars to buffer of " << chars_required << " chars";
80*8f0ba417SAndroid Build Coastguard Worker     utf8->clear();
81*8f0ba417SAndroid Build Coastguard Worker     return false;
82*8f0ba417SAndroid Build Coastguard Worker   }
83*8f0ba417SAndroid Build Coastguard Worker 
84*8f0ba417SAndroid Build Coastguard Worker   return true;
85*8f0ba417SAndroid Build Coastguard Worker }
86*8f0ba417SAndroid Build Coastguard Worker 
WideToUTF8(const wchar_t * utf16,std::string * utf8)87*8f0ba417SAndroid Build Coastguard Worker bool WideToUTF8(const wchar_t* utf16, std::string* utf8) {
88*8f0ba417SAndroid Build Coastguard Worker   // Compute string length of NULL-terminated string with wcslen().
89*8f0ba417SAndroid Build Coastguard Worker   return WideToUTF8(utf16, wcslen(utf16), utf8);
90*8f0ba417SAndroid Build Coastguard Worker }
91*8f0ba417SAndroid Build Coastguard Worker 
WideToUTF8(const std::wstring & utf16,std::string * utf8)92*8f0ba417SAndroid Build Coastguard Worker bool WideToUTF8(const std::wstring& utf16, std::string* utf8) {
93*8f0ba417SAndroid Build Coastguard Worker   // Use the stored length of the string which allows embedded NULL characters
94*8f0ba417SAndroid Build Coastguard Worker   // to be converted.
95*8f0ba417SAndroid Build Coastguard Worker   return WideToUTF8(utf16.c_str(), utf16.length(), utf8);
96*8f0ba417SAndroid Build Coastguard Worker }
97*8f0ba417SAndroid Build Coastguard Worker 
98*8f0ba417SAndroid Build Coastguard Worker // Internal helper function that takes MultiByteToWideChar() flags.
UTF8ToWideWithFlags(const char * utf8,const size_t size,std::wstring * utf16,const DWORD flags)99*8f0ba417SAndroid Build Coastguard Worker static bool UTF8ToWideWithFlags(const char* utf8, const size_t size, std::wstring* utf16,
100*8f0ba417SAndroid Build Coastguard Worker                                 const DWORD flags) {
101*8f0ba417SAndroid Build Coastguard Worker   utf16->clear();
102*8f0ba417SAndroid Build Coastguard Worker 
103*8f0ba417SAndroid Build Coastguard Worker   if (size == 0) {
104*8f0ba417SAndroid Build Coastguard Worker     return true;
105*8f0ba417SAndroid Build Coastguard Worker   }
106*8f0ba417SAndroid Build Coastguard Worker 
107*8f0ba417SAndroid Build Coastguard Worker   // TODO: Consider using std::wstring_convert once libcxx is supported on
108*8f0ba417SAndroid Build Coastguard Worker   // Windows.
109*8f0ba417SAndroid Build Coastguard Worker   const int chars_required = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
110*8f0ba417SAndroid Build Coastguard Worker                                                  NULL, 0);
111*8f0ba417SAndroid Build Coastguard Worker   if (chars_required <= 0) {
112*8f0ba417SAndroid Build Coastguard Worker     SetErrnoFromLastError();
113*8f0ba417SAndroid Build Coastguard Worker     return false;
114*8f0ba417SAndroid Build Coastguard Worker   }
115*8f0ba417SAndroid Build Coastguard Worker 
116*8f0ba417SAndroid Build Coastguard Worker   // This could potentially throw a std::bad_alloc exception.
117*8f0ba417SAndroid Build Coastguard Worker   utf16->resize(chars_required);
118*8f0ba417SAndroid Build Coastguard Worker 
119*8f0ba417SAndroid Build Coastguard Worker   const int result = MultiByteToWideChar(CP_UTF8, flags, utf8, size,
120*8f0ba417SAndroid Build Coastguard Worker                                          &(*utf16)[0], chars_required);
121*8f0ba417SAndroid Build Coastguard Worker   if (result != chars_required) {
122*8f0ba417SAndroid Build Coastguard Worker     SetErrnoFromLastError();
123*8f0ba417SAndroid Build Coastguard Worker     CHECK_LE(result, chars_required) << "MultiByteToWideChar wrote " << result
124*8f0ba417SAndroid Build Coastguard Worker         << " chars to buffer of " << chars_required << " chars";
125*8f0ba417SAndroid Build Coastguard Worker     utf16->clear();
126*8f0ba417SAndroid Build Coastguard Worker     return false;
127*8f0ba417SAndroid Build Coastguard Worker   }
128*8f0ba417SAndroid Build Coastguard Worker 
129*8f0ba417SAndroid Build Coastguard Worker   return true;
130*8f0ba417SAndroid Build Coastguard Worker }
131*8f0ba417SAndroid Build Coastguard Worker 
UTF8ToWide(const char * utf8,const size_t size,std::wstring * utf16)132*8f0ba417SAndroid Build Coastguard Worker bool UTF8ToWide(const char* utf8, const size_t size, std::wstring* utf16) {
133*8f0ba417SAndroid Build Coastguard Worker   // If strictly interpreting as UTF-8 succeeds, return success.
134*8f0ba417SAndroid Build Coastguard Worker   if (UTF8ToWideWithFlags(utf8, size, utf16, MB_ERR_INVALID_CHARS)) {
135*8f0ba417SAndroid Build Coastguard Worker     return true;
136*8f0ba417SAndroid Build Coastguard Worker   }
137*8f0ba417SAndroid Build Coastguard Worker 
138*8f0ba417SAndroid Build Coastguard Worker   const int saved_errno = errno;
139*8f0ba417SAndroid Build Coastguard Worker 
140*8f0ba417SAndroid Build Coastguard Worker   // Fallback to non-strict interpretation, allowing invalid characters and
141*8f0ba417SAndroid Build Coastguard Worker   // converting as best as possible, and return false to signify a problem.
142*8f0ba417SAndroid Build Coastguard Worker   (void)UTF8ToWideWithFlags(utf8, size, utf16, 0);
143*8f0ba417SAndroid Build Coastguard Worker   errno = saved_errno;
144*8f0ba417SAndroid Build Coastguard Worker   return false;
145*8f0ba417SAndroid Build Coastguard Worker }
146*8f0ba417SAndroid Build Coastguard Worker 
UTF8ToWide(const char * utf8,std::wstring * utf16)147*8f0ba417SAndroid Build Coastguard Worker bool UTF8ToWide(const char* utf8, std::wstring* utf16) {
148*8f0ba417SAndroid Build Coastguard Worker   // Compute string length of NULL-terminated string with strlen().
149*8f0ba417SAndroid Build Coastguard Worker   return UTF8ToWide(utf8, strlen(utf8), utf16);
150*8f0ba417SAndroid Build Coastguard Worker }
151*8f0ba417SAndroid Build Coastguard Worker 
UTF8ToWide(const std::string & utf8,std::wstring * utf16)152*8f0ba417SAndroid Build Coastguard Worker bool UTF8ToWide(const std::string& utf8, std::wstring* utf16) {
153*8f0ba417SAndroid Build Coastguard Worker   // Use the stored length of the string which allows embedded NULL characters
154*8f0ba417SAndroid Build Coastguard Worker   // to be converted.
155*8f0ba417SAndroid Build Coastguard Worker   return UTF8ToWide(utf8.c_str(), utf8.length(), utf16);
156*8f0ba417SAndroid Build Coastguard Worker }
157*8f0ba417SAndroid Build Coastguard Worker 
isDriveLetter(wchar_t c)158*8f0ba417SAndroid Build Coastguard Worker static bool isDriveLetter(wchar_t c) {
159*8f0ba417SAndroid Build Coastguard Worker   return (c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z');
160*8f0ba417SAndroid Build Coastguard Worker }
161*8f0ba417SAndroid Build Coastguard Worker 
UTF8PathToWindowsLongPath(const char * utf8,std::wstring * utf16)162*8f0ba417SAndroid Build Coastguard Worker bool UTF8PathToWindowsLongPath(const char* utf8, std::wstring* utf16) {
163*8f0ba417SAndroid Build Coastguard Worker   if (!UTF8ToWide(utf8, utf16)) {
164*8f0ba417SAndroid Build Coastguard Worker     return false;
165*8f0ba417SAndroid Build Coastguard Worker   }
166*8f0ba417SAndroid Build Coastguard Worker   // Note: Although most Win32 File I/O API are limited to MAX_PATH (260
167*8f0ba417SAndroid Build Coastguard Worker   //       characters), the CreateDirectory API is limited to 248 characters.
168*8f0ba417SAndroid Build Coastguard Worker   if (utf16->length() >= 248) {
169*8f0ba417SAndroid Build Coastguard Worker     // If path is of the form "x:\" or "x:/"
170*8f0ba417SAndroid Build Coastguard Worker     if (isDriveLetter((*utf16)[0]) && (*utf16)[1] == L':' &&
171*8f0ba417SAndroid Build Coastguard Worker         ((*utf16)[2] == L'\\' || (*utf16)[2] == L'/')) {
172*8f0ba417SAndroid Build Coastguard Worker       // Append long path prefix, and make sure there are no unix-style
173*8f0ba417SAndroid Build Coastguard Worker       // separators to ensure a fully compliant Win32 long path string.
174*8f0ba417SAndroid Build Coastguard Worker       utf16->insert(0, LR"(\\?\)");
175*8f0ba417SAndroid Build Coastguard Worker       std::replace(utf16->begin(), utf16->end(), L'/', L'\\');
176*8f0ba417SAndroid Build Coastguard Worker     }
177*8f0ba417SAndroid Build Coastguard Worker   }
178*8f0ba417SAndroid Build Coastguard Worker   return true;
179*8f0ba417SAndroid Build Coastguard Worker }
180*8f0ba417SAndroid Build Coastguard Worker 
181*8f0ba417SAndroid Build Coastguard Worker // Versions of standard library APIs that support UTF-8 strings.
182*8f0ba417SAndroid Build Coastguard Worker namespace utf8 {
183*8f0ba417SAndroid Build Coastguard Worker 
fopen(const char * name,const char * mode)184*8f0ba417SAndroid Build Coastguard Worker FILE* fopen(const char* name, const char* mode) {
185*8f0ba417SAndroid Build Coastguard Worker   std::wstring name_utf16;
186*8f0ba417SAndroid Build Coastguard Worker   if (!UTF8PathToWindowsLongPath(name, &name_utf16)) {
187*8f0ba417SAndroid Build Coastguard Worker     return nullptr;
188*8f0ba417SAndroid Build Coastguard Worker   }
189*8f0ba417SAndroid Build Coastguard Worker 
190*8f0ba417SAndroid Build Coastguard Worker   std::wstring mode_utf16;
191*8f0ba417SAndroid Build Coastguard Worker   if (!UTF8ToWide(mode, &mode_utf16)) {
192*8f0ba417SAndroid Build Coastguard Worker     return nullptr;
193*8f0ba417SAndroid Build Coastguard Worker   }
194*8f0ba417SAndroid Build Coastguard Worker 
195*8f0ba417SAndroid Build Coastguard Worker   return _wfopen(name_utf16.c_str(), mode_utf16.c_str());
196*8f0ba417SAndroid Build Coastguard Worker }
197*8f0ba417SAndroid Build Coastguard Worker 
mkdir(const char * name,mode_t)198*8f0ba417SAndroid Build Coastguard Worker int mkdir(const char* name, mode_t) {
199*8f0ba417SAndroid Build Coastguard Worker   std::wstring name_utf16;
200*8f0ba417SAndroid Build Coastguard Worker   if (!UTF8PathToWindowsLongPath(name, &name_utf16)) {
201*8f0ba417SAndroid Build Coastguard Worker     return -1;
202*8f0ba417SAndroid Build Coastguard Worker   }
203*8f0ba417SAndroid Build Coastguard Worker 
204*8f0ba417SAndroid Build Coastguard Worker   return _wmkdir(name_utf16.c_str());
205*8f0ba417SAndroid Build Coastguard Worker }
206*8f0ba417SAndroid Build Coastguard Worker 
open(const char * name,int flags,...)207*8f0ba417SAndroid Build Coastguard Worker int open(const char* name, int flags, ...) {
208*8f0ba417SAndroid Build Coastguard Worker   std::wstring name_utf16;
209*8f0ba417SAndroid Build Coastguard Worker   if (!UTF8PathToWindowsLongPath(name, &name_utf16)) {
210*8f0ba417SAndroid Build Coastguard Worker     return -1;
211*8f0ba417SAndroid Build Coastguard Worker   }
212*8f0ba417SAndroid Build Coastguard Worker 
213*8f0ba417SAndroid Build Coastguard Worker   int mode = 0;
214*8f0ba417SAndroid Build Coastguard Worker   if ((flags & O_CREAT) != 0) {
215*8f0ba417SAndroid Build Coastguard Worker     va_list args;
216*8f0ba417SAndroid Build Coastguard Worker     va_start(args, flags);
217*8f0ba417SAndroid Build Coastguard Worker     mode = va_arg(args, int);
218*8f0ba417SAndroid Build Coastguard Worker     va_end(args);
219*8f0ba417SAndroid Build Coastguard Worker   }
220*8f0ba417SAndroid Build Coastguard Worker 
221*8f0ba417SAndroid Build Coastguard Worker   return _wopen(name_utf16.c_str(), flags, mode);
222*8f0ba417SAndroid Build Coastguard Worker }
223*8f0ba417SAndroid Build Coastguard Worker 
unlink(const char * name)224*8f0ba417SAndroid Build Coastguard Worker int unlink(const char* name) {
225*8f0ba417SAndroid Build Coastguard Worker   std::wstring name_utf16;
226*8f0ba417SAndroid Build Coastguard Worker   if (!UTF8PathToWindowsLongPath(name, &name_utf16)) {
227*8f0ba417SAndroid Build Coastguard Worker     return -1;
228*8f0ba417SAndroid Build Coastguard Worker   }
229*8f0ba417SAndroid Build Coastguard Worker 
230*8f0ba417SAndroid Build Coastguard Worker   return _wunlink(name_utf16.c_str());
231*8f0ba417SAndroid Build Coastguard Worker }
232*8f0ba417SAndroid Build Coastguard Worker 
233*8f0ba417SAndroid Build Coastguard Worker }  // namespace utf8
234*8f0ba417SAndroid Build Coastguard Worker }  // namespace base
235*8f0ba417SAndroid Build Coastguard Worker }  // namespace android
236