xref: /aosp_15_r20/external/pdfium/core/fxcrt/fx_codepage.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/fx_codepage.h"
8 
9 #include <algorithm>
10 #include <iterator>
11 #include <utility>
12 
13 #include "build/build_config.h"
14 #include "third_party/base/numerics/safe_math.h"
15 
16 #if BUILDFLAG(IS_WIN)
17 #include <windows.h>
18 #endif
19 
20 namespace {
21 
22 const uint16_t kFX_MSDOSThaiUnicodes[128] = {
23     0x20AC, 0x0000, 0x0000, 0x0000, 0x0000, 0x2026, 0x0000, 0x0000, 0x0000,
24     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2018,
25     0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0000, 0x0000, 0x0000,
26     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00A0, 0x0E01, 0x0E02, 0x0E03,
27     0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C,
28     0x0E0D, 0x0E0E, 0x0E0F, 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15,
29     0x0E16, 0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E,
30     0x0E1F, 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
31     0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F, 0x0E30,
32     0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, 0x0E38, 0x0E39,
33     0x0E3A, 0x0000, 0x0000, 0x0000, 0x0000, 0x0E3F, 0x0E40, 0x0E41, 0x0E42,
34     0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B,
35     0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F, 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54,
36     0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0x0000, 0x0000,
37     0x0000, 0x0000,
38 };
39 
40 const uint16_t kFX_MSWinEasternEuropeanUnicodes[128] = {
41     0x20AC, 0x0000, 0x201A, 0x0000, 0x201E, 0x2026, 0x2020, 0x2021, 0x0000,
42     0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179, 0x0000, 0x2018,
43     0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0000, 0x2122, 0x0161,
44     0x203A, 0x015B, 0x0165, 0x017E, 0x017A, 0x00A0, 0x02C7, 0x02D8, 0x0141,
45     0x00A4, 0x0104, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC,
46     0x00AD, 0x00AE, 0x017B, 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5,
47     0x00B6, 0x00B7, 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E,
48     0x017C, 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
49     0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E, 0x0110,
50     0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E,
51     0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF, 0x0155, 0x00E1, 0x00E2,
52     0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB,
53     0x011B, 0x00ED, 0x00EE, 0x010F, 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4,
54     0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD,
55     0x0163, 0x02D9,
56 };
57 
58 const uint16_t kFX_MSWinCyrillicUnicodes[128] = {
59     0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, 0x20AC,
60     0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F, 0x0452, 0x2018,
61     0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0000, 0x2122, 0x0459,
62     0x203A, 0x045A, 0x045C, 0x045B, 0x045F, 0x00A0, 0x040E, 0x045E, 0x0408,
63     0x00A4, 0x0490, 0x00A6, 0x00A7, 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC,
64     0x00AD, 0x00AE, 0x0407, 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5,
65     0x00B6, 0x00B7, 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455,
66     0x0457, 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
67     0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 0x0420,
68     0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429,
69     0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, 0x0430, 0x0431, 0x0432,
70     0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B,
71     0x043C, 0x043D, 0x043E, 0x043F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0444,
72     0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D,
73     0x044E, 0x044F,
74 };
75 
76 const uint16_t kFX_MSWinGreekUnicodes[128] = {
77     0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x0000,
78     0x2030, 0x0000, 0x2039, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2018,
79     0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0000, 0x2122, 0x0000,
80     0x203A, 0x0000, 0x0000, 0x0000, 0x0000, 0x00A0, 0x0385, 0x0386, 0x00A3,
81     0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x0000, 0x00AB, 0x00AC,
82     0x00AD, 0x00AE, 0x2015, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5,
83     0x00B6, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E,
84     0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
85     0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F, 0x03A0,
86     0x03A1, 0x0000, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9,
87     0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF, 0x03B0, 0x03B1, 0x03B2,
88     0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB,
89     0x03BC, 0x03BD, 0x03BE, 0x03BF, 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4,
90     0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD,
91     0x03CE, 0x0000,
92 };
93 
94 const uint16_t kFX_MSWinTurkishUnicodes[128] = {
95     0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6,
96     0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x0000, 0x0000, 0x0000, 0x2018,
97     0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161,
98     0x203A, 0x0153, 0x0000, 0x0000, 0x0178, 0x00A0, 0x00A1, 0x00A2, 0x00A3,
99     0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC,
100     0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5,
101     0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE,
102     0x00BF, 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
103     0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x011E,
104     0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9,
105     0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF, 0x00E0, 0x00E1, 0x00E2,
106     0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB,
107     0x00EC, 0x00ED, 0x00EE, 0x00EF, 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4,
108     0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131,
109     0x015F, 0x00FF,
110 };
111 
112 const uint16_t kFX_MSWinHebrewUnicodes[128] = {
113     0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6,
114     0x2030, 0x0000, 0x2039, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2018,
115     0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0000,
116     0x203A, 0x0000, 0x0000, 0x0000, 0x0000, 0x00A0, 0x00A1, 0x00A2, 0x00A3,
117     0x20AA, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC,
118     0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5,
119     0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE,
120     0x00BF, 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
121     0x05B8, 0x05B9, 0x0000, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF, 0x05C0,
122     0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x0000,
123     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x05D0, 0x05D1, 0x05D2,
124     0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB,
125     0x05DC, 0x05DD, 0x05DE, 0x05DF, 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4,
126     0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x200E,
127     0x200F, 0x0000,
128 };
129 
130 const uint16_t kFX_MSWinArabicUnicodes[128] = {
131     0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6,
132     0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688, 0x06AF, 0x2018,
133     0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x06A9, 0x2122, 0x0691,
134     0x203A, 0x0153, 0x200C, 0x200D, 0x06BA, 0x00A0, 0x060C, 0x00A2, 0x00A3,
135     0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC,
136     0x00AD, 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5,
137     0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE,
138     0x061F, 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
139     0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F, 0x0630,
140     0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7, 0x0637, 0x0638,
141     0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643, 0x00E0, 0x0644, 0x00E2,
142     0x0645, 0x0646, 0x0647, 0x0648, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB,
143     0x0649, 0x064A, 0x00EE, 0x00EF, 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4,
144     0x064F, 0x0650, 0x00F7, 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E,
145     0x200F, 0x06D2,
146 };
147 
148 const uint16_t kFX_MSWinBalticUnicodes[128] = {
149     0x20AC, 0x0000, 0x201A, 0x0000, 0x201E, 0x2026, 0x2020, 0x2021, 0x0000,
150     0x2030, 0x0000, 0x2039, 0x0000, 0x00A8, 0x02C7, 0x00B8, 0x0000, 0x2018,
151     0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x0000, 0x2122, 0x0000,
152     0x203A, 0x0000, 0x00AF, 0x02DB, 0x0000, 0x00A0, 0x0000, 0x00A2, 0x00A3,
153     0x00A4, 0x0000, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC,
154     0x00AD, 0x00AE, 0x00C6, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5,
155     0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE,
156     0x00E6, 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
157     0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B, 0x0160,
158     0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141,
159     0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF, 0x0105, 0x012F, 0x0101,
160     0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117,
161     0x0123, 0x0137, 0x012B, 0x013C, 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D,
162     0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C,
163     0x017E, 0x02D9,
164 };
165 
166 struct FX_CHARSET_MAP {
167   FX_Charset charset;
168   FX_CodePage codepage;
169 };
170 
171 const FX_CHARSET_MAP kFXCharset2CodePageTable[] = {
172     {FX_Charset::kANSI, FX_CodePage::kMSWin_WesternEuropean},
173     {FX_Charset::kDefault, FX_CodePage::kDefANSI},
174     {FX_Charset::kSymbol, FX_CodePage::kSymbol},
175     {FX_Charset::kMAC_Roman, FX_CodePage::kMAC_Roman},
176     {FX_Charset::kMAC_ShiftJIS, FX_CodePage::kMAC_ShiftJIS},
177     {FX_Charset::kMAC_Korean, FX_CodePage::kMAC_Korean},
178     {FX_Charset::kMAC_ChineseSimplified, FX_CodePage::kMAC_ChineseSimplified},
179     {FX_Charset::kMAC_ChineseTraditional, FX_CodePage::kMAC_ChineseTraditional},
180     {FX_Charset::kMAC_Hebrew, FX_CodePage::kMAC_Hebrew},
181     {FX_Charset::kMAC_Arabic, FX_CodePage::kMAC_Arabic},
182     {FX_Charset::kMAC_Greek, FX_CodePage::kMAC_Greek},
183     {FX_Charset::kMAC_Turkish, FX_CodePage::kMAC_Turkish},
184     {FX_Charset::kMAC_Thai, FX_CodePage::kMAC_Thai},
185     {FX_Charset::kMAC_EasternEuropean, FX_CodePage::kMAC_EasternEuropean},
186     {FX_Charset::kMAC_Cyrillic, FX_CodePage::kMAC_Cyrillic},
187     {FX_Charset::kShiftJIS, FX_CodePage::kShiftJIS},
188     {FX_Charset::kHangul, FX_CodePage::kHangul},
189     {FX_Charset::kJohab, FX_CodePage::kJohab},
190     {FX_Charset::kChineseSimplified, FX_CodePage::kChineseSimplified},
191     {FX_Charset::kChineseTraditional, FX_CodePage::kChineseTraditional},
192     {FX_Charset::kMSWin_Greek, FX_CodePage::kMSWin_Greek},
193     {FX_Charset::kMSWin_Turkish, FX_CodePage::kMSWin_Turkish},
194     {FX_Charset::kMSWin_Vietnamese, FX_CodePage::kMSWin_Vietnamese},
195     {FX_Charset::kMSWin_Hebrew, FX_CodePage::kMSWin_Hebrew},
196     {FX_Charset::kMSWin_Arabic, FX_CodePage::kMSWin_Arabic},
197     {FX_Charset::kMSWin_Baltic, FX_CodePage::kMSWin_Baltic},
198     {FX_Charset::kMSWin_Cyrillic, FX_CodePage::kMSWin_Cyrillic},
199     {FX_Charset::kThai, FX_CodePage::kMSDOS_Thai},
200     {FX_Charset::kMSWin_EasternEuropean, FX_CodePage::kMSWin_EasternEuropean},
201     {FX_Charset::kUS, FX_CodePage::kMSDOS_US},
202     {FX_Charset::kOEM, FX_CodePage::kMSDOS_WesternEuropean},
203 };
204 
205 }  // namespace
206 
207 const FX_CharsetUnicodes kFX_CharsetUnicodes[8] = {
208     {FX_Charset::kThai, kFX_MSDOSThaiUnicodes},
209     {FX_Charset::kMSWin_EasternEuropean, kFX_MSWinEasternEuropeanUnicodes},
210     {FX_Charset::kMSWin_Cyrillic, kFX_MSWinCyrillicUnicodes},
211     {FX_Charset::kMSWin_Greek, kFX_MSWinGreekUnicodes},
212     {FX_Charset::kMSWin_Turkish, kFX_MSWinTurkishUnicodes},
213     {FX_Charset::kMSWin_Hebrew, kFX_MSWinHebrewUnicodes},
214     {FX_Charset::kMSWin_Arabic, kFX_MSWinArabicUnicodes},
215     {FX_Charset::kMSWin_Baltic, kFX_MSWinBalticUnicodes},
216 };
217 
FX_GetACP()218 FX_CodePage FX_GetACP() {
219 #if BUILDFLAG(IS_WIN)
220   return static_cast<FX_CodePage>(GetACP());
221 #else
222   return FX_CodePage::kDefANSI;
223 #endif
224 }
225 
FX_GetCodePageFromCharset(FX_Charset charset)226 FX_CodePage FX_GetCodePageFromCharset(FX_Charset charset) {
227   auto* result = std::lower_bound(
228       std::begin(kFXCharset2CodePageTable), std::end(kFXCharset2CodePageTable),
229       charset, [](const FX_CHARSET_MAP& iter, const FX_Charset& charset) {
230         return iter.charset < charset;
231       });
232   if (result != std::end(kFXCharset2CodePageTable) &&
233       result->charset == charset) {
234     return result->codepage;
235   }
236   return FX_CodePage::kFailure;
237 }
238 
FX_GetCharsetFromCodePage(FX_CodePage codepage)239 FX_Charset FX_GetCharsetFromCodePage(FX_CodePage codepage) {
240   for (const auto& it : kFXCharset2CodePageTable) {
241     if (it.codepage == codepage)
242       return it.charset;
243   }
244   return FX_Charset::kANSI;
245 }
246 
FX_GetCharsetFromInt(int value)247 FX_Charset FX_GetCharsetFromInt(int value) {
248   switch (value) {
249     case static_cast<int>(FX_Charset::kANSI):
250     case static_cast<int>(FX_Charset::kDefault):
251     case static_cast<int>(FX_Charset::kSymbol):
252     case static_cast<int>(FX_Charset::kMAC_Roman):
253     case static_cast<int>(FX_Charset::kMAC_ShiftJIS):
254     case static_cast<int>(FX_Charset::kMAC_Korean):
255     case static_cast<int>(FX_Charset::kMAC_ChineseSimplified):
256     case static_cast<int>(FX_Charset::kMAC_ChineseTraditional):
257     case static_cast<int>(FX_Charset::kMAC_Hebrew):
258     case static_cast<int>(FX_Charset::kMAC_Arabic):
259     case static_cast<int>(FX_Charset::kMAC_Greek):
260     case static_cast<int>(FX_Charset::kMAC_Turkish):
261     case static_cast<int>(FX_Charset::kMAC_Thai):
262     case static_cast<int>(FX_Charset::kMAC_EasternEuropean):
263     case static_cast<int>(FX_Charset::kMAC_Cyrillic):
264     case static_cast<int>(FX_Charset::kShiftJIS):
265     case static_cast<int>(FX_Charset::kHangul):
266     case static_cast<int>(FX_Charset::kJohab):
267     case static_cast<int>(FX_Charset::kChineseSimplified):
268     case static_cast<int>(FX_Charset::kChineseTraditional):
269     case static_cast<int>(FX_Charset::kMSWin_Greek):
270     case static_cast<int>(FX_Charset::kMSWin_Turkish):
271     case static_cast<int>(FX_Charset::kMSWin_Vietnamese):
272     case static_cast<int>(FX_Charset::kMSWin_Hebrew):
273     case static_cast<int>(FX_Charset::kMSWin_Arabic):
274     case static_cast<int>(FX_Charset::kMSWin_Baltic):
275     case static_cast<int>(FX_Charset::kMSWin_Cyrillic):
276     case static_cast<int>(FX_Charset::kThai):
277     case static_cast<int>(FX_Charset::kMSWin_EasternEuropean):
278     case static_cast<int>(FX_Charset::kUS):
279     case static_cast<int>(FX_Charset::kOEM):
280       return static_cast<FX_Charset>(value);
281     default:
282       return FX_Charset::kANSI;
283   }
284 }
285 
FX_CharSetIsCJK(FX_Charset uCharset)286 bool FX_CharSetIsCJK(FX_Charset uCharset) {
287   return (uCharset == FX_Charset::kChineseSimplified) ||
288          (uCharset == FX_Charset::kChineseTraditional) ||
289          (uCharset == FX_Charset::kHangul) ||
290          (uCharset == FX_Charset::kShiftJIS);
291 }
292 
FX_WideCharToMultiByte(FX_CodePage codepage,WideStringView wstr,pdfium::span<char> buf)293 size_t FX_WideCharToMultiByte(FX_CodePage codepage,
294                               WideStringView wstr,
295                               pdfium::span<char> buf) {
296 #if BUILDFLAG(IS_WIN)
297   int input_len = pdfium::base::checked_cast<int>(wstr.GetLength());
298   int output_len = pdfium::base::checked_cast<int>(buf.size());
299   return WideCharToMultiByte(static_cast<UINT>(codepage), 0,
300                              wstr.unterminated_c_str(), input_len, buf.data(),
301                              output_len, nullptr, nullptr);
302 #else
303   size_t len = 0;
304   for (size_t i = 0; i < wstr.GetLength(); i++) {
305     if (wstr[i] < 0x100) {
306       if (len < buf.size())
307         buf[len] = static_cast<char>(wstr[i]);
308       len++;
309     }
310   }
311   return len;
312 #endif
313 }
314 
FX_MultiByteToWideChar(FX_CodePage codepage,ByteStringView bstr,pdfium::span<wchar_t> buf)315 size_t FX_MultiByteToWideChar(FX_CodePage codepage,
316                               ByteStringView bstr,
317                               pdfium::span<wchar_t> buf) {
318 #if BUILDFLAG(IS_WIN)
319   const int input_len = pdfium::base::checked_cast<int>(bstr.GetLength());
320   const int output_len = pdfium::base::checked_cast<int>(buf.size());
321   return MultiByteToWideChar(static_cast<UINT>(codepage), 0,
322                              bstr.unterminated_c_str(), input_len, buf.data(),
323                              output_len);
324 #else
325   size_t wlen = 0;
326   for (size_t i = 0; i < bstr.GetLength(); i++) {
327     if (wlen < buf.size())
328       buf[wlen] = reinterpret_cast<uint8_t>(bstr[i]);
329     wlen++;
330   }
331   return wlen;
332 #endif
333 }
334