xref: /aosp_15_r20/external/pdfium/core/fxcrt/fx_codepage.h (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FXCRT_FX_CODEPAGE_H_
8 #define CORE_FXCRT_FX_CODEPAGE_H_
9 
10 #include <stdint.h>
11 
12 // Prove consistency with incomplete forward definitions.
13 #include "core/fxcrt/fx_codepage_forward.h"
14 #include "core/fxcrt/fx_string.h"
15 #include "core/fxcrt/unowned_ptr_exclusion.h"
16 #include "third_party/base/containers/span.h"
17 
18 enum class FX_CodePage : uint16_t {
19   kDefANSI = 0,
20   kSymbol = 42,
21   kMSDOS_US = 437,
22   kArabic_ASMO708 = 708,
23   kMSDOS_Greek1 = 737,
24   kMSDOS_Baltic = 775,
25   kMSDOS_WesternEuropean = 850,
26   kMSDOS_EasternEuropean = 852,
27   kMSDOS_Cyrillic = 855,
28   kMSDOS_Turkish = 857,
29   kMSDOS_Portuguese = 860,
30   kMSDOS_Icelandic = 861,
31   kMSDOS_Hebrew = 862,
32   kMSDOS_FrenchCanadian = 863,
33   kMSDOS_Arabic = 864,
34   kMSDOS_Norwegian = 865,
35   kMSDOS_Russian = 866,
36   kMSDOS_Greek2 = 869,
37   kMSDOS_Thai = 874,
38   kShiftJIS = 932,
39   kChineseSimplified = 936,
40   kHangul = 949,
41   kChineseTraditional = 950,
42   kUTF16LE = 1200,
43   kUTF16BE = 1201,
44   kMSWin_EasternEuropean = 1250,
45   kMSWin_Cyrillic = 1251,
46   kMSWin_WesternEuropean = 1252,
47   kMSWin_Greek = 1253,
48   kMSWin_Turkish = 1254,
49   kMSWin_Hebrew = 1255,
50   kMSWin_Arabic = 1256,
51   kMSWin_Baltic = 1257,
52   kMSWin_Vietnamese = 1258,
53   kJohab = 1361,
54   kMAC_Roman = 10000,
55   kMAC_ShiftJIS = 10001,
56   kMAC_ChineseTraditional = 10002,
57   kMAC_Korean = 10003,
58   kMAC_Arabic = 10004,
59   kMAC_Hebrew = 10005,
60   kMAC_Greek = 10006,
61   kMAC_Cyrillic = 10007,
62   kMAC_ChineseSimplified = 10008,
63   kMAC_Thai = 10021,
64   kMAC_EasternEuropean = 10029,
65   kMAC_Turkish = 10081,
66   kUTF8 = 65001,
67   kFailure = 65535,
68 };
69 
70 enum class FX_Charset : uint8_t {
71   kANSI = 0,
72   kDefault = 1,
73   kSymbol = 2,
74   kMAC_Roman = 77,
75   kMAC_ShiftJIS = 78,
76   kMAC_Korean = 79,
77   kMAC_ChineseSimplified = 80,
78   kMAC_ChineseTraditional = 81,
79   kMAC_Hebrew = 83,
80   kMAC_Arabic = 84,
81   kMAC_Greek = 85,
82   kMAC_Turkish = 86,
83   kMAC_Thai = 87,
84   kMAC_EasternEuropean = 88,
85   kMAC_Cyrillic = 89,
86   kShiftJIS = 128,
87   kHangul = 129,
88   kJohab = 130,
89   kChineseSimplified = 134,
90   kChineseTraditional = 136,
91   kMSWin_Greek = 161,
92   kMSWin_Turkish = 162,
93   kMSWin_Vietnamese = 163,
94   kMSWin_Hebrew = 177,
95   kMSWin_Arabic = 178,
96   kMSWin_Baltic = 186,
97   kMSWin_Cyrillic = 204,
98   kThai = 222,
99   kMSWin_EasternEuropean = 238,
100   kUS = 254,
101   kOEM = 255,
102 };
103 
104 // Hi-bytes to unicode codepoint mapping for various code pages.
105 struct FX_CharsetUnicodes {
106   FX_Charset m_Charset;
107   UNOWNED_PTR_EXCLUSION const uint16_t* m_pUnicodes;  // POD struct.
108 };
109 
110 extern const FX_CharsetUnicodes kFX_CharsetUnicodes[8];
111 
112 FX_CodePage FX_GetACP();
113 FX_CodePage FX_GetCodePageFromCharset(FX_Charset charset);
114 FX_Charset FX_GetCharsetFromCodePage(FX_CodePage codepage);
115 FX_Charset FX_GetCharsetFromInt(int value);
116 bool FX_CharSetIsCJK(FX_Charset uCharset);
117 size_t FX_WideCharToMultiByte(FX_CodePage codepage,
118                               WideStringView wstr,
119                               pdfium::span<char> buf);
120 size_t FX_MultiByteToWideChar(FX_CodePage codepage,
121                               ByteStringView bstr,
122                               pdfium::span<wchar_t> buf);
123 
124 #endif  // CORE_FXCRT_FX_CODEPAGE_H_
125