1 // Copyright 2014 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FXCRT_FX_CODEPAGE_H_ 8 #define CORE_FXCRT_FX_CODEPAGE_H_ 9 10 #include <stdint.h> 11 12 // Prove consistency with incomplete forward definitions. 13 #include "core/fxcrt/fx_codepage_forward.h" 14 #include "core/fxcrt/fx_string.h" 15 #include "core/fxcrt/unowned_ptr_exclusion.h" 16 #include "third_party/base/containers/span.h" 17 18 enum class FX_CodePage : uint16_t { 19 kDefANSI = 0, 20 kSymbol = 42, 21 kMSDOS_US = 437, 22 kArabic_ASMO708 = 708, 23 kMSDOS_Greek1 = 737, 24 kMSDOS_Baltic = 775, 25 kMSDOS_WesternEuropean = 850, 26 kMSDOS_EasternEuropean = 852, 27 kMSDOS_Cyrillic = 855, 28 kMSDOS_Turkish = 857, 29 kMSDOS_Portuguese = 860, 30 kMSDOS_Icelandic = 861, 31 kMSDOS_Hebrew = 862, 32 kMSDOS_FrenchCanadian = 863, 33 kMSDOS_Arabic = 864, 34 kMSDOS_Norwegian = 865, 35 kMSDOS_Russian = 866, 36 kMSDOS_Greek2 = 869, 37 kMSDOS_Thai = 874, 38 kShiftJIS = 932, 39 kChineseSimplified = 936, 40 kHangul = 949, 41 kChineseTraditional = 950, 42 kUTF16LE = 1200, 43 kUTF16BE = 1201, 44 kMSWin_EasternEuropean = 1250, 45 kMSWin_Cyrillic = 1251, 46 kMSWin_WesternEuropean = 1252, 47 kMSWin_Greek = 1253, 48 kMSWin_Turkish = 1254, 49 kMSWin_Hebrew = 1255, 50 kMSWin_Arabic = 1256, 51 kMSWin_Baltic = 1257, 52 kMSWin_Vietnamese = 1258, 53 kJohab = 1361, 54 kMAC_Roman = 10000, 55 kMAC_ShiftJIS = 10001, 56 kMAC_ChineseTraditional = 10002, 57 kMAC_Korean = 10003, 58 kMAC_Arabic = 10004, 59 kMAC_Hebrew = 10005, 60 kMAC_Greek = 10006, 61 kMAC_Cyrillic = 10007, 62 kMAC_ChineseSimplified = 10008, 63 kMAC_Thai = 10021, 64 kMAC_EasternEuropean = 10029, 65 kMAC_Turkish = 10081, 66 kUTF8 = 65001, 67 kFailure = 65535, 68 }; 69 70 enum class FX_Charset : uint8_t { 71 kANSI = 0, 72 kDefault = 1, 73 kSymbol = 2, 74 kMAC_Roman = 77, 75 kMAC_ShiftJIS = 78, 76 kMAC_Korean = 79, 77 kMAC_ChineseSimplified = 80, 78 kMAC_ChineseTraditional = 81, 79 kMAC_Hebrew = 83, 80 kMAC_Arabic = 84, 81 kMAC_Greek = 85, 82 kMAC_Turkish = 86, 83 kMAC_Thai = 87, 84 kMAC_EasternEuropean = 88, 85 kMAC_Cyrillic = 89, 86 kShiftJIS = 128, 87 kHangul = 129, 88 kJohab = 130, 89 kChineseSimplified = 134, 90 kChineseTraditional = 136, 91 kMSWin_Greek = 161, 92 kMSWin_Turkish = 162, 93 kMSWin_Vietnamese = 163, 94 kMSWin_Hebrew = 177, 95 kMSWin_Arabic = 178, 96 kMSWin_Baltic = 186, 97 kMSWin_Cyrillic = 204, 98 kThai = 222, 99 kMSWin_EasternEuropean = 238, 100 kUS = 254, 101 kOEM = 255, 102 }; 103 104 // Hi-bytes to unicode codepoint mapping for various code pages. 105 struct FX_CharsetUnicodes { 106 FX_Charset m_Charset; 107 UNOWNED_PTR_EXCLUSION const uint16_t* m_pUnicodes; // POD struct. 108 }; 109 110 extern const FX_CharsetUnicodes kFX_CharsetUnicodes[8]; 111 112 FX_CodePage FX_GetACP(); 113 FX_CodePage FX_GetCodePageFromCharset(FX_Charset charset); 114 FX_Charset FX_GetCharsetFromCodePage(FX_CodePage codepage); 115 FX_Charset FX_GetCharsetFromInt(int value); 116 bool FX_CharSetIsCJK(FX_Charset uCharset); 117 size_t FX_WideCharToMultiByte(FX_CodePage codepage, 118 WideStringView wstr, 119 pdfium::span<char> buf); 120 size_t FX_MultiByteToWideChar(FX_CodePage codepage, 121 ByteStringView bstr, 122 pdfium::span<wchar_t> buf); 123 124 #endif // CORE_FXCRT_FX_CODEPAGE_H_ 125