xref: /aosp_15_r20/external/pdfium/core/fpdfapi/font/cpdf_cmapparser.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/font/cpdf_cmapparser.h"
8 
9 #include <ctype.h>
10 
11 #include <iterator>
12 
13 #include "core/fpdfapi/cmaps/fpdf_cmaps.h"
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
17 #include "core/fxcrt/fx_extension.h"
18 #include "core/fxcrt/fx_safe_types.h"
19 #include "core/fxge/freetype/fx_freetype.h"
20 #include "third_party/base/check.h"
21 
22 namespace {
23 
CMap_GetString(ByteStringView word)24 ByteStringView CMap_GetString(ByteStringView word) {
25   if (word.GetLength() <= 2)
26     return ByteStringView();
27   return word.Last(word.GetLength() - 2);
28 }
29 
30 }  // namespace
31 
CPDF_CMapParser(CPDF_CMap * pCMap)32 CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap) : m_pCMap(pCMap) {}
33 
~CPDF_CMapParser()34 CPDF_CMapParser::~CPDF_CMapParser() {
35   m_pCMap->SetAdditionalMappings(std::move(m_AdditionalCharcodeToCIDMappings));
36   m_pCMap->SetMixedFourByteLeadingRanges(std::move(m_Ranges));
37 }
38 
ParseWord(ByteStringView word)39 void CPDF_CMapParser::ParseWord(ByteStringView word) {
40   DCHECK(!word.IsEmpty());
41 
42   if (word == "begincidchar") {
43     m_Status = kProcessingCidChar;
44     m_CodeSeq = 0;
45   } else if (word == "begincidrange") {
46     m_Status = kProcessingCidRange;
47     m_CodeSeq = 0;
48   } else if (word == "endcidrange" || word == "endcidchar") {
49     m_Status = kStart;
50   } else if (word == "/WMode") {
51     m_Status = kProcessingWMode;
52   } else if (word == "/Registry") {
53     m_Status = kProcessingRegistry;
54   } else if (word == "/Ordering") {
55     m_Status = kProcessingOrdering;
56   } else if (word == "/Supplement") {
57     m_Status = kProcessingSupplement;
58   } else if (word == "begincodespacerange") {
59     m_Status = kProcessingCodeSpaceRange;
60     m_CodeSeq = 0;
61   } else if (word == "usecmap") {
62   } else if (m_Status == kProcessingCidChar) {
63     HandleCid(word);
64   } else if (m_Status == kProcessingCidRange) {
65     HandleCid(word);
66   } else if (m_Status == kProcessingRegistry) {
67     m_Status = kStart;
68   } else if (m_Status == kProcessingOrdering) {
69     m_pCMap->SetCharset(CharsetFromOrdering(CMap_GetString(word)));
70     m_Status = kStart;
71   } else if (m_Status == kProcessingSupplement) {
72     m_Status = kStart;
73   } else if (m_Status == kProcessingWMode) {
74     m_pCMap->SetVertical(GetCode(word) != 0);
75     m_Status = kStart;
76   } else if (m_Status == kProcessingCodeSpaceRange) {
77     HandleCodeSpaceRange(word);
78   }
79   m_LastWord = word;
80 }
81 
HandleCid(ByteStringView word)82 void CPDF_CMapParser::HandleCid(ByteStringView word) {
83   DCHECK(m_Status == kProcessingCidChar || m_Status == kProcessingCidRange);
84   bool bChar = m_Status == kProcessingCidChar;
85 
86   m_CodePoints[m_CodeSeq] = GetCode(word);
87   m_CodeSeq++;
88   int nRequiredCodePoints = bChar ? 2 : 3;
89   if (m_CodeSeq < nRequiredCodePoints)
90     return;
91 
92   uint32_t StartCode = m_CodePoints[0];
93   uint32_t EndCode;
94   uint16_t StartCID;
95   if (bChar) {
96     EndCode = StartCode;
97     StartCID = static_cast<uint16_t>(m_CodePoints[1]);
98   } else {
99     EndCode = m_CodePoints[1];
100     StartCID = static_cast<uint16_t>(m_CodePoints[2]);
101   }
102   if (EndCode < CPDF_CMap::kDirectMapTableSize) {
103     for (uint32_t code = StartCode; code <= EndCode; code++) {
104       m_pCMap->SetDirectCharcodeToCIDTable(
105           code, static_cast<uint16_t>(StartCID + code - StartCode));
106     }
107   } else {
108     m_AdditionalCharcodeToCIDMappings.push_back({StartCode, EndCode, StartCID});
109   }
110   m_CodeSeq = 0;
111 }
112 
HandleCodeSpaceRange(ByteStringView word)113 void CPDF_CMapParser::HandleCodeSpaceRange(ByteStringView word) {
114   if (word != "endcodespacerange") {
115     if (word.IsEmpty() || word[0] != '<')
116       return;
117 
118     if (m_CodeSeq % 2) {
119       absl::optional<CPDF_CMap::CodeRange> range =
120           GetCodeRange(m_LastWord.AsStringView(), word);
121       if (range.has_value())
122         m_PendingRanges.push_back(range.value());
123     }
124     m_CodeSeq++;
125     return;
126   }
127 
128   size_t nSegs = m_Ranges.size() + m_PendingRanges.size();
129   if (nSegs == 1) {
130     const auto& first_range =
131         !m_Ranges.empty() ? m_Ranges[0] : m_PendingRanges[0];
132     m_pCMap->SetCodingScheme(first_range.m_CharSize == 2 ? CPDF_CMap::TwoBytes
133                                                          : CPDF_CMap::OneByte);
134   } else if (nSegs > 1) {
135     m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes);
136     m_Ranges.reserve(nSegs);
137     std::move(m_PendingRanges.begin(), m_PendingRanges.end(),
138               std::back_inserter(m_Ranges));
139     m_PendingRanges.clear();
140   }
141   m_Status = kStart;
142 }
143 
144 // static
GetCode(ByteStringView word)145 uint32_t CPDF_CMapParser::GetCode(ByteStringView word) {
146   if (word.IsEmpty())
147     return 0;
148 
149   FX_SAFE_UINT32 num = 0;
150   if (word[0] == '<') {
151     for (size_t i = 1; i < word.GetLength() && isxdigit(word[i]); ++i) {
152       num = num * 16 + FXSYS_HexCharToInt(word[i]);
153       if (!num.IsValid())
154         return 0;
155     }
156     return num.ValueOrDie();
157   }
158 
159   for (size_t i = 0; i < word.GetLength() && isdigit(word[i]); ++i) {
160     num = num * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(word[i]));
161     if (!num.IsValid())
162       return 0;
163   }
164   return num.ValueOrDie();
165 }
166 
167 // static
GetCodeRange(ByteStringView first,ByteStringView second)168 absl::optional<CPDF_CMap::CodeRange> CPDF_CMapParser::GetCodeRange(
169     ByteStringView first,
170     ByteStringView second) {
171   if (first.IsEmpty() || first[0] != '<')
172     return absl::nullopt;
173 
174   size_t i;
175   for (i = 1; i < first.GetLength(); ++i) {
176     if (first[i] == '>')
177       break;
178   }
179   size_t char_size = (i - 1) / 2;
180   if (char_size > 4)
181     return absl::nullopt;
182 
183   CPDF_CMap::CodeRange range;
184   range.m_CharSize = char_size;
185   for (i = 0; i < range.m_CharSize; ++i) {
186     uint8_t digit1 = first[i * 2 + 1];
187     uint8_t digit2 = first[i * 2 + 2];
188     range.m_Lower[i] =
189         FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
190   }
191 
192   size_t size = second.GetLength();
193   for (i = 0; i < range.m_CharSize; ++i) {
194     size_t i1 = i * 2 + 1;
195     size_t i2 = i1 + 1;
196     uint8_t digit1 = i1 < size ? second[i1] : '0';
197     uint8_t digit2 = i2 < size ? second[i2] : '0';
198     range.m_Upper[i] =
199         FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
200   }
201   return range;
202 }
203 
204 // static
CharsetFromOrdering(ByteStringView ordering)205 CIDSet CPDF_CMapParser::CharsetFromOrdering(ByteStringView ordering) {
206   static const char* const kCharsetNames[CIDSET_NUM_SETS] = {
207       nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"};
208   static_assert(std::size(kCharsetNames) == CIDSET_NUM_SETS,
209                 "Too many CID sets");
210 
211   for (size_t charset = 1; charset < std::size(kCharsetNames); ++charset) {
212     if (ordering == kCharsetNames[charset])
213       return static_cast<CIDSet>(charset);
214   }
215   return CIDSET_UNKNOWN;
216 }
217