xref: /aosp_15_r20/external/pdfium/core/fpdfapi/parser/fpdf_parser_decode.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
8 
9 #include <ctype.h>
10 #include <limits.h>
11 #include <stddef.h>
12 
13 #include <algorithm>
14 #include <utility>
15 
16 #include "build/build_config.h"
17 #include "constants/stream_dict_common.h"
18 #include "core/fpdfapi/parser/cpdf_array.h"
19 #include "core/fpdfapi/parser/cpdf_dictionary.h"
20 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
21 #include "core/fxcodec/fax/faxmodule.h"
22 #include "core/fxcodec/flate/flatemodule.h"
23 #include "core/fxcodec/scanlinedecoder.h"
24 #include "core/fxcrt/fx_extension.h"
25 #include "core/fxcrt/fx_safe_types.h"
26 #include "core/fxcrt/span_util.h"
27 #include "core/fxcrt/utf16.h"
28 #include "third_party/base/check.h"
29 #include "third_party/base/containers/contains.h"
30 
31 namespace {
32 
33 const uint32_t kMaxStreamSize = 20 * 1024 * 1024;
34 
GetUnicodeFromBigEndianBytes(const uint8_t * bytes)35 uint16_t GetUnicodeFromBigEndianBytes(const uint8_t* bytes) {
36   return bytes[0] << 8 | bytes[1];
37 }
38 
GetUnicodeFromLittleEndianBytes(const uint8_t * bytes)39 uint16_t GetUnicodeFromLittleEndianBytes(const uint8_t* bytes) {
40   return bytes[1] << 8 | bytes[0];
41 }
42 
CheckFlateDecodeParams(int Colors,int BitsPerComponent,int Columns)43 bool CheckFlateDecodeParams(int Colors, int BitsPerComponent, int Columns) {
44   if (Colors < 0 || BitsPerComponent < 0 || Columns < 0)
45     return false;
46 
47   FX_SAFE_INT32 check = Columns;
48   check *= Colors;
49   check *= BitsPerComponent;
50   if (!check.IsValid())
51     return false;
52 
53   return check.ValueOrDie() <= INT_MAX - 7;
54 }
55 
GetA85Result(uint32_t res,size_t i)56 uint8_t GetA85Result(uint32_t res, size_t i) {
57   return static_cast<uint8_t>(res >> (3 - i) * 8);
58 }
59 
60 }  // namespace
61 
62 const uint16_t kPDFDocEncoding[256] = {
63     0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
64     0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011,
65     0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x02d8, 0x02c7, 0x02c6,
66     0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, 0x0020, 0x0021, 0x0022, 0x0023,
67     0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c,
68     0x002d, 0x002e, 0x002f, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035,
69     0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e,
70     0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
71     0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050,
72     0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
73     0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062,
74     0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b,
75     0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074,
76     0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d,
77     0x007e, 0x0000, 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192,
78     0x2044, 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
79     0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, 0x0178,
80     0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, 0x0000, 0x20ac, 0x00a1,
81     0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa,
82     0x00ab, 0x00ac, 0x0000, 0x00ae, 0x00af, 0x00b0, 0x00b1, 0x00b2, 0x00b3,
83     0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc,
84     0x00bd, 0x00be, 0x00bf, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5,
85     0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce,
86     0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
87     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 0x00e0,
88     0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9,
89     0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2,
90     0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb,
91     0x00fc, 0x00fd, 0x00fe, 0x00ff};
92 
ValidateDecoderPipeline(const CPDF_Array * pDecoders)93 bool ValidateDecoderPipeline(const CPDF_Array* pDecoders) {
94   size_t count = pDecoders->size();
95   if (count == 0)
96     return true;
97 
98   for (size_t i = 0; i < count; ++i) {
99     RetainPtr<const CPDF_Object> object = pDecoders->GetDirectObjectAt(i);
100     if (!object || !object->IsName()) {
101       return false;
102     }
103   }
104 
105   if (count == 1)
106     return true;
107 
108   // TODO(thestig): Consolidate all the places that use these filter names.
109   static const char kValidDecoders[][16] = {
110       "FlateDecode",    "Fl",  "LZWDecode",       "LZW", "ASCII85Decode", "A85",
111       "ASCIIHexDecode", "AHx", "RunLengthDecode", "RL"};
112   for (size_t i = 0; i < count - 1; ++i) {
113     if (!pdfium::Contains(kValidDecoders, pDecoders->GetByteStringAt(i)))
114       return false;
115   }
116   return true;
117 }
118 
A85Decode(pdfium::span<const uint8_t> src_span,std::unique_ptr<uint8_t,FxFreeDeleter> * dest_buf,uint32_t * dest_size)119 uint32_t A85Decode(pdfium::span<const uint8_t> src_span,
120                    std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
121                    uint32_t* dest_size) {
122   *dest_size = 0;
123   if (src_span.empty()) {
124     dest_buf->reset();
125     return 0;
126   }
127 
128   // Count legal characters and zeros.
129   uint32_t zcount = 0;
130   uint32_t pos = 0;
131   while (pos < src_span.size()) {
132     uint8_t ch = src_span[pos];
133     if (ch == 'z') {
134       zcount++;
135     } else if ((ch < '!' || ch > 'u') && !PDFCharIsLineEnding(ch) &&
136                ch != ' ' && ch != '\t') {
137       break;
138     }
139     pos++;
140   }
141   // No content to decode.
142   if (pos == 0)
143     return 0;
144 
145   // Count the space needed to contain non-zero characters. The encoding ratio
146   // of Ascii85 is 4:5.
147   uint32_t space_for_non_zeroes = (pos - zcount) / 5 * 4 + 4;
148   FX_SAFE_UINT32 size = zcount;
149   size *= 4;
150   size += space_for_non_zeroes;
151   if (!size.IsValid())
152     return FX_INVALID_OFFSET;
153 
154   dest_buf->reset(FX_Alloc(uint8_t, size.ValueOrDie()));
155   uint8_t* dest_buf_ptr = dest_buf->get();
156   size_t state = 0;
157   uint32_t res = 0;
158   pos = 0;
159   while (pos < src_span.size()) {
160     uint8_t ch = src_span[pos++];
161     if (PDFCharIsLineEnding(ch) || ch == ' ' || ch == '\t')
162       continue;
163 
164     if (ch == 'z') {
165       memset(dest_buf_ptr + *dest_size, 0, 4);
166       state = 0;
167       res = 0;
168       *dest_size += 4;
169       continue;
170     }
171 
172     // Check for the end or illegal character.
173     if (ch < '!' || ch > 'u')
174       break;
175 
176     res = res * 85 + ch - 33;
177     if (state < 4) {
178       ++state;
179       continue;
180     }
181 
182     for (size_t i = 0; i < 4; ++i) {
183       dest_buf_ptr[(*dest_size)++] = GetA85Result(res, i);
184     }
185     state = 0;
186     res = 0;
187   }
188   // Handle partial group.
189   if (state) {
190     for (size_t i = state; i < 5; ++i)
191       res = res * 85 + 84;
192     for (size_t i = 0; i < state - 1; ++i)
193       dest_buf_ptr[(*dest_size)++] = GetA85Result(res, i);
194   }
195   if (pos < src_span.size() && src_span[pos] == '>')
196     ++pos;
197   return pos;
198 }
199 
HexDecode(pdfium::span<const uint8_t> src_span,std::unique_ptr<uint8_t,FxFreeDeleter> * dest_buf,uint32_t * dest_size)200 uint32_t HexDecode(pdfium::span<const uint8_t> src_span,
201                    std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
202                    uint32_t* dest_size) {
203   *dest_size = 0;
204   if (src_span.empty()) {
205     dest_buf->reset();
206     return 0;
207   }
208 
209   uint32_t i = 0;
210   // Find the end of data.
211   while (i < src_span.size() && src_span[i] != '>')
212     ++i;
213 
214   dest_buf->reset(FX_Alloc(uint8_t, i / 2 + 1));
215   uint8_t* dest_buf_ptr = dest_buf->get();
216   bool bFirst = true;
217   for (i = 0; i < src_span.size(); ++i) {
218     uint8_t ch = src_span[i];
219     if (PDFCharIsLineEnding(ch) || ch == ' ' || ch == '\t')
220       continue;
221 
222     if (ch == '>') {
223       ++i;
224       break;
225     }
226     if (!isxdigit(ch))
227       continue;
228 
229     int digit = FXSYS_HexCharToInt(ch);
230     if (bFirst)
231       dest_buf_ptr[*dest_size] = digit * 16;
232     else
233       dest_buf_ptr[(*dest_size)++] += digit;
234     bFirst = !bFirst;
235   }
236   if (!bFirst)
237     ++(*dest_size);
238   return i;
239 }
240 
RunLengthDecode(pdfium::span<const uint8_t> src_span,std::unique_ptr<uint8_t,FxFreeDeleter> * dest_buf,uint32_t * dest_size)241 uint32_t RunLengthDecode(pdfium::span<const uint8_t> src_span,
242                          std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
243                          uint32_t* dest_size) {
244   size_t i = 0;
245   *dest_size = 0;
246   while (i < src_span.size()) {
247     if (src_span[i] == 128)
248       break;
249 
250     uint32_t old = *dest_size;
251     if (src_span[i] < 128) {
252       *dest_size += src_span[i] + 1;
253       if (*dest_size < old)
254         return FX_INVALID_OFFSET;
255       i += src_span[i] + 2;
256     } else {
257       *dest_size += 257 - src_span[i];
258       if (*dest_size < old)
259         return FX_INVALID_OFFSET;
260       i += 2;
261     }
262   }
263   if (*dest_size >= kMaxStreamSize)
264     return FX_INVALID_OFFSET;
265 
266   dest_buf->reset(FX_Alloc(uint8_t, *dest_size));
267   pdfium::span<uint8_t> dest_span(dest_buf->get(), *dest_size);
268   i = 0;
269   int dest_count = 0;
270   while (i < src_span.size()) {
271     if (src_span[i] == 128)
272       break;
273 
274     if (src_span[i] < 128) {
275       uint32_t copy_len = src_span[i] + 1;
276       uint32_t buf_left = src_span.size() - i - 1;
277       if (buf_left < copy_len) {
278         uint32_t delta = copy_len - buf_left;
279         copy_len = buf_left;
280         fxcrt::spanclr(dest_span.subspan(dest_count + copy_len, delta));
281       }
282       auto copy_span = src_span.subspan(i + 1, copy_len);
283       fxcrt::spancpy(dest_span.subspan(dest_count), copy_span);
284       dest_count += src_span[i] + 1;
285       i += src_span[i] + 2;
286     } else {
287       const uint8_t fill = i < src_span.size() - 1 ? src_span[i + 1] : 0;
288       const size_t fill_size = 257 - src_span[i];
289       fxcrt::spanset(dest_span.subspan(dest_count, fill_size), fill);
290       dest_count += fill_size;
291       i += 2;
292     }
293   }
294   return std::min(i + 1, src_span.size());
295 }
296 
CreateFaxDecoder(pdfium::span<const uint8_t> src_span,int width,int height,const CPDF_Dictionary * pParams)297 std::unique_ptr<ScanlineDecoder> CreateFaxDecoder(
298     pdfium::span<const uint8_t> src_span,
299     int width,
300     int height,
301     const CPDF_Dictionary* pParams) {
302   int K = 0;
303   bool EndOfLine = false;
304   bool ByteAlign = false;
305   bool BlackIs1 = false;
306   int Columns = 1728;
307   int Rows = 0;
308   if (pParams) {
309     K = pParams->GetIntegerFor("K");
310     EndOfLine = !!pParams->GetIntegerFor("EndOfLine");
311     ByteAlign = !!pParams->GetIntegerFor("EncodedByteAlign");
312     BlackIs1 = !!pParams->GetIntegerFor("BlackIs1");
313     Columns = pParams->GetIntegerFor("Columns", 1728);
314     Rows = pParams->GetIntegerFor("Rows");
315     if (Rows > USHRT_MAX)
316       Rows = 0;
317   }
318   return FaxModule::CreateDecoder(src_span, width, height, K, EndOfLine,
319                                   ByteAlign, BlackIs1, Columns, Rows);
320 }
321 
CreateFlateDecoder(pdfium::span<const uint8_t> src_span,int width,int height,int nComps,int bpc,const CPDF_Dictionary * pParams)322 std::unique_ptr<ScanlineDecoder> CreateFlateDecoder(
323     pdfium::span<const uint8_t> src_span,
324     int width,
325     int height,
326     int nComps,
327     int bpc,
328     const CPDF_Dictionary* pParams) {
329   int predictor = 0;
330   int Colors = 0;
331   int BitsPerComponent = 0;
332   int Columns = 0;
333   if (pParams) {
334     predictor = pParams->GetIntegerFor("Predictor");
335     Colors = pParams->GetIntegerFor("Colors", 1);
336     BitsPerComponent = pParams->GetIntegerFor("BitsPerComponent", 8);
337     Columns = pParams->GetIntegerFor("Columns", 1);
338     if (!CheckFlateDecodeParams(Colors, BitsPerComponent, Columns))
339       return nullptr;
340   }
341   return FlateModule::CreateDecoder(src_span, width, height, nComps, bpc,
342                                     predictor, Colors, BitsPerComponent,
343                                     Columns);
344 }
345 
FlateOrLZWDecode(bool bLZW,pdfium::span<const uint8_t> src_span,const CPDF_Dictionary * pParams,uint32_t estimated_size,std::unique_ptr<uint8_t,FxFreeDeleter> * dest_buf,uint32_t * dest_size)346 uint32_t FlateOrLZWDecode(bool bLZW,
347                           pdfium::span<const uint8_t> src_span,
348                           const CPDF_Dictionary* pParams,
349                           uint32_t estimated_size,
350                           std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
351                           uint32_t* dest_size) {
352   int predictor = 0;
353   int Colors = 0;
354   int BitsPerComponent = 0;
355   int Columns = 0;
356   bool bEarlyChange = true;
357   if (pParams) {
358     predictor = pParams->GetIntegerFor("Predictor");
359     bEarlyChange = !!pParams->GetIntegerFor("EarlyChange", 1);
360     Colors = pParams->GetIntegerFor("Colors", 1);
361     BitsPerComponent = pParams->GetIntegerFor("BitsPerComponent", 8);
362     Columns = pParams->GetIntegerFor("Columns", 1);
363     if (!CheckFlateDecodeParams(Colors, BitsPerComponent, Columns))
364       return FX_INVALID_OFFSET;
365   }
366   return FlateModule::FlateOrLZWDecode(bLZW, src_span, bEarlyChange, predictor,
367                                        Colors, BitsPerComponent, Columns,
368                                        estimated_size, dest_buf, dest_size);
369 }
370 
GetDecoderArray(RetainPtr<const CPDF_Dictionary> pDict)371 absl::optional<DecoderArray> GetDecoderArray(
372     RetainPtr<const CPDF_Dictionary> pDict) {
373   RetainPtr<const CPDF_Object> pFilter = pDict->GetDirectObjectFor("Filter");
374   if (!pFilter)
375     return DecoderArray();
376 
377   if (!pFilter->IsArray() && !pFilter->IsName())
378     return absl::nullopt;
379 
380   RetainPtr<const CPDF_Object> pParams =
381       pDict->GetDirectObjectFor(pdfium::stream::kDecodeParms);
382 
383   DecoderArray decoder_array;
384   if (const CPDF_Array* pDecoders = pFilter->AsArray()) {
385     if (!ValidateDecoderPipeline(pDecoders))
386       return absl::nullopt;
387 
388     RetainPtr<const CPDF_Array> pParamsArray = ToArray(pParams);
389     for (size_t i = 0; i < pDecoders->size(); ++i) {
390       decoder_array.emplace_back(
391           pDecoders->GetByteStringAt(i),
392           pParamsArray ? pParamsArray->GetDictAt(i) : nullptr);
393     }
394   } else {
395     DCHECK(pFilter->IsName());
396     decoder_array.emplace_back(pFilter->GetString(),
397                                pParams ? pParams->GetDict() : nullptr);
398   }
399 
400   return decoder_array;
401 }
402 
PDF_DataDecode(pdfium::span<const uint8_t> src_span,uint32_t last_estimated_size,bool bImageAcc,const DecoderArray & decoder_array,std::unique_ptr<uint8_t,FxFreeDeleter> * dest_buf,uint32_t * dest_size,ByteString * ImageEncoding,RetainPtr<const CPDF_Dictionary> * pImageParams)403 bool PDF_DataDecode(pdfium::span<const uint8_t> src_span,
404                     uint32_t last_estimated_size,
405                     bool bImageAcc,
406                     const DecoderArray& decoder_array,
407                     std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
408                     uint32_t* dest_size,
409                     ByteString* ImageEncoding,
410                     RetainPtr<const CPDF_Dictionary>* pImageParams) {
411   std::unique_ptr<uint8_t, FxFreeDeleter> result;
412   // May be changed to point to |result| in the for-loop below. So put it below
413   // |result| and let it get destroyed first.
414   pdfium::span<const uint8_t> last_span = src_span;
415   size_t nSize = decoder_array.size();
416   for (size_t i = 0; i < nSize; ++i) {
417     int estimated_size = i == nSize - 1 ? last_estimated_size : 0;
418     ByteString decoder = decoder_array[i].first;
419     RetainPtr<const CPDF_Dictionary> pParam =
420         ToDictionary(decoder_array[i].second);
421     std::unique_ptr<uint8_t, FxFreeDeleter> new_buf;
422     uint32_t new_size = 0xFFFFFFFF;
423     uint32_t offset = FX_INVALID_OFFSET;
424     if (decoder == "Crypt")
425       continue;
426     if (decoder == "FlateDecode" || decoder == "Fl") {
427       if (bImageAcc && i == nSize - 1) {
428         *ImageEncoding = "FlateDecode";
429         *dest_buf = std::move(result);
430         *dest_size = last_span.size();
431         *pImageParams = std::move(pParam);
432         return true;
433       }
434       offset = FlateOrLZWDecode(false, last_span, pParam, estimated_size,
435                                 &new_buf, &new_size);
436     } else if (decoder == "LZWDecode" || decoder == "LZW") {
437       offset = FlateOrLZWDecode(true, last_span, pParam, estimated_size,
438                                 &new_buf, &new_size);
439     } else if (decoder == "ASCII85Decode" || decoder == "A85") {
440       offset = A85Decode(last_span, &new_buf, &new_size);
441     } else if (decoder == "ASCIIHexDecode" || decoder == "AHx") {
442       offset = HexDecode(last_span, &new_buf, &new_size);
443     } else if (decoder == "RunLengthDecode" || decoder == "RL") {
444       if (bImageAcc && i == nSize - 1) {
445         *ImageEncoding = "RunLengthDecode";
446         *dest_buf = std::move(result);
447         *dest_size = last_span.size();
448         *pImageParams = std::move(pParam);
449         return true;
450       }
451       offset = RunLengthDecode(last_span, &new_buf, &new_size);
452     } else {
453       // If we get here, assume it's an image decoder.
454       if (decoder == "DCT")
455         decoder = "DCTDecode";
456       else if (decoder == "CCF")
457         decoder = "CCITTFaxDecode";
458       *ImageEncoding = std::move(decoder);
459       *pImageParams = std::move(pParam);
460       *dest_buf = std::move(result);
461       *dest_size = last_span.size();
462       return true;
463     }
464     if (offset == FX_INVALID_OFFSET)
465       return false;
466 
467     last_span = {new_buf.get(), new_size};
468     result = std::move(new_buf);
469   }
470   ImageEncoding->clear();
471   *pImageParams = nullptr;
472   *dest_buf = std::move(result);
473   *dest_size = last_span.size();
474   return true;
475 }
476 
PDF_DecodeText(pdfium::span<const uint8_t> span)477 WideString PDF_DecodeText(pdfium::span<const uint8_t> span) {
478   size_t dest_pos = 0;
479   WideString result;
480   if (span.size() >= 2 && ((span[0] == 0xfe && span[1] == 0xff) ||
481                            (span[0] == 0xff && span[1] == 0xfe))) {
482     size_t max_chars = (span.size() - 2) / 2;
483     if (!max_chars)
484       return result;
485 
486     pdfium::span<wchar_t> dest_buf = result.GetBuffer(max_chars);
487     uint16_t (*GetUnicodeFromBytes)(const uint8_t*) =
488         span[0] == 0xfe ? GetUnicodeFromBigEndianBytes
489                         : GetUnicodeFromLittleEndianBytes;
490     const uint8_t* unicode_str = &span[2];
491 
492 #if defined(WCHAR_T_IS_UTF32)
493     char16_t high_surrogate = 0;
494 #endif  // defined(WCHAR_T_IS_UTF32)
495     for (size_t i = 0; i < max_chars * 2; i += 2) {
496       uint16_t unicode = GetUnicodeFromBytes(unicode_str + i);
497 
498       // 0x001B is a begin/end marker for language metadata region that
499       // should not be in the decoded text.
500       if (unicode == 0x001B) {
501         i += 2;
502         for (; i < max_chars * 2; i += 2) {
503           unicode = GetUnicodeFromBytes(unicode_str + i);
504           if (unicode == 0x001B) {
505             i += 2;
506             if (i < max_chars * 2)
507               unicode = GetUnicodeFromBytes(unicode_str + i);
508             break;
509           }
510         }
511         if (i >= max_chars * 2)
512           break;
513       }
514 
515 #if defined(WCHAR_T_IS_UTF32)
516       // TODO(crbug.com/pdfium/2031): Always use UTF-16.
517       if (high_surrogate) {
518         char16_t previous_high_surrogate = high_surrogate;
519         high_surrogate = 0;
520 
521         if (pdfium::IsLowSurrogate(unicode)) {
522           dest_buf[dest_pos++] =
523               pdfium::SurrogatePair(previous_high_surrogate, unicode)
524                   .ToCodePoint();
525           continue;
526         }
527         dest_buf[dest_pos++] = previous_high_surrogate;
528       }
529 
530       if (pdfium::IsHighSurrogate(unicode)) {
531         high_surrogate = unicode;
532         continue;
533       }
534 #endif  // defined(WCHAR_T_IS_UTF32)
535       dest_buf[dest_pos++] = unicode;
536     }
537 
538 #if defined(WCHAR_T_IS_UTF32)
539     if (high_surrogate) {
540       dest_buf[dest_pos++] = high_surrogate;
541     }
542 #endif  // defined(WCHAR_T_IS_UTF32)
543   } else {
544     pdfium::span<wchar_t> dest_buf = result.GetBuffer(span.size());
545     for (size_t i = 0; i < span.size(); ++i)
546       dest_buf[i] = kPDFDocEncoding[span[i]];
547     dest_pos = span.size();
548   }
549   result.ReleaseBuffer(dest_pos);
550   return result;
551 }
552 
PDF_EncodeText(WideStringView str)553 ByteString PDF_EncodeText(WideStringView str) {
554   size_t i = 0;
555   size_t len = str.GetLength();
556   ByteString result;
557   {
558     pdfium::span<char> dest_buf = result.GetBuffer(len);
559     for (i = 0; i < len; ++i) {
560       int code;
561       for (code = 0; code < 256; ++code) {
562         if (kPDFDocEncoding[code] == str[i])
563           break;
564       }
565       if (code == 256)
566         break;
567 
568       dest_buf[i] = code;
569     }
570   }
571   result.ReleaseBuffer(i);
572   if (i == len)
573     return result;
574 
575   if (len > INT_MAX / 2 - 1) {
576     result.ReleaseBuffer(0);
577     return result;
578   }
579 
580   size_t dest_index = 0;
581   {
582 #if defined(WCHAR_T_IS_UTF32)
583     // 2 or 4 bytes required per UTF-32 code unit.
584     pdfium::span<uint8_t> dest_buf =
585         pdfium::as_writable_bytes(result.GetBuffer(len * 4 + 2));
586 #else
587     // 2 bytes required per UTF-16 code unit.
588     pdfium::span<uint8_t> dest_buf =
589         pdfium::as_writable_bytes(result.GetBuffer(len * 2 + 2));
590 #endif  // defined(WCHAR_T_IS_UTF32)
591 
592     dest_buf[dest_index++] = 0xfe;
593     dest_buf[dest_index++] = 0xff;
594     for (size_t j = 0; j < len; ++j) {
595 #if defined(WCHAR_T_IS_UTF32)
596       if (pdfium::IsSupplementary(str[j])) {
597         pdfium::SurrogatePair pair(str[j]);
598         dest_buf[dest_index++] = pair.high() >> 8;
599         dest_buf[dest_index++] = static_cast<uint8_t>(pair.high());
600         dest_buf[dest_index++] = pair.low() >> 8;
601         dest_buf[dest_index++] = static_cast<uint8_t>(pair.low());
602         continue;
603       }
604 #endif  // defined(WCHAR_T_IS_UTF32)
605       dest_buf[dest_index++] = str[j] >> 8;
606       dest_buf[dest_index++] = static_cast<uint8_t>(str[j]);
607     }
608   }
609   result.ReleaseBuffer(dest_index);
610   return result;
611 }
612 
PDF_EncodeString(ByteStringView src)613 ByteString PDF_EncodeString(ByteStringView src) {
614   ByteString result;
615   result.Reserve(src.GetLength() + 2);
616   result += '(';
617   for (size_t i = 0; i < src.GetLength(); ++i) {
618     uint8_t ch = src[i];
619     if (ch == 0x0a) {
620       result += "\\n";
621       continue;
622     }
623     if (ch == 0x0d) {
624       result += "\\r";
625       continue;
626     }
627     if (ch == ')' || ch == '\\' || ch == '(')
628       result += '\\';
629     result += static_cast<char>(ch);
630   }
631   result += ')';
632   return result;
633 }
634 
PDF_HexEncodeString(ByteStringView src)635 ByteString PDF_HexEncodeString(ByteStringView src) {
636   ByteString result;
637   result.Reserve(2 * src.GetLength() + 2);
638   result += '<';
639   for (size_t i = 0; i < src.GetLength(); ++i) {
640     char buf[2];
641     FXSYS_IntToTwoHexChars(src[i], buf);
642     result += buf[0];
643     result += buf[1];
644   }
645   result += '>';
646   return result;
647 }
648 
FlateEncode(pdfium::span<const uint8_t> src_span)649 DataVector<uint8_t> FlateEncode(pdfium::span<const uint8_t> src_span) {
650   return FlateModule::Encode(src_span);
651 }
652 
FlateDecode(pdfium::span<const uint8_t> src_span,std::unique_ptr<uint8_t,FxFreeDeleter> * dest_buf,uint32_t * dest_size)653 uint32_t FlateDecode(pdfium::span<const uint8_t> src_span,
654                      std::unique_ptr<uint8_t, FxFreeDeleter>* dest_buf,
655                      uint32_t* dest_size) {
656   return FlateModule::FlateOrLZWDecode(false, src_span, false, 0, 0, 0, 0, 0,
657                                        dest_buf, dest_size);
658 }
659