1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_parser.h"
8
9 #include <ctype.h>
10 #include <stdint.h>
11
12 #include <algorithm>
13 #include <utility>
14 #include <vector>
15
16 #include "core/fpdfapi/parser/cpdf_array.h"
17 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_document.h"
20 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
21 #include "core/fpdfapi/parser/cpdf_number.h"
22 #include "core/fpdfapi/parser/cpdf_object_stream.h"
23 #include "core/fpdfapi/parser/cpdf_read_validator.h"
24 #include "core/fpdfapi/parser/cpdf_reference.h"
25 #include "core/fpdfapi/parser/cpdf_security_handler.h"
26 #include "core/fpdfapi/parser/cpdf_stream.h"
27 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
28 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
29 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
30 #include "core/fxcrt/autorestorer.h"
31 #include "core/fxcrt/data_vector.h"
32 #include "core/fxcrt/fx_extension.h"
33 #include "core/fxcrt/fx_safe_types.h"
34 #include "core/fxcrt/scoped_set_insertion.h"
35 #include "third_party/base/check.h"
36 #include "third_party/base/check_op.h"
37 #include "third_party/base/containers/contains.h"
38 #include "third_party/base/containers/span.h"
39 #include "third_party/base/notreached.h"
40
41 namespace {
42
43 // A limit on the size of the xref table. Theoretical limits are higher, but
44 // this may be large enough in practice. The max size should always be 1 more
45 // than the max object number.
46 constexpr int32_t kMaxXRefSize = CPDF_Parser::kMaxObjectNumber + 1;
47
48 // "%PDF-1.7\n"
49 constexpr FX_FILESIZE kPDFHeaderSize = 9;
50
51 // The required number of fields in a /W array in a cross-reference stream
52 // dictionary.
53 constexpr size_t kMinFieldCount = 3;
54
55 // V4 trailers are inline.
56 constexpr uint32_t kNoV4TrailerObjectNumber = 0;
57
58 struct CrossRefV5IndexEntry {
59 uint32_t start_obj_num;
60 uint32_t obj_count;
61 };
62
GetObjectTypeFromCrossRefStreamType(uint32_t cross_ref_stream_type)63 CPDF_Parser::ObjectType GetObjectTypeFromCrossRefStreamType(
64 uint32_t cross_ref_stream_type) {
65 switch (cross_ref_stream_type) {
66 case 0:
67 return CPDF_Parser::ObjectType::kFree;
68 case 1:
69 return CPDF_Parser::ObjectType::kNotCompressed;
70 case 2:
71 return CPDF_Parser::ObjectType::kCompressed;
72 default:
73 return CPDF_Parser::ObjectType::kNull;
74 }
75 }
76
77 // Use the Get*XRefStreamEntry() functions below, instead of calling this
78 // directly.
GetVarInt(pdfium::span<const uint8_t> input)79 uint32_t GetVarInt(pdfium::span<const uint8_t> input) {
80 uint32_t result = 0;
81 for (uint8_t c : input)
82 result = result * 256 + c;
83 return result;
84 }
85
86 // The following 3 functions retrieve variable length entries from
87 // cross-reference streams, as described in ISO 32000-1:2008 table 18. There are
88 // only 3 fields for any given entry.
GetFirstXRefStreamEntry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths)89 uint32_t GetFirstXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
90 pdfium::span<const uint32_t> field_widths) {
91 return GetVarInt(entry_span.first(field_widths[0]));
92 }
93
GetSecondXRefStreamEntry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths)94 uint32_t GetSecondXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
95 pdfium::span<const uint32_t> field_widths) {
96 return GetVarInt(entry_span.subspan(field_widths[0], field_widths[1]));
97 }
98
GetThirdXRefStreamEntry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths)99 uint32_t GetThirdXRefStreamEntry(pdfium::span<const uint8_t> entry_span,
100 pdfium::span<const uint32_t> field_widths) {
101 return GetVarInt(
102 entry_span.subspan(field_widths[0] + field_widths[1], field_widths[2]));
103 }
104
GetCrossRefV5Indices(const CPDF_Array * array,uint32_t size)105 std::vector<CrossRefV5IndexEntry> GetCrossRefV5Indices(const CPDF_Array* array,
106 uint32_t size) {
107 std::vector<CrossRefV5IndexEntry> indices;
108 if (array) {
109 for (size_t i = 0; i < array->size() / 2; i++) {
110 RetainPtr<const CPDF_Number> pStartNumObj = array->GetNumberAt(i * 2);
111 if (!pStartNumObj)
112 continue;
113
114 RetainPtr<const CPDF_Number> pCountObj = array->GetNumberAt(i * 2 + 1);
115 if (!pCountObj)
116 continue;
117
118 int nStartNum = pStartNumObj->GetInteger();
119 int nCount = pCountObj->GetInteger();
120 if (nStartNum < 0 || nCount <= 0)
121 continue;
122
123 indices.push_back(
124 {static_cast<uint32_t>(nStartNum), static_cast<uint32_t>(nCount)});
125 }
126 }
127
128 if (indices.empty())
129 indices.push_back({0, size});
130 return indices;
131 }
132
GetFieldWidths(const CPDF_Array * array)133 std::vector<uint32_t> GetFieldWidths(const CPDF_Array* array) {
134 std::vector<uint32_t> results;
135 if (!array)
136 return results;
137
138 CPDF_ArrayLocker locker(array);
139 for (const auto& obj : locker)
140 results.push_back(obj->GetInteger());
141 return results;
142 }
143
144 class ObjectsHolderStub final : public CPDF_Parser::ParsedObjectsHolder {
145 public:
146 ObjectsHolderStub() = default;
147 ~ObjectsHolderStub() override = default;
TryInit()148 bool TryInit() override { return true; }
149 };
150
151 } // namespace
152
CPDF_Parser(ParsedObjectsHolder * holder)153 CPDF_Parser::CPDF_Parser(ParsedObjectsHolder* holder)
154 : m_pObjectsHolder(holder),
155 m_CrossRefTable(std::make_unique<CPDF_CrossRefTable>()) {
156 if (!holder) {
157 m_pOwnedObjectsHolder = std::make_unique<ObjectsHolderStub>();
158 m_pObjectsHolder = m_pOwnedObjectsHolder.get();
159 }
160 }
161
CPDF_Parser()162 CPDF_Parser::CPDF_Parser() : CPDF_Parser(nullptr) {}
163
164 CPDF_Parser::~CPDF_Parser() = default;
165
GetLastObjNum() const166 uint32_t CPDF_Parser::GetLastObjNum() const {
167 return m_CrossRefTable->objects_info().empty()
168 ? 0
169 : m_CrossRefTable->objects_info().rbegin()->first;
170 }
171
IsValidObjectNumber(uint32_t objnum) const172 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
173 return objnum <= GetLastObjNum();
174 }
175
GetObjectPositionOrZero(uint32_t objnum) const176 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
177 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
178 return (info && info->type == ObjectType::kNormal) ? info->pos : 0;
179 }
180
GetObjectType(uint32_t objnum) const181 CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const {
182 DCHECK(IsValidObjectNumber(objnum));
183 const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
184 return info ? info->type : ObjectType::kFree;
185 }
186
IsObjectFreeOrNull(uint32_t objnum) const187 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
188 switch (GetObjectType(objnum)) {
189 case ObjectType::kFree:
190 case ObjectType::kNull:
191 return true;
192 case ObjectType::kNotCompressed:
193 case ObjectType::kCompressed:
194 return false;
195 }
196 NOTREACHED();
197 return false;
198 }
199
IsObjectFree(uint32_t objnum) const200 bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
201 return GetObjectType(objnum) == ObjectType::kFree;
202 }
203
InitSyntaxParser(RetainPtr<CPDF_ReadValidator> validator)204 bool CPDF_Parser::InitSyntaxParser(RetainPtr<CPDF_ReadValidator> validator) {
205 const absl::optional<FX_FILESIZE> header_offset = GetHeaderOffset(validator);
206 if (!header_offset.has_value())
207 return false;
208 if (validator->GetSize() < header_offset.value() + kPDFHeaderSize)
209 return false;
210
211 m_pSyntax = std::make_unique<CPDF_SyntaxParser>(std::move(validator),
212 header_offset.value());
213 return ParseFileVersion();
214 }
215
ParseFileVersion()216 bool CPDF_Parser::ParseFileVersion() {
217 m_FileVersion = 0;
218 uint8_t ch;
219 if (!m_pSyntax->GetCharAt(5, ch))
220 return false;
221
222 if (isdigit(ch))
223 m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10;
224
225 if (!m_pSyntax->GetCharAt(7, ch))
226 return false;
227
228 if (isdigit(ch))
229 m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
230 return true;
231 }
232
StartParse(RetainPtr<IFX_SeekableReadStream> pFileAccess,const ByteString & password)233 CPDF_Parser::Error CPDF_Parser::StartParse(
234 RetainPtr<IFX_SeekableReadStream> pFileAccess,
235 const ByteString& password) {
236 if (!InitSyntaxParser(pdfium::MakeRetain<CPDF_ReadValidator>(
237 std::move(pFileAccess), nullptr)))
238 return FORMAT_ERROR;
239 SetPassword(password);
240 return StartParseInternal();
241 }
242
StartParseInternal()243 CPDF_Parser::Error CPDF_Parser::StartParseInternal() {
244 DCHECK(!m_bHasParsed);
245 DCHECK(!m_bXRefTableRebuilt);
246 m_bHasParsed = true;
247 m_bXRefStream = false;
248
249 m_LastXRefOffset = ParseStartXRef();
250 if (m_LastXRefOffset >= kPDFHeaderSize) {
251 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
252 !LoadAllCrossRefV5(m_LastXRefOffset)) {
253 if (!RebuildCrossRef())
254 return FORMAT_ERROR;
255
256 m_bXRefTableRebuilt = true;
257 m_LastXRefOffset = 0;
258 }
259 } else {
260 if (!RebuildCrossRef())
261 return FORMAT_ERROR;
262
263 m_bXRefTableRebuilt = true;
264 }
265 Error eRet = SetEncryptHandler();
266 if (eRet != SUCCESS)
267 return eRet;
268
269 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
270 if (m_bXRefTableRebuilt)
271 return FORMAT_ERROR;
272
273 ReleaseEncryptHandler();
274 if (!RebuildCrossRef())
275 return FORMAT_ERROR;
276
277 eRet = SetEncryptHandler();
278 if (eRet != SUCCESS)
279 return eRet;
280
281 m_pObjectsHolder->TryInit();
282 if (!GetRoot())
283 return FORMAT_ERROR;
284 }
285 if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) {
286 ReleaseEncryptHandler();
287 if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum)
288 return FORMAT_ERROR;
289
290 eRet = SetEncryptHandler();
291 if (eRet != SUCCESS)
292 return eRet;
293 }
294 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
295 RetainPtr<const CPDF_Reference> pMetadata =
296 ToReference(GetRoot()->GetObjectFor("Metadata"));
297 if (pMetadata)
298 m_MetadataObjnum = pMetadata->GetRefObjNum();
299 }
300 return SUCCESS;
301 }
302
ParseStartXRef()303 FX_FILESIZE CPDF_Parser::ParseStartXRef() {
304 static constexpr char kStartXRefKeyword[] = "startxref";
305 m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword));
306 if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
307 return 0;
308
309 // Skip "startxref" keyword.
310 m_pSyntax->GetKeyword();
311
312 // Read XRef offset.
313 const CPDF_SyntaxParser::WordResult xref_offset_result =
314 m_pSyntax->GetNextWord();
315 if (!xref_offset_result.is_number || xref_offset_result.word.IsEmpty())
316 return 0;
317
318 const FX_SAFE_FILESIZE result = FXSYS_atoi64(xref_offset_result.word.c_str());
319 if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize())
320 return 0;
321
322 return result.ValueOrDie();
323 }
324
SetEncryptHandler()325 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
326 ReleaseEncryptHandler();
327 if (!GetTrailer())
328 return FORMAT_ERROR;
329
330 RetainPtr<const CPDF_Dictionary> pEncryptDict = GetEncryptDict();
331 if (!pEncryptDict)
332 return SUCCESS;
333
334 if (pEncryptDict->GetNameFor("Filter") != "Standard")
335 return HANDLER_ERROR;
336
337 auto pSecurityHandler = pdfium::MakeRetain<CPDF_SecurityHandler>();
338 if (!pSecurityHandler->OnInit(pEncryptDict, GetIDArray(), GetPassword()))
339 return PASSWORD_ERROR;
340
341 m_pSecurityHandler = std::move(pSecurityHandler);
342 return SUCCESS;
343 }
344
ReleaseEncryptHandler()345 void CPDF_Parser::ReleaseEncryptHandler() {
346 m_pSecurityHandler.Reset();
347 }
348
349 // Ideally, all the cross reference entries should be verified.
350 // In reality, we rarely see well-formed cross references don't match
351 // with the objects. crbug/602650 showed a case where object numbers
352 // in the cross reference table are all off by one.
VerifyCrossRefV4()353 bool CPDF_Parser::VerifyCrossRefV4() {
354 for (const auto& it : m_CrossRefTable->objects_info()) {
355 if (it.second.pos <= 0)
356 continue;
357 // Find the first non-zero position.
358 FX_FILESIZE SavedPos = m_pSyntax->GetPos();
359 m_pSyntax->SetPos(it.second.pos);
360 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
361 m_pSyntax->SetPos(SavedPos);
362 if (!word_result.is_number || word_result.word.IsEmpty() ||
363 FXSYS_atoui(word_result.word.c_str()) != it.first) {
364 // If the object number read doesn't match the one stored,
365 // something is wrong with the cross reference table.
366 return false;
367 }
368 break;
369 }
370 return true;
371 }
372
LoadAllCrossRefV4(FX_FILESIZE xref_offset)373 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xref_offset) {
374 if (!LoadCrossRefV4(xref_offset, true))
375 return false;
376
377 RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
378 if (!trailer)
379 return false;
380
381 m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
382 const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
383 if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
384 m_CrossRefTable->SetObjectMapSize(xrefsize);
385
386 FX_FILESIZE xref_stm = GetTrailer()->GetDirectIntegerFor("XRefStm");
387 std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
388 std::vector<FX_FILESIZE> xref_list{xref_offset};
389 std::set<FX_FILESIZE> seen_xref_offset{xref_offset};
390
391 // When the trailer doesn't have Prev entry or Prev entry value is not
392 // numerical, GetDirectInteger() returns 0. Loading will end.
393 xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
394 while (xref_offset > 0) {
395 // Check for circular references.
396 if (pdfium::Contains(seen_xref_offset, xref_offset))
397 return false;
398
399 seen_xref_offset.insert(xref_offset);
400 xref_list.insert(xref_list.begin(), xref_offset);
401
402 // SLOW ...
403 LoadCrossRefV4(xref_offset, true);
404
405 RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
406 if (!pDict)
407 return false;
408
409 xref_offset = pDict->GetDirectIntegerFor("Prev");
410 xref_stm = pDict->GetIntegerFor("XRefStm");
411 xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
412
413 // SLOW ...
414 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
415 std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
416 kNoV4TrailerObjectNumber),
417 std::move(m_CrossRefTable));
418 }
419
420 for (size_t i = 0; i < xref_list.size(); ++i) {
421 if (xref_list[i] > 0 && !LoadCrossRefV4(xref_list[i], false))
422 return false;
423
424 if (xref_stream_list[i] > 0 && !LoadCrossRefV5(&xref_stream_list[i], false))
425 return false;
426
427 if (i == 0 && !VerifyCrossRefV4())
428 return false;
429 }
430 return true;
431 }
432
LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset)433 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset) {
434 if (!LoadCrossRefV4(main_xref_offset, false))
435 return false;
436
437 RetainPtr<CPDF_Dictionary> main_trailer = LoadTrailerV4();
438 if (!main_trailer)
439 return false;
440
441 // GetTrailer() currently returns the first-page trailer.
442 if (GetTrailer()->GetDirectIntegerFor("Size") == 0)
443 return false;
444
445 // Read /XRefStm from the first-page trailer. No need to read /Prev for the
446 // first-page trailer, as the caller already did that and passed it in as
447 // |main_xref_offset|.
448 FX_FILESIZE xref_stm = GetTrailer()->GetDirectIntegerFor("XRefStm");
449 std::vector<FX_FILESIZE> xref_stream_list{xref_stm};
450 std::vector<FX_FILESIZE> xref_list{main_xref_offset};
451 std::set<FX_FILESIZE> seen_xref_offset{main_xref_offset};
452
453 // Merge the trailers.
454 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
455 std::make_unique<CPDF_CrossRefTable>(std::move(main_trailer),
456 kNoV4TrailerObjectNumber),
457 std::move(m_CrossRefTable));
458
459 // Now GetTrailer() returns the merged trailer, where /Prev is from the
460 // main-trailer.
461 FX_FILESIZE xref_offset = GetTrailer()->GetDirectIntegerFor("Prev");
462 while (xref_offset > 0) {
463 // Check for circular references.
464 if (pdfium::Contains(seen_xref_offset, xref_offset))
465 return false;
466
467 seen_xref_offset.insert(xref_offset);
468 xref_list.insert(xref_list.begin(), xref_offset);
469
470 // SLOW ...
471 LoadCrossRefV4(xref_offset, true);
472
473 RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
474 if (!pDict)
475 return false;
476
477 xref_offset = pDict->GetDirectIntegerFor("Prev");
478 xref_stm = pDict->GetIntegerFor("XRefStm");
479 xref_stream_list.insert(xref_stream_list.begin(), xref_stm);
480
481 // SLOW ...
482 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
483 std::make_unique<CPDF_CrossRefTable>(std::move(pDict),
484 kNoV4TrailerObjectNumber),
485 std::move(m_CrossRefTable));
486 }
487
488 if (xref_stream_list[0] > 0 && !LoadCrossRefV5(&xref_stream_list[0], false))
489 return false;
490
491 for (size_t i = 1; i < xref_list.size(); ++i) {
492 if (xref_list[i] > 0 && !LoadCrossRefV4(xref_list[i], false))
493 return false;
494
495 if (xref_stream_list[i] > 0 && !LoadCrossRefV5(&xref_stream_list[i], false))
496 return false;
497 }
498 return true;
499 }
500
ParseAndAppendCrossRefSubsectionData(uint32_t start_objnum,uint32_t count,std::vector<CrossRefObjData> * out_objects)501 bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
502 uint32_t start_objnum,
503 uint32_t count,
504 std::vector<CrossRefObjData>* out_objects) {
505 if (!count)
506 return true;
507
508 // Each entry shall be exactly 20 byte.
509 // A sample entry looks like:
510 // "0000000000 00007 f\r\n"
511 static constexpr int32_t kEntrySize = 20;
512
513 if (!out_objects) {
514 FX_SAFE_FILESIZE pos = count;
515 pos *= kEntrySize;
516 pos += m_pSyntax->GetPos();
517 if (!pos.IsValid())
518 return false;
519 m_pSyntax->SetPos(pos.ValueOrDie());
520 return true;
521 }
522 const size_t start_obj_index = out_objects->size();
523 FX_SAFE_SIZE_T new_size = start_obj_index;
524 new_size += count;
525 if (!new_size.IsValid())
526 return false;
527
528 if (new_size.ValueOrDie() > kMaxXRefSize)
529 return false;
530
531 const size_t max_entries_in_file = m_pSyntax->GetDocumentSize() / kEntrySize;
532 if (new_size.ValueOrDie() > max_entries_in_file)
533 return false;
534
535 out_objects->resize(new_size.ValueOrDie());
536
537 DataVector<char> buf(1024 * kEntrySize + 1);
538 buf.back() = '\0';
539
540 uint32_t entries_to_read = count;
541 while (entries_to_read > 0) {
542 const uint32_t entries_in_block = std::min(entries_to_read, 1024u);
543 const uint32_t bytes_to_read = entries_in_block * kEntrySize;
544 auto block_span = pdfium::make_span(buf).first(bytes_to_read);
545 if (!m_pSyntax->ReadBlock(pdfium::as_writable_bytes(block_span)))
546 return false;
547
548 for (uint32_t i = 0; i < entries_in_block; i++) {
549 uint32_t iObjectIndex = count - entries_to_read + i;
550 CrossRefObjData& obj_data =
551 (*out_objects)[start_obj_index + iObjectIndex];
552 const uint32_t objnum = start_objnum + iObjectIndex;
553 obj_data.obj_num = objnum;
554 ObjectInfo& info = obj_data.info;
555
556 const char* pEntry = &buf[i * kEntrySize];
557 if (pEntry[17] == 'f') {
558 info.pos = 0;
559 info.type = ObjectType::kFree;
560 } else {
561 const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
562 if (!offset.IsValid())
563 return false;
564
565 if (offset.ValueOrDie() == 0) {
566 for (int32_t c = 0; c < 10; c++) {
567 if (!isdigit(pEntry[c]))
568 return false;
569 }
570 }
571
572 info.pos = offset.ValueOrDie();
573
574 // TODO(art-snake): The info.gennum is uint16_t, but version may be
575 // greated than max<uint16_t>. Needs solve this issue.
576 const int32_t version = FXSYS_atoi(pEntry + 11);
577 info.gennum = version;
578 info.type = ObjectType::kNotCompressed;
579 }
580 }
581 entries_to_read -= entries_in_block;
582 }
583 return true;
584 }
585
ParseCrossRefV4(std::vector<CrossRefObjData> * out_objects)586 bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
587 if (out_objects)
588 out_objects->clear();
589
590 if (m_pSyntax->GetKeyword() != "xref")
591 return false;
592 std::vector<CrossRefObjData> result_objects;
593 while (true) {
594 FX_FILESIZE saved_pos = m_pSyntax->GetPos();
595 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
596 const ByteString& word = word_result.word;
597 if (word.IsEmpty())
598 return false;
599
600 if (!word_result.is_number) {
601 m_pSyntax->SetPos(saved_pos);
602 break;
603 }
604
605 uint32_t start_objnum = FXSYS_atoui(word.c_str());
606 if (start_objnum >= kMaxObjectNumber)
607 return false;
608
609 uint32_t count = m_pSyntax->GetDirectNum();
610 m_pSyntax->ToNextWord();
611
612 if (!ParseAndAppendCrossRefSubsectionData(
613 start_objnum, count, out_objects ? &result_objects : nullptr)) {
614 return false;
615 }
616 }
617 if (out_objects)
618 *out_objects = std::move(result_objects);
619 return true;
620 }
621
LoadCrossRefV4(FX_FILESIZE pos,bool bSkip)622 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, bool bSkip) {
623 m_pSyntax->SetPos(pos);
624 std::vector<CrossRefObjData> objects;
625 if (!ParseCrossRefV4(bSkip ? nullptr : &objects))
626 return false;
627
628 MergeCrossRefObjectsData(objects);
629 return true;
630 }
631
MergeCrossRefObjectsData(const std::vector<CrossRefObjData> & objects)632 void CPDF_Parser::MergeCrossRefObjectsData(
633 const std::vector<CrossRefObjData>& objects) {
634 for (const auto& obj : objects) {
635 switch (obj.info.type) {
636 case ObjectType::kFree:
637 if (obj.info.gennum > 0)
638 m_CrossRefTable->SetFree(obj.obj_num);
639 break;
640 case ObjectType::kNormal:
641 case ObjectType::kObjStream:
642 m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum, obj.info.pos);
643 break;
644 case ObjectType::kCompressed:
645 m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive.obj_num,
646 obj.info.archive.obj_index);
647 break;
648 }
649 }
650 }
651
LoadAllCrossRefV5(FX_FILESIZE xref_offset)652 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xref_offset) {
653 if (!LoadCrossRefV5(&xref_offset, true))
654 return false;
655
656 std::set<FX_FILESIZE> seen_xref_offset;
657 while (xref_offset > 0) {
658 seen_xref_offset.insert(xref_offset);
659 if (!LoadCrossRefV5(&xref_offset, false))
660 return false;
661
662 // Check for circular references.
663 if (pdfium::Contains(seen_xref_offset, xref_offset))
664 return false;
665 }
666 m_ObjectStreamMap.clear();
667 m_bXRefStream = true;
668 return true;
669 }
670
RebuildCrossRef()671 bool CPDF_Parser::RebuildCrossRef() {
672 auto cross_ref_table = std::make_unique<CPDF_CrossRefTable>();
673
674 const uint32_t kBufferSize = 4096;
675 m_pSyntax->SetReadBufferSize(kBufferSize);
676 m_pSyntax->SetPos(0);
677
678 std::vector<std::pair<uint32_t, FX_FILESIZE>> numbers;
679 for (CPDF_SyntaxParser::WordResult result = m_pSyntax->GetNextWord();
680 !result.word.IsEmpty(); result = m_pSyntax->GetNextWord()) {
681 const ByteString& word = result.word;
682 if (result.is_number) {
683 numbers.emplace_back(FXSYS_atoui(word.c_str()),
684 m_pSyntax->GetPos() - word.GetLength());
685 if (numbers.size() > 2u)
686 numbers.erase(numbers.begin());
687 continue;
688 }
689
690 if (word == "(") {
691 m_pSyntax->ReadString();
692 } else if (word == "<") {
693 m_pSyntax->ReadHexString();
694 } else if (word == "trailer") {
695 RetainPtr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
696 if (pTrailer) {
697 CPDF_Stream* stream_trailer = pTrailer->AsMutableStream();
698 // Grab the object number from `pTrailer` before potentially calling
699 // std::move(pTrailer) below.
700 const uint32_t trailer_object_number = pTrailer->GetObjNum();
701 RetainPtr<CPDF_Dictionary> trailer_dict =
702 stream_trailer ? stream_trailer->GetMutableDict()
703 : ToDictionary(std::move(pTrailer));
704 cross_ref_table = CPDF_CrossRefTable::MergeUp(
705 std::move(cross_ref_table),
706 std::make_unique<CPDF_CrossRefTable>(std::move(trailer_dict),
707 trailer_object_number));
708 }
709 } else if (word == "obj" && numbers.size() == 2u) {
710 const FX_FILESIZE obj_pos = numbers[0].second;
711 const uint32_t obj_num = numbers[0].first;
712 const uint32_t gen_num = numbers[1].first;
713
714 m_pSyntax->SetPos(obj_pos);
715 const RetainPtr<CPDF_Stream> pStream =
716 ToStream(m_pSyntax->GetIndirectObject(
717 nullptr, CPDF_SyntaxParser::ParseType::kStrict));
718
719 if (pStream && pStream->GetDict()->GetNameFor("Type") == "XRef") {
720 cross_ref_table = CPDF_CrossRefTable::MergeUp(
721 std::move(cross_ref_table),
722 std::make_unique<CPDF_CrossRefTable>(
723 ToDictionary(pStream->GetDict()->Clone()),
724 pStream->GetObjNum()));
725 }
726
727 if (obj_num < kMaxObjectNumber) {
728 cross_ref_table->AddNormal(obj_num, gen_num, obj_pos);
729 const auto object_stream =
730 CPDF_ObjectStream::Create(std::move(pStream));
731 if (object_stream) {
732 const auto& object_info = object_stream->object_info();
733 for (size_t i = 0; i < object_info.size(); ++i) {
734 const auto& info = object_info[i];
735 if (info.obj_num < kMaxObjectNumber)
736 cross_ref_table->AddCompressed(info.obj_num, obj_num, i);
737 }
738 }
739 }
740 }
741 numbers.clear();
742 }
743
744 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable),
745 std::move(cross_ref_table));
746 // Resore default buffer size.
747 m_pSyntax->SetReadBufferSize(CPDF_Stream::kFileBufSize);
748
749 return GetTrailer() && !m_CrossRefTable->objects_info().empty();
750 }
751
LoadCrossRefV5(FX_FILESIZE * pos,bool bMainXRef)752 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
753 RetainPtr<CPDF_Object> pObject(ParseIndirectObjectAt(*pos, 0));
754 if (!pObject || !pObject->GetObjNum())
755 return false;
756
757 RetainPtr<const CPDF_Stream> pStream(pObject->AsStream());
758 if (!pStream)
759 return false;
760
761 RetainPtr<const CPDF_Dictionary> pDict = pStream->GetDict();
762 int32_t prev = pDict->GetIntegerFor("Prev");
763 if (prev < 0)
764 return false;
765
766 int32_t size = pDict->GetIntegerFor("Size");
767 if (size < 0)
768 return false;
769
770 *pos = prev;
771
772 RetainPtr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
773 if (bMainXRef) {
774 m_CrossRefTable = std::make_unique<CPDF_CrossRefTable>(
775 std::move(pNewTrailer), pStream->GetObjNum());
776 m_CrossRefTable->SetObjectMapSize(size);
777 } else {
778 m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
779 std::make_unique<CPDF_CrossRefTable>(std::move(pNewTrailer),
780 pStream->GetObjNum()),
781 std::move(m_CrossRefTable));
782 }
783
784 std::vector<CrossRefV5IndexEntry> indices =
785 GetCrossRefV5Indices(pDict->GetArrayFor("Index").Get(), size);
786
787 std::vector<uint32_t> field_widths =
788 GetFieldWidths(pDict->GetArrayFor("W").Get());
789 if (field_widths.size() < kMinFieldCount)
790 return false;
791
792 FX_SAFE_UINT32 dwAccWidth;
793 for (uint32_t width : field_widths)
794 dwAccWidth += width;
795 if (!dwAccWidth.IsValid())
796 return false;
797
798 uint32_t total_width = dwAccWidth.ValueOrDie();
799 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStream));
800 pAcc->LoadAllDataFiltered();
801
802 pdfium::span<const uint8_t> data_span = pAcc->GetSpan();
803 uint32_t segindex = 0;
804 for (const auto& index : indices) {
805 FX_SAFE_UINT32 seg_end = segindex;
806 seg_end += index.obj_count;
807 seg_end *= total_width;
808 if (!seg_end.IsValid() || seg_end.ValueOrDie() > data_span.size())
809 continue;
810
811 pdfium::span<const uint8_t> seg_span = data_span.subspan(
812 segindex * total_width, index.obj_count * total_width);
813 FX_SAFE_UINT32 safe_new_size = index.start_obj_num;
814 safe_new_size += index.obj_count;
815 if (!safe_new_size.IsValid()) {
816 continue;
817 }
818
819 // Until SetObjectMapSize() below has been called by a prior loop iteration,
820 // `current_size` is based on the /Size value parsed in LoadCrossRefV5().
821 // PDFs may not always have the correct /Size. In this case, other PDF
822 // implementations ignore the incorrect size, and PDFium also ignores
823 // incorrect size in trailers for V4 xrefs.
824 const uint32_t current_size =
825 m_CrossRefTable->objects_info().empty() ? 0 : GetLastObjNum() + 1;
826 // So allow `new_size` to be greater than `current_size`, but avoid going
827 // over `kMaxXRefSize`. This works just fine because the loop below checks
828 // against `kMaxObjectNumber`, and the two "max" constants are in sync.
829 const uint32_t new_size =
830 std::min<uint32_t>(safe_new_size.ValueOrDie(), kMaxXRefSize);
831 if (new_size > current_size) {
832 m_CrossRefTable->SetObjectMapSize(new_size);
833 }
834
835 for (uint32_t i = 0; i < index.obj_count; ++i) {
836 const uint32_t obj_num = index.start_obj_num + i;
837 if (obj_num >= kMaxObjectNumber) {
838 break;
839 }
840
841 ProcessCrossRefV5Entry(seg_span.subspan(i * total_width, total_width),
842 field_widths, obj_num);
843 }
844
845 segindex += index.obj_count;
846 }
847 return true;
848 }
849
ProcessCrossRefV5Entry(pdfium::span<const uint8_t> entry_span,pdfium::span<const uint32_t> field_widths,uint32_t obj_num)850 void CPDF_Parser::ProcessCrossRefV5Entry(
851 pdfium::span<const uint8_t> entry_span,
852 pdfium::span<const uint32_t> field_widths,
853 uint32_t obj_num) {
854 DCHECK_GE(field_widths.size(), kMinFieldCount);
855 ObjectType type;
856 if (field_widths[0]) {
857 const uint32_t cross_ref_stream_obj_type =
858 GetFirstXRefStreamEntry(entry_span, field_widths);
859 type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
860 if (type == ObjectType::kNull)
861 return;
862 } else {
863 // Per ISO 32000-1:2008 table 17, use the default value of 1 for the xref
864 // stream entry when it is not specified. The `type` assignment is the
865 // equivalent to calling GetObjectTypeFromCrossRefStreamType(1).
866 type = ObjectType::kNotCompressed;
867 }
868
869 const ObjectType existing_type = GetObjectType(obj_num);
870 if (existing_type == ObjectType::kNull) {
871 const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
872 if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
873 m_CrossRefTable->AddNormal(obj_num, 0, offset);
874 return;
875 }
876
877 if (existing_type != ObjectType::kFree)
878 return;
879
880 if (type == ObjectType::kFree) {
881 m_CrossRefTable->SetFree(obj_num);
882 return;
883 }
884
885 if (type == ObjectType::kNotCompressed) {
886 const uint32_t offset = GetSecondXRefStreamEntry(entry_span, field_widths);
887 if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
888 m_CrossRefTable->AddNormal(obj_num, 0, offset);
889 return;
890 }
891
892 DCHECK_EQ(type, ObjectType::kCompressed);
893 const uint32_t archive_obj_num =
894 GetSecondXRefStreamEntry(entry_span, field_widths);
895 if (!IsValidObjectNumber(archive_obj_num)) {
896 return;
897 }
898
899 const uint32_t archive_obj_index =
900 GetThirdXRefStreamEntry(entry_span, field_widths);
901 m_CrossRefTable->AddCompressed(obj_num, archive_obj_num, archive_obj_index);
902 }
903
GetIDArray() const904 RetainPtr<const CPDF_Array> CPDF_Parser::GetIDArray() const {
905 return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr;
906 }
907
GetRoot() const908 RetainPtr<const CPDF_Dictionary> CPDF_Parser::GetRoot() const {
909 RetainPtr<CPDF_Object> obj =
910 m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum());
911 return obj ? obj->GetDict() : nullptr;
912 }
913
GetEncryptDict() const914 RetainPtr<const CPDF_Dictionary> CPDF_Parser::GetEncryptDict() const {
915 if (!GetTrailer())
916 return nullptr;
917
918 RetainPtr<const CPDF_Object> pEncryptObj =
919 GetTrailer()->GetObjectFor("Encrypt");
920 if (!pEncryptObj)
921 return nullptr;
922
923 if (pEncryptObj->IsDictionary())
924 return pdfium::WrapRetain(pEncryptObj->AsDictionary());
925
926 if (pEncryptObj->IsReference()) {
927 return ToDictionary(m_pObjectsHolder->GetOrParseIndirectObject(
928 pEncryptObj->AsReference()->GetRefObjNum()));
929 }
930 return nullptr;
931 }
932
GetEncodedPassword() const933 ByteString CPDF_Parser::GetEncodedPassword() const {
934 return GetSecurityHandler()->GetEncodedPassword(GetPassword().AsStringView());
935 }
936
GetTrailer() const937 const CPDF_Dictionary* CPDF_Parser::GetTrailer() const {
938 return m_CrossRefTable->trailer();
939 }
940
GetMutableTrailerForTesting()941 CPDF_Dictionary* CPDF_Parser::GetMutableTrailerForTesting() {
942 return m_CrossRefTable->GetMutableTrailerForTesting();
943 }
944
GetTrailerObjectNumber() const945 uint32_t CPDF_Parser::GetTrailerObjectNumber() const {
946 return m_CrossRefTable->trailer_object_number();
947 }
948
GetCombinedTrailer() const949 RetainPtr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
950 return m_CrossRefTable->trailer()
951 ? ToDictionary(m_CrossRefTable->trailer()->Clone())
952 : RetainPtr<CPDF_Dictionary>();
953 }
954
GetInfoObjNum() const955 uint32_t CPDF_Parser::GetInfoObjNum() const {
956 RetainPtr<const CPDF_Reference> pRef =
957 ToReference(m_CrossRefTable->trailer()
958 ? m_CrossRefTable->trailer()->GetObjectFor("Info")
959 : nullptr);
960 return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
961 }
962
GetRootObjNum() const963 uint32_t CPDF_Parser::GetRootObjNum() const {
964 RetainPtr<const CPDF_Reference> pRef =
965 ToReference(m_CrossRefTable->trailer()
966 ? m_CrossRefTable->trailer()->GetObjectFor("Root")
967 : nullptr);
968 return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
969 }
970
ParseIndirectObject(uint32_t objnum)971 RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObject(uint32_t objnum) {
972 if (!IsValidObjectNumber(objnum))
973 return nullptr;
974
975 // Prevent circular parsing the same object.
976 if (pdfium::Contains(m_ParsingObjNums, objnum))
977 return nullptr;
978
979 ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
980 if (GetObjectType(objnum) == ObjectType::kNotCompressed) {
981 FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
982 if (pos <= 0)
983 return nullptr;
984 return ParseIndirectObjectAt(pos, objnum);
985 }
986 if (GetObjectType(objnum) != ObjectType::kCompressed)
987 return nullptr;
988
989 const ObjectInfo& info = *m_CrossRefTable->GetObjectInfo(objnum);
990 const CPDF_ObjectStream* pObjStream = GetObjectStream(info.archive.obj_num);
991 if (!pObjStream)
992 return nullptr;
993
994 return pObjStream->ParseObject(m_pObjectsHolder, objnum,
995 info.archive.obj_index);
996 }
997
GetObjectStream(uint32_t object_number)998 const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) {
999 // Prevent circular parsing the same object.
1000 if (pdfium::Contains(m_ParsingObjNums, object_number))
1001 return nullptr;
1002
1003 auto it = m_ObjectStreamMap.find(object_number);
1004 if (it != m_ObjectStreamMap.end())
1005 return it->second.get();
1006
1007 const auto* info = m_CrossRefTable->GetObjectInfo(object_number);
1008 if (!info || info->type != ObjectType::kObjStream)
1009 return nullptr;
1010
1011 const FX_FILESIZE object_pos = info->pos;
1012 if (object_pos <= 0)
1013 return nullptr;
1014
1015 // Keep track of `object_number` before doing more parsing.
1016 ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, object_number);
1017
1018 RetainPtr<CPDF_Object> object =
1019 ParseIndirectObjectAt(object_pos, object_number);
1020 if (!object)
1021 return nullptr;
1022
1023 std::unique_ptr<CPDF_ObjectStream> objs_stream =
1024 CPDF_ObjectStream::Create(ToStream(object));
1025 const CPDF_ObjectStream* result = objs_stream.get();
1026 m_ObjectStreamMap[object_number] = std::move(objs_stream);
1027
1028 return result;
1029 }
1030
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum)1031 RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(FX_FILESIZE pos,
1032 uint32_t objnum) {
1033 const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
1034 m_pSyntax->SetPos(pos);
1035
1036 auto result = m_pSyntax->GetIndirectObject(
1037 m_pObjectsHolder, CPDF_SyntaxParser::ParseType::kLoose);
1038 m_pSyntax->SetPos(saved_pos);
1039 if (result && objnum && result->GetObjNum() != objnum)
1040 return nullptr;
1041
1042 const bool should_decrypt = m_pSecurityHandler &&
1043 m_pSecurityHandler->GetCryptoHandler() &&
1044 objnum != m_MetadataObjnum;
1045 if (should_decrypt &&
1046 !m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(result)) {
1047 return nullptr;
1048 }
1049 return result;
1050 }
1051
GetDocumentSize() const1052 FX_FILESIZE CPDF_Parser::GetDocumentSize() const {
1053 return m_pSyntax->GetDocumentSize();
1054 }
1055
GetFirstPageNo() const1056 uint32_t CPDF_Parser::GetFirstPageNo() const {
1057 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1058 }
1059
SetLinearizedHeaderForTesting(std::unique_ptr<CPDF_LinearizedHeader> pLinearized)1060 void CPDF_Parser::SetLinearizedHeaderForTesting(
1061 std::unique_ptr<CPDF_LinearizedHeader> pLinearized) {
1062 m_pLinearized = std::move(pLinearized);
1063 }
1064
LoadTrailerV4()1065 RetainPtr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
1066 if (m_pSyntax->GetKeyword() != "trailer")
1067 return nullptr;
1068
1069 return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder));
1070 }
1071
GetPermissions() const1072 uint32_t CPDF_Parser::GetPermissions() const {
1073 return m_pSecurityHandler ? m_pSecurityHandler->GetPermissions() : 0xFFFFFFFF;
1074 }
1075
ParseLinearizedHeader()1076 std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() {
1077 return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
1078 }
1079
StartLinearizedParse(RetainPtr<CPDF_ReadValidator> validator,const ByteString & password)1080 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
1081 RetainPtr<CPDF_ReadValidator> validator,
1082 const ByteString& password) {
1083 DCHECK(!m_bHasParsed);
1084 DCHECK(!m_bXRefTableRebuilt);
1085 SetPassword(password);
1086 m_bXRefStream = false;
1087 m_LastXRefOffset = 0;
1088
1089 if (!InitSyntaxParser(std::move(validator)))
1090 return FORMAT_ERROR;
1091
1092 m_pLinearized = ParseLinearizedHeader();
1093 if (!m_pLinearized)
1094 return StartParseInternal();
1095
1096 m_bHasParsed = true;
1097
1098 m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
1099 FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
1100 bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
1101 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
1102 if (!RebuildCrossRef())
1103 return FORMAT_ERROR;
1104
1105 m_bXRefTableRebuilt = true;
1106 m_LastXRefOffset = 0;
1107 }
1108 if (bLoadV4) {
1109 RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
1110 if (!trailer)
1111 return SUCCESS;
1112
1113 m_CrossRefTable->SetTrailer(std::move(trailer), kNoV4TrailerObjectNumber);
1114 const int32_t xrefsize = GetTrailer()->GetDirectIntegerFor("Size");
1115 if (xrefsize > 0) {
1116 // Check if `xrefsize` is correct. If it is incorrect, give up and rebuild
1117 // the xref table.
1118 const uint32_t expected_last_obj_num = xrefsize - 1;
1119 if (GetLastObjNum() != expected_last_obj_num && !RebuildCrossRef()) {
1120 return FORMAT_ERROR;
1121 }
1122 }
1123 }
1124
1125 Error eRet = SetEncryptHandler();
1126 if (eRet != SUCCESS)
1127 return eRet;
1128
1129 if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
1130 if (m_bXRefTableRebuilt)
1131 return FORMAT_ERROR;
1132
1133 ReleaseEncryptHandler();
1134 if (!RebuildCrossRef())
1135 return FORMAT_ERROR;
1136
1137 eRet = SetEncryptHandler();
1138 if (eRet != SUCCESS)
1139 return eRet;
1140
1141 m_pObjectsHolder->TryInit();
1142 if (!GetRoot())
1143 return FORMAT_ERROR;
1144 }
1145
1146 if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) {
1147 ReleaseEncryptHandler();
1148 if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum)
1149 return FORMAT_ERROR;
1150
1151 eRet = SetEncryptHandler();
1152 if (eRet != SUCCESS)
1153 return eRet;
1154 }
1155
1156 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1157 RetainPtr<const CPDF_Reference> pMetadata =
1158 ToReference(GetRoot()->GetObjectFor("Metadata"));
1159 if (pMetadata)
1160 m_MetadataObjnum = pMetadata->GetRefObjNum();
1161 }
1162 return SUCCESS;
1163 }
1164
LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset)1165 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset) {
1166 FX_FILESIZE xref_offset = main_xref_offset;
1167 if (!LoadCrossRefV5(&xref_offset, false))
1168 return false;
1169
1170 std::set<FX_FILESIZE> seen_xref_offset;
1171 while (xref_offset) {
1172 seen_xref_offset.insert(xref_offset);
1173 if (!LoadCrossRefV5(&xref_offset, false))
1174 return false;
1175
1176 // Check for circular references.
1177 if (pdfium::Contains(seen_xref_offset, xref_offset))
1178 return false;
1179 }
1180 m_ObjectStreamMap.clear();
1181 m_bXRefStream = true;
1182 return true;
1183 }
1184
LoadLinearizedMainXRefTable()1185 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1186 const FX_SAFE_FILESIZE prev = GetTrailer()->GetIntegerFor("Prev");
1187 const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
1188 if (main_xref_offset < 0)
1189 return FORMAT_ERROR;
1190
1191 if (main_xref_offset == 0)
1192 return SUCCESS;
1193
1194 const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
1195 m_MetadataObjnum = 0;
1196 m_ObjectStreamMap.clear();
1197
1198 if (!LoadLinearizedAllCrossRefV4(main_xref_offset) &&
1199 !LoadLinearizedAllCrossRefV5(main_xref_offset)) {
1200 m_LastXRefOffset = 0;
1201 return FORMAT_ERROR;
1202 }
1203
1204 return SUCCESS;
1205 }
1206
SetSyntaxParserForTesting(std::unique_ptr<CPDF_SyntaxParser> parser)1207 void CPDF_Parser::SetSyntaxParserForTesting(
1208 std::unique_ptr<CPDF_SyntaxParser> parser) {
1209 m_pSyntax = std::move(parser);
1210 }
1211
GetTrailerEnds()1212 std::vector<unsigned int> CPDF_Parser::GetTrailerEnds() {
1213 std::vector<unsigned int> trailer_ends;
1214 m_pSyntax->SetTrailerEnds(&trailer_ends);
1215
1216 // Traverse the document.
1217 m_pSyntax->SetPos(0);
1218 while (true) {
1219 CPDF_SyntaxParser::WordResult word_result = m_pSyntax->GetNextWord();
1220 if (word_result.is_number) {
1221 // The object number was read. Read the generation number.
1222 word_result = m_pSyntax->GetNextWord();
1223 if (!word_result.is_number)
1224 break;
1225
1226 word_result = m_pSyntax->GetNextWord();
1227 if (word_result.word != "obj")
1228 break;
1229
1230 m_pSyntax->GetObjectBody(nullptr);
1231
1232 word_result = m_pSyntax->GetNextWord();
1233 if (word_result.word != "endobj")
1234 break;
1235 } else if (word_result.word == "trailer") {
1236 m_pSyntax->GetObjectBody(nullptr);
1237 } else if (word_result.word == "startxref") {
1238 m_pSyntax->GetNextWord();
1239 } else if (word_result.word == "xref") {
1240 while (true) {
1241 word_result = m_pSyntax->GetNextWord();
1242 if (word_result.word.IsEmpty() || word_result.word == "startxref")
1243 break;
1244 }
1245 m_pSyntax->GetNextWord();
1246 } else {
1247 break;
1248 }
1249 }
1250
1251 // Stop recording trailer ends.
1252 m_pSyntax->SetTrailerEnds(nullptr);
1253 return trailer_ends;
1254 }
1255
WriteToArchive(IFX_ArchiveStream * archive,FX_FILESIZE src_size)1256 bool CPDF_Parser::WriteToArchive(IFX_ArchiveStream* archive,
1257 FX_FILESIZE src_size) {
1258 static constexpr FX_FILESIZE kBufferSize = 4096;
1259 DataVector<uint8_t> buffer(kBufferSize);
1260 m_pSyntax->SetPos(0);
1261 while (src_size) {
1262 const uint32_t block_size =
1263 static_cast<uint32_t>(std::min(kBufferSize, src_size));
1264 auto block_span = pdfium::make_span(buffer).first(block_size);
1265 if (!m_pSyntax->ReadBlock(block_span))
1266 return false;
1267 if (!archive->WriteBlock(pdfium::make_span(buffer).first(block_size)))
1268 return false;
1269 src_size -= block_size;
1270 }
1271 return true;
1272 }
1273