1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
18 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_page_object_avail.h"
22 #include "core/fpdfapi/parser/cpdf_read_validator.h"
23 #include "core/fpdfapi/parser/cpdf_reference.h"
24 #include "core/fpdfapi/parser/cpdf_stream.h"
25 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcrt/autorestorer.h"
28 #include "core/fxcrt/fx_extension.h"
29 #include "core/fxcrt/fx_safe_types.h"
30 #include "core/fxcrt/stl_util.h"
31 #include "third_party/base/check.h"
32 #include "third_party/base/containers/contains.h"
33 #include "third_party/base/notreached.h"
34 #include "third_party/base/numerics/safe_conversions.h"
35
36 namespace {
37
GetResourceObject(RetainPtr<CPDF_Dictionary> pDict)38 RetainPtr<CPDF_Object> GetResourceObject(RetainPtr<CPDF_Dictionary> pDict) {
39 constexpr size_t kMaxHierarchyDepth = 64;
40 size_t depth = 0;
41
42 while (pDict) {
43 RetainPtr<CPDF_Object> result = pDict->GetMutableObjectFor("Resources");
44 if (result)
45 return result;
46 if (++depth > kMaxHierarchyDepth) {
47 // We have cycle in parents hierarchy.
48 return nullptr;
49 }
50 RetainPtr<CPDF_Object> parent = pDict->GetMutableObjectFor("Parent");
51 pDict = parent ? parent->GetMutableDict() : nullptr;
52 }
53 return nullptr;
54 }
55
56 class HintsScope {
57 public:
HintsScope(RetainPtr<CPDF_ReadValidator> validator,CPDF_DataAvail::DownloadHints * hints)58 HintsScope(RetainPtr<CPDF_ReadValidator> validator,
59 CPDF_DataAvail::DownloadHints* hints)
60 : validator_(std::move(validator)) {
61 DCHECK(validator_);
62 validator_->SetDownloadHints(hints);
63 }
64
~HintsScope()65 ~HintsScope() { validator_->SetDownloadHints(nullptr); }
66
67 private:
68 RetainPtr<CPDF_ReadValidator> validator_;
69 };
70
71 } // namespace
72
73 CPDF_DataAvail::FileAvail::~FileAvail() = default;
74
75 CPDF_DataAvail::DownloadHints::~DownloadHints() = default;
76
CPDF_DataAvail(FileAvail * pFileAvail,RetainPtr<IFX_SeekableReadStream> pFileRead)77 CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail,
78 RetainPtr<IFX_SeekableReadStream> pFileRead)
79 : m_pFileRead(pdfium::MakeRetain<CPDF_ReadValidator>(std::move(pFileRead),
80 pFileAvail)),
81 m_dwFileLen(m_pFileRead->GetSize()) {}
82
~CPDF_DataAvail()83 CPDF_DataAvail::~CPDF_DataAvail() {
84 m_pHintTables.reset();
85 if (m_pDocument)
86 m_pDocument->RemoveObserver(this);
87 }
88
OnObservableDestroyed()89 void CPDF_DataAvail::OnObservableDestroyed() {
90 m_pDocument = nullptr;
91 m_pFormAvail.reset();
92 m_PagesArray.clear();
93 m_PagesObjAvail.clear();
94 m_PagesResourcesAvail.clear();
95 }
96
IsDocAvail(DownloadHints * pHints)97 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
98 DownloadHints* pHints) {
99 if (!m_dwFileLen)
100 return kDataError;
101
102 DCHECK(m_SeenPageObjList.empty());
103 AutoRestorer<std::set<uint32_t>> seen_objects_restorer(&m_SeenPageObjList);
104 const HintsScope hints_scope(GetValidator(), pHints);
105 while (!m_bDocAvail) {
106 if (!CheckDocStatus())
107 return kDataNotAvailable;
108 }
109
110 return kDataAvailable;
111 }
112
CheckDocStatus()113 bool CPDF_DataAvail::CheckDocStatus() {
114 switch (m_internalStatus) {
115 case InternalStatus::kHeader:
116 return CheckHeader();
117 case InternalStatus::kFirstPage:
118 return CheckFirstPage();
119 case InternalStatus::kHintTable:
120 return CheckHintTables();
121 case InternalStatus::kLoadAllCrossRef:
122 return CheckAndLoadAllXref();
123 case InternalStatus::kLoadAllFile:
124 return LoadAllFile();
125 case InternalStatus::kRoot:
126 return CheckRoot();
127 case InternalStatus::kInfo:
128 return CheckInfo();
129 case InternalStatus::kPageTree:
130 if (m_bTotalLoadPageTree)
131 return CheckPages();
132 return LoadDocPages();
133 case InternalStatus::kPage:
134 if (m_bTotalLoadPageTree)
135 return CheckPage();
136 m_internalStatus = InternalStatus::kPageLaterLoad;
137 return true;
138 case InternalStatus::kError:
139 return LoadAllFile();
140 case InternalStatus::kPageLaterLoad:
141 m_internalStatus = InternalStatus::kPage;
142 [[fallthrough]];
143 default:
144 m_bDocAvail = true;
145 return true;
146 }
147 }
148
CheckPageStatus()149 bool CPDF_DataAvail::CheckPageStatus() {
150 switch (m_internalStatus) {
151 case InternalStatus::kPageTree:
152 return CheckPages();
153 case InternalStatus::kPage:
154 return CheckPage();
155 case InternalStatus::kError:
156 return LoadAllFile();
157 default:
158 m_bPagesTreeLoad = true;
159 m_bPagesLoad = true;
160 return true;
161 }
162 }
163
LoadAllFile()164 bool CPDF_DataAvail::LoadAllFile() {
165 if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
166 m_internalStatus = InternalStatus::kDone;
167 return true;
168 }
169 return false;
170 }
171
CheckAndLoadAllXref()172 bool CPDF_DataAvail::CheckAndLoadAllXref() {
173 if (!m_pCrossRefAvail) {
174 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
175 const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
176 if (GetValidator()->has_read_problems())
177 return false;
178
179 if (last_xref_offset <= 0) {
180 m_internalStatus = InternalStatus::kError;
181 return false;
182 }
183
184 m_pCrossRefAvail = std::make_unique<CPDF_CrossRefAvail>(GetSyntaxParser(),
185 last_xref_offset);
186 }
187
188 switch (m_pCrossRefAvail->CheckAvail()) {
189 case kDataAvailable:
190 break;
191 case kDataNotAvailable:
192 return false;
193 case kDataError:
194 m_internalStatus = InternalStatus::kError;
195 return false;
196 }
197
198 if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
199 !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
200 m_internalStatus = InternalStatus::kLoadAllFile;
201 return false;
202 }
203
204 m_internalStatus = InternalStatus::kRoot;
205 return true;
206 }
207
GetObject(uint32_t objnum,bool * pExistInFile)208 RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
209 bool* pExistInFile) {
210 *pExistInFile = false;
211 CPDF_Parser* pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
212 if (!pParser)
213 return nullptr;
214
215 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
216 RetainPtr<CPDF_Object> pRet = pParser->ParseIndirectObject(objnum);
217 if (!pRet)
218 return nullptr;
219
220 *pExistInFile = true;
221 if (GetValidator()->has_read_problems())
222 return nullptr;
223
224 return pRet;
225 }
226
CheckInfo()227 bool CPDF_DataAvail::CheckInfo() {
228 const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum();
229 if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) {
230 m_internalStatus = InternalStatus::kPageTree;
231 return true;
232 }
233
234 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
235 m_parser.ParseIndirectObject(dwInfoObjNum);
236 if (GetValidator()->has_read_problems())
237 return false;
238
239 m_internalStatus = InternalStatus::kPageTree;
240 return true;
241 }
242
CheckRoot()243 bool CPDF_DataAvail::CheckRoot() {
244 const uint32_t dwRootObjNum = m_parser.GetRootObjNum();
245 if (dwRootObjNum == CPDF_Object::kInvalidObjNum) {
246 m_internalStatus = InternalStatus::kError;
247 return true;
248 }
249
250 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
251 m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum));
252 if (GetValidator()->has_read_problems())
253 return false;
254
255 if (!m_pRoot) {
256 m_internalStatus = InternalStatus::kError;
257 return false;
258 }
259
260 RetainPtr<const CPDF_Reference> pRef =
261 ToReference(m_pRoot->GetObjectFor("Pages"));
262 if (!pRef) {
263 m_internalStatus = InternalStatus::kError;
264 return false;
265 }
266
267 m_PagesObjNum = pRef->GetRefObjNum();
268 m_internalStatus = InternalStatus::kInfo;
269 return true;
270 }
271
PreparePageItem()272 bool CPDF_DataAvail::PreparePageItem() {
273 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
274 if (!pRoot) {
275 m_internalStatus = InternalStatus::kError;
276 return false;
277 }
278
279 RetainPtr<const CPDF_Reference> pRef =
280 ToReference(pRoot->GetObjectFor("Pages"));
281 if (!pRef) {
282 m_internalStatus = InternalStatus::kError;
283 return false;
284 }
285
286 m_PagesObjNum = pRef->GetRefObjNum();
287 m_internalStatus = InternalStatus::kPageTree;
288 return true;
289 }
290
IsFirstCheck(uint32_t dwPage)291 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
292 return m_pageMapCheckState.insert(dwPage).second;
293 }
294
ResetFirstCheck(uint32_t dwPage)295 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
296 m_pageMapCheckState.erase(dwPage);
297 }
298
CheckPage()299 bool CPDF_DataAvail::CheckPage() {
300 std::vector<uint32_t> UnavailObjList;
301 for (uint32_t dwPageObjNum : m_PageObjList) {
302 bool bExists = false;
303 RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
304 if (!pObj) {
305 if (bExists)
306 UnavailObjList.push_back(dwPageObjNum);
307 continue;
308 }
309
310 switch (pObj->GetType()) {
311 case CPDF_Object::kArray: {
312 CPDF_ArrayLocker locker(pObj->AsArray());
313 for (const auto& pArrayObj : locker) {
314 const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
315 if (pRef)
316 UnavailObjList.push_back(pRef->GetRefObjNum());
317 }
318 break;
319 }
320 case CPDF_Object::kDictionary:
321 if (pObj->GetDict()->GetNameFor("Type") == "Pages")
322 m_PagesArray.push_back(std::move(pObj));
323 break;
324 default:
325 break;
326 }
327 }
328 m_PageObjList.clear();
329 if (!UnavailObjList.empty()) {
330 m_PageObjList = std::move(UnavailObjList);
331 return false;
332 }
333 size_t iPages = m_PagesArray.size();
334 for (size_t i = 0; i < iPages; ++i) {
335 RetainPtr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
336 if (pPages && !GetPageKids(pPages.Get())) {
337 m_PagesArray.clear();
338 m_internalStatus = InternalStatus::kError;
339 return false;
340 }
341 }
342 m_PagesArray.clear();
343 if (m_PageObjList.empty())
344 m_internalStatus = InternalStatus::kDone;
345
346 return true;
347 }
348
GetPageKids(CPDF_Object * pPages)349 bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) {
350 RetainPtr<const CPDF_Dictionary> pDict = pPages->GetDict();
351 if (!pDict)
352 return true;
353
354 RetainPtr<const CPDF_Object> pKids = pDict->GetObjectFor("Kids");
355 if (!pKids)
356 return true;
357
358 std::vector<uint32_t> object_numbers;
359 switch (pKids->GetType()) {
360 case CPDF_Object::kReference:
361 object_numbers.push_back(pKids->AsReference()->GetRefObjNum());
362 break;
363 case CPDF_Object::kArray: {
364 CPDF_ArrayLocker locker(pKids->AsArray());
365 for (const auto& pArrayObj : locker) {
366 const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
367 if (pRef)
368 object_numbers.push_back(pRef->GetRefObjNum());
369 }
370 break;
371 }
372 default:
373 m_internalStatus = InternalStatus::kError;
374 return false;
375 }
376
377 for (uint32_t num : object_numbers) {
378 bool inserted = m_SeenPageObjList.insert(num).second;
379 if (inserted)
380 m_PageObjList.push_back(num);
381 }
382 return true;
383 }
384
CheckPages()385 bool CPDF_DataAvail::CheckPages() {
386 bool bExists = false;
387 RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
388 if (!bExists) {
389 m_internalStatus = InternalStatus::kLoadAllFile;
390 return true;
391 }
392
393 if (!pPages) {
394 if (m_internalStatus == InternalStatus::kError) {
395 m_internalStatus = InternalStatus::kLoadAllFile;
396 return true;
397 }
398 return false;
399 }
400
401 if (!GetPageKids(pPages.Get())) {
402 m_internalStatus = InternalStatus::kError;
403 return false;
404 }
405
406 m_internalStatus = InternalStatus::kPage;
407 return true;
408 }
409
CheckHeader()410 bool CPDF_DataAvail::CheckHeader() {
411 switch (CheckHeaderAndLinearized()) {
412 case kDataAvailable:
413 m_internalStatus = m_pLinearized ? InternalStatus::kFirstPage
414 : InternalStatus::kLoadAllCrossRef;
415 return true;
416 case kDataNotAvailable:
417 return false;
418 case kDataError:
419 m_internalStatus = InternalStatus::kError;
420 return true;
421 }
422 }
423
CheckFirstPage()424 bool CPDF_DataAvail::CheckFirstPage() {
425 if (!m_pLinearized->GetFirstPageEndOffset() ||
426 !m_pLinearized->GetFileSize() ||
427 !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
428 m_internalStatus = InternalStatus::kError;
429 return false;
430 }
431
432 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
433 dwEnd += 512;
434 if ((FX_FILESIZE)dwEnd > m_dwFileLen)
435 dwEnd = (uint32_t)m_dwFileLen;
436
437 const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
438 const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
439 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
440 data_size))
441 return false;
442
443 m_internalStatus = InternalStatus::kHintTable;
444 return true;
445 }
446
CheckHintTables()447 bool CPDF_DataAvail::CheckHintTables() {
448 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
449 m_pHintTables =
450 CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get());
451
452 if (GetValidator()->read_error()) {
453 m_internalStatus = InternalStatus::kError;
454 return true;
455 }
456 if (GetValidator()->has_unavailable_data())
457 return false;
458
459 m_internalStatus = InternalStatus::kDone;
460 return true;
461 }
462
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList) const463 RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
464 FX_FILESIZE pos,
465 uint32_t objnum,
466 CPDF_IndirectObjectHolder* pObjList) const {
467 const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
468 GetSyntaxParser()->SetPos(pos);
469 RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
470 pObjList, CPDF_SyntaxParser::ParseType::kLoose);
471 GetSyntaxParser()->SetPos(SavedPos);
472 return (result && (!objnum || result->GetObjNum() == objnum))
473 ? std::move(result)
474 : nullptr;
475 }
476
IsLinearizedPDF()477 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
478 switch (CheckHeaderAndLinearized()) {
479 case kDataAvailable:
480 return m_pLinearized ? kLinearized : kNotLinearized;
481 case kDataNotAvailable:
482 return kLinearizationUnknown;
483 case kDataError:
484 return kNotLinearized;
485 }
486 }
487
CheckHeaderAndLinearized()488 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
489 if (m_bHeaderAvail)
490 return kDataAvailable;
491
492 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
493 const absl::optional<FX_FILESIZE> header_offset =
494 GetHeaderOffset(GetValidator());
495 if (GetValidator()->has_read_problems())
496 return kDataNotAvailable;
497
498 if (!header_offset.has_value())
499 return kDataError;
500
501 m_parser.m_pSyntax = std::make_unique<CPDF_SyntaxParser>(
502 GetValidator(), header_offset.value());
503 m_pLinearized = m_parser.ParseLinearizedHeader();
504 if (GetValidator()->has_read_problems())
505 return kDataNotAvailable;
506
507 m_bHeaderAvail = true;
508 return kDataAvailable;
509 }
510
CheckPage(uint32_t dwPage)511 bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
512 while (true) {
513 switch (m_internalStatus) {
514 case InternalStatus::kPageTree:
515 if (!LoadDocPages())
516 return false;
517 break;
518 case InternalStatus::kPage:
519 if (!LoadDocPage(dwPage))
520 return false;
521 break;
522 case InternalStatus::kError:
523 return LoadAllFile();
524 default:
525 m_bPagesTreeLoad = true;
526 m_bPagesLoad = true;
527 m_bCurPageDictLoadOK = true;
528 m_internalStatus = InternalStatus::kPage;
529 return true;
530 }
531 }
532 }
533
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode)534 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
535 PageNode* pPageNode) {
536 bool bExists = false;
537 RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
538 if (!bExists) {
539 m_internalStatus = InternalStatus::kError;
540 return false;
541 }
542
543 if (!pPages)
544 return false;
545
546 const CPDF_Array* pArray = pPages->AsArray();
547 if (!pArray) {
548 m_internalStatus = InternalStatus::kError;
549 return false;
550 }
551
552 pPageNode->m_type = PageNode::Type::kPages;
553 for (size_t i = 0; i < pArray->size(); ++i) {
554 RetainPtr<const CPDF_Reference> pKid = ToReference(pArray->GetObjectAt(i));
555 if (!pKid)
556 continue;
557
558 auto pNode = std::make_unique<PageNode>();
559 pNode->m_dwPageNo = pKid->GetRefObjNum();
560 pPageNode->m_ChildNodes.push_back(std::move(pNode));
561 }
562 return true;
563 }
564
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode)565 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
566 PageNode* pPageNode) {
567 bool bExists = false;
568 RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
569 if (!bExists) {
570 m_internalStatus = InternalStatus::kError;
571 return false;
572 }
573
574 if (!pPage)
575 return false;
576
577 if (pPage->IsArray()) {
578 pPageNode->m_dwPageNo = dwPageNo;
579 pPageNode->m_type = PageNode::Type::kArray;
580 return true;
581 }
582
583 if (!pPage->IsDictionary()) {
584 m_internalStatus = InternalStatus::kError;
585 return false;
586 }
587
588 pPageNode->m_dwPageNo = dwPageNo;
589 RetainPtr<CPDF_Dictionary> pDict = pPage->GetMutableDict();
590 const ByteString type = pDict->GetNameFor("Type");
591 if (type == "Page") {
592 pPageNode->m_type = PageNode::Type::kPage;
593 return true;
594 }
595
596 if (type != "Pages") {
597 m_internalStatus = InternalStatus::kError;
598 return false;
599 }
600
601 pPageNode->m_type = PageNode::Type::kPages;
602 RetainPtr<CPDF_Object> pKids = pDict->GetMutableObjectFor("Kids");
603 if (!pKids) {
604 m_internalStatus = InternalStatus::kPage;
605 return true;
606 }
607
608 switch (pKids->GetType()) {
609 case CPDF_Object::kReference: {
610 const CPDF_Reference* pKid = pKids->AsReference();
611 auto pNode = std::make_unique<PageNode>();
612 pNode->m_dwPageNo = pKid->GetRefObjNum();
613 pPageNode->m_ChildNodes.push_back(std::move(pNode));
614 break;
615 }
616 case CPDF_Object::kArray: {
617 const CPDF_Array* pKidsArray = pKids->AsArray();
618 for (size_t i = 0; i < pKidsArray->size(); ++i) {
619 RetainPtr<const CPDF_Reference> pKid =
620 ToReference(pKidsArray->GetObjectAt(i));
621 if (!pKid)
622 continue;
623
624 auto pNode = std::make_unique<PageNode>();
625 pNode->m_dwPageNo = pKid->GetRefObjNum();
626 pPageNode->m_ChildNodes.push_back(std::move(pNode));
627 }
628 break;
629 }
630 default:
631 break;
632 }
633 return true;
634 }
635
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,int level)636 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
637 int32_t iPage,
638 int32_t& iCount,
639 int level) {
640 if (level >= kMaxPageRecursionDepth)
641 return false;
642
643 int32_t iSize = fxcrt::CollectionSize<int32_t>(pageNode.m_ChildNodes);
644 if (iSize <= 0 || iPage >= iSize) {
645 m_internalStatus = InternalStatus::kError;
646 return false;
647 }
648 for (int32_t i = 0; i < iSize; ++i) {
649 PageNode* pNode = pageNode.m_ChildNodes[i].get();
650 if (!pNode)
651 continue;
652
653 if (pNode->m_type == PageNode::Type::kUnknown) {
654 // Updates the type for the unknown page node.
655 if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
656 return false;
657 }
658 if (pNode->m_type == PageNode::Type::kArray) {
659 // Updates a more specific type for the array page node.
660 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
661 return false;
662 }
663 switch (pNode->m_type) {
664 case PageNode::Type::kPage:
665 iCount++;
666 if (iPage == iCount && m_pDocument)
667 m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
668 break;
669 case PageNode::Type::kPages:
670 if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
671 return false;
672 break;
673 case PageNode::Type::kUnknown:
674 case PageNode::Type::kArray:
675 // Already converted above, error if we get here.
676 return false;
677 }
678 if (iPage == iCount) {
679 m_internalStatus = InternalStatus::kDone;
680 return true;
681 }
682 }
683 return true;
684 }
685
LoadDocPage(uint32_t dwPage)686 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
687 int iPage = pdfium::base::checked_cast<int>(dwPage);
688 if (m_pDocument->GetPageCount() <= iPage ||
689 m_pDocument->IsPageLoaded(iPage)) {
690 m_internalStatus = InternalStatus::kDone;
691 return true;
692 }
693 if (m_PageNode.m_type == PageNode::Type::kPage) {
694 m_internalStatus =
695 iPage == 0 ? InternalStatus::kDone : InternalStatus::kError;
696 return true;
697 }
698 int32_t iCount = -1;
699 return CheckPageNode(m_PageNode, iPage, iCount, 0);
700 }
701
CheckPageCount()702 bool CPDF_DataAvail::CheckPageCount() {
703 bool bExists = false;
704 RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
705 if (!bExists) {
706 m_internalStatus = InternalStatus::kError;
707 return false;
708 }
709 if (!pPages)
710 return false;
711
712 RetainPtr<const CPDF_Dictionary> pPagesDict = pPages->GetDict();
713 if (!pPagesDict) {
714 m_internalStatus = InternalStatus::kError;
715 return false;
716 }
717 if (!pPagesDict->KeyExist("Kids"))
718 return true;
719
720 return pPagesDict->GetIntegerFor("Count") > 0;
721 }
722
LoadDocPages()723 bool CPDF_DataAvail::LoadDocPages() {
724 if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
725 return false;
726
727 if (CheckPageCount()) {
728 m_internalStatus = InternalStatus::kPage;
729 return true;
730 }
731
732 m_bTotalLoadPageTree = true;
733 return false;
734 }
735
LoadPages()736 bool CPDF_DataAvail::LoadPages() {
737 while (!m_bPagesTreeLoad) {
738 if (!CheckPageStatus())
739 return false;
740 }
741
742 if (m_bPagesLoad)
743 return true;
744
745 m_pDocument->LoadPages();
746 return false;
747 }
748
CheckLinearizedData()749 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
750 if (m_bLinearedDataOK)
751 return kDataAvailable;
752 DCHECK(m_pLinearized);
753 if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
754 !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
755 return kDataError;
756 }
757
758 if (!m_bMainXRefLoadTried) {
759 const FX_SAFE_FILESIZE prev =
760 m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
761 const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
762 if (main_xref_offset < 0)
763 return kDataError;
764
765 if (main_xref_offset == 0)
766 return kDataAvailable;
767
768 FX_SAFE_SIZE_T data_size = m_dwFileLen;
769 data_size -= main_xref_offset;
770 if (!data_size.IsValid())
771 return kDataError;
772
773 if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
774 main_xref_offset, data_size.ValueOrDie()))
775 return kDataNotAvailable;
776
777 CPDF_Parser::Error eRet =
778 m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
779 m_bMainXRefLoadTried = true;
780 if (eRet != CPDF_Parser::SUCCESS)
781 return kDataError;
782
783 if (!PreparePageItem())
784 return kDataNotAvailable;
785
786 m_bMainXRefLoadedOK = true;
787 m_bLinearedDataOK = true;
788 }
789
790 return m_bLinearedDataOK ? kDataAvailable : kDataNotAvailable;
791 }
792
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)793 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
794 uint32_t dwPage,
795 DownloadHints* pHints) {
796 if (!m_pDocument)
797 return kDataError;
798
799 const int iPage = pdfium::base::checked_cast<int>(dwPage);
800 if (iPage >= m_pDocument->GetPageCount()) {
801 // This is XFA page.
802 return kDataAvailable;
803 }
804
805 if (IsFirstCheck(dwPage)) {
806 m_bCurPageDictLoadOK = false;
807 }
808
809 if (pdfium::Contains(m_pagesLoadState, dwPage))
810 return kDataAvailable;
811
812 const HintsScope hints_scope(GetValidator(), pHints);
813 if (m_pLinearized) {
814 if (dwPage == m_pLinearized->GetFirstPageNo()) {
815 RetainPtr<const CPDF_Dictionary> pPageDict =
816 m_pDocument->GetPageDictionary(iPage);
817 if (!pPageDict)
818 return kDataError;
819
820 auto page_num_obj =
821 std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
822 GetValidator(), m_pDocument, pPageDict));
823
824 CPDF_PageObjectAvail* page_obj_avail =
825 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
826 // TODO(art-snake): Check resources.
827 return page_obj_avail->CheckAvail();
828 }
829
830 DocAvailStatus nResult = CheckLinearizedData();
831 if (nResult != kDataAvailable)
832 return nResult;
833
834 if (m_pHintTables) {
835 nResult = m_pHintTables->CheckPage(dwPage);
836 if (nResult != kDataAvailable)
837 return nResult;
838 if (GetPageDictionary(dwPage)) {
839 m_pagesLoadState.insert(dwPage);
840 return kDataAvailable;
841 }
842 }
843
844 if (!m_bMainXRefLoadedOK) {
845 if (!LoadAllFile())
846 return kDataNotAvailable;
847 m_pDocument->GetParser()->RebuildCrossRef();
848 ResetFirstCheck(dwPage);
849 return kDataAvailable;
850 }
851 if (m_bTotalLoadPageTree) {
852 if (!LoadPages())
853 return kDataNotAvailable;
854 } else {
855 if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
856 return kDataNotAvailable;
857 }
858 } else {
859 if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
860 return kDataNotAvailable;
861 }
862 }
863
864 if (CheckAcroForm() == kFormNotAvailable)
865 return kDataNotAvailable;
866
867 RetainPtr<CPDF_Dictionary> pPageDict =
868 m_pDocument->GetMutablePageDictionary(iPage);
869 if (!pPageDict)
870 return kDataError;
871
872 {
873 auto page_num_obj =
874 std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
875 GetValidator(), m_pDocument, pPageDict));
876 CPDF_PageObjectAvail* page_obj_avail =
877 m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
878 const DocAvailStatus status = page_obj_avail->CheckAvail();
879 if (status != kDataAvailable)
880 return status;
881 }
882
883 const DocAvailStatus resources_status = CheckResources(std::move(pPageDict));
884 if (resources_status != kDataAvailable)
885 return resources_status;
886
887 m_bCurPageDictLoadOK = false;
888 ResetFirstCheck(dwPage);
889 m_pagesLoadState.insert(dwPage);
890 return kDataAvailable;
891 }
892
CheckResources(RetainPtr<CPDF_Dictionary> page)893 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
894 RetainPtr<CPDF_Dictionary> page) {
895 DCHECK(page);
896 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
897 RetainPtr<CPDF_Object> resources = GetResourceObject(std::move(page));
898 if (GetValidator()->has_read_problems())
899 return kDataNotAvailable;
900
901 if (!resources)
902 return kDataAvailable;
903
904 CPDF_PageObjectAvail* resource_avail =
905 m_PagesResourcesAvail
906 .insert(std::make_pair(resources,
907 std::make_unique<CPDF_PageObjectAvail>(
908 GetValidator(), m_pDocument, resources)))
909 .first->second.get();
910 return resource_avail->CheckAvail();
911 }
912
GetValidator() const913 RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
914 return m_pFileRead;
915 }
916
GetSyntaxParser() const917 CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
918 return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
919 : m_parser.m_pSyntax.get();
920 }
921
GetPageCount() const922 int CPDF_DataAvail::GetPageCount() const {
923 if (m_pLinearized)
924 return m_pLinearized->GetPageCount();
925 return m_pDocument ? m_pDocument->GetPageCount() : 0;
926 }
927
GetPageDictionary(int index) const928 RetainPtr<const CPDF_Dictionary> CPDF_DataAvail::GetPageDictionary(
929 int index) const {
930 if (!m_pDocument || index < 0 || index >= GetPageCount())
931 return nullptr;
932 RetainPtr<const CPDF_Dictionary> page = m_pDocument->GetPageDictionary(index);
933 if (page)
934 return page;
935 if (!m_pLinearized || !m_pHintTables)
936 return nullptr;
937
938 if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
939 return nullptr;
940 FX_FILESIZE szPageStartPos = 0;
941 FX_FILESIZE szPageLength = 0;
942 uint32_t dwObjNum = 0;
943 const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
944 &szPageLength, &dwObjNum);
945 if (!bPagePosGot || !dwObjNum)
946 return nullptr;
947 // We should say to the document, which object is the page.
948 m_pDocument->SetPageObjNum(index, dwObjNum);
949 // Page object already can be parsed in document.
950 if (!m_pDocument->GetIndirectObject(dwObjNum)) {
951 m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
952 dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
953 }
954 if (!ValidatePage(index))
955 return nullptr;
956 return m_pDocument->GetPageDictionary(index);
957 }
958
IsFormAvail(DownloadHints * pHints)959 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
960 DownloadHints* pHints) {
961 const HintsScope hints_scope(GetValidator(), pHints);
962 return CheckAcroForm();
963 }
964
CheckAcroForm()965 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
966 if (!m_pDocument)
967 return kFormAvailable;
968
969 if (m_pLinearized) {
970 DocAvailStatus nDocStatus = CheckLinearizedData();
971 if (nDocStatus == kDataError)
972 return kFormError;
973 if (nDocStatus == kDataNotAvailable)
974 return kFormNotAvailable;
975 }
976
977 if (!m_pFormAvail) {
978 const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
979 if (!pRoot)
980 return kFormAvailable;
981
982 RetainPtr<const CPDF_Object> pAcroForm = pRoot->GetObjectFor("AcroForm");
983 if (!pAcroForm)
984 return kFormNotExist;
985
986 m_pFormAvail = std::make_unique<CPDF_PageObjectAvail>(
987 GetValidator(), m_pDocument, std::move(pAcroForm));
988 }
989 switch (m_pFormAvail->CheckAvail()) {
990 case kDataError:
991 return kFormError;
992 case kDataNotAvailable:
993 return kFormNotAvailable;
994 case kDataAvailable:
995 return kFormAvailable;
996 }
997 }
998
ValidatePage(uint32_t dwPage) const999 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const {
1000 int iPage = pdfium::base::checked_cast<int>(dwPage);
1001 RetainPtr<const CPDF_Dictionary> pPageDict =
1002 m_pDocument->GetPageDictionary(iPage);
1003 if (!pPageDict)
1004 return false;
1005
1006 CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument,
1007 std::move(pPageDict));
1008 return obj_avail.CheckAvail() == kDataAvailable;
1009 }
1010
1011 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
ParseDocument(std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,std::unique_ptr<CPDF_Document::PageDataIface> pPageData,const ByteString & password)1012 CPDF_DataAvail::ParseDocument(
1013 std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
1014 std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
1015 const ByteString& password) {
1016 if (m_pDocument) {
1017 // We already returned parsed document.
1018 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1019 }
1020 auto document = std::make_unique<CPDF_Document>(std::move(pRenderData),
1021 std::move(pPageData));
1022 document->AddObserver(this);
1023
1024 CPDF_ReadValidator::ScopedSession read_session(GetValidator());
1025 CPDF_Parser::Error error =
1026 document->LoadLinearizedDoc(GetValidator(), password);
1027
1028 // Additional check, that all ok.
1029 if (GetValidator()->has_read_problems()) {
1030 NOTREACHED();
1031 return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1032 }
1033
1034 if (error != CPDF_Parser::SUCCESS)
1035 return std::make_pair(error, nullptr);
1036
1037 m_pDocument = document.get();
1038 return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
1039 }
1040
1041 CPDF_DataAvail::PageNode::PageNode() = default;
1042
1043 CPDF_DataAvail::PageNode::~PageNode() = default;
1044