xref: /aosp_15_r20/external/pdfium/core/fpdfapi/parser/cpdf_data_avail.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8 
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12 
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
18 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_page_object_avail.h"
22 #include "core/fpdfapi/parser/cpdf_read_validator.h"
23 #include "core/fpdfapi/parser/cpdf_reference.h"
24 #include "core/fpdfapi/parser/cpdf_stream.h"
25 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcrt/autorestorer.h"
28 #include "core/fxcrt/fx_extension.h"
29 #include "core/fxcrt/fx_safe_types.h"
30 #include "core/fxcrt/stl_util.h"
31 #include "third_party/base/check.h"
32 #include "third_party/base/containers/contains.h"
33 #include "third_party/base/notreached.h"
34 #include "third_party/base/numerics/safe_conversions.h"
35 
36 namespace {
37 
GetResourceObject(RetainPtr<CPDF_Dictionary> pDict)38 RetainPtr<CPDF_Object> GetResourceObject(RetainPtr<CPDF_Dictionary> pDict) {
39   constexpr size_t kMaxHierarchyDepth = 64;
40   size_t depth = 0;
41 
42   while (pDict) {
43     RetainPtr<CPDF_Object> result = pDict->GetMutableObjectFor("Resources");
44     if (result)
45       return result;
46     if (++depth > kMaxHierarchyDepth) {
47       // We have cycle in parents hierarchy.
48       return nullptr;
49     }
50     RetainPtr<CPDF_Object> parent = pDict->GetMutableObjectFor("Parent");
51     pDict = parent ? parent->GetMutableDict() : nullptr;
52   }
53   return nullptr;
54 }
55 
56 class HintsScope {
57  public:
HintsScope(RetainPtr<CPDF_ReadValidator> validator,CPDF_DataAvail::DownloadHints * hints)58   HintsScope(RetainPtr<CPDF_ReadValidator> validator,
59              CPDF_DataAvail::DownloadHints* hints)
60       : validator_(std::move(validator)) {
61     DCHECK(validator_);
62     validator_->SetDownloadHints(hints);
63   }
64 
~HintsScope()65   ~HintsScope() { validator_->SetDownloadHints(nullptr); }
66 
67  private:
68   RetainPtr<CPDF_ReadValidator> validator_;
69 };
70 
71 }  // namespace
72 
73 CPDF_DataAvail::FileAvail::~FileAvail() = default;
74 
75 CPDF_DataAvail::DownloadHints::~DownloadHints() = default;
76 
CPDF_DataAvail(FileAvail * pFileAvail,RetainPtr<IFX_SeekableReadStream> pFileRead)77 CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail,
78                                RetainPtr<IFX_SeekableReadStream> pFileRead)
79     : m_pFileRead(pdfium::MakeRetain<CPDF_ReadValidator>(std::move(pFileRead),
80                                                          pFileAvail)),
81       m_dwFileLen(m_pFileRead->GetSize()) {}
82 
~CPDF_DataAvail()83 CPDF_DataAvail::~CPDF_DataAvail() {
84   m_pHintTables.reset();
85   if (m_pDocument)
86     m_pDocument->RemoveObserver(this);
87 }
88 
OnObservableDestroyed()89 void CPDF_DataAvail::OnObservableDestroyed() {
90   m_pDocument = nullptr;
91   m_pFormAvail.reset();
92   m_PagesArray.clear();
93   m_PagesObjAvail.clear();
94   m_PagesResourcesAvail.clear();
95 }
96 
IsDocAvail(DownloadHints * pHints)97 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
98     DownloadHints* pHints) {
99   if (!m_dwFileLen)
100     return kDataError;
101 
102   DCHECK(m_SeenPageObjList.empty());
103   AutoRestorer<std::set<uint32_t>> seen_objects_restorer(&m_SeenPageObjList);
104   const HintsScope hints_scope(GetValidator(), pHints);
105   while (!m_bDocAvail) {
106     if (!CheckDocStatus())
107       return kDataNotAvailable;
108   }
109 
110   return kDataAvailable;
111 }
112 
CheckDocStatus()113 bool CPDF_DataAvail::CheckDocStatus() {
114   switch (m_internalStatus) {
115     case InternalStatus::kHeader:
116       return CheckHeader();
117     case InternalStatus::kFirstPage:
118       return CheckFirstPage();
119     case InternalStatus::kHintTable:
120       return CheckHintTables();
121     case InternalStatus::kLoadAllCrossRef:
122       return CheckAndLoadAllXref();
123     case InternalStatus::kLoadAllFile:
124       return LoadAllFile();
125     case InternalStatus::kRoot:
126       return CheckRoot();
127     case InternalStatus::kInfo:
128       return CheckInfo();
129     case InternalStatus::kPageTree:
130       if (m_bTotalLoadPageTree)
131         return CheckPages();
132       return LoadDocPages();
133     case InternalStatus::kPage:
134       if (m_bTotalLoadPageTree)
135         return CheckPage();
136       m_internalStatus = InternalStatus::kPageLaterLoad;
137       return true;
138     case InternalStatus::kError:
139       return LoadAllFile();
140     case InternalStatus::kPageLaterLoad:
141       m_internalStatus = InternalStatus::kPage;
142       [[fallthrough]];
143     default:
144       m_bDocAvail = true;
145       return true;
146   }
147 }
148 
CheckPageStatus()149 bool CPDF_DataAvail::CheckPageStatus() {
150   switch (m_internalStatus) {
151     case InternalStatus::kPageTree:
152       return CheckPages();
153     case InternalStatus::kPage:
154       return CheckPage();
155     case InternalStatus::kError:
156       return LoadAllFile();
157     default:
158       m_bPagesTreeLoad = true;
159       m_bPagesLoad = true;
160       return true;
161   }
162 }
163 
LoadAllFile()164 bool CPDF_DataAvail::LoadAllFile() {
165   if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
166     m_internalStatus = InternalStatus::kDone;
167     return true;
168   }
169   return false;
170 }
171 
CheckAndLoadAllXref()172 bool CPDF_DataAvail::CheckAndLoadAllXref() {
173   if (!m_pCrossRefAvail) {
174     CPDF_ReadValidator::ScopedSession read_session(GetValidator());
175     const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
176     if (GetValidator()->has_read_problems())
177       return false;
178 
179     if (last_xref_offset <= 0) {
180       m_internalStatus = InternalStatus::kError;
181       return false;
182     }
183 
184     m_pCrossRefAvail = std::make_unique<CPDF_CrossRefAvail>(GetSyntaxParser(),
185                                                             last_xref_offset);
186   }
187 
188   switch (m_pCrossRefAvail->CheckAvail()) {
189     case kDataAvailable:
190       break;
191     case kDataNotAvailable:
192       return false;
193     case kDataError:
194       m_internalStatus = InternalStatus::kError;
195       return false;
196   }
197 
198   if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
199       !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
200     m_internalStatus = InternalStatus::kLoadAllFile;
201     return false;
202   }
203 
204   m_internalStatus = InternalStatus::kRoot;
205   return true;
206 }
207 
GetObject(uint32_t objnum,bool * pExistInFile)208 RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
209                                                  bool* pExistInFile) {
210   *pExistInFile = false;
211   CPDF_Parser* pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
212   if (!pParser)
213     return nullptr;
214 
215   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
216   RetainPtr<CPDF_Object> pRet = pParser->ParseIndirectObject(objnum);
217   if (!pRet)
218     return nullptr;
219 
220   *pExistInFile = true;
221   if (GetValidator()->has_read_problems())
222     return nullptr;
223 
224   return pRet;
225 }
226 
CheckInfo()227 bool CPDF_DataAvail::CheckInfo() {
228   const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum();
229   if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) {
230     m_internalStatus = InternalStatus::kPageTree;
231     return true;
232   }
233 
234   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
235   m_parser.ParseIndirectObject(dwInfoObjNum);
236   if (GetValidator()->has_read_problems())
237     return false;
238 
239   m_internalStatus = InternalStatus::kPageTree;
240   return true;
241 }
242 
CheckRoot()243 bool CPDF_DataAvail::CheckRoot() {
244   const uint32_t dwRootObjNum = m_parser.GetRootObjNum();
245   if (dwRootObjNum == CPDF_Object::kInvalidObjNum) {
246     m_internalStatus = InternalStatus::kError;
247     return true;
248   }
249 
250   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
251   m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum));
252   if (GetValidator()->has_read_problems())
253     return false;
254 
255   if (!m_pRoot) {
256     m_internalStatus = InternalStatus::kError;
257     return false;
258   }
259 
260   RetainPtr<const CPDF_Reference> pRef =
261       ToReference(m_pRoot->GetObjectFor("Pages"));
262   if (!pRef) {
263     m_internalStatus = InternalStatus::kError;
264     return false;
265   }
266 
267   m_PagesObjNum = pRef->GetRefObjNum();
268   m_internalStatus = InternalStatus::kInfo;
269   return true;
270 }
271 
PreparePageItem()272 bool CPDF_DataAvail::PreparePageItem() {
273   const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
274   if (!pRoot) {
275     m_internalStatus = InternalStatus::kError;
276     return false;
277   }
278 
279   RetainPtr<const CPDF_Reference> pRef =
280       ToReference(pRoot->GetObjectFor("Pages"));
281   if (!pRef) {
282     m_internalStatus = InternalStatus::kError;
283     return false;
284   }
285 
286   m_PagesObjNum = pRef->GetRefObjNum();
287   m_internalStatus = InternalStatus::kPageTree;
288   return true;
289 }
290 
IsFirstCheck(uint32_t dwPage)291 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
292   return m_pageMapCheckState.insert(dwPage).second;
293 }
294 
ResetFirstCheck(uint32_t dwPage)295 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
296   m_pageMapCheckState.erase(dwPage);
297 }
298 
CheckPage()299 bool CPDF_DataAvail::CheckPage() {
300   std::vector<uint32_t> UnavailObjList;
301   for (uint32_t dwPageObjNum : m_PageObjList) {
302     bool bExists = false;
303     RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
304     if (!pObj) {
305       if (bExists)
306         UnavailObjList.push_back(dwPageObjNum);
307       continue;
308     }
309 
310     switch (pObj->GetType()) {
311       case CPDF_Object::kArray: {
312         CPDF_ArrayLocker locker(pObj->AsArray());
313         for (const auto& pArrayObj : locker) {
314           const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
315           if (pRef)
316             UnavailObjList.push_back(pRef->GetRefObjNum());
317         }
318         break;
319       }
320       case CPDF_Object::kDictionary:
321         if (pObj->GetDict()->GetNameFor("Type") == "Pages")
322           m_PagesArray.push_back(std::move(pObj));
323         break;
324       default:
325         break;
326     }
327   }
328   m_PageObjList.clear();
329   if (!UnavailObjList.empty()) {
330     m_PageObjList = std::move(UnavailObjList);
331     return false;
332   }
333   size_t iPages = m_PagesArray.size();
334   for (size_t i = 0; i < iPages; ++i) {
335     RetainPtr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
336     if (pPages && !GetPageKids(pPages.Get())) {
337       m_PagesArray.clear();
338       m_internalStatus = InternalStatus::kError;
339       return false;
340     }
341   }
342   m_PagesArray.clear();
343   if (m_PageObjList.empty())
344     m_internalStatus = InternalStatus::kDone;
345 
346   return true;
347 }
348 
GetPageKids(CPDF_Object * pPages)349 bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) {
350   RetainPtr<const CPDF_Dictionary> pDict = pPages->GetDict();
351   if (!pDict)
352     return true;
353 
354   RetainPtr<const CPDF_Object> pKids = pDict->GetObjectFor("Kids");
355   if (!pKids)
356     return true;
357 
358   std::vector<uint32_t> object_numbers;
359   switch (pKids->GetType()) {
360     case CPDF_Object::kReference:
361       object_numbers.push_back(pKids->AsReference()->GetRefObjNum());
362       break;
363     case CPDF_Object::kArray: {
364       CPDF_ArrayLocker locker(pKids->AsArray());
365       for (const auto& pArrayObj : locker) {
366         const CPDF_Reference* pRef = ToReference(pArrayObj.Get());
367         if (pRef)
368           object_numbers.push_back(pRef->GetRefObjNum());
369       }
370       break;
371     }
372     default:
373       m_internalStatus = InternalStatus::kError;
374       return false;
375   }
376 
377   for (uint32_t num : object_numbers) {
378     bool inserted = m_SeenPageObjList.insert(num).second;
379     if (inserted)
380       m_PageObjList.push_back(num);
381   }
382   return true;
383 }
384 
CheckPages()385 bool CPDF_DataAvail::CheckPages() {
386   bool bExists = false;
387   RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
388   if (!bExists) {
389     m_internalStatus = InternalStatus::kLoadAllFile;
390     return true;
391   }
392 
393   if (!pPages) {
394     if (m_internalStatus == InternalStatus::kError) {
395       m_internalStatus = InternalStatus::kLoadAllFile;
396       return true;
397     }
398     return false;
399   }
400 
401   if (!GetPageKids(pPages.Get())) {
402     m_internalStatus = InternalStatus::kError;
403     return false;
404   }
405 
406   m_internalStatus = InternalStatus::kPage;
407   return true;
408 }
409 
CheckHeader()410 bool CPDF_DataAvail::CheckHeader() {
411   switch (CheckHeaderAndLinearized()) {
412     case kDataAvailable:
413       m_internalStatus = m_pLinearized ? InternalStatus::kFirstPage
414                                        : InternalStatus::kLoadAllCrossRef;
415       return true;
416     case kDataNotAvailable:
417       return false;
418     case kDataError:
419       m_internalStatus = InternalStatus::kError;
420       return true;
421   }
422 }
423 
CheckFirstPage()424 bool CPDF_DataAvail::CheckFirstPage() {
425   if (!m_pLinearized->GetFirstPageEndOffset() ||
426       !m_pLinearized->GetFileSize() ||
427       !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
428     m_internalStatus = InternalStatus::kError;
429     return false;
430   }
431 
432   uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
433   dwEnd += 512;
434   if ((FX_FILESIZE)dwEnd > m_dwFileLen)
435     dwEnd = (uint32_t)m_dwFileLen;
436 
437   const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
438   const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
439   if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
440                                                              data_size))
441     return false;
442 
443   m_internalStatus = InternalStatus::kHintTable;
444   return true;
445 }
446 
CheckHintTables()447 bool CPDF_DataAvail::CheckHintTables() {
448   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
449   m_pHintTables =
450       CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get());
451 
452   if (GetValidator()->read_error()) {
453     m_internalStatus = InternalStatus::kError;
454     return true;
455   }
456   if (GetValidator()->has_unavailable_data())
457     return false;
458 
459   m_internalStatus = InternalStatus::kDone;
460   return true;
461 }
462 
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList) const463 RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
464     FX_FILESIZE pos,
465     uint32_t objnum,
466     CPDF_IndirectObjectHolder* pObjList) const {
467   const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
468   GetSyntaxParser()->SetPos(pos);
469   RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
470       pObjList, CPDF_SyntaxParser::ParseType::kLoose);
471   GetSyntaxParser()->SetPos(SavedPos);
472   return (result && (!objnum || result->GetObjNum() == objnum))
473              ? std::move(result)
474              : nullptr;
475 }
476 
IsLinearizedPDF()477 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
478   switch (CheckHeaderAndLinearized()) {
479     case kDataAvailable:
480       return m_pLinearized ? kLinearized : kNotLinearized;
481     case kDataNotAvailable:
482       return kLinearizationUnknown;
483     case kDataError:
484       return kNotLinearized;
485   }
486 }
487 
CheckHeaderAndLinearized()488 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
489   if (m_bHeaderAvail)
490     return kDataAvailable;
491 
492   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
493   const absl::optional<FX_FILESIZE> header_offset =
494       GetHeaderOffset(GetValidator());
495   if (GetValidator()->has_read_problems())
496     return kDataNotAvailable;
497 
498   if (!header_offset.has_value())
499     return kDataError;
500 
501   m_parser.m_pSyntax = std::make_unique<CPDF_SyntaxParser>(
502       GetValidator(), header_offset.value());
503   m_pLinearized = m_parser.ParseLinearizedHeader();
504   if (GetValidator()->has_read_problems())
505     return kDataNotAvailable;
506 
507   m_bHeaderAvail = true;
508   return kDataAvailable;
509 }
510 
CheckPage(uint32_t dwPage)511 bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
512   while (true) {
513     switch (m_internalStatus) {
514       case InternalStatus::kPageTree:
515         if (!LoadDocPages())
516           return false;
517         break;
518       case InternalStatus::kPage:
519         if (!LoadDocPage(dwPage))
520           return false;
521         break;
522       case InternalStatus::kError:
523         return LoadAllFile();
524       default:
525         m_bPagesTreeLoad = true;
526         m_bPagesLoad = true;
527         m_bCurPageDictLoadOK = true;
528         m_internalStatus = InternalStatus::kPage;
529         return true;
530     }
531   }
532 }
533 
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode)534 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
535                                         PageNode* pPageNode) {
536   bool bExists = false;
537   RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
538   if (!bExists) {
539     m_internalStatus = InternalStatus::kError;
540     return false;
541   }
542 
543   if (!pPages)
544     return false;
545 
546   const CPDF_Array* pArray = pPages->AsArray();
547   if (!pArray) {
548     m_internalStatus = InternalStatus::kError;
549     return false;
550   }
551 
552   pPageNode->m_type = PageNode::Type::kPages;
553   for (size_t i = 0; i < pArray->size(); ++i) {
554     RetainPtr<const CPDF_Reference> pKid = ToReference(pArray->GetObjectAt(i));
555     if (!pKid)
556       continue;
557 
558     auto pNode = std::make_unique<PageNode>();
559     pNode->m_dwPageNo = pKid->GetRefObjNum();
560     pPageNode->m_ChildNodes.push_back(std::move(pNode));
561   }
562   return true;
563 }
564 
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode)565 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
566                                           PageNode* pPageNode) {
567   bool bExists = false;
568   RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
569   if (!bExists) {
570     m_internalStatus = InternalStatus::kError;
571     return false;
572   }
573 
574   if (!pPage)
575     return false;
576 
577   if (pPage->IsArray()) {
578     pPageNode->m_dwPageNo = dwPageNo;
579     pPageNode->m_type = PageNode::Type::kArray;
580     return true;
581   }
582 
583   if (!pPage->IsDictionary()) {
584     m_internalStatus = InternalStatus::kError;
585     return false;
586   }
587 
588   pPageNode->m_dwPageNo = dwPageNo;
589   RetainPtr<CPDF_Dictionary> pDict = pPage->GetMutableDict();
590   const ByteString type = pDict->GetNameFor("Type");
591   if (type == "Page") {
592     pPageNode->m_type = PageNode::Type::kPage;
593     return true;
594   }
595 
596   if (type != "Pages") {
597     m_internalStatus = InternalStatus::kError;
598     return false;
599   }
600 
601   pPageNode->m_type = PageNode::Type::kPages;
602   RetainPtr<CPDF_Object> pKids = pDict->GetMutableObjectFor("Kids");
603   if (!pKids) {
604     m_internalStatus = InternalStatus::kPage;
605     return true;
606   }
607 
608   switch (pKids->GetType()) {
609     case CPDF_Object::kReference: {
610       const CPDF_Reference* pKid = pKids->AsReference();
611       auto pNode = std::make_unique<PageNode>();
612       pNode->m_dwPageNo = pKid->GetRefObjNum();
613       pPageNode->m_ChildNodes.push_back(std::move(pNode));
614       break;
615     }
616     case CPDF_Object::kArray: {
617       const CPDF_Array* pKidsArray = pKids->AsArray();
618       for (size_t i = 0; i < pKidsArray->size(); ++i) {
619         RetainPtr<const CPDF_Reference> pKid =
620             ToReference(pKidsArray->GetObjectAt(i));
621         if (!pKid)
622           continue;
623 
624         auto pNode = std::make_unique<PageNode>();
625         pNode->m_dwPageNo = pKid->GetRefObjNum();
626         pPageNode->m_ChildNodes.push_back(std::move(pNode));
627       }
628       break;
629     }
630     default:
631       break;
632   }
633   return true;
634 }
635 
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,int level)636 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
637                                    int32_t iPage,
638                                    int32_t& iCount,
639                                    int level) {
640   if (level >= kMaxPageRecursionDepth)
641     return false;
642 
643   int32_t iSize = fxcrt::CollectionSize<int32_t>(pageNode.m_ChildNodes);
644   if (iSize <= 0 || iPage >= iSize) {
645     m_internalStatus = InternalStatus::kError;
646     return false;
647   }
648   for (int32_t i = 0; i < iSize; ++i) {
649     PageNode* pNode = pageNode.m_ChildNodes[i].get();
650     if (!pNode)
651       continue;
652 
653     if (pNode->m_type == PageNode::Type::kUnknown) {
654       // Updates the type for the unknown page node.
655       if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
656         return false;
657     }
658     if (pNode->m_type == PageNode::Type::kArray) {
659       // Updates a more specific type for the array page node.
660       if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
661         return false;
662     }
663     switch (pNode->m_type) {
664       case PageNode::Type::kPage:
665         iCount++;
666         if (iPage == iCount && m_pDocument)
667           m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
668         break;
669       case PageNode::Type::kPages:
670         if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
671           return false;
672         break;
673       case PageNode::Type::kUnknown:
674       case PageNode::Type::kArray:
675         // Already converted above, error if we get here.
676         return false;
677     }
678     if (iPage == iCount) {
679       m_internalStatus = InternalStatus::kDone;
680       return true;
681     }
682   }
683   return true;
684 }
685 
LoadDocPage(uint32_t dwPage)686 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
687   int iPage = pdfium::base::checked_cast<int>(dwPage);
688   if (m_pDocument->GetPageCount() <= iPage ||
689       m_pDocument->IsPageLoaded(iPage)) {
690     m_internalStatus = InternalStatus::kDone;
691     return true;
692   }
693   if (m_PageNode.m_type == PageNode::Type::kPage) {
694     m_internalStatus =
695         iPage == 0 ? InternalStatus::kDone : InternalStatus::kError;
696     return true;
697   }
698   int32_t iCount = -1;
699   return CheckPageNode(m_PageNode, iPage, iCount, 0);
700 }
701 
CheckPageCount()702 bool CPDF_DataAvail::CheckPageCount() {
703   bool bExists = false;
704   RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
705   if (!bExists) {
706     m_internalStatus = InternalStatus::kError;
707     return false;
708   }
709   if (!pPages)
710     return false;
711 
712   RetainPtr<const CPDF_Dictionary> pPagesDict = pPages->GetDict();
713   if (!pPagesDict) {
714     m_internalStatus = InternalStatus::kError;
715     return false;
716   }
717   if (!pPagesDict->KeyExist("Kids"))
718     return true;
719 
720   return pPagesDict->GetIntegerFor("Count") > 0;
721 }
722 
LoadDocPages()723 bool CPDF_DataAvail::LoadDocPages() {
724   if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
725     return false;
726 
727   if (CheckPageCount()) {
728     m_internalStatus = InternalStatus::kPage;
729     return true;
730   }
731 
732   m_bTotalLoadPageTree = true;
733   return false;
734 }
735 
LoadPages()736 bool CPDF_DataAvail::LoadPages() {
737   while (!m_bPagesTreeLoad) {
738     if (!CheckPageStatus())
739       return false;
740   }
741 
742   if (m_bPagesLoad)
743     return true;
744 
745   m_pDocument->LoadPages();
746   return false;
747 }
748 
CheckLinearizedData()749 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
750   if (m_bLinearedDataOK)
751     return kDataAvailable;
752   DCHECK(m_pLinearized);
753   if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
754       !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
755     return kDataError;
756   }
757 
758   if (!m_bMainXRefLoadTried) {
759     const FX_SAFE_FILESIZE prev =
760         m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
761     const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
762     if (main_xref_offset < 0)
763       return kDataError;
764 
765     if (main_xref_offset == 0)
766       return kDataAvailable;
767 
768     FX_SAFE_SIZE_T data_size = m_dwFileLen;
769     data_size -= main_xref_offset;
770     if (!data_size.IsValid())
771       return kDataError;
772 
773     if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
774             main_xref_offset, data_size.ValueOrDie()))
775       return kDataNotAvailable;
776 
777     CPDF_Parser::Error eRet =
778         m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
779     m_bMainXRefLoadTried = true;
780     if (eRet != CPDF_Parser::SUCCESS)
781       return kDataError;
782 
783     if (!PreparePageItem())
784       return kDataNotAvailable;
785 
786     m_bMainXRefLoadedOK = true;
787     m_bLinearedDataOK = true;
788   }
789 
790   return m_bLinearedDataOK ? kDataAvailable : kDataNotAvailable;
791 }
792 
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)793 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
794     uint32_t dwPage,
795     DownloadHints* pHints) {
796   if (!m_pDocument)
797     return kDataError;
798 
799   const int iPage = pdfium::base::checked_cast<int>(dwPage);
800   if (iPage >= m_pDocument->GetPageCount()) {
801     // This is XFA page.
802     return kDataAvailable;
803   }
804 
805   if (IsFirstCheck(dwPage)) {
806     m_bCurPageDictLoadOK = false;
807   }
808 
809   if (pdfium::Contains(m_pagesLoadState, dwPage))
810     return kDataAvailable;
811 
812   const HintsScope hints_scope(GetValidator(), pHints);
813   if (m_pLinearized) {
814     if (dwPage == m_pLinearized->GetFirstPageNo()) {
815       RetainPtr<const CPDF_Dictionary> pPageDict =
816           m_pDocument->GetPageDictionary(iPage);
817       if (!pPageDict)
818         return kDataError;
819 
820       auto page_num_obj =
821           std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
822                                      GetValidator(), m_pDocument, pPageDict));
823 
824       CPDF_PageObjectAvail* page_obj_avail =
825           m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
826       // TODO(art-snake): Check resources.
827       return page_obj_avail->CheckAvail();
828     }
829 
830     DocAvailStatus nResult = CheckLinearizedData();
831     if (nResult != kDataAvailable)
832       return nResult;
833 
834     if (m_pHintTables) {
835       nResult = m_pHintTables->CheckPage(dwPage);
836       if (nResult != kDataAvailable)
837         return nResult;
838       if (GetPageDictionary(dwPage)) {
839         m_pagesLoadState.insert(dwPage);
840         return kDataAvailable;
841       }
842     }
843 
844     if (!m_bMainXRefLoadedOK) {
845       if (!LoadAllFile())
846         return kDataNotAvailable;
847       m_pDocument->GetParser()->RebuildCrossRef();
848       ResetFirstCheck(dwPage);
849       return kDataAvailable;
850     }
851     if (m_bTotalLoadPageTree) {
852       if (!LoadPages())
853         return kDataNotAvailable;
854     } else {
855       if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
856         return kDataNotAvailable;
857     }
858   } else {
859     if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
860       return kDataNotAvailable;
861     }
862   }
863 
864   if (CheckAcroForm() == kFormNotAvailable)
865     return kDataNotAvailable;
866 
867   RetainPtr<CPDF_Dictionary> pPageDict =
868       m_pDocument->GetMutablePageDictionary(iPage);
869   if (!pPageDict)
870     return kDataError;
871 
872   {
873     auto page_num_obj =
874         std::make_pair(dwPage, std::make_unique<CPDF_PageObjectAvail>(
875                                    GetValidator(), m_pDocument, pPageDict));
876     CPDF_PageObjectAvail* page_obj_avail =
877         m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
878     const DocAvailStatus status = page_obj_avail->CheckAvail();
879     if (status != kDataAvailable)
880       return status;
881   }
882 
883   const DocAvailStatus resources_status = CheckResources(std::move(pPageDict));
884   if (resources_status != kDataAvailable)
885     return resources_status;
886 
887   m_bCurPageDictLoadOK = false;
888   ResetFirstCheck(dwPage);
889   m_pagesLoadState.insert(dwPage);
890   return kDataAvailable;
891 }
892 
CheckResources(RetainPtr<CPDF_Dictionary> page)893 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
894     RetainPtr<CPDF_Dictionary> page) {
895   DCHECK(page);
896   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
897   RetainPtr<CPDF_Object> resources = GetResourceObject(std::move(page));
898   if (GetValidator()->has_read_problems())
899     return kDataNotAvailable;
900 
901   if (!resources)
902     return kDataAvailable;
903 
904   CPDF_PageObjectAvail* resource_avail =
905       m_PagesResourcesAvail
906           .insert(std::make_pair(resources,
907                                  std::make_unique<CPDF_PageObjectAvail>(
908                                      GetValidator(), m_pDocument, resources)))
909           .first->second.get();
910   return resource_avail->CheckAvail();
911 }
912 
GetValidator() const913 RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
914   return m_pFileRead;
915 }
916 
GetSyntaxParser() const917 CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
918   return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
919                      : m_parser.m_pSyntax.get();
920 }
921 
GetPageCount() const922 int CPDF_DataAvail::GetPageCount() const {
923   if (m_pLinearized)
924     return m_pLinearized->GetPageCount();
925   return m_pDocument ? m_pDocument->GetPageCount() : 0;
926 }
927 
GetPageDictionary(int index) const928 RetainPtr<const CPDF_Dictionary> CPDF_DataAvail::GetPageDictionary(
929     int index) const {
930   if (!m_pDocument || index < 0 || index >= GetPageCount())
931     return nullptr;
932   RetainPtr<const CPDF_Dictionary> page = m_pDocument->GetPageDictionary(index);
933   if (page)
934     return page;
935   if (!m_pLinearized || !m_pHintTables)
936     return nullptr;
937 
938   if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
939     return nullptr;
940   FX_FILESIZE szPageStartPos = 0;
941   FX_FILESIZE szPageLength = 0;
942   uint32_t dwObjNum = 0;
943   const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
944                                                      &szPageLength, &dwObjNum);
945   if (!bPagePosGot || !dwObjNum)
946     return nullptr;
947   // We should say to the document, which object is the page.
948   m_pDocument->SetPageObjNum(index, dwObjNum);
949   // Page object already can be parsed in document.
950   if (!m_pDocument->GetIndirectObject(dwObjNum)) {
951     m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
952         dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
953   }
954   if (!ValidatePage(index))
955     return nullptr;
956   return m_pDocument->GetPageDictionary(index);
957 }
958 
IsFormAvail(DownloadHints * pHints)959 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
960     DownloadHints* pHints) {
961   const HintsScope hints_scope(GetValidator(), pHints);
962   return CheckAcroForm();
963 }
964 
CheckAcroForm()965 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
966   if (!m_pDocument)
967     return kFormAvailable;
968 
969   if (m_pLinearized) {
970     DocAvailStatus nDocStatus = CheckLinearizedData();
971     if (nDocStatus == kDataError)
972       return kFormError;
973     if (nDocStatus == kDataNotAvailable)
974       return kFormNotAvailable;
975   }
976 
977   if (!m_pFormAvail) {
978     const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
979     if (!pRoot)
980       return kFormAvailable;
981 
982     RetainPtr<const CPDF_Object> pAcroForm = pRoot->GetObjectFor("AcroForm");
983     if (!pAcroForm)
984       return kFormNotExist;
985 
986     m_pFormAvail = std::make_unique<CPDF_PageObjectAvail>(
987         GetValidator(), m_pDocument, std::move(pAcroForm));
988   }
989   switch (m_pFormAvail->CheckAvail()) {
990     case kDataError:
991       return kFormError;
992     case kDataNotAvailable:
993       return kFormNotAvailable;
994     case kDataAvailable:
995       return kFormAvailable;
996   }
997 }
998 
ValidatePage(uint32_t dwPage) const999 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const {
1000   int iPage = pdfium::base::checked_cast<int>(dwPage);
1001   RetainPtr<const CPDF_Dictionary> pPageDict =
1002       m_pDocument->GetPageDictionary(iPage);
1003   if (!pPageDict)
1004     return false;
1005 
1006   CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument,
1007                                  std::move(pPageDict));
1008   return obj_avail.CheckAvail() == kDataAvailable;
1009 }
1010 
1011 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
ParseDocument(std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,std::unique_ptr<CPDF_Document::PageDataIface> pPageData,const ByteString & password)1012 CPDF_DataAvail::ParseDocument(
1013     std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
1014     std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
1015     const ByteString& password) {
1016   if (m_pDocument) {
1017     // We already returned parsed document.
1018     return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1019   }
1020   auto document = std::make_unique<CPDF_Document>(std::move(pRenderData),
1021                                                   std::move(pPageData));
1022   document->AddObserver(this);
1023 
1024   CPDF_ReadValidator::ScopedSession read_session(GetValidator());
1025   CPDF_Parser::Error error =
1026       document->LoadLinearizedDoc(GetValidator(), password);
1027 
1028   // Additional check, that all ok.
1029   if (GetValidator()->has_read_problems()) {
1030     NOTREACHED();
1031     return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1032   }
1033 
1034   if (error != CPDF_Parser::SUCCESS)
1035     return std::make_pair(error, nullptr);
1036 
1037   m_pDocument = document.get();
1038   return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
1039 }
1040 
1041 CPDF_DataAvail::PageNode::PageNode() = default;
1042 
1043 CPDF_DataAvail::PageNode::~PageNode() = default;
1044