xref: /aosp_15_r20/external/pdfium/core/fpdfapi/parser/cpdf_data_avail.h (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
8 #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
9 
10 #include <functional>
11 #include <map>
12 #include <memory>
13 #include <set>
14 #include <utility>
15 #include <vector>
16 
17 #include "core/fpdfapi/parser/cpdf_document.h"
18 #include "core/fpdfapi/parser/cpdf_parser.h"
19 #include "core/fxcrt/retain_ptr.h"
20 #include "core/fxcrt/unowned_ptr.h"
21 
22 class CPDF_CrossRefAvail;
23 class CPDF_Dictionary;
24 class CPDF_HintTables;
25 class CPDF_IndirectObjectHolder;
26 class CPDF_LinearizedHeader;
27 class CPDF_PageObjectAvail;
28 class CPDF_ReadValidator;
29 class CPDF_SyntaxParser;
30 
31 class CPDF_DataAvail final : public Observable::ObserverIface {
32  public:
33   // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot
34   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
35   // to make sure the two sets of values match.
36   enum DocAvailStatus {
37     kDataError = -1,        // PDF_DATA_ERROR
38     kDataNotAvailable = 0,  // PDF_DATA_NOTAVAIL
39     kDataAvailable = 1,     // PDF_DATA_AVAIL
40   };
41 
42   // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot
43   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
44   // to make sure the two sets of values match.
45   enum DocLinearizationStatus {
46     kLinearizationUnknown = -1,  // PDF_LINEARIZATION_UNKNOWN
47     kNotLinearized = 0,          // PDF_NOT_LINEARIZED
48     kLinearized = 1,             // PDF_LINEARIZED
49   };
50 
51   // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot
52   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
53   // to make sure the two sets of values match.
54   enum DocFormStatus {
55     kFormError = -1,        // PDF_FORM_ERROR
56     kFormNotAvailable = 0,  // PDF_FORM_NOTAVAIL
57     kFormAvailable = 1,     // PDF_FORM_AVAIL
58     kFormNotExist = 2,      // PDF_FORM_NOTEXIST
59   };
60 
61   class FileAvail {
62    public:
63     virtual ~FileAvail();
64     virtual bool IsDataAvail(FX_FILESIZE offset, size_t size) = 0;
65   };
66 
67   class DownloadHints {
68    public:
69     virtual ~DownloadHints();
70     virtual void AddSegment(FX_FILESIZE offset, size_t size) = 0;
71   };
72 
73   CPDF_DataAvail(FileAvail* pFileAvail,
74                  RetainPtr<IFX_SeekableReadStream> pFileRead);
75   ~CPDF_DataAvail() override;
76 
77   // Observable::ObserverIface:
78   void OnObservableDestroyed() override;
79 
80   DocAvailStatus IsDocAvail(DownloadHints* pHints);
81   DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints);
82   DocFormStatus IsFormAvail(DownloadHints* pHints);
83   DocLinearizationStatus IsLinearizedPDF();
84   int GetPageCount() const;
85   RetainPtr<const CPDF_Dictionary> GetPageDictionary(int index) const;
86   RetainPtr<CPDF_ReadValidator> GetValidator() const;
87 
88   std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>> ParseDocument(
89       std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
90       std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
91       const ByteString& password);
92 
GetHintTablesForTest()93   const CPDF_HintTables* GetHintTablesForTest() const {
94     return m_pHintTables.get();
95   }
96 
97  private:
98   enum class InternalStatus : uint8_t {
99     kHeader = 0,
100     kFirstPage,
101     kHintTable,
102     kLoadAllCrossRef,
103     kRoot,
104     kInfo,
105     kPageTree,
106     kPage,
107     kPageLaterLoad,
108     kResources,
109     kDone,
110     kError,
111     kLoadAllFile,
112   };
113 
114   class PageNode {
115    public:
116     enum class Type { kUnknown = 0, kPage, kPages, kArray };
117 
118     PageNode();
119     ~PageNode();
120 
121     Type m_type = Type::kUnknown;
122     uint32_t m_dwPageNo = 0;
123     std::vector<std::unique_ptr<PageNode>> m_ChildNodes;
124   };
125 
126   static constexpr int kMaxPageRecursionDepth = 1024;
127 
128   bool CheckDocStatus();
129   bool CheckHeader();
130   bool CheckFirstPage();
131   bool CheckHintTables();
132   bool CheckRoot();
133   bool CheckInfo();
134   bool CheckPages();
135   bool CheckPage();
136   DocAvailStatus CheckResources(RetainPtr<CPDF_Dictionary> page);
137   DocFormStatus CheckAcroForm();
138   bool CheckPageStatus();
139 
140   DocAvailStatus CheckHeaderAndLinearized();
141   RetainPtr<CPDF_Object> ParseIndirectObjectAt(
142       FX_FILESIZE pos,
143       uint32_t objnum,
144       CPDF_IndirectObjectHolder* pObjList) const;
145   RetainPtr<CPDF_Object> GetObject(uint32_t objnum, bool* pExistInFile);
146   bool GetPageKids(CPDF_Object* pPages);
147   bool PreparePageItem();
148   bool LoadPages();
149   bool CheckAndLoadAllXref();
150   bool LoadAllFile();
151   DocAvailStatus CheckLinearizedData();
152 
153   bool CheckPage(uint32_t dwPage);
154   bool LoadDocPages();
155   bool LoadDocPage(uint32_t dwPage);
156   bool CheckPageNode(const PageNode& pageNode,
157                      int32_t iPage,
158                      int32_t& iCount,
159                      int level);
160   bool CheckUnknownPageNode(uint32_t dwPageNo, PageNode* pPageNode);
161   bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode);
162   bool CheckPageCount();
163   bool IsFirstCheck(uint32_t dwPage);
164   void ResetFirstCheck(uint32_t dwPage);
165   bool ValidatePage(uint32_t dwPage) const;
166   CPDF_SyntaxParser* GetSyntaxParser() const;
167 
168   RetainPtr<CPDF_ReadValidator> m_pFileRead;
169   CPDF_Parser m_parser;
170   RetainPtr<CPDF_Dictionary> m_pRoot;
171   std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
172   bool m_bDocAvail = false;
173   InternalStatus m_internalStatus = InternalStatus::kHeader;
174   std::unique_ptr<CPDF_CrossRefAvail> m_pCrossRefAvail;
175   const FX_FILESIZE m_dwFileLen;
176   UnownedPtr<CPDF_Document> m_pDocument;
177   std::vector<uint32_t> m_PageObjList;
178   std::set<uint32_t> m_SeenPageObjList;
179   uint32_t m_PagesObjNum = 0;
180   bool m_bLinearedDataOK = false;
181   bool m_bMainXRefLoadTried = false;
182   bool m_bMainXRefLoadedOK = false;
183   bool m_bPagesTreeLoad = false;
184   bool m_bPagesLoad = false;
185   std::unique_ptr<CPDF_PageObjectAvail> m_pFormAvail;
186   std::vector<RetainPtr<CPDF_Object>> m_PagesArray;
187   bool m_bTotalLoadPageTree = false;
188   bool m_bCurPageDictLoadOK = false;
189   bool m_bHeaderAvail = false;
190   PageNode m_PageNode;
191   std::set<uint32_t> m_pageMapCheckState;
192   std::set<uint32_t> m_pagesLoadState;
193   std::unique_ptr<CPDF_HintTables> m_pHintTables;
194   std::map<uint32_t, std::unique_ptr<CPDF_PageObjectAvail>> m_PagesObjAvail;
195   std::map<RetainPtr<const CPDF_Object>,
196            std::unique_ptr<CPDF_PageObjectAvail>,
197            std::less<>>
198       m_PagesResourcesAvail;
199 };
200 
201 #endif  // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
202