1 // Copyright 2016 The PDFium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 8 #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 9 10 #include <functional> 11 #include <map> 12 #include <memory> 13 #include <set> 14 #include <utility> 15 #include <vector> 16 17 #include "core/fpdfapi/parser/cpdf_document.h" 18 #include "core/fpdfapi/parser/cpdf_parser.h" 19 #include "core/fxcrt/retain_ptr.h" 20 #include "core/fxcrt/unowned_ptr.h" 21 22 class CPDF_CrossRefAvail; 23 class CPDF_Dictionary; 24 class CPDF_HintTables; 25 class CPDF_IndirectObjectHolder; 26 class CPDF_LinearizedHeader; 27 class CPDF_PageObjectAvail; 28 class CPDF_ReadValidator; 29 class CPDF_SyntaxParser; 30 31 class CPDF_DataAvail final : public Observable::ObserverIface { 32 public: 33 // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot 34 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 35 // to make sure the two sets of values match. 36 enum DocAvailStatus { 37 kDataError = -1, // PDF_DATA_ERROR 38 kDataNotAvailable = 0, // PDF_DATA_NOTAVAIL 39 kDataAvailable = 1, // PDF_DATA_AVAIL 40 }; 41 42 // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot 43 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 44 // to make sure the two sets of values match. 45 enum DocLinearizationStatus { 46 kLinearizationUnknown = -1, // PDF_LINEARIZATION_UNKNOWN 47 kNotLinearized = 0, // PDF_NOT_LINEARIZED 48 kLinearized = 1, // PDF_LINEARIZED 49 }; 50 51 // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot 52 // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts 53 // to make sure the two sets of values match. 54 enum DocFormStatus { 55 kFormError = -1, // PDF_FORM_ERROR 56 kFormNotAvailable = 0, // PDF_FORM_NOTAVAIL 57 kFormAvailable = 1, // PDF_FORM_AVAIL 58 kFormNotExist = 2, // PDF_FORM_NOTEXIST 59 }; 60 61 class FileAvail { 62 public: 63 virtual ~FileAvail(); 64 virtual bool IsDataAvail(FX_FILESIZE offset, size_t size) = 0; 65 }; 66 67 class DownloadHints { 68 public: 69 virtual ~DownloadHints(); 70 virtual void AddSegment(FX_FILESIZE offset, size_t size) = 0; 71 }; 72 73 CPDF_DataAvail(FileAvail* pFileAvail, 74 RetainPtr<IFX_SeekableReadStream> pFileRead); 75 ~CPDF_DataAvail() override; 76 77 // Observable::ObserverIface: 78 void OnObservableDestroyed() override; 79 80 DocAvailStatus IsDocAvail(DownloadHints* pHints); 81 DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints); 82 DocFormStatus IsFormAvail(DownloadHints* pHints); 83 DocLinearizationStatus IsLinearizedPDF(); 84 int GetPageCount() const; 85 RetainPtr<const CPDF_Dictionary> GetPageDictionary(int index) const; 86 RetainPtr<CPDF_ReadValidator> GetValidator() const; 87 88 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>> ParseDocument( 89 std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData, 90 std::unique_ptr<CPDF_Document::PageDataIface> pPageData, 91 const ByteString& password); 92 GetHintTablesForTest()93 const CPDF_HintTables* GetHintTablesForTest() const { 94 return m_pHintTables.get(); 95 } 96 97 private: 98 enum class InternalStatus : uint8_t { 99 kHeader = 0, 100 kFirstPage, 101 kHintTable, 102 kLoadAllCrossRef, 103 kRoot, 104 kInfo, 105 kPageTree, 106 kPage, 107 kPageLaterLoad, 108 kResources, 109 kDone, 110 kError, 111 kLoadAllFile, 112 }; 113 114 class PageNode { 115 public: 116 enum class Type { kUnknown = 0, kPage, kPages, kArray }; 117 118 PageNode(); 119 ~PageNode(); 120 121 Type m_type = Type::kUnknown; 122 uint32_t m_dwPageNo = 0; 123 std::vector<std::unique_ptr<PageNode>> m_ChildNodes; 124 }; 125 126 static constexpr int kMaxPageRecursionDepth = 1024; 127 128 bool CheckDocStatus(); 129 bool CheckHeader(); 130 bool CheckFirstPage(); 131 bool CheckHintTables(); 132 bool CheckRoot(); 133 bool CheckInfo(); 134 bool CheckPages(); 135 bool CheckPage(); 136 DocAvailStatus CheckResources(RetainPtr<CPDF_Dictionary> page); 137 DocFormStatus CheckAcroForm(); 138 bool CheckPageStatus(); 139 140 DocAvailStatus CheckHeaderAndLinearized(); 141 RetainPtr<CPDF_Object> ParseIndirectObjectAt( 142 FX_FILESIZE pos, 143 uint32_t objnum, 144 CPDF_IndirectObjectHolder* pObjList) const; 145 RetainPtr<CPDF_Object> GetObject(uint32_t objnum, bool* pExistInFile); 146 bool GetPageKids(CPDF_Object* pPages); 147 bool PreparePageItem(); 148 bool LoadPages(); 149 bool CheckAndLoadAllXref(); 150 bool LoadAllFile(); 151 DocAvailStatus CheckLinearizedData(); 152 153 bool CheckPage(uint32_t dwPage); 154 bool LoadDocPages(); 155 bool LoadDocPage(uint32_t dwPage); 156 bool CheckPageNode(const PageNode& pageNode, 157 int32_t iPage, 158 int32_t& iCount, 159 int level); 160 bool CheckUnknownPageNode(uint32_t dwPageNo, PageNode* pPageNode); 161 bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode); 162 bool CheckPageCount(); 163 bool IsFirstCheck(uint32_t dwPage); 164 void ResetFirstCheck(uint32_t dwPage); 165 bool ValidatePage(uint32_t dwPage) const; 166 CPDF_SyntaxParser* GetSyntaxParser() const; 167 168 RetainPtr<CPDF_ReadValidator> m_pFileRead; 169 CPDF_Parser m_parser; 170 RetainPtr<CPDF_Dictionary> m_pRoot; 171 std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized; 172 bool m_bDocAvail = false; 173 InternalStatus m_internalStatus = InternalStatus::kHeader; 174 std::unique_ptr<CPDF_CrossRefAvail> m_pCrossRefAvail; 175 const FX_FILESIZE m_dwFileLen; 176 UnownedPtr<CPDF_Document> m_pDocument; 177 std::vector<uint32_t> m_PageObjList; 178 std::set<uint32_t> m_SeenPageObjList; 179 uint32_t m_PagesObjNum = 0; 180 bool m_bLinearedDataOK = false; 181 bool m_bMainXRefLoadTried = false; 182 bool m_bMainXRefLoadedOK = false; 183 bool m_bPagesTreeLoad = false; 184 bool m_bPagesLoad = false; 185 std::unique_ptr<CPDF_PageObjectAvail> m_pFormAvail; 186 std::vector<RetainPtr<CPDF_Object>> m_PagesArray; 187 bool m_bTotalLoadPageTree = false; 188 bool m_bCurPageDictLoadOK = false; 189 bool m_bHeaderAvail = false; 190 PageNode m_PageNode; 191 std::set<uint32_t> m_pageMapCheckState; 192 std::set<uint32_t> m_pagesLoadState; 193 std::unique_ptr<CPDF_HintTables> m_pHintTables; 194 std::map<uint32_t, std::unique_ptr<CPDF_PageObjectAvail>> m_PagesObjAvail; 195 std::map<RetainPtr<const CPDF_Object>, 196 std::unique_ptr<CPDF_PageObjectAvail>, 197 std::less<>> 198 m_PagesResourcesAvail; 199 }; 200 201 #endif // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_ 202