xref: /aosp_15_r20/external/pdfium/core/fpdfapi/parser/cpdf_document_unittest.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/fpdfapi/parser/cpdf_document.h"
6 
7 #include <memory>
8 #include <utility>
9 
10 #include "core/fpdfapi/page/test_with_page_module.h"
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_boolean.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
15 #include "core/fpdfapi/parser/cpdf_name.h"
16 #include "core/fpdfapi/parser/cpdf_number.h"
17 #include "core/fpdfapi/parser/cpdf_parser.h"
18 #include "core/fpdfapi/parser/cpdf_reference.h"
19 #include "core/fpdfapi/parser/cpdf_string.h"
20 #include "core/fpdfapi/parser/cpdf_test_document.h"
21 #include "testing/gtest/include/gtest/gtest.h"
22 #include "third_party/base/check.h"
23 
24 namespace {
25 
26 const int kNumTestPages = 7;
27 
CreatePageTreeNode(RetainPtr<CPDF_Array> kids,CPDF_Document * pDoc,int count)28 RetainPtr<CPDF_Dictionary> CreatePageTreeNode(RetainPtr<CPDF_Array> kids,
29                                               CPDF_Document* pDoc,
30                                               int count) {
31   uint32_t new_objnum = pDoc->AddIndirectObject(kids);
32   auto pageNode = pDoc->NewIndirect<CPDF_Dictionary>();
33   pageNode->SetNewFor<CPDF_Name>("Type", "Pages");
34   pageNode->SetNewFor<CPDF_Reference>("Kids", pDoc, new_objnum);
35   pageNode->SetNewFor<CPDF_Number>("Count", count);
36   for (size_t i = 0; i < kids->size(); i++) {
37     kids->GetMutableDictAt(i)->SetNewFor<CPDF_Reference>("Parent", pDoc,
38                                                          pageNode->GetObjNum());
39   }
40   return pageNode;
41 }
42 
CreateNumberedPage(size_t number)43 RetainPtr<CPDF_Dictionary> CreateNumberedPage(size_t number) {
44   auto page = pdfium::MakeRetain<CPDF_Dictionary>();
45   page->SetNewFor<CPDF_Name>("Type", "Page");
46   page->SetNewFor<CPDF_Number>("PageNumbering", static_cast<int>(number));
47   return page;
48 }
49 
50 class CPDF_TestDocumentForPages final : public CPDF_TestDocument {
51  public:
CPDF_TestDocumentForPages()52   CPDF_TestDocumentForPages() {
53     // Set up test
54     auto zeroToTwo = pdfium::MakeRetain<CPDF_Array>();
55     zeroToTwo->AppendNew<CPDF_Reference>(
56         this, AddIndirectObject(CreateNumberedPage(0)));
57     zeroToTwo->AppendNew<CPDF_Reference>(
58         this, AddIndirectObject(CreateNumberedPage(1)));
59     zeroToTwo->AppendNew<CPDF_Reference>(
60         this, AddIndirectObject(CreateNumberedPage(2)));
61     RetainPtr<CPDF_Dictionary> branch1 =
62         CreatePageTreeNode(std::move(zeroToTwo), this, 3);
63 
64     auto zeroToThree = pdfium::MakeRetain<CPDF_Array>();
65     zeroToThree->AppendNew<CPDF_Reference>(this, branch1->GetObjNum());
66     zeroToThree->AppendNew<CPDF_Reference>(
67         this, AddIndirectObject(CreateNumberedPage(3)));
68     RetainPtr<CPDF_Dictionary> branch2 =
69         CreatePageTreeNode(std::move(zeroToThree), this, 4);
70 
71     auto fourFive = pdfium::MakeRetain<CPDF_Array>();
72     fourFive->AppendNew<CPDF_Reference>(
73         this, AddIndirectObject(CreateNumberedPage(4)));
74     fourFive->AppendNew<CPDF_Reference>(
75         this, AddIndirectObject(CreateNumberedPage(5)));
76     RetainPtr<CPDF_Dictionary> branch3 =
77         CreatePageTreeNode(std::move(fourFive), this, 2);
78 
79     auto justSix = pdfium::MakeRetain<CPDF_Array>();
80     justSix->AppendNew<CPDF_Reference>(
81         this, AddIndirectObject(CreateNumberedPage(6)));
82     RetainPtr<CPDF_Dictionary> branch4 =
83         CreatePageTreeNode(std::move(justSix), this, 1);
84 
85     auto allPages = pdfium::MakeRetain<CPDF_Array>();
86     allPages->AppendNew<CPDF_Reference>(this, branch2->GetObjNum());
87     allPages->AppendNew<CPDF_Reference>(this, branch3->GetObjNum());
88     allPages->AppendNew<CPDF_Reference>(this, branch4->GetObjNum());
89     RetainPtr<CPDF_Dictionary> pagesDict =
90         CreatePageTreeNode(std::move(allPages), this, kNumTestPages);
91 
92     SetRootForTesting(NewIndirect<CPDF_Dictionary>());
93     GetMutableRoot()->SetNewFor<CPDF_Reference>("Pages", this,
94                                                 pagesDict->GetObjNum());
95     ResizePageListForTesting(kNumTestPages);
96   }
97 
SetTreeSize(int size)98   void SetTreeSize(int size) {
99     GetMutableRoot()->SetNewFor<CPDF_Number>("Count", size);
100     ResizePageListForTesting(size);
101   }
102 };
103 
104 class CPDF_TestDocumentWithPageWithoutPageNum final : public CPDF_TestDocument {
105  public:
CPDF_TestDocumentWithPageWithoutPageNum()106   CPDF_TestDocumentWithPageWithoutPageNum() {
107     // Set up test
108     auto allPages = pdfium::MakeRetain<CPDF_Array>();
109     allPages->AppendNew<CPDF_Reference>(
110         this, AddIndirectObject(CreateNumberedPage(0)));
111     allPages->AppendNew<CPDF_Reference>(
112         this, AddIndirectObject(CreateNumberedPage(1)));
113     // Page without pageNum.
114     inlined_page_ = CreateNumberedPage(2);
115     allPages->Append(inlined_page_);
116     RetainPtr<CPDF_Dictionary> pagesDict =
117         CreatePageTreeNode(std::move(allPages), this, 3);
118     SetRootForTesting(NewIndirect<CPDF_Dictionary>());
119     GetMutableRoot()->SetNewFor<CPDF_Reference>("Pages", this,
120                                                 pagesDict->GetObjNum());
121     ResizePageListForTesting(3);
122   }
123 
inlined_page() const124   const CPDF_Object* inlined_page() const { return inlined_page_.Get(); }
125 
126  private:
127   RetainPtr<CPDF_Object> inlined_page_;
128 };
129 
130 class TestLinearized final : public CPDF_LinearizedHeader {
131  public:
TestLinearized(CPDF_Dictionary * dict)132   explicit TestLinearized(CPDF_Dictionary* dict)
133       : CPDF_LinearizedHeader(dict, 0) {}
134 };
135 
136 class CPDF_TestDocPagesWithoutKids final : public CPDF_TestDocument {
137  public:
CPDF_TestDocPagesWithoutKids()138   CPDF_TestDocPagesWithoutKids() {
139     auto pagesDict = NewIndirect<CPDF_Dictionary>();
140     pagesDict->SetNewFor<CPDF_Name>("Type", "Pages");
141     pagesDict->SetNewFor<CPDF_Number>("Count", 3);
142     ResizePageListForTesting(10);
143     SetRootForTesting(NewIndirect<CPDF_Dictionary>());
144     GetMutableRoot()->SetNewFor<CPDF_Reference>("Pages", this,
145                                                 pagesDict->GetObjNum());
146   }
147 };
148 
149 class CPDF_TestDocumentAllowSetParser final : public CPDF_TestDocument {
150  public:
151   CPDF_TestDocumentAllowSetParser() = default;
152 
153   using CPDF_Document::SetParser;
154 };
155 
156 }  // namespace
157 
158 using DocumentTest = TestWithPageModule;
159 
TEST_F(DocumentTest,GetPages)160 TEST_F(DocumentTest, GetPages) {
161   std::unique_ptr<CPDF_TestDocumentForPages> document =
162       std::make_unique<CPDF_TestDocumentForPages>();
163   for (int i = 0; i < kNumTestPages; i++) {
164     RetainPtr<const CPDF_Dictionary> page = document->GetPageDictionary(i);
165     ASSERT_TRUE(page);
166     ASSERT_TRUE(page->KeyExist("PageNumbering"));
167     EXPECT_EQ(i, page->GetIntegerFor("PageNumbering"));
168   }
169   RetainPtr<const CPDF_Dictionary> page =
170       document->GetPageDictionary(kNumTestPages);
171   EXPECT_FALSE(page);
172 }
173 
TEST_F(DocumentTest,GetPageWithoutObjNumTwice)174 TEST_F(DocumentTest, GetPageWithoutObjNumTwice) {
175   auto document = std::make_unique<CPDF_TestDocumentWithPageWithoutPageNum>();
176   RetainPtr<const CPDF_Dictionary> page = document->GetPageDictionary(2);
177   ASSERT_TRUE(page);
178   ASSERT_EQ(document->inlined_page(), page);
179 
180   RetainPtr<const CPDF_Dictionary> second_call_page =
181       document->GetPageDictionary(2);
182   EXPECT_TRUE(second_call_page);
183   EXPECT_EQ(page, second_call_page);
184 }
185 
TEST_F(DocumentTest,GetPagesReverseOrder)186 TEST_F(DocumentTest, GetPagesReverseOrder) {
187   std::unique_ptr<CPDF_TestDocumentForPages> document =
188       std::make_unique<CPDF_TestDocumentForPages>();
189   for (int i = 6; i >= 0; i--) {
190     RetainPtr<const CPDF_Dictionary> page = document->GetPageDictionary(i);
191     ASSERT_TRUE(page);
192     ASSERT_TRUE(page->KeyExist("PageNumbering"));
193     EXPECT_EQ(i, page->GetIntegerFor("PageNumbering"));
194   }
195   RetainPtr<const CPDF_Dictionary> page =
196       document->GetPageDictionary(kNumTestPages);
197   EXPECT_FALSE(page);
198 }
199 
TEST_F(DocumentTest,GetPagesInDisorder)200 TEST_F(DocumentTest, GetPagesInDisorder) {
201   std::unique_ptr<CPDF_TestDocumentForPages> document =
202       std::make_unique<CPDF_TestDocumentForPages>();
203 
204   RetainPtr<const CPDF_Dictionary> page = document->GetPageDictionary(1);
205   ASSERT_TRUE(page);
206   ASSERT_TRUE(page->KeyExist("PageNumbering"));
207   EXPECT_EQ(1, page->GetIntegerFor("PageNumbering"));
208 
209   page = document->GetPageDictionary(3);
210   ASSERT_TRUE(page);
211   ASSERT_TRUE(page->KeyExist("PageNumbering"));
212   EXPECT_EQ(3, page->GetIntegerFor("PageNumbering"));
213 
214   page = document->GetPageDictionary(kNumTestPages);
215   EXPECT_FALSE(page);
216 
217   page = document->GetPageDictionary(6);
218   ASSERT_TRUE(page);
219   ASSERT_TRUE(page->KeyExist("PageNumbering"));
220   EXPECT_EQ(6, page->GetIntegerFor("PageNumbering"));
221 }
222 
TEST_F(DocumentTest,IsValidPageObject)223 TEST_F(DocumentTest, IsValidPageObject) {
224   CPDF_TestDocumentForPages document;
225 
226   auto dict_type_name_page = pdfium::MakeRetain<CPDF_Dictionary>();
227   dict_type_name_page->SetNewFor<CPDF_Name>("Type", "Page");
228   document.AddIndirectObject(dict_type_name_page);
229   EXPECT_TRUE(CPDF_Document::IsValidPageObject(dict_type_name_page.Get()));
230 
231   auto dict_type_string_page = pdfium::MakeRetain<CPDF_Dictionary>();
232   dict_type_string_page->SetNewFor<CPDF_String>("Type", "Page", false);
233   document.AddIndirectObject(dict_type_string_page);
234   EXPECT_FALSE(CPDF_Document::IsValidPageObject(dict_type_string_page.Get()));
235 
236   auto dict_type_name_font = pdfium::MakeRetain<CPDF_Dictionary>();
237   dict_type_name_font->SetNewFor<CPDF_Name>("Type", "Font");
238   document.AddIndirectObject(dict_type_name_font);
239   EXPECT_FALSE(CPDF_Document::IsValidPageObject(dict_type_name_font.Get()));
240 
241   auto obj_no_type = document.NewIndirect<CPDF_Dictionary>();
242   EXPECT_FALSE(CPDF_Document::IsValidPageObject(obj_no_type.Get()));
243 }
244 
TEST_F(DocumentTest,UseCachedPageObjNumIfHaveNotPagesDict)245 TEST_F(DocumentTest, UseCachedPageObjNumIfHaveNotPagesDict) {
246   // ObjNum can be added in CPDF_DataAvail::IsPageAvail(), and PagesDict may not
247   // exist in this case, e.g. when hint table is used to page check in
248   // CPDF_DataAvail.
249   constexpr int kPageCount = 100;
250   constexpr int kTestPageNum = 33;
251 
252   auto linearization_dict = pdfium::MakeRetain<CPDF_Dictionary>();
253   CPDF_TestDocumentAllowSetParser document;
254 
255   {
256     auto first_page = CreateNumberedPage(0);
257     ASSERT_TRUE(first_page);
258 
259     int first_page_obj_num = document.AddIndirectObject(first_page);
260     ASSERT_NE(kTestPageNum, first_page_obj_num);
261 
262     linearization_dict->SetNewFor<CPDF_Boolean>("Linearized", true);
263     linearization_dict->SetNewFor<CPDF_Number>("N", kPageCount);
264     linearization_dict->SetNewFor<CPDF_Number>("O", first_page_obj_num);
265 
266     auto parser = std::make_unique<CPDF_Parser>();
267     parser->SetLinearizedHeaderForTesting(
268         std::make_unique<TestLinearized>(linearization_dict.Get()));
269     document.SetParser(std::move(parser));
270   }
271 
272   document.LoadPages();
273 
274   ASSERT_EQ(kPageCount, document.GetPageCount());
275   auto page_stub = document.NewIndirect<CPDF_Dictionary>();
276   const uint32_t obj_num = page_stub->GetObjNum();
277 
278   EXPECT_FALSE(document.IsPageLoaded(kTestPageNum));
279   EXPECT_FALSE(document.GetPageDictionary(kTestPageNum));
280 
281   document.SetPageObjNum(kTestPageNum, obj_num);
282   EXPECT_TRUE(document.IsPageLoaded(kTestPageNum));
283   EXPECT_EQ(page_stub, document.GetPageDictionary(kTestPageNum));
284 }
285 
TEST_F(DocumentTest,CountGreaterThanPageTree)286 TEST_F(DocumentTest, CountGreaterThanPageTree) {
287   std::unique_ptr<CPDF_TestDocumentForPages> document =
288       std::make_unique<CPDF_TestDocumentForPages>();
289   document->SetTreeSize(kNumTestPages + 3);
290   for (int i = 0; i < kNumTestPages; i++)
291     EXPECT_TRUE(document->GetPageDictionary(i));
292   for (int i = kNumTestPages; i < kNumTestPages + 4; i++)
293     EXPECT_FALSE(document->GetPageDictionary(i));
294   EXPECT_TRUE(document->GetPageDictionary(kNumTestPages - 1));
295 }
296 
TEST_F(DocumentTest,PagesWithoutKids)297 TEST_F(DocumentTest, PagesWithoutKids) {
298   // Set up a document with Pages dict without kids, and Count = 3
299   auto pDoc = std::make_unique<CPDF_TestDocPagesWithoutKids>();
300   EXPECT_TRUE(pDoc->GetPageDictionary(0));
301   // Test GetPage does not fetch pages out of range
302   for (int i = 1; i < 5; i++)
303     EXPECT_FALSE(pDoc->GetPageDictionary(i));
304 
305   EXPECT_TRUE(pDoc->GetPageDictionary(0));
306 }
307