xref: /aosp_15_r20/external/pdfium/fpdfsdk/fpdf_dataavail_embeddertest.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <memory>
7 #include <string>
8 #include <utility>
9 #include <vector>
10 
11 #include "core/fxcrt/bytestring.h"
12 #include "public/fpdf_doc.h"
13 #include "public/fpdfview.h"
14 #include "testing/embedder_test.h"
15 #include "testing/fx_string_testhelpers.h"
16 #include "testing/gtest/include/gtest/gtest.h"
17 #include "testing/range_set.h"
18 #include "testing/utils/file_util.h"
19 #include "testing/utils/path_service.h"
20 
21 namespace {
22 
23 class MockDownloadHints final : public FX_DOWNLOADHINTS {
24  public:
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)25   static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
26   }
27 
MockDownloadHints()28   MockDownloadHints() {
29     FX_DOWNLOADHINTS::version = 1;
30     FX_DOWNLOADHINTS::AddSegment = SAddSegment;
31   }
32 
33   ~MockDownloadHints() = default;
34 };
35 
36 class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
37  public:
TestAsyncLoader(const std::string & file_name)38   explicit TestAsyncLoader(const std::string& file_name) {
39     std::string file_path;
40     if (!PathService::GetTestFilePath(file_name, &file_path))
41       return;
42     file_contents_ = GetFileContents(file_path.c_str(), &file_length_);
43     if (!file_contents_)
44       return;
45 
46     file_access_.m_FileLen = static_cast<unsigned long>(file_length_);
47     file_access_.m_GetBlock = SGetBlock;
48     file_access_.m_Param = this;
49 
50     FX_DOWNLOADHINTS::version = 1;
51     FX_DOWNLOADHINTS::AddSegment = SAddSegment;
52 
53     FX_FILEAVAIL::version = 1;
54     FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
55   }
56 
IsOpened() const57   bool IsOpened() const { return !!file_contents_; }
58 
file_access()59   FPDF_FILEACCESS* file_access() { return &file_access_; }
hints()60   FX_DOWNLOADHINTS* hints() { return this; }
file_avail()61   FX_FILEAVAIL* file_avail() { return this; }
62 
requested_segments() const63   const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
64     return requested_segments_;
65   }
66 
max_requested_bound() const67   size_t max_requested_bound() const { return max_requested_bound_; }
68 
ClearRequestedSegments()69   void ClearRequestedSegments() {
70     requested_segments_.clear();
71     max_requested_bound_ = 0;
72   }
73 
is_new_data_available() const74   bool is_new_data_available() const { return is_new_data_available_; }
set_is_new_data_available(bool is_new_data_available)75   void set_is_new_data_available(bool is_new_data_available) {
76     is_new_data_available_ = is_new_data_available;
77   }
78 
max_already_available_bound() const79   size_t max_already_available_bound() const {
80     return available_ranges_.IsEmpty()
81                ? 0
82                : available_ranges_.ranges().rbegin()->second;
83   }
84 
FlushRequestedData()85   void FlushRequestedData() {
86     for (const auto& it : requested_segments_) {
87       SetDataAvailable(it.first, it.second);
88     }
89     ClearRequestedSegments();
90   }
91 
file_contents()92   char* file_contents() { return file_contents_.get(); }
file_length() const93   size_t file_length() const { return file_length_; }
94 
95  private:
SetDataAvailable(size_t start,size_t size)96   void SetDataAvailable(size_t start, size_t size) {
97     available_ranges_.Union(RangeSet::Range(start, start + size));
98   }
99 
CheckDataAlreadyAvailable(size_t start,size_t size) const100   bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
101     return available_ranges_.Contains(RangeSet::Range(start, start + size));
102   }
103 
GetBlockImpl(unsigned long pos,unsigned char * pBuf,unsigned long size)104   int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
105     if (!IsDataAvailImpl(pos, size))
106       return 0;
107     const unsigned long end =
108         std::min(static_cast<unsigned long>(file_length_), pos + size);
109     if (end <= pos)
110       return 0;
111     memcpy(pBuf, file_contents_.get() + pos, end - pos);
112     SetDataAvailable(pos, end - pos);
113     return static_cast<int>(end - pos);
114   }
115 
AddSegmentImpl(size_t offset,size_t size)116   void AddSegmentImpl(size_t offset, size_t size) {
117     requested_segments_.push_back(std::make_pair(offset, size));
118     max_requested_bound_ = std::max(max_requested_bound_, offset + size);
119   }
120 
IsDataAvailImpl(size_t offset,size_t size)121   bool IsDataAvailImpl(size_t offset, size_t size) {
122     if (offset + size > file_length_)
123       return false;
124     if (is_new_data_available_) {
125       SetDataAvailable(offset, size);
126       return true;
127     }
128     return CheckDataAlreadyAvailable(offset, size);
129   }
130 
SGetBlock(void * param,unsigned long pos,unsigned char * pBuf,unsigned long size)131   static int SGetBlock(void* param,
132                        unsigned long pos,
133                        unsigned char* pBuf,
134                        unsigned long size) {
135     return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
136   }
137 
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)138   static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
139     return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
140   }
141 
SIsDataAvail(FX_FILEAVAIL * pThis,size_t offset,size_t size)142   static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
143                                 size_t offset,
144                                 size_t size) {
145     return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
146   }
147 
148   FPDF_FILEACCESS file_access_;
149 
150   std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
151   size_t file_length_ = 0;
152   std::vector<std::pair<size_t, size_t>> requested_segments_;
153   size_t max_requested_bound_ = 0;
154   bool is_new_data_available_ = true;
155 
156   RangeSet available_ranges_;
157 };
158 
159 }  // namespace
160 
161 class FPDFDataAvailEmbedderTest : public EmbedderTest {};
162 
TEST_F(FPDFDataAvailEmbedderTest,TrailerUnterminated)163 TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) {
164   // Document must load without crashing but is too malformed to be available.
165   EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
166   MockDownloadHints hints;
167   EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints));
168 }
169 
TEST_F(FPDFDataAvailEmbedderTest,TrailerAsHexstring)170 TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) {
171   // Document must load without crashing but is too malformed to be available.
172   EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
173   MockDownloadHints hints;
174   EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints));
175 }
176 
TEST_F(FPDFDataAvailEmbedderTest,LoadUsingHintTables)177 TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) {
178   TestAsyncLoader loader("feature_linearized_loading.pdf");
179   CreateAvail(loader.file_avail(), loader.file_access());
180   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
181   SetDocumentFromAvail();
182   ASSERT_TRUE(document());
183   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints()));
184 
185   // No new data available, to prevent load "Pages" node.
186   loader.set_is_new_data_available(false);
187   ScopedFPDFPage page(FPDF_LoadPage(document(), 1));
188   EXPECT_TRUE(page);
189 }
190 
TEST_F(FPDFDataAvailEmbedderTest,CheckFormAvailIfLinearized)191 TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) {
192   TestAsyncLoader loader("feature_linearized_loading.pdf");
193   CreateAvail(loader.file_avail(), loader.file_access());
194   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
195   SetDocumentFromAvail();
196   ASSERT_TRUE(document());
197 
198   // Prevent access to non-requested data to coerce the parser to send new
199   // request for non available (non-requested before) data.
200   loader.set_is_new_data_available(false);
201   loader.ClearRequestedSegments();
202 
203   int status = PDF_FORM_NOTAVAIL;
204   while (status == PDF_FORM_NOTAVAIL) {
205     loader.FlushRequestedData();
206     status = FPDFAvail_IsFormAvail(avail(), loader.hints());
207   }
208   EXPECT_NE(PDF_FORM_ERROR, status);
209 }
210 
TEST_F(FPDFDataAvailEmbedderTest,DoNotLoadMainCrossRefForFirstPageIfLinearized)211 TEST_F(FPDFDataAvailEmbedderTest,
212        DoNotLoadMainCrossRefForFirstPageIfLinearized) {
213   TestAsyncLoader loader("feature_linearized_loading.pdf");
214   CreateAvail(loader.file_avail(), loader.file_access());
215   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
216   SetDocumentFromAvail();
217   ASSERT_TRUE(document());
218   const int first_page_num = FPDFAvail_GetFirstPageNum(document());
219 
220   // The main cross ref table should not be processed.
221   // (It is always at file end)
222   EXPECT_GT(loader.file_access()->m_FileLen,
223             loader.max_already_available_bound());
224 
225   // Prevent access to non-requested data to coerce the parser to send new
226   // request for non available (non-requested before) data.
227   loader.set_is_new_data_available(false);
228   FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints());
229 
230   // The main cross ref table should not be requested.
231   // (It is always at file end)
232   EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
233 
234   // Allow parse page.
235   loader.set_is_new_data_available(true);
236   ASSERT_EQ(PDF_DATA_AVAIL,
237             FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints()));
238 
239   // The main cross ref table should not be processed.
240   // (It is always at file end)
241   EXPECT_GT(loader.file_access()->m_FileLen,
242             loader.max_already_available_bound());
243 
244   // Prevent loading data, while page loading.
245   loader.set_is_new_data_available(false);
246   ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num));
247   EXPECT_TRUE(page);
248 }
249 
TEST_F(FPDFDataAvailEmbedderTest,LoadSecondPageIfLinearizedWithHints)250 TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) {
251   TestAsyncLoader loader("feature_linearized_loading.pdf");
252   CreateAvail(loader.file_avail(), loader.file_access());
253   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
254   SetDocumentFromAvail();
255   ASSERT_TRUE(document());
256 
257   static constexpr uint32_t kSecondPageNum = 1;
258 
259   // Prevent access to non-requested data to coerce the parser to send new
260   // request for non available (non-requested before) data.
261   loader.set_is_new_data_available(false);
262   loader.ClearRequestedSegments();
263 
264   int status = PDF_DATA_NOTAVAIL;
265   while (status == PDF_DATA_NOTAVAIL) {
266     loader.FlushRequestedData();
267     status = FPDFAvail_IsPageAvail(avail(), kSecondPageNum, loader.hints());
268   }
269   EXPECT_EQ(PDF_DATA_AVAIL, status);
270 
271   // Prevent loading data, while page loading.
272   loader.set_is_new_data_available(false);
273   ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum));
274   EXPECT_TRUE(page);
275 }
276 
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingWholeDocument)277 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) {
278   TestAsyncLoader loader("linearized.pdf");
279   loader.set_is_new_data_available(false);
280   CreateAvail(loader.file_avail(), loader.file_access());
281   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
282     loader.FlushRequestedData();
283   }
284 
285   SetDocumentFromAvail();
286   ASSERT_TRUE(document());
287 
288   // The "info" dictionary should still be unavailable.
289   EXPECT_FALSE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0));
290 
291   // Simulate receiving whole file.
292   loader.set_is_new_data_available(true);
293   // Load second page, to parse additional crossref sections.
294   EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints()));
295 
296   EXPECT_TRUE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0));
297 }
298 
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingFirstPage)299 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) {
300   TestAsyncLoader loader("linearized.pdf");
301   // Map "Info" to an object within the first section without breaking
302   // linearization.
303   ByteString data(loader.file_contents(), loader.file_length());
304   absl::optional<size_t> index = data.Find("/Info 27 0 R");
305   ASSERT_TRUE(index);
306   memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12);
307 
308   loader.set_is_new_data_available(false);
309   CreateAvail(loader.file_avail(), loader.file_access());
310   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
311     loader.FlushRequestedData();
312   }
313 
314   SetDocumentFromAvail();
315   ASSERT_TRUE(document());
316 
317   // The "Info" dictionary should be available for the linearized document, if
318   // it is located in the first page section.
319   // Info was remapped to a dictionary with Type "Catalog"
320   unsigned short buffer[100] = {0};
321   EXPECT_TRUE(FPDF_GetMetaText(document(), "Type", buffer, sizeof(buffer)));
322   EXPECT_EQ(L"Catalog", GetPlatformWString(buffer));
323 }
324 
TEST_F(FPDFDataAvailEmbedderTest,TryLoadInvalidInfo)325 TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) {
326   TestAsyncLoader loader("linearized.pdf");
327   // Map "Info" to an invalid object without breaking linearization.
328   ByteString data(loader.file_contents(), loader.file_length());
329   absl::optional<size_t> index = data.Find("/Info 27 0 R");
330   ASSERT_TRUE(index);
331   memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12);
332 
333   loader.set_is_new_data_available(false);
334   CreateAvail(loader.file_avail(), loader.file_access());
335   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
336     loader.FlushRequestedData();
337   }
338 
339   SetDocumentFromAvail();
340   ASSERT_TRUE(document());
341 
342   // Set all data available.
343   loader.set_is_new_data_available(true);
344   // Check second page, to load additional crossrefs.
345   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints()));
346 
347   // Test that api is robust enough to handle the bad case.
348   EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0));
349 }
350 
TEST_F(FPDFDataAvailEmbedderTest,TryLoadNonExistsInfo)351 TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) {
352   TestAsyncLoader loader("linearized.pdf");
353   // Break the "Info" parameter without breaking linearization.
354   ByteString data(loader.file_contents(), loader.file_length());
355   absl::optional<size_t> index = data.Find("/Info 27 0 R");
356   ASSERT_TRUE(index);
357   memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12);
358 
359   loader.set_is_new_data_available(false);
360   CreateAvail(loader.file_avail(), loader.file_access());
361   while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
362     loader.FlushRequestedData();
363   }
364 
365   SetDocumentFromAvail();
366   ASSERT_TRUE(document());
367 
368   // Set all data available.
369   loader.set_is_new_data_available(true);
370   // Check second page, to load additional crossrefs.
371   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints()));
372 
373   // Test that api is robust enough to handle the bad case.
374   EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0));
375 }
376 
TEST_F(FPDFDataAvailEmbedderTest,BadInputsToAPIs)377 TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) {
378   EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr));
379   EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr));
380   EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr));
381   EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr));
382   EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr));
383   EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr));
384 }
385 
TEST_F(FPDFDataAvailEmbedderTest,NegativePageIndex)386 TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) {
387   TestAsyncLoader loader("linearized.pdf");
388   CreateAvail(loader.file_avail(), loader.file_access());
389   ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
390   EXPECT_EQ(PDF_DATA_NOTAVAIL,
391             FPDFAvail_IsPageAvail(avail(), -1, loader.hints()));
392 }
393 
TEST_F(FPDFDataAvailEmbedderTest,Bug_1324189)394 TEST_F(FPDFDataAvailEmbedderTest, Bug_1324189) {
395   // Test passes if it doesn't crash.
396   TestAsyncLoader loader("bug_1324189.pdf");
397   CreateAvail(loader.file_avail(), loader.file_access());
398   ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
399 }
400 
TEST_F(FPDFDataAvailEmbedderTest,Bug_1324503)401 TEST_F(FPDFDataAvailEmbedderTest, Bug_1324503) {
402   // Test passes if it doesn't crash.
403   TestAsyncLoader loader("bug_1324503.pdf");
404   CreateAvail(loader.file_avail(), loader.file_access());
405   ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
406 }
407