1 // Copyright 2015 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <memory>
7 #include <string>
8 #include <utility>
9 #include <vector>
10
11 #include "core/fxcrt/bytestring.h"
12 #include "public/fpdf_doc.h"
13 #include "public/fpdfview.h"
14 #include "testing/embedder_test.h"
15 #include "testing/fx_string_testhelpers.h"
16 #include "testing/gtest/include/gtest/gtest.h"
17 #include "testing/range_set.h"
18 #include "testing/utils/file_util.h"
19 #include "testing/utils/path_service.h"
20
21 namespace {
22
23 class MockDownloadHints final : public FX_DOWNLOADHINTS {
24 public:
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)25 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
26 }
27
MockDownloadHints()28 MockDownloadHints() {
29 FX_DOWNLOADHINTS::version = 1;
30 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
31 }
32
33 ~MockDownloadHints() = default;
34 };
35
36 class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
37 public:
TestAsyncLoader(const std::string & file_name)38 explicit TestAsyncLoader(const std::string& file_name) {
39 std::string file_path;
40 if (!PathService::GetTestFilePath(file_name, &file_path))
41 return;
42 file_contents_ = GetFileContents(file_path.c_str(), &file_length_);
43 if (!file_contents_)
44 return;
45
46 file_access_.m_FileLen = static_cast<unsigned long>(file_length_);
47 file_access_.m_GetBlock = SGetBlock;
48 file_access_.m_Param = this;
49
50 FX_DOWNLOADHINTS::version = 1;
51 FX_DOWNLOADHINTS::AddSegment = SAddSegment;
52
53 FX_FILEAVAIL::version = 1;
54 FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
55 }
56
IsOpened() const57 bool IsOpened() const { return !!file_contents_; }
58
file_access()59 FPDF_FILEACCESS* file_access() { return &file_access_; }
hints()60 FX_DOWNLOADHINTS* hints() { return this; }
file_avail()61 FX_FILEAVAIL* file_avail() { return this; }
62
requested_segments() const63 const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
64 return requested_segments_;
65 }
66
max_requested_bound() const67 size_t max_requested_bound() const { return max_requested_bound_; }
68
ClearRequestedSegments()69 void ClearRequestedSegments() {
70 requested_segments_.clear();
71 max_requested_bound_ = 0;
72 }
73
is_new_data_available() const74 bool is_new_data_available() const { return is_new_data_available_; }
set_is_new_data_available(bool is_new_data_available)75 void set_is_new_data_available(bool is_new_data_available) {
76 is_new_data_available_ = is_new_data_available;
77 }
78
max_already_available_bound() const79 size_t max_already_available_bound() const {
80 return available_ranges_.IsEmpty()
81 ? 0
82 : available_ranges_.ranges().rbegin()->second;
83 }
84
FlushRequestedData()85 void FlushRequestedData() {
86 for (const auto& it : requested_segments_) {
87 SetDataAvailable(it.first, it.second);
88 }
89 ClearRequestedSegments();
90 }
91
file_contents()92 char* file_contents() { return file_contents_.get(); }
file_length() const93 size_t file_length() const { return file_length_; }
94
95 private:
SetDataAvailable(size_t start,size_t size)96 void SetDataAvailable(size_t start, size_t size) {
97 available_ranges_.Union(RangeSet::Range(start, start + size));
98 }
99
CheckDataAlreadyAvailable(size_t start,size_t size) const100 bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
101 return available_ranges_.Contains(RangeSet::Range(start, start + size));
102 }
103
GetBlockImpl(unsigned long pos,unsigned char * pBuf,unsigned long size)104 int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
105 if (!IsDataAvailImpl(pos, size))
106 return 0;
107 const unsigned long end =
108 std::min(static_cast<unsigned long>(file_length_), pos + size);
109 if (end <= pos)
110 return 0;
111 memcpy(pBuf, file_contents_.get() + pos, end - pos);
112 SetDataAvailable(pos, end - pos);
113 return static_cast<int>(end - pos);
114 }
115
AddSegmentImpl(size_t offset,size_t size)116 void AddSegmentImpl(size_t offset, size_t size) {
117 requested_segments_.push_back(std::make_pair(offset, size));
118 max_requested_bound_ = std::max(max_requested_bound_, offset + size);
119 }
120
IsDataAvailImpl(size_t offset,size_t size)121 bool IsDataAvailImpl(size_t offset, size_t size) {
122 if (offset + size > file_length_)
123 return false;
124 if (is_new_data_available_) {
125 SetDataAvailable(offset, size);
126 return true;
127 }
128 return CheckDataAlreadyAvailable(offset, size);
129 }
130
SGetBlock(void * param,unsigned long pos,unsigned char * pBuf,unsigned long size)131 static int SGetBlock(void* param,
132 unsigned long pos,
133 unsigned char* pBuf,
134 unsigned long size) {
135 return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
136 }
137
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)138 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
139 return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
140 }
141
SIsDataAvail(FX_FILEAVAIL * pThis,size_t offset,size_t size)142 static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
143 size_t offset,
144 size_t size) {
145 return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
146 }
147
148 FPDF_FILEACCESS file_access_;
149
150 std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
151 size_t file_length_ = 0;
152 std::vector<std::pair<size_t, size_t>> requested_segments_;
153 size_t max_requested_bound_ = 0;
154 bool is_new_data_available_ = true;
155
156 RangeSet available_ranges_;
157 };
158
159 } // namespace
160
161 class FPDFDataAvailEmbedderTest : public EmbedderTest {};
162
TEST_F(FPDFDataAvailEmbedderTest,TrailerUnterminated)163 TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) {
164 // Document must load without crashing but is too malformed to be available.
165 EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
166 MockDownloadHints hints;
167 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints));
168 }
169
TEST_F(FPDFDataAvailEmbedderTest,TrailerAsHexstring)170 TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) {
171 // Document must load without crashing but is too malformed to be available.
172 EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
173 MockDownloadHints hints;
174 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail(), &hints));
175 }
176
TEST_F(FPDFDataAvailEmbedderTest,LoadUsingHintTables)177 TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) {
178 TestAsyncLoader loader("feature_linearized_loading.pdf");
179 CreateAvail(loader.file_avail(), loader.file_access());
180 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
181 SetDocumentFromAvail();
182 ASSERT_TRUE(document());
183 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints()));
184
185 // No new data available, to prevent load "Pages" node.
186 loader.set_is_new_data_available(false);
187 ScopedFPDFPage page(FPDF_LoadPage(document(), 1));
188 EXPECT_TRUE(page);
189 }
190
TEST_F(FPDFDataAvailEmbedderTest,CheckFormAvailIfLinearized)191 TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) {
192 TestAsyncLoader loader("feature_linearized_loading.pdf");
193 CreateAvail(loader.file_avail(), loader.file_access());
194 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
195 SetDocumentFromAvail();
196 ASSERT_TRUE(document());
197
198 // Prevent access to non-requested data to coerce the parser to send new
199 // request for non available (non-requested before) data.
200 loader.set_is_new_data_available(false);
201 loader.ClearRequestedSegments();
202
203 int status = PDF_FORM_NOTAVAIL;
204 while (status == PDF_FORM_NOTAVAIL) {
205 loader.FlushRequestedData();
206 status = FPDFAvail_IsFormAvail(avail(), loader.hints());
207 }
208 EXPECT_NE(PDF_FORM_ERROR, status);
209 }
210
TEST_F(FPDFDataAvailEmbedderTest,DoNotLoadMainCrossRefForFirstPageIfLinearized)211 TEST_F(FPDFDataAvailEmbedderTest,
212 DoNotLoadMainCrossRefForFirstPageIfLinearized) {
213 TestAsyncLoader loader("feature_linearized_loading.pdf");
214 CreateAvail(loader.file_avail(), loader.file_access());
215 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
216 SetDocumentFromAvail();
217 ASSERT_TRUE(document());
218 const int first_page_num = FPDFAvail_GetFirstPageNum(document());
219
220 // The main cross ref table should not be processed.
221 // (It is always at file end)
222 EXPECT_GT(loader.file_access()->m_FileLen,
223 loader.max_already_available_bound());
224
225 // Prevent access to non-requested data to coerce the parser to send new
226 // request for non available (non-requested before) data.
227 loader.set_is_new_data_available(false);
228 FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints());
229
230 // The main cross ref table should not be requested.
231 // (It is always at file end)
232 EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
233
234 // Allow parse page.
235 loader.set_is_new_data_available(true);
236 ASSERT_EQ(PDF_DATA_AVAIL,
237 FPDFAvail_IsPageAvail(avail(), first_page_num, loader.hints()));
238
239 // The main cross ref table should not be processed.
240 // (It is always at file end)
241 EXPECT_GT(loader.file_access()->m_FileLen,
242 loader.max_already_available_bound());
243
244 // Prevent loading data, while page loading.
245 loader.set_is_new_data_available(false);
246 ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num));
247 EXPECT_TRUE(page);
248 }
249
TEST_F(FPDFDataAvailEmbedderTest,LoadSecondPageIfLinearizedWithHints)250 TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) {
251 TestAsyncLoader loader("feature_linearized_loading.pdf");
252 CreateAvail(loader.file_avail(), loader.file_access());
253 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
254 SetDocumentFromAvail();
255 ASSERT_TRUE(document());
256
257 static constexpr uint32_t kSecondPageNum = 1;
258
259 // Prevent access to non-requested data to coerce the parser to send new
260 // request for non available (non-requested before) data.
261 loader.set_is_new_data_available(false);
262 loader.ClearRequestedSegments();
263
264 int status = PDF_DATA_NOTAVAIL;
265 while (status == PDF_DATA_NOTAVAIL) {
266 loader.FlushRequestedData();
267 status = FPDFAvail_IsPageAvail(avail(), kSecondPageNum, loader.hints());
268 }
269 EXPECT_EQ(PDF_DATA_AVAIL, status);
270
271 // Prevent loading data, while page loading.
272 loader.set_is_new_data_available(false);
273 ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum));
274 EXPECT_TRUE(page);
275 }
276
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingWholeDocument)277 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) {
278 TestAsyncLoader loader("linearized.pdf");
279 loader.set_is_new_data_available(false);
280 CreateAvail(loader.file_avail(), loader.file_access());
281 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
282 loader.FlushRequestedData();
283 }
284
285 SetDocumentFromAvail();
286 ASSERT_TRUE(document());
287
288 // The "info" dictionary should still be unavailable.
289 EXPECT_FALSE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0));
290
291 // Simulate receiving whole file.
292 loader.set_is_new_data_available(true);
293 // Load second page, to parse additional crossref sections.
294 EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 1, loader.hints()));
295
296 EXPECT_TRUE(FPDF_GetMetaText(document(), "CreationDate", nullptr, 0));
297 }
298
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingFirstPage)299 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) {
300 TestAsyncLoader loader("linearized.pdf");
301 // Map "Info" to an object within the first section without breaking
302 // linearization.
303 ByteString data(loader.file_contents(), loader.file_length());
304 absl::optional<size_t> index = data.Find("/Info 27 0 R");
305 ASSERT_TRUE(index);
306 memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12);
307
308 loader.set_is_new_data_available(false);
309 CreateAvail(loader.file_avail(), loader.file_access());
310 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
311 loader.FlushRequestedData();
312 }
313
314 SetDocumentFromAvail();
315 ASSERT_TRUE(document());
316
317 // The "Info" dictionary should be available for the linearized document, if
318 // it is located in the first page section.
319 // Info was remapped to a dictionary with Type "Catalog"
320 unsigned short buffer[100] = {0};
321 EXPECT_TRUE(FPDF_GetMetaText(document(), "Type", buffer, sizeof(buffer)));
322 EXPECT_EQ(L"Catalog", GetPlatformWString(buffer));
323 }
324
TEST_F(FPDFDataAvailEmbedderTest,TryLoadInvalidInfo)325 TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) {
326 TestAsyncLoader loader("linearized.pdf");
327 // Map "Info" to an invalid object without breaking linearization.
328 ByteString data(loader.file_contents(), loader.file_length());
329 absl::optional<size_t> index = data.Find("/Info 27 0 R");
330 ASSERT_TRUE(index);
331 memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12);
332
333 loader.set_is_new_data_available(false);
334 CreateAvail(loader.file_avail(), loader.file_access());
335 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
336 loader.FlushRequestedData();
337 }
338
339 SetDocumentFromAvail();
340 ASSERT_TRUE(document());
341
342 // Set all data available.
343 loader.set_is_new_data_available(true);
344 // Check second page, to load additional crossrefs.
345 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints()));
346
347 // Test that api is robust enough to handle the bad case.
348 EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0));
349 }
350
TEST_F(FPDFDataAvailEmbedderTest,TryLoadNonExistsInfo)351 TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) {
352 TestAsyncLoader loader("linearized.pdf");
353 // Break the "Info" parameter without breaking linearization.
354 ByteString data(loader.file_contents(), loader.file_length());
355 absl::optional<size_t> index = data.Find("/Info 27 0 R");
356 ASSERT_TRUE(index);
357 memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12);
358
359 loader.set_is_new_data_available(false);
360 CreateAvail(loader.file_avail(), loader.file_access());
361 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail(), loader.hints())) {
362 loader.FlushRequestedData();
363 }
364
365 SetDocumentFromAvail();
366 ASSERT_TRUE(document());
367
368 // Set all data available.
369 loader.set_is_new_data_available(true);
370 // Check second page, to load additional crossrefs.
371 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail(), 0, loader.hints()));
372
373 // Test that api is robust enough to handle the bad case.
374 EXPECT_FALSE(FPDF_GetMetaText(document(), "Type", nullptr, 0));
375 }
376
TEST_F(FPDFDataAvailEmbedderTest,BadInputsToAPIs)377 TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) {
378 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr));
379 EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr));
380 EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr));
381 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr));
382 EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr));
383 EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr));
384 }
385
TEST_F(FPDFDataAvailEmbedderTest,NegativePageIndex)386 TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) {
387 TestAsyncLoader loader("linearized.pdf");
388 CreateAvail(loader.file_avail(), loader.file_access());
389 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
390 EXPECT_EQ(PDF_DATA_NOTAVAIL,
391 FPDFAvail_IsPageAvail(avail(), -1, loader.hints()));
392 }
393
TEST_F(FPDFDataAvailEmbedderTest,Bug_1324189)394 TEST_F(FPDFDataAvailEmbedderTest, Bug_1324189) {
395 // Test passes if it doesn't crash.
396 TestAsyncLoader loader("bug_1324189.pdf");
397 CreateAvail(loader.file_avail(), loader.file_access());
398 ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
399 }
400
TEST_F(FPDFDataAvailEmbedderTest,Bug_1324503)401 TEST_F(FPDFDataAvailEmbedderTest, Bug_1324503) {
402 // Test passes if it doesn't crash.
403 TestAsyncLoader loader("bug_1324503.pdf");
404 CreateAvail(loader.file_avail(), loader.file_access());
405 ASSERT_EQ(PDF_DATA_NOTAVAIL, FPDFAvail_IsDocAvail(avail(), loader.hints()));
406 }
407