1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "public/fpdf_flatten.h"
8
9 #include <limits.h>
10
11 #include <algorithm>
12 #include <sstream>
13 #include <utility>
14 #include <vector>
15
16 #include "constants/annotation_common.h"
17 #include "constants/annotation_flags.h"
18 #include "constants/page_object.h"
19 #include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
20 #include "core/fpdfapi/page/cpdf_page.h"
21 #include "core/fpdfapi/page/cpdf_pageobject.h"
22 #include "core/fpdfapi/parser/cpdf_array.h"
23 #include "core/fpdfapi/parser/cpdf_dictionary.h"
24 #include "core/fpdfapi/parser/cpdf_document.h"
25 #include "core/fpdfapi/parser/cpdf_name.h"
26 #include "core/fpdfapi/parser/cpdf_number.h"
27 #include "core/fpdfapi/parser/cpdf_reference.h"
28 #include "core/fpdfapi/parser/cpdf_stream.h"
29 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
30 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
31 #include "core/fpdfdoc/cpdf_annot.h"
32 #include "core/fxcrt/fx_string_wrappers.h"
33 #include "fpdfsdk/cpdfsdk_helpers.h"
34 #include "third_party/base/notreached.h"
35
36 enum FPDF_TYPE { MAX, MIN };
37 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM };
38
39 namespace {
40
IsValidRect(const CFX_FloatRect & rect,const CFX_FloatRect & rcPage)41 bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) {
42 constexpr float kMinSize = 0.000001f;
43 if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize)
44 return false;
45
46 if (rcPage.IsEmpty())
47 return true;
48
49 constexpr float kMinBorderSize = 10.000001f;
50 return rect.left - rcPage.left >= -kMinBorderSize &&
51 rect.right - rcPage.right <= kMinBorderSize &&
52 rect.top - rcPage.top <= kMinBorderSize &&
53 rect.bottom - rcPage.bottom >= -kMinBorderSize;
54 }
55
GetContentsRect(CPDF_Document * pDoc,RetainPtr<CPDF_Dictionary> pDict,std::vector<CFX_FloatRect> * pRectArray)56 void GetContentsRect(CPDF_Document* pDoc,
57 RetainPtr<CPDF_Dictionary> pDict,
58 std::vector<CFX_FloatRect>* pRectArray) {
59 auto pPDFPage = pdfium::MakeRetain<CPDF_Page>(pDoc, pDict);
60 pPDFPage->ParseContent();
61
62 for (const auto& pPageObject : *pPDFPage) {
63 const CFX_FloatRect& rc = pPageObject->GetRect();
64 if (IsValidRect(rc, pDict->GetRectFor(pdfium::page_object::kMediaBox)))
65 pRectArray->push_back(rc);
66 }
67 }
68
ParserStream(const CPDF_Dictionary * pPageDic,CPDF_Dictionary * pStream,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray)69 void ParserStream(const CPDF_Dictionary* pPageDic,
70 CPDF_Dictionary* pStream,
71 std::vector<CFX_FloatRect>* pRectArray,
72 std::vector<CPDF_Dictionary*>* pObjectArray) {
73 if (!pStream)
74 return;
75 CFX_FloatRect rect;
76 if (pStream->KeyExist("Rect"))
77 rect = pStream->GetRectFor("Rect");
78 else if (pStream->KeyExist("BBox"))
79 rect = pStream->GetRectFor("BBox");
80
81 if (IsValidRect(rect, pPageDic->GetRectFor(pdfium::page_object::kMediaBox)))
82 pRectArray->push_back(rect);
83
84 pObjectArray->push_back(pStream);
85 }
86
ParserAnnots(CPDF_Document * pSourceDoc,RetainPtr<CPDF_Dictionary> pPageDic,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray,int nUsage)87 int ParserAnnots(CPDF_Document* pSourceDoc,
88 RetainPtr<CPDF_Dictionary> pPageDic,
89 std::vector<CFX_FloatRect>* pRectArray,
90 std::vector<CPDF_Dictionary*>* pObjectArray,
91 int nUsage) {
92 if (!pSourceDoc)
93 return FLATTEN_FAIL;
94
95 GetContentsRect(pSourceDoc, pPageDic, pRectArray);
96 RetainPtr<const CPDF_Array> pAnnots = pPageDic->GetArrayFor("Annots");
97 if (!pAnnots)
98 return FLATTEN_NOTHINGTODO;
99
100 CPDF_ArrayLocker locker(pAnnots);
101 for (const auto& pAnnot : locker) {
102 RetainPtr<CPDF_Dictionary> pAnnotDict =
103 ToDictionary(pAnnot->GetMutableDirect());
104 if (!pAnnotDict)
105 continue;
106
107 ByteString sSubtype =
108 pAnnotDict->GetByteStringFor(pdfium::annotation::kSubtype);
109 if (sSubtype == "Popup")
110 continue;
111
112 int nAnnotFlag = pAnnotDict->GetIntegerFor("F");
113 if (nAnnotFlag & pdfium::annotation_flags::kHidden)
114 continue;
115
116 bool bParseStream;
117 if (nUsage == FLAT_NORMALDISPLAY)
118 bParseStream = !(nAnnotFlag & pdfium::annotation_flags::kInvisible);
119 else
120 bParseStream = !!(nAnnotFlag & pdfium::annotation_flags::kPrint);
121 if (bParseStream)
122 ParserStream(pPageDic.Get(), pAnnotDict.Get(), pRectArray, pObjectArray);
123 }
124 return FLATTEN_SUCCESS;
125 }
126
GetMinMaxValue(const std::vector<CFX_FloatRect> & array,FPDF_TYPE type,FPDF_VALUE value)127 float GetMinMaxValue(const std::vector<CFX_FloatRect>& array,
128 FPDF_TYPE type,
129 FPDF_VALUE value) {
130 if (array.empty())
131 return 0.0f;
132
133 size_t nRects = array.size();
134 std::vector<float> pArray(nRects);
135 switch (value) {
136 case LEFT:
137 for (size_t i = 0; i < nRects; i++)
138 pArray[i] = array[i].left;
139 break;
140 case TOP:
141 for (size_t i = 0; i < nRects; i++)
142 pArray[i] = array[i].top;
143 break;
144 case RIGHT:
145 for (size_t i = 0; i < nRects; i++)
146 pArray[i] = array[i].right;
147 break;
148 case BOTTOM:
149 for (size_t i = 0; i < nRects; i++)
150 pArray[i] = array[i].bottom;
151 break;
152 }
153
154 float fRet = pArray[0];
155 if (type == MAX) {
156 for (size_t i = 1; i < nRects; i++)
157 fRet = std::max(fRet, pArray[i]);
158 } else {
159 for (size_t i = 1; i < nRects; i++)
160 fRet = std::min(fRet, pArray[i]);
161 }
162 return fRet;
163 }
164
CalculateRect(std::vector<CFX_FloatRect> * pRectArray)165 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) {
166 CFX_FloatRect rcRet;
167
168 rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT);
169 rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP);
170 rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT);
171 rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM);
172
173 return rcRet;
174 }
175
GenerateFlattenedContent(const ByteString & key)176 ByteString GenerateFlattenedContent(const ByteString& key) {
177 return "q 1 0 0 1 0 0 cm /" + key + " Do Q";
178 }
179
NewIndirectContentsStreamReference(CPDF_Document * pDocument,const ByteString & contents)180 RetainPtr<CPDF_Reference> NewIndirectContentsStreamReference(
181 CPDF_Document* pDocument,
182 const ByteString& contents) {
183 auto pNewContents =
184 pDocument->NewIndirect<CPDF_Stream>(pDocument->New<CPDF_Dictionary>());
185 pNewContents->SetData(contents.raw_span());
186 return pNewContents->MakeReference(pDocument);
187 }
188
SetPageContents(const ByteString & key,CPDF_Dictionary * pPage,CPDF_Document * pDocument)189 void SetPageContents(const ByteString& key,
190 CPDF_Dictionary* pPage,
191 CPDF_Document* pDocument) {
192 RetainPtr<CPDF_Array> pContentsArray =
193 pPage->GetMutableArrayFor(pdfium::page_object::kContents);
194 RetainPtr<CPDF_Stream> pContentsStream =
195 pPage->GetMutableStreamFor(pdfium::page_object::kContents);
196 if (!pContentsStream && !pContentsArray) {
197 if (!key.IsEmpty()) {
198 pPage->SetFor(pdfium::page_object::kContents,
199 NewIndirectContentsStreamReference(
200 pDocument, GenerateFlattenedContent(key)));
201 }
202 return;
203 }
204
205 pPage->ConvertToIndirectObjectFor(pdfium::page_object::kContents, pDocument);
206 if (pContentsArray) {
207 pContentsArray->InsertAt(
208 0, NewIndirectContentsStreamReference(pDocument, "q"));
209 pContentsArray->Append(NewIndirectContentsStreamReference(pDocument, "Q"));
210 } else {
211 ByteString sStream = "q\n";
212 {
213 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pContentsStream);
214 pAcc->LoadAllDataFiltered();
215 sStream += ByteString(pAcc->GetSpan());
216 sStream += "\nQ";
217 }
218 pContentsStream->SetDataAndRemoveFilter(sStream.raw_span());
219 pContentsArray = pDocument->NewIndirect<CPDF_Array>();
220 pContentsArray->AppendNew<CPDF_Reference>(pDocument,
221 pContentsStream->GetObjNum());
222 pPage->SetNewFor<CPDF_Reference>(pdfium::page_object::kContents, pDocument,
223 pContentsArray->GetObjNum());
224 }
225 if (!key.IsEmpty()) {
226 pContentsArray->Append(NewIndirectContentsStreamReference(
227 pDocument, GenerateFlattenedContent(key)));
228 }
229 }
230
GetMatrix(const CFX_FloatRect & rcAnnot,const CFX_FloatRect & rcStream,const CFX_Matrix & matrix)231 CFX_Matrix GetMatrix(const CFX_FloatRect& rcAnnot,
232 const CFX_FloatRect& rcStream,
233 const CFX_Matrix& matrix) {
234 if (rcStream.IsEmpty())
235 return CFX_Matrix();
236
237 CFX_FloatRect rcTransformed = matrix.TransformRect(rcStream);
238 rcTransformed.Normalize();
239
240 float a = rcAnnot.Width() / rcTransformed.Width();
241 float d = rcAnnot.Height() / rcTransformed.Height();
242
243 float e = rcAnnot.left - rcTransformed.left * a;
244 float f = rcAnnot.bottom - rcTransformed.bottom * d;
245 return CFX_Matrix(a, 0.0f, 0.0f, d, e, f);
246 }
247
248 } // namespace
249
FPDFPage_Flatten(FPDF_PAGE page,int nFlag)250 FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
251 CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
252 if (!page)
253 return FLATTEN_FAIL;
254
255 CPDF_Document* pDocument = pPage->GetDocument();
256 RetainPtr<CPDF_Dictionary> pPageDict = pPage->GetMutableDict();
257 if (!pDocument)
258 return FLATTEN_FAIL;
259
260 std::vector<CPDF_Dictionary*> ObjectArray;
261 std::vector<CFX_FloatRect> RectArray;
262 int iRet =
263 ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag);
264 if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL)
265 return iRet;
266
267 CFX_FloatRect rcMerger = CalculateRect(&RectArray);
268 CFX_FloatRect rcOriginalMB =
269 pPageDict->GetRectFor(pdfium::page_object::kMediaBox);
270 if (pPageDict->KeyExist(pdfium::page_object::kCropBox))
271 rcOriginalMB = pPageDict->GetRectFor(pdfium::page_object::kCropBox);
272
273 rcOriginalMB.Normalize();
274 if (rcOriginalMB.IsEmpty())
275 rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f);
276
277 CFX_FloatRect rcOriginalCB;
278 if (pPageDict->KeyExist(pdfium::page_object::kCropBox)) {
279 rcOriginalCB = pPageDict->GetRectFor(pdfium::page_object::kCropBox);
280 rcOriginalCB.Normalize();
281 }
282 if (rcOriginalCB.IsEmpty())
283 rcOriginalCB = rcOriginalMB;
284
285 rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left);
286 rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right);
287 rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom);
288 rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top);
289
290 pPageDict->SetRectFor(pdfium::page_object::kMediaBox, rcOriginalMB);
291 pPageDict->SetRectFor(pdfium::page_object::kCropBox, rcOriginalCB);
292
293 RetainPtr<CPDF_Dictionary> pRes =
294 pPageDict->GetOrCreateDictFor(pdfium::page_object::kResources);
295 auto pNewXObject =
296 pDocument->NewIndirect<CPDF_Stream>(pDocument->New<CPDF_Dictionary>());
297 RetainPtr<CPDF_Dictionary> pPageXObject = pRes->GetOrCreateDictFor("XObject");
298
299 ByteString key;
300 if (!ObjectArray.empty()) {
301 int i = 0;
302 while (i < INT_MAX) {
303 ByteString sKey = ByteString::Format("FFT%d", i);
304 if (!pPageXObject->KeyExist(sKey)) {
305 key = std::move(sKey);
306 break;
307 }
308 ++i;
309 }
310 }
311
312 SetPageContents(key, pPageDict.Get(), pDocument);
313
314 RetainPtr<CPDF_Dictionary> pNewXORes;
315 if (!key.IsEmpty()) {
316 pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument,
317 pNewXObject->GetObjNum());
318
319 RetainPtr<CPDF_Dictionary> pNewOXbjectDic = pNewXObject->GetMutableDict();
320 pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources");
321 pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject");
322 pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form");
323 pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1);
324 pNewOXbjectDic->SetRectFor("BBox", rcOriginalCB);
325 }
326
327 for (size_t i = 0; i < ObjectArray.size(); ++i) {
328 CPDF_Dictionary* pAnnotDict = ObjectArray[i];
329 if (!pAnnotDict)
330 continue;
331
332 CFX_FloatRect rcAnnot = pAnnotDict->GetRectFor(pdfium::annotation::kRect);
333 rcAnnot.Normalize();
334
335 ByteString sAnnotState = pAnnotDict->GetByteStringFor("AS");
336 RetainPtr<CPDF_Dictionary> pAnnotAP =
337 pAnnotDict->GetMutableDictFor(pdfium::annotation::kAP);
338 if (!pAnnotAP)
339 continue;
340
341 RetainPtr<CPDF_Stream> pAPStream = pAnnotAP->GetMutableStreamFor("N");
342 if (!pAPStream) {
343 RetainPtr<CPDF_Dictionary> pAPDict = pAnnotAP->GetMutableDictFor("N");
344 if (!pAPDict)
345 continue;
346
347 if (!sAnnotState.IsEmpty()) {
348 pAPStream = pAPDict->GetMutableStreamFor(sAnnotState);
349 } else {
350 if (pAPDict->size() > 0) {
351 CPDF_DictionaryLocker locker(pAPDict);
352 RetainPtr<CPDF_Object> pFirstObj = locker.begin()->second;
353 if (pFirstObj) {
354 if (pFirstObj->IsReference())
355 pFirstObj = pFirstObj->GetMutableDirect();
356 if (!pFirstObj->IsStream())
357 continue;
358 pAPStream.Reset(pFirstObj->AsMutableStream());
359 }
360 }
361 }
362 }
363 if (!pAPStream)
364 continue;
365
366 RetainPtr<const CPDF_Dictionary> pAPDict = pAPStream->GetDict();
367 CFX_FloatRect rcStream;
368 if (pAPDict->KeyExist("Rect"))
369 rcStream = pAPDict->GetRectFor("Rect");
370 else if (pAPDict->KeyExist("BBox"))
371 rcStream = pAPDict->GetRectFor("BBox");
372 rcStream.Normalize();
373
374 if (rcStream.IsEmpty())
375 continue;
376
377 RetainPtr<CPDF_Object> pObj = pAPStream;
378 if (pObj->IsInline()) {
379 pObj = pObj->Clone();
380 pDocument->AddIndirectObject(pObj);
381 }
382
383 RetainPtr<CPDF_Dictionary> pObjDict = pObj->GetMutableDict();
384 if (pObjDict) {
385 pObjDict->SetNewFor<CPDF_Name>("Type", "XObject");
386 pObjDict->SetNewFor<CPDF_Name>("Subtype", "Form");
387 }
388
389 RetainPtr<CPDF_Dictionary> pXObject =
390 pNewXORes->GetOrCreateDictFor("XObject");
391 ByteString sFormName = ByteString::Format("F%d", i);
392 pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument,
393 pObj->GetObjNum());
394
395 ByteString sStream;
396 {
397 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pNewXObject);
398 pAcc->LoadAllDataFiltered();
399 sStream = ByteString(pAcc->GetSpan());
400 }
401 CFX_Matrix matrix = pAPDict->GetMatrixFor("Matrix");
402 CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix);
403 m.b = 0;
404 m.c = 0;
405 fxcrt::ostringstream buf;
406 WriteMatrix(buf, m);
407 ByteString str(buf);
408 sStream += ByteString::Format("q %s cm /%s Do Q\n", str.c_str(),
409 sFormName.c_str());
410 pNewXObject->SetDataAndRemoveFilter(sStream.raw_span());
411 }
412 pPageDict->RemoveFor("Annots");
413 return FLATTEN_SUCCESS;
414 }
415