xref: /aosp_15_r20/external/pdfium/fpdfsdk/fpdf_flatten.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2014 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "public/fpdf_flatten.h"
8 
9 #include <limits.h>
10 
11 #include <algorithm>
12 #include <sstream>
13 #include <utility>
14 #include <vector>
15 
16 #include "constants/annotation_common.h"
17 #include "constants/annotation_flags.h"
18 #include "constants/page_object.h"
19 #include "core/fpdfapi/edit/cpdf_contentstream_write_utils.h"
20 #include "core/fpdfapi/page/cpdf_page.h"
21 #include "core/fpdfapi/page/cpdf_pageobject.h"
22 #include "core/fpdfapi/parser/cpdf_array.h"
23 #include "core/fpdfapi/parser/cpdf_dictionary.h"
24 #include "core/fpdfapi/parser/cpdf_document.h"
25 #include "core/fpdfapi/parser/cpdf_name.h"
26 #include "core/fpdfapi/parser/cpdf_number.h"
27 #include "core/fpdfapi/parser/cpdf_reference.h"
28 #include "core/fpdfapi/parser/cpdf_stream.h"
29 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
30 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
31 #include "core/fpdfdoc/cpdf_annot.h"
32 #include "core/fxcrt/fx_string_wrappers.h"
33 #include "fpdfsdk/cpdfsdk_helpers.h"
34 #include "third_party/base/notreached.h"
35 
36 enum FPDF_TYPE { MAX, MIN };
37 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM };
38 
39 namespace {
40 
IsValidRect(const CFX_FloatRect & rect,const CFX_FloatRect & rcPage)41 bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) {
42   constexpr float kMinSize = 0.000001f;
43   if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize)
44     return false;
45 
46   if (rcPage.IsEmpty())
47     return true;
48 
49   constexpr float kMinBorderSize = 10.000001f;
50   return rect.left - rcPage.left >= -kMinBorderSize &&
51          rect.right - rcPage.right <= kMinBorderSize &&
52          rect.top - rcPage.top <= kMinBorderSize &&
53          rect.bottom - rcPage.bottom >= -kMinBorderSize;
54 }
55 
GetContentsRect(CPDF_Document * pDoc,RetainPtr<CPDF_Dictionary> pDict,std::vector<CFX_FloatRect> * pRectArray)56 void GetContentsRect(CPDF_Document* pDoc,
57                      RetainPtr<CPDF_Dictionary> pDict,
58                      std::vector<CFX_FloatRect>* pRectArray) {
59   auto pPDFPage = pdfium::MakeRetain<CPDF_Page>(pDoc, pDict);
60   pPDFPage->ParseContent();
61 
62   for (const auto& pPageObject : *pPDFPage) {
63     const CFX_FloatRect& rc = pPageObject->GetRect();
64     if (IsValidRect(rc, pDict->GetRectFor(pdfium::page_object::kMediaBox)))
65       pRectArray->push_back(rc);
66   }
67 }
68 
ParserStream(const CPDF_Dictionary * pPageDic,CPDF_Dictionary * pStream,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray)69 void ParserStream(const CPDF_Dictionary* pPageDic,
70                   CPDF_Dictionary* pStream,
71                   std::vector<CFX_FloatRect>* pRectArray,
72                   std::vector<CPDF_Dictionary*>* pObjectArray) {
73   if (!pStream)
74     return;
75   CFX_FloatRect rect;
76   if (pStream->KeyExist("Rect"))
77     rect = pStream->GetRectFor("Rect");
78   else if (pStream->KeyExist("BBox"))
79     rect = pStream->GetRectFor("BBox");
80 
81   if (IsValidRect(rect, pPageDic->GetRectFor(pdfium::page_object::kMediaBox)))
82     pRectArray->push_back(rect);
83 
84   pObjectArray->push_back(pStream);
85 }
86 
ParserAnnots(CPDF_Document * pSourceDoc,RetainPtr<CPDF_Dictionary> pPageDic,std::vector<CFX_FloatRect> * pRectArray,std::vector<CPDF_Dictionary * > * pObjectArray,int nUsage)87 int ParserAnnots(CPDF_Document* pSourceDoc,
88                  RetainPtr<CPDF_Dictionary> pPageDic,
89                  std::vector<CFX_FloatRect>* pRectArray,
90                  std::vector<CPDF_Dictionary*>* pObjectArray,
91                  int nUsage) {
92   if (!pSourceDoc)
93     return FLATTEN_FAIL;
94 
95   GetContentsRect(pSourceDoc, pPageDic, pRectArray);
96   RetainPtr<const CPDF_Array> pAnnots = pPageDic->GetArrayFor("Annots");
97   if (!pAnnots)
98     return FLATTEN_NOTHINGTODO;
99 
100   CPDF_ArrayLocker locker(pAnnots);
101   for (const auto& pAnnot : locker) {
102     RetainPtr<CPDF_Dictionary> pAnnotDict =
103         ToDictionary(pAnnot->GetMutableDirect());
104     if (!pAnnotDict)
105       continue;
106 
107     ByteString sSubtype =
108         pAnnotDict->GetByteStringFor(pdfium::annotation::kSubtype);
109     if (sSubtype == "Popup")
110       continue;
111 
112     int nAnnotFlag = pAnnotDict->GetIntegerFor("F");
113     if (nAnnotFlag & pdfium::annotation_flags::kHidden)
114       continue;
115 
116     bool bParseStream;
117     if (nUsage == FLAT_NORMALDISPLAY)
118       bParseStream = !(nAnnotFlag & pdfium::annotation_flags::kInvisible);
119     else
120       bParseStream = !!(nAnnotFlag & pdfium::annotation_flags::kPrint);
121     if (bParseStream)
122       ParserStream(pPageDic.Get(), pAnnotDict.Get(), pRectArray, pObjectArray);
123   }
124   return FLATTEN_SUCCESS;
125 }
126 
GetMinMaxValue(const std::vector<CFX_FloatRect> & array,FPDF_TYPE type,FPDF_VALUE value)127 float GetMinMaxValue(const std::vector<CFX_FloatRect>& array,
128                      FPDF_TYPE type,
129                      FPDF_VALUE value) {
130   if (array.empty())
131     return 0.0f;
132 
133   size_t nRects = array.size();
134   std::vector<float> pArray(nRects);
135   switch (value) {
136     case LEFT:
137       for (size_t i = 0; i < nRects; i++)
138         pArray[i] = array[i].left;
139       break;
140     case TOP:
141       for (size_t i = 0; i < nRects; i++)
142         pArray[i] = array[i].top;
143       break;
144     case RIGHT:
145       for (size_t i = 0; i < nRects; i++)
146         pArray[i] = array[i].right;
147       break;
148     case BOTTOM:
149       for (size_t i = 0; i < nRects; i++)
150         pArray[i] = array[i].bottom;
151       break;
152   }
153 
154   float fRet = pArray[0];
155   if (type == MAX) {
156     for (size_t i = 1; i < nRects; i++)
157       fRet = std::max(fRet, pArray[i]);
158   } else {
159     for (size_t i = 1; i < nRects; i++)
160       fRet = std::min(fRet, pArray[i]);
161   }
162   return fRet;
163 }
164 
CalculateRect(std::vector<CFX_FloatRect> * pRectArray)165 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) {
166   CFX_FloatRect rcRet;
167 
168   rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT);
169   rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP);
170   rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT);
171   rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM);
172 
173   return rcRet;
174 }
175 
GenerateFlattenedContent(const ByteString & key)176 ByteString GenerateFlattenedContent(const ByteString& key) {
177   return "q 1 0 0 1 0 0 cm /" + key + " Do Q";
178 }
179 
NewIndirectContentsStreamReference(CPDF_Document * pDocument,const ByteString & contents)180 RetainPtr<CPDF_Reference> NewIndirectContentsStreamReference(
181     CPDF_Document* pDocument,
182     const ByteString& contents) {
183   auto pNewContents =
184       pDocument->NewIndirect<CPDF_Stream>(pDocument->New<CPDF_Dictionary>());
185   pNewContents->SetData(contents.raw_span());
186   return pNewContents->MakeReference(pDocument);
187 }
188 
SetPageContents(const ByteString & key,CPDF_Dictionary * pPage,CPDF_Document * pDocument)189 void SetPageContents(const ByteString& key,
190                      CPDF_Dictionary* pPage,
191                      CPDF_Document* pDocument) {
192   RetainPtr<CPDF_Array> pContentsArray =
193       pPage->GetMutableArrayFor(pdfium::page_object::kContents);
194   RetainPtr<CPDF_Stream> pContentsStream =
195       pPage->GetMutableStreamFor(pdfium::page_object::kContents);
196   if (!pContentsStream && !pContentsArray) {
197     if (!key.IsEmpty()) {
198       pPage->SetFor(pdfium::page_object::kContents,
199                     NewIndirectContentsStreamReference(
200                         pDocument, GenerateFlattenedContent(key)));
201     }
202     return;
203   }
204 
205   pPage->ConvertToIndirectObjectFor(pdfium::page_object::kContents, pDocument);
206   if (pContentsArray) {
207     pContentsArray->InsertAt(
208         0, NewIndirectContentsStreamReference(pDocument, "q"));
209     pContentsArray->Append(NewIndirectContentsStreamReference(pDocument, "Q"));
210   } else {
211     ByteString sStream = "q\n";
212     {
213       auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pContentsStream);
214       pAcc->LoadAllDataFiltered();
215       sStream += ByteString(pAcc->GetSpan());
216       sStream += "\nQ";
217     }
218     pContentsStream->SetDataAndRemoveFilter(sStream.raw_span());
219     pContentsArray = pDocument->NewIndirect<CPDF_Array>();
220     pContentsArray->AppendNew<CPDF_Reference>(pDocument,
221                                               pContentsStream->GetObjNum());
222     pPage->SetNewFor<CPDF_Reference>(pdfium::page_object::kContents, pDocument,
223                                      pContentsArray->GetObjNum());
224   }
225   if (!key.IsEmpty()) {
226     pContentsArray->Append(NewIndirectContentsStreamReference(
227         pDocument, GenerateFlattenedContent(key)));
228   }
229 }
230 
GetMatrix(const CFX_FloatRect & rcAnnot,const CFX_FloatRect & rcStream,const CFX_Matrix & matrix)231 CFX_Matrix GetMatrix(const CFX_FloatRect& rcAnnot,
232                      const CFX_FloatRect& rcStream,
233                      const CFX_Matrix& matrix) {
234   if (rcStream.IsEmpty())
235     return CFX_Matrix();
236 
237   CFX_FloatRect rcTransformed = matrix.TransformRect(rcStream);
238   rcTransformed.Normalize();
239 
240   float a = rcAnnot.Width() / rcTransformed.Width();
241   float d = rcAnnot.Height() / rcTransformed.Height();
242 
243   float e = rcAnnot.left - rcTransformed.left * a;
244   float f = rcAnnot.bottom - rcTransformed.bottom * d;
245   return CFX_Matrix(a, 0.0f, 0.0f, d, e, f);
246 }
247 
248 }  // namespace
249 
FPDFPage_Flatten(FPDF_PAGE page,int nFlag)250 FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
251   CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
252   if (!page)
253     return FLATTEN_FAIL;
254 
255   CPDF_Document* pDocument = pPage->GetDocument();
256   RetainPtr<CPDF_Dictionary> pPageDict = pPage->GetMutableDict();
257   if (!pDocument)
258     return FLATTEN_FAIL;
259 
260   std::vector<CPDF_Dictionary*> ObjectArray;
261   std::vector<CFX_FloatRect> RectArray;
262   int iRet =
263       ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag);
264   if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL)
265     return iRet;
266 
267   CFX_FloatRect rcMerger = CalculateRect(&RectArray);
268   CFX_FloatRect rcOriginalMB =
269       pPageDict->GetRectFor(pdfium::page_object::kMediaBox);
270   if (pPageDict->KeyExist(pdfium::page_object::kCropBox))
271     rcOriginalMB = pPageDict->GetRectFor(pdfium::page_object::kCropBox);
272 
273   rcOriginalMB.Normalize();
274   if (rcOriginalMB.IsEmpty())
275     rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f);
276 
277   CFX_FloatRect rcOriginalCB;
278   if (pPageDict->KeyExist(pdfium::page_object::kCropBox)) {
279     rcOriginalCB = pPageDict->GetRectFor(pdfium::page_object::kCropBox);
280     rcOriginalCB.Normalize();
281   }
282   if (rcOriginalCB.IsEmpty())
283     rcOriginalCB = rcOriginalMB;
284 
285   rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left);
286   rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right);
287   rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom);
288   rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top);
289 
290   pPageDict->SetRectFor(pdfium::page_object::kMediaBox, rcOriginalMB);
291   pPageDict->SetRectFor(pdfium::page_object::kCropBox, rcOriginalCB);
292 
293   RetainPtr<CPDF_Dictionary> pRes =
294       pPageDict->GetOrCreateDictFor(pdfium::page_object::kResources);
295   auto pNewXObject =
296       pDocument->NewIndirect<CPDF_Stream>(pDocument->New<CPDF_Dictionary>());
297   RetainPtr<CPDF_Dictionary> pPageXObject = pRes->GetOrCreateDictFor("XObject");
298 
299   ByteString key;
300   if (!ObjectArray.empty()) {
301     int i = 0;
302     while (i < INT_MAX) {
303       ByteString sKey = ByteString::Format("FFT%d", i);
304       if (!pPageXObject->KeyExist(sKey)) {
305         key = std::move(sKey);
306         break;
307       }
308       ++i;
309     }
310   }
311 
312   SetPageContents(key, pPageDict.Get(), pDocument);
313 
314   RetainPtr<CPDF_Dictionary> pNewXORes;
315   if (!key.IsEmpty()) {
316     pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument,
317                                             pNewXObject->GetObjNum());
318 
319     RetainPtr<CPDF_Dictionary> pNewOXbjectDic = pNewXObject->GetMutableDict();
320     pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources");
321     pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject");
322     pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form");
323     pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1);
324     pNewOXbjectDic->SetRectFor("BBox", rcOriginalCB);
325   }
326 
327   for (size_t i = 0; i < ObjectArray.size(); ++i) {
328     CPDF_Dictionary* pAnnotDict = ObjectArray[i];
329     if (!pAnnotDict)
330       continue;
331 
332     CFX_FloatRect rcAnnot = pAnnotDict->GetRectFor(pdfium::annotation::kRect);
333     rcAnnot.Normalize();
334 
335     ByteString sAnnotState = pAnnotDict->GetByteStringFor("AS");
336     RetainPtr<CPDF_Dictionary> pAnnotAP =
337         pAnnotDict->GetMutableDictFor(pdfium::annotation::kAP);
338     if (!pAnnotAP)
339       continue;
340 
341     RetainPtr<CPDF_Stream> pAPStream = pAnnotAP->GetMutableStreamFor("N");
342     if (!pAPStream) {
343       RetainPtr<CPDF_Dictionary> pAPDict = pAnnotAP->GetMutableDictFor("N");
344       if (!pAPDict)
345         continue;
346 
347       if (!sAnnotState.IsEmpty()) {
348         pAPStream = pAPDict->GetMutableStreamFor(sAnnotState);
349       } else {
350         if (pAPDict->size() > 0) {
351           CPDF_DictionaryLocker locker(pAPDict);
352           RetainPtr<CPDF_Object> pFirstObj = locker.begin()->second;
353           if (pFirstObj) {
354             if (pFirstObj->IsReference())
355               pFirstObj = pFirstObj->GetMutableDirect();
356             if (!pFirstObj->IsStream())
357               continue;
358             pAPStream.Reset(pFirstObj->AsMutableStream());
359           }
360         }
361       }
362     }
363     if (!pAPStream)
364       continue;
365 
366     RetainPtr<const CPDF_Dictionary> pAPDict = pAPStream->GetDict();
367     CFX_FloatRect rcStream;
368     if (pAPDict->KeyExist("Rect"))
369       rcStream = pAPDict->GetRectFor("Rect");
370     else if (pAPDict->KeyExist("BBox"))
371       rcStream = pAPDict->GetRectFor("BBox");
372     rcStream.Normalize();
373 
374     if (rcStream.IsEmpty())
375       continue;
376 
377     RetainPtr<CPDF_Object> pObj = pAPStream;
378     if (pObj->IsInline()) {
379       pObj = pObj->Clone();
380       pDocument->AddIndirectObject(pObj);
381     }
382 
383     RetainPtr<CPDF_Dictionary> pObjDict = pObj->GetMutableDict();
384     if (pObjDict) {
385       pObjDict->SetNewFor<CPDF_Name>("Type", "XObject");
386       pObjDict->SetNewFor<CPDF_Name>("Subtype", "Form");
387     }
388 
389     RetainPtr<CPDF_Dictionary> pXObject =
390         pNewXORes->GetOrCreateDictFor("XObject");
391     ByteString sFormName = ByteString::Format("F%d", i);
392     pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument,
393                                         pObj->GetObjNum());
394 
395     ByteString sStream;
396     {
397       auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pNewXObject);
398       pAcc->LoadAllDataFiltered();
399       sStream = ByteString(pAcc->GetSpan());
400     }
401     CFX_Matrix matrix = pAPDict->GetMatrixFor("Matrix");
402     CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix);
403     m.b = 0;
404     m.c = 0;
405     fxcrt::ostringstream buf;
406     WriteMatrix(buf, m);
407     ByteString str(buf);
408     sStream += ByteString::Format("q %s cm /%s Do Q\n", str.c_str(),
409                                   sFormName.c_str());
410     pNewXObject->SetDataAndRemoveFilter(sStream.raw_span());
411   }
412   pPageDict->RemoveFor("Annots");
413   return FLATTEN_SUCCESS;
414 }
415