xref: /aosp_15_r20/external/pdfium/core/fpdfapi/page/cpdf_contentparser.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/page/cpdf_contentparser.h"
8 
9 #include <utility>
10 
11 #include "constants/page_object.h"
12 #include "core/fpdfapi/font/cpdf_type3char.h"
13 #include "core/fpdfapi/page/cpdf_allstates.h"
14 #include "core/fpdfapi/page/cpdf_page.h"
15 #include "core/fpdfapi/page/cpdf_pageobject.h"
16 #include "core/fpdfapi/page/cpdf_path.h"
17 #include "core/fpdfapi/parser/cpdf_array.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_stream.h"
20 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
21 #include "core/fxcrt/fixed_try_alloc_zeroed_data_vector.h"
22 #include "core/fxcrt/fx_safe_types.h"
23 #include "core/fxcrt/pauseindicator_iface.h"
24 #include "core/fxcrt/span_util.h"
25 #include "core/fxcrt/stl_util.h"
26 #include "core/fxge/cfx_fillrenderoptions.h"
27 #include "third_party/base/check.h"
28 #include "third_party/base/check_op.h"
29 
CPDF_ContentParser(CPDF_Page * pPage)30 CPDF_ContentParser::CPDF_ContentParser(CPDF_Page* pPage)
31     : m_CurrentStage(Stage::kGetContent), m_pPageObjectHolder(pPage) {
32   DCHECK(pPage);
33   if (!pPage->GetDocument()) {
34     m_CurrentStage = Stage::kComplete;
35     return;
36   }
37 
38   RetainPtr<CPDF_Object> pContent =
39       pPage->GetMutableDict()->GetMutableDirectObjectFor(
40           pdfium::page_object::kContents);
41   if (!pContent) {
42     HandlePageContentFailure();
43     return;
44   }
45 
46   const CPDF_Stream* pStream = pContent->AsStream();
47   if (pStream) {
48     HandlePageContentStream(pStream);
49     return;
50   }
51 
52   const CPDF_Array* pArray = pContent->AsArray();
53   if (pArray && HandlePageContentArray(pArray))
54     return;
55 
56   HandlePageContentFailure();
57 }
58 
CPDF_ContentParser(RetainPtr<const CPDF_Stream> pStream,CPDF_PageObjectHolder * pPageObjectHolder,const CPDF_AllStates * pGraphicStates,const CFX_Matrix * pParentMatrix,CPDF_Type3Char * pType3Char,CPDF_Form::RecursionState * recursion_state)59 CPDF_ContentParser::CPDF_ContentParser(
60     RetainPtr<const CPDF_Stream> pStream,
61     CPDF_PageObjectHolder* pPageObjectHolder,
62     const CPDF_AllStates* pGraphicStates,
63     const CFX_Matrix* pParentMatrix,
64     CPDF_Type3Char* pType3Char,
65     CPDF_Form::RecursionState* recursion_state)
66     : m_CurrentStage(Stage::kParse),
67       m_pPageObjectHolder(pPageObjectHolder),
68       m_pType3Char(pType3Char) {
69   DCHECK(m_pPageObjectHolder);
70   CFX_Matrix form_matrix =
71       m_pPageObjectHolder->GetDict()->GetMatrixFor("Matrix");
72   if (pGraphicStates)
73     form_matrix.Concat(pGraphicStates->m_CTM);
74 
75   RetainPtr<const CPDF_Array> pBBox =
76       m_pPageObjectHolder->GetDict()->GetArrayFor("BBox");
77   CFX_FloatRect form_bbox;
78   CPDF_Path ClipPath;
79   if (pBBox) {
80     form_bbox = pBBox->GetRect();
81     ClipPath.Emplace();
82     ClipPath.AppendFloatRect(form_bbox);
83     ClipPath.Transform(form_matrix);
84     if (pParentMatrix)
85       ClipPath.Transform(*pParentMatrix);
86 
87     form_bbox = form_matrix.TransformRect(form_bbox);
88     if (pParentMatrix)
89       form_bbox = pParentMatrix->TransformRect(form_bbox);
90   }
91 
92   RetainPtr<CPDF_Dictionary> pResources =
93       m_pPageObjectHolder->GetMutableDict()->GetMutableDictFor("Resources");
94   m_pParser = std::make_unique<CPDF_StreamContentParser>(
95       m_pPageObjectHolder->GetDocument(),
96       m_pPageObjectHolder->GetMutablePageResources(),
97       m_pPageObjectHolder->GetMutableResources(), pParentMatrix,
98       m_pPageObjectHolder, std::move(pResources), form_bbox, pGraphicStates,
99       recursion_state);
100   m_pParser->GetCurStates()->m_CTM = form_matrix;
101   m_pParser->GetCurStates()->m_ParentMatrix = form_matrix;
102   if (ClipPath.HasRef()) {
103     m_pParser->GetCurStates()->m_ClipPath.AppendPathWithAutoMerge(
104         ClipPath, CFX_FillRenderOptions::FillType::kWinding);
105   }
106   if (m_pPageObjectHolder->GetTransparency().IsGroup()) {
107     CPDF_GeneralState* pState = &m_pParser->GetCurStates()->m_GeneralState;
108     pState->SetBlendType(BlendMode::kNormal);
109     pState->SetStrokeAlpha(1.0f);
110     pState->SetFillAlpha(1.0f);
111     pState->SetSoftMask(nullptr);
112   }
113   m_pSingleStream = pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStream));
114   m_pSingleStream->LoadAllDataFiltered();
115   m_Data = m_pSingleStream->GetSpan();
116 }
117 
118 CPDF_ContentParser::~CPDF_ContentParser() = default;
119 
120 // Returning |true| means that there is more content to be processed and
121 // Continue() should be called again. Returning |false| means that we've
122 // completed the parse and Continue() is complete.
Continue(PauseIndicatorIface * pPause)123 bool CPDF_ContentParser::Continue(PauseIndicatorIface* pPause) {
124   while (m_CurrentStage == Stage::kGetContent) {
125     m_CurrentStage = GetContent();
126     if (pPause && pPause->NeedToPauseNow())
127       return true;
128   }
129 
130   if (m_CurrentStage == Stage::kPrepareContent)
131     m_CurrentStage = PrepareContent();
132 
133   while (m_CurrentStage == Stage::kParse) {
134     m_CurrentStage = Parse();
135     if (pPause && pPause->NeedToPauseNow())
136       return true;
137   }
138 
139   if (m_CurrentStage == Stage::kCheckClip)
140     m_CurrentStage = CheckClip();
141 
142   DCHECK_EQ(m_CurrentStage, Stage::kComplete);
143   return false;
144 }
145 
GetContent()146 CPDF_ContentParser::Stage CPDF_ContentParser::GetContent() {
147   DCHECK_EQ(m_CurrentStage, Stage::kGetContent);
148   DCHECK(m_pPageObjectHolder->IsPage());
149   RetainPtr<const CPDF_Array> pContent =
150       m_pPageObjectHolder->GetDict()->GetArrayFor(
151           pdfium::page_object::kContents);
152   RetainPtr<const CPDF_Stream> pStreamObj = ToStream(
153       pContent ? pContent->GetDirectObjectAt(m_CurrentOffset) : nullptr);
154   m_StreamArray[m_CurrentOffset] =
155       pdfium::MakeRetain<CPDF_StreamAcc>(std::move(pStreamObj));
156   m_StreamArray[m_CurrentOffset]->LoadAllDataFiltered();
157   m_CurrentOffset++;
158 
159   return m_CurrentOffset == m_nStreams ? Stage::kPrepareContent
160                                        : Stage::kGetContent;
161 }
162 
PrepareContent()163 CPDF_ContentParser::Stage CPDF_ContentParser::PrepareContent() {
164   m_CurrentOffset = 0;
165 
166   if (m_StreamArray.empty()) {
167     m_Data = m_pSingleStream->GetSpan();
168     return Stage::kParse;
169   }
170 
171   FX_SAFE_UINT32 safe_size = 0;
172   for (const auto& stream : m_StreamArray) {
173     m_StreamSegmentOffsets.push_back(safe_size.ValueOrDie());
174     safe_size += stream->GetSize();
175     safe_size += 1;
176     if (!safe_size.IsValid())
177       return Stage::kComplete;
178   }
179 
180   const size_t buffer_size = safe_size.ValueOrDie();
181   FixedTryAllocZeroedDataVector<uint8_t> buffer(buffer_size);
182   if (buffer.empty()) {
183     m_Data.emplace<pdfium::span<const uint8_t>>();
184     return Stage::kComplete;
185   }
186 
187   size_t pos = 0;
188   auto data_span = buffer.writable_span();
189   for (const auto& stream : m_StreamArray) {
190     fxcrt::spancpy(data_span.subspan(pos), stream->GetSpan());
191     pos += stream->GetSize();
192     data_span[pos++] = ' ';
193   }
194   m_StreamArray.clear();
195   m_Data = std::move(buffer);
196   return Stage::kParse;
197 }
198 
Parse()199 CPDF_ContentParser::Stage CPDF_ContentParser::Parse() {
200   if (!m_pParser) {
201     m_RecursionState.parsed_set.clear();
202     m_RecursionState.form_count = 0;
203     m_pParser = std::make_unique<CPDF_StreamContentParser>(
204         m_pPageObjectHolder->GetDocument(),
205         m_pPageObjectHolder->GetMutablePageResources(), nullptr, nullptr,
206         m_pPageObjectHolder, m_pPageObjectHolder->GetMutableResources(),
207         m_pPageObjectHolder->GetBBox(), nullptr, &m_RecursionState);
208     m_pParser->GetCurStates()->m_ColorState.SetDefault();
209   }
210   if (m_CurrentOffset >= GetData().size())
211     return Stage::kCheckClip;
212 
213   if (m_StreamSegmentOffsets.empty())
214     m_StreamSegmentOffsets.push_back(0);
215 
216   static constexpr uint32_t kParseStepLimit = 100;
217   m_CurrentOffset += m_pParser->Parse(GetData(), m_CurrentOffset,
218                                       kParseStepLimit, m_StreamSegmentOffsets);
219   return Stage::kParse;
220 }
221 
CheckClip()222 CPDF_ContentParser::Stage CPDF_ContentParser::CheckClip() {
223   if (m_pType3Char) {
224     m_pType3Char->InitializeFromStreamData(m_pParser->IsColored(),
225                                            m_pParser->GetType3Data());
226   }
227 
228   for (auto& pObj : *m_pPageObjectHolder) {
229     if (!pObj->m_ClipPath.HasRef())
230       continue;
231     if (pObj->m_ClipPath.GetPathCount() != 1)
232       continue;
233     if (pObj->m_ClipPath.GetTextCount() > 0)
234       continue;
235 
236     CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
237     if (!ClipPath.IsRect() || pObj->IsShading())
238       continue;
239 
240     CFX_PointF point0 = ClipPath.GetPoint(0);
241     CFX_PointF point2 = ClipPath.GetPoint(2);
242     CFX_FloatRect old_rect(point0.x, point0.y, point2.x, point2.y);
243     if (old_rect.Contains(pObj->GetRect()))
244       pObj->m_ClipPath.SetNull();
245   }
246   return Stage::kComplete;
247 }
248 
HandlePageContentStream(const CPDF_Stream * pStream)249 void CPDF_ContentParser::HandlePageContentStream(const CPDF_Stream* pStream) {
250   m_pSingleStream =
251       pdfium::MakeRetain<CPDF_StreamAcc>(pdfium::WrapRetain(pStream));
252   m_pSingleStream->LoadAllDataFiltered();
253   m_CurrentStage = Stage::kPrepareContent;
254 }
255 
HandlePageContentArray(const CPDF_Array * pArray)256 bool CPDF_ContentParser::HandlePageContentArray(const CPDF_Array* pArray) {
257   m_nStreams = fxcrt::CollectionSize<uint32_t>(*pArray);
258   if (m_nStreams == 0)
259     return false;
260 
261   m_StreamArray.resize(m_nStreams);
262   return true;
263 }
264 
HandlePageContentFailure()265 void CPDF_ContentParser::HandlePageContentFailure() {
266   m_CurrentStage = Stage::kComplete;
267 }
268 
GetData() const269 pdfium::span<const uint8_t> CPDF_ContentParser::GetData() const {
270   if (is_owned())
271     return absl::get<FixedTryAllocZeroedDataVector<uint8_t>>(m_Data).span();
272   return absl::get<pdfium::span<const uint8_t>>(m_Data);
273 }
274