xref: /aosp_15_r20/external/pdfium/core/fpdfdoc/cpdf_structtree.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfdoc/cpdf_structtree.h"
8 
9 #include <utility>
10 
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_dictionary.h"
13 #include "core/fpdfapi/parser/cpdf_document.h"
14 #include "core/fpdfapi/parser/cpdf_number.h"
15 #include "core/fpdfapi/parser/cpdf_reference.h"
16 #include "core/fpdfdoc/cpdf_numbertree.h"
17 #include "core/fpdfdoc/cpdf_structelement.h"
18 
19 namespace {
20 
IsTagged(const CPDF_Document * pDoc)21 bool IsTagged(const CPDF_Document* pDoc) {
22   RetainPtr<const CPDF_Dictionary> pMarkInfo =
23       pDoc->GetRoot()->GetDictFor("MarkInfo");
24   return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
25 }
26 
27 }  // namespace
28 
29 // static
LoadPage(const CPDF_Document * pDoc,RetainPtr<const CPDF_Dictionary> pPageDict)30 std::unique_ptr<CPDF_StructTree> CPDF_StructTree::LoadPage(
31     const CPDF_Document* pDoc,
32     RetainPtr<const CPDF_Dictionary> pPageDict) {
33   if (!IsTagged(pDoc))
34     return nullptr;
35 
36   auto pTree = std::make_unique<CPDF_StructTree>(pDoc);
37   pTree->LoadPageTree(std::move(pPageDict));
38   return pTree;
39 }
40 
CPDF_StructTree(const CPDF_Document * pDoc)41 CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
42     : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
43       m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr) {}
44 
45 CPDF_StructTree::~CPDF_StructTree() = default;
46 
GetRoleMapNameFor(const ByteString & type) const47 ByteString CPDF_StructTree::GetRoleMapNameFor(const ByteString& type) const {
48   if (m_pRoleMap) {
49     ByteString mapped = m_pRoleMap->GetNameFor(type);
50     if (!mapped.IsEmpty())
51       return mapped;
52   }
53   return type;
54 }
55 
LoadPageTree(RetainPtr<const CPDF_Dictionary> pPageDict)56 void CPDF_StructTree::LoadPageTree(RetainPtr<const CPDF_Dictionary> pPageDict) {
57   m_pPage = std::move(pPageDict);
58   if (!m_pTreeRoot)
59     return;
60 
61   RetainPtr<const CPDF_Object> pKids = m_pTreeRoot->GetDirectObjectFor("K");
62   if (!pKids)
63     return;
64 
65   size_t kids_count;
66   if (pKids->IsDictionary())
67     kids_count = 1;
68   else if (const CPDF_Array* pArray = pKids->AsArray())
69     kids_count = pArray->size();
70   else
71     return;
72 
73   m_Kids.clear();
74   m_Kids.resize(kids_count);
75 
76   RetainPtr<const CPDF_Dictionary> pParentTree =
77       m_pTreeRoot->GetDictFor("ParentTree");
78   if (!pParentTree)
79     return;
80 
81   CPDF_NumberTree parent_tree(std::move(pParentTree));
82   int parents_id = m_pPage->GetIntegerFor("StructParents", -1);
83   if (parents_id < 0)
84     return;
85 
86   RetainPtr<const CPDF_Array> pParentArray =
87       ToArray(parent_tree.LookupValue(parents_id));
88   if (!pParentArray)
89     return;
90 
91   StructElementMap element_map;
92   for (size_t i = 0; i < pParentArray->size(); i++) {
93     RetainPtr<const CPDF_Dictionary> pParent = pParentArray->GetDictAt(i);
94     if (pParent)
95       AddPageNode(std::move(pParent), &element_map, 0);
96   }
97 }
98 
AddPageNode(RetainPtr<const CPDF_Dictionary> pDict,StructElementMap * map,int nLevel)99 RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
100     RetainPtr<const CPDF_Dictionary> pDict,
101     StructElementMap* map,
102     int nLevel) {
103   static constexpr int kStructTreeMaxRecursion = 32;
104   if (nLevel > kStructTreeMaxRecursion)
105     return nullptr;
106 
107   auto it = map->find(pDict);
108   if (it != map->end())
109     return it->second;
110 
111   RetainPtr<const CPDF_Dictionary> key(pDict);
112   auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, pDict);
113   (*map)[key] = pElement;
114   RetainPtr<const CPDF_Dictionary> pParent = pDict->GetDictFor("P");
115   if (!pParent || pParent->GetNameFor("Type") == "StructTreeRoot") {
116     if (!AddTopLevelNode(pDict, pElement))
117       map->erase(key);
118     return pElement;
119   }
120 
121   RetainPtr<CPDF_StructElement> pParentElement =
122       AddPageNode(std::move(pParent), map, nLevel + 1);
123   if (!pParentElement)
124     return pElement;
125 
126   if (!pParentElement->UpdateKidIfElement(pDict, pElement.Get())) {
127     map->erase(key);
128     return pElement;
129   }
130 
131   pElement->SetParent(pParentElement.Get());
132   return pElement;
133 }
134 
AddTopLevelNode(const CPDF_Dictionary * pDict,const RetainPtr<CPDF_StructElement> & pElement)135 bool CPDF_StructTree::AddTopLevelNode(
136     const CPDF_Dictionary* pDict,
137     const RetainPtr<CPDF_StructElement>& pElement) {
138   RetainPtr<const CPDF_Object> pObj = m_pTreeRoot->GetDirectObjectFor("K");
139   if (!pObj)
140     return false;
141 
142   if (pObj->IsDictionary()) {
143     if (pObj->GetObjNum() != pDict->GetObjNum())
144       return false;
145     m_Kids[0] = pElement;
146   }
147 
148   const CPDF_Array* pTopKids = pObj->AsArray();
149   if (!pTopKids)
150     return true;
151 
152   bool bSave = false;
153   for (size_t i = 0; i < pTopKids->size(); i++) {
154     RetainPtr<const CPDF_Reference> pKidRef =
155         ToReference(pTopKids->GetObjectAt(i));
156     if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
157       m_Kids[i] = pElement;
158       bSave = true;
159     }
160   }
161   return bSave;
162 }
163