xref: /aosp_15_r20/external/pdfium/core/fpdfdoc/cpdf_structelement.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfdoc/cpdf_structelement.h"
8 
9 #include <utility>
10 
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_dictionary.h"
13 #include "core/fpdfapi/parser/cpdf_name.h"
14 #include "core/fpdfapi/parser/cpdf_number.h"
15 #include "core/fpdfapi/parser/cpdf_object.h"
16 #include "core/fpdfapi/parser/cpdf_reference.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfdoc/cpdf_structtree.h"
19 #include "third_party/base/check.h"
20 
21 CPDF_StructElement::Kid::Kid() = default;
22 
23 CPDF_StructElement::Kid::Kid(const Kid& that) = default;
24 
25 CPDF_StructElement::Kid::~Kid() = default;
26 
CPDF_StructElement(const CPDF_StructTree * pTree,RetainPtr<const CPDF_Dictionary> pDict)27 CPDF_StructElement::CPDF_StructElement(const CPDF_StructTree* pTree,
28                                        RetainPtr<const CPDF_Dictionary> pDict)
29     : m_pTree(pTree),
30       m_pDict(std::move(pDict)),
31       m_Type(m_pTree->GetRoleMapNameFor(m_pDict->GetNameFor("S"))) {
32   LoadKids();
33 }
34 
~CPDF_StructElement()35 CPDF_StructElement::~CPDF_StructElement() {
36   for (auto& kid : m_Kids) {
37     if (kid.m_Type == Kid::kElement && kid.m_pElement) {
38       kid.m_pElement->SetParent(nullptr);
39     }
40   }
41 }
42 
GetObjType() const43 ByteString CPDF_StructElement::GetObjType() const {
44   return m_pDict->GetByteStringFor("Type");
45 }
46 
GetAltText() const47 WideString CPDF_StructElement::GetAltText() const {
48   return m_pDict->GetUnicodeTextFor("Alt");
49 }
50 
GetActualText() const51 WideString CPDF_StructElement::GetActualText() const {
52   return m_pDict->GetUnicodeTextFor("ActualText");
53 }
54 
GetTitle() const55 WideString CPDF_StructElement::GetTitle() const {
56   return m_pDict->GetUnicodeTextFor("T");
57 }
58 
GetID() const59 absl::optional<WideString> CPDF_StructElement::GetID() const {
60   RetainPtr<const CPDF_Object> obj = m_pDict->GetObjectFor("ID");
61   if (!obj || !obj->IsString())
62     return absl::nullopt;
63   return obj->GetUnicodeText();
64 }
65 
GetLang() const66 absl::optional<WideString> CPDF_StructElement::GetLang() const {
67   RetainPtr<const CPDF_Object> obj = m_pDict->GetObjectFor("Lang");
68   if (!obj || !obj->IsString())
69     return absl::nullopt;
70   return obj->GetUnicodeText();
71 }
72 
GetA() const73 RetainPtr<const CPDF_Object> CPDF_StructElement::GetA() const {
74   return m_pDict->GetObjectFor("A");
75 }
76 
GetK() const77 RetainPtr<const CPDF_Object> CPDF_StructElement::GetK() const {
78   return m_pDict->GetObjectFor("K");
79 }
80 
CountKids() const81 size_t CPDF_StructElement::CountKids() const {
82   return m_Kids.size();
83 }
84 
GetKidIfElement(size_t index) const85 CPDF_StructElement* CPDF_StructElement::GetKidIfElement(size_t index) const {
86   return m_Kids[index].m_Type == Kid::kElement ? m_Kids[index].m_pElement.Get()
87                                                : nullptr;
88 }
89 
UpdateKidIfElement(const CPDF_Dictionary * pDict,CPDF_StructElement * pElement)90 bool CPDF_StructElement::UpdateKidIfElement(const CPDF_Dictionary* pDict,
91                                             CPDF_StructElement* pElement) {
92   bool bSave = false;
93   for (auto& kid : m_Kids) {
94     if (kid.m_Type == Kid::kElement && kid.m_pDict == pDict) {
95       kid.m_pElement.Reset(pElement);
96       bSave = true;
97     }
98   }
99   return bSave;
100 }
101 
LoadKids()102 void CPDF_StructElement::LoadKids() {
103   RetainPtr<const CPDF_Object> pObj = m_pDict->GetObjectFor("Pg");
104   const CPDF_Reference* pRef = ToReference(pObj.Get());
105   const uint32_t page_obj_num = pRef ? pRef->GetRefObjNum() : 0;
106   RetainPtr<const CPDF_Object> pKids = m_pDict->GetDirectObjectFor("K");
107   if (!pKids)
108     return;
109 
110   DCHECK(m_Kids.empty());
111   if (const CPDF_Array* pArray = pKids->AsArray()) {
112     m_Kids.resize(pArray->size());
113     for (size_t i = 0; i < pArray->size(); ++i) {
114       LoadKid(page_obj_num, pArray->GetDirectObjectAt(i), m_Kids[i]);
115     }
116     return;
117   }
118 
119   m_Kids.resize(1);
120   LoadKid(page_obj_num, std::move(pKids), m_Kids[0]);
121 }
122 
LoadKid(uint32_t page_obj_num,RetainPtr<const CPDF_Object> pKidObj,Kid & kid)123 void CPDF_StructElement::LoadKid(uint32_t page_obj_num,
124                                  RetainPtr<const CPDF_Object> pKidObj,
125                                  Kid& kid) {
126   if (!pKidObj)
127     return;
128 
129   if (pKidObj->IsNumber()) {
130     if (m_pTree->GetPageObjNum() != page_obj_num) {
131       return;
132     }
133 
134     kid.m_Type = Kid::kPageContent;
135     kid.m_ContentId = pKidObj->GetInteger();
136     kid.m_PageObjNum = page_obj_num;
137     return;
138   }
139 
140   const CPDF_Dictionary* pKidDict = pKidObj->AsDictionary();
141   if (!pKidDict)
142     return;
143 
144   if (RetainPtr<const CPDF_Reference> pRef =
145           ToReference(pKidDict->GetObjectFor("Pg"))) {
146     page_obj_num = pRef->GetRefObjNum();
147   }
148   ByteString type = pKidDict->GetNameFor("Type");
149   if ((type == "MCR" || type == "OBJR") &&
150       m_pTree->GetPageObjNum() != page_obj_num) {
151     return;
152   }
153 
154   if (type == "MCR") {
155     kid.m_Type = Kid::kStreamContent;
156     RetainPtr<const CPDF_Reference> pRef =
157         ToReference(pKidDict->GetObjectFor("Stm"));
158     kid.m_RefObjNum = pRef ? pRef->GetRefObjNum() : 0;
159     kid.m_PageObjNum = page_obj_num;
160     kid.m_ContentId = pKidDict->GetIntegerFor("MCID");
161     return;
162   }
163 
164   if (type == "OBJR") {
165     kid.m_Type = Kid::kObject;
166     RetainPtr<const CPDF_Reference> pObj =
167         ToReference(pKidDict->GetObjectFor("Obj"));
168     kid.m_RefObjNum = pObj ? pObj->GetRefObjNum() : 0;
169     kid.m_PageObjNum = page_obj_num;
170     return;
171   }
172 
173   kid.m_Type = Kid::kElement;
174   kid.m_pDict.Reset(pKidDict);
175 }
176