1 // Copyright 2017 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfdoc/cpdf_structelement.h"
8
9 #include <utility>
10
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_dictionary.h"
13 #include "core/fpdfapi/parser/cpdf_name.h"
14 #include "core/fpdfapi/parser/cpdf_number.h"
15 #include "core/fpdfapi/parser/cpdf_object.h"
16 #include "core/fpdfapi/parser/cpdf_reference.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfdoc/cpdf_structtree.h"
19 #include "third_party/base/check.h"
20
21 CPDF_StructElement::Kid::Kid() = default;
22
23 CPDF_StructElement::Kid::Kid(const Kid& that) = default;
24
25 CPDF_StructElement::Kid::~Kid() = default;
26
CPDF_StructElement(const CPDF_StructTree * pTree,RetainPtr<const CPDF_Dictionary> pDict)27 CPDF_StructElement::CPDF_StructElement(const CPDF_StructTree* pTree,
28 RetainPtr<const CPDF_Dictionary> pDict)
29 : m_pTree(pTree),
30 m_pDict(std::move(pDict)),
31 m_Type(m_pTree->GetRoleMapNameFor(m_pDict->GetNameFor("S"))) {
32 LoadKids();
33 }
34
~CPDF_StructElement()35 CPDF_StructElement::~CPDF_StructElement() {
36 for (auto& kid : m_Kids) {
37 if (kid.m_Type == Kid::kElement && kid.m_pElement) {
38 kid.m_pElement->SetParent(nullptr);
39 }
40 }
41 }
42
GetObjType() const43 ByteString CPDF_StructElement::GetObjType() const {
44 return m_pDict->GetByteStringFor("Type");
45 }
46
GetAltText() const47 WideString CPDF_StructElement::GetAltText() const {
48 return m_pDict->GetUnicodeTextFor("Alt");
49 }
50
GetActualText() const51 WideString CPDF_StructElement::GetActualText() const {
52 return m_pDict->GetUnicodeTextFor("ActualText");
53 }
54
GetTitle() const55 WideString CPDF_StructElement::GetTitle() const {
56 return m_pDict->GetUnicodeTextFor("T");
57 }
58
GetID() const59 absl::optional<WideString> CPDF_StructElement::GetID() const {
60 RetainPtr<const CPDF_Object> obj = m_pDict->GetObjectFor("ID");
61 if (!obj || !obj->IsString())
62 return absl::nullopt;
63 return obj->GetUnicodeText();
64 }
65
GetLang() const66 absl::optional<WideString> CPDF_StructElement::GetLang() const {
67 RetainPtr<const CPDF_Object> obj = m_pDict->GetObjectFor("Lang");
68 if (!obj || !obj->IsString())
69 return absl::nullopt;
70 return obj->GetUnicodeText();
71 }
72
GetA() const73 RetainPtr<const CPDF_Object> CPDF_StructElement::GetA() const {
74 return m_pDict->GetObjectFor("A");
75 }
76
GetK() const77 RetainPtr<const CPDF_Object> CPDF_StructElement::GetK() const {
78 return m_pDict->GetObjectFor("K");
79 }
80
CountKids() const81 size_t CPDF_StructElement::CountKids() const {
82 return m_Kids.size();
83 }
84
GetKidIfElement(size_t index) const85 CPDF_StructElement* CPDF_StructElement::GetKidIfElement(size_t index) const {
86 return m_Kids[index].m_Type == Kid::kElement ? m_Kids[index].m_pElement.Get()
87 : nullptr;
88 }
89
UpdateKidIfElement(const CPDF_Dictionary * pDict,CPDF_StructElement * pElement)90 bool CPDF_StructElement::UpdateKidIfElement(const CPDF_Dictionary* pDict,
91 CPDF_StructElement* pElement) {
92 bool bSave = false;
93 for (auto& kid : m_Kids) {
94 if (kid.m_Type == Kid::kElement && kid.m_pDict == pDict) {
95 kid.m_pElement.Reset(pElement);
96 bSave = true;
97 }
98 }
99 return bSave;
100 }
101
LoadKids()102 void CPDF_StructElement::LoadKids() {
103 RetainPtr<const CPDF_Object> pObj = m_pDict->GetObjectFor("Pg");
104 const CPDF_Reference* pRef = ToReference(pObj.Get());
105 const uint32_t page_obj_num = pRef ? pRef->GetRefObjNum() : 0;
106 RetainPtr<const CPDF_Object> pKids = m_pDict->GetDirectObjectFor("K");
107 if (!pKids)
108 return;
109
110 DCHECK(m_Kids.empty());
111 if (const CPDF_Array* pArray = pKids->AsArray()) {
112 m_Kids.resize(pArray->size());
113 for (size_t i = 0; i < pArray->size(); ++i) {
114 LoadKid(page_obj_num, pArray->GetDirectObjectAt(i), m_Kids[i]);
115 }
116 return;
117 }
118
119 m_Kids.resize(1);
120 LoadKid(page_obj_num, std::move(pKids), m_Kids[0]);
121 }
122
LoadKid(uint32_t page_obj_num,RetainPtr<const CPDF_Object> pKidObj,Kid & kid)123 void CPDF_StructElement::LoadKid(uint32_t page_obj_num,
124 RetainPtr<const CPDF_Object> pKidObj,
125 Kid& kid) {
126 if (!pKidObj)
127 return;
128
129 if (pKidObj->IsNumber()) {
130 if (m_pTree->GetPageObjNum() != page_obj_num) {
131 return;
132 }
133
134 kid.m_Type = Kid::kPageContent;
135 kid.m_ContentId = pKidObj->GetInteger();
136 kid.m_PageObjNum = page_obj_num;
137 return;
138 }
139
140 const CPDF_Dictionary* pKidDict = pKidObj->AsDictionary();
141 if (!pKidDict)
142 return;
143
144 if (RetainPtr<const CPDF_Reference> pRef =
145 ToReference(pKidDict->GetObjectFor("Pg"))) {
146 page_obj_num = pRef->GetRefObjNum();
147 }
148 ByteString type = pKidDict->GetNameFor("Type");
149 if ((type == "MCR" || type == "OBJR") &&
150 m_pTree->GetPageObjNum() != page_obj_num) {
151 return;
152 }
153
154 if (type == "MCR") {
155 kid.m_Type = Kid::kStreamContent;
156 RetainPtr<const CPDF_Reference> pRef =
157 ToReference(pKidDict->GetObjectFor("Stm"));
158 kid.m_RefObjNum = pRef ? pRef->GetRefObjNum() : 0;
159 kid.m_PageObjNum = page_obj_num;
160 kid.m_ContentId = pKidDict->GetIntegerFor("MCID");
161 return;
162 }
163
164 if (type == "OBJR") {
165 kid.m_Type = Kid::kObject;
166 RetainPtr<const CPDF_Reference> pObj =
167 ToReference(pKidDict->GetObjectFor("Obj"));
168 kid.m_RefObjNum = pObj ? pObj->GetRefObjNum() : 0;
169 kid.m_PageObjNum = page_obj_num;
170 return;
171 }
172
173 kid.m_Type = Kid::kElement;
174 kid.m_pDict.Reset(pKidDict);
175 }
176