1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "public/fpdf_structtree.h"
6
7 #include <memory>
8
9 #include "core/fpdfapi/page/cpdf_page.h"
10 #include "core/fpdfapi/parser/cpdf_array.h"
11 #include "core/fpdfapi/parser/cpdf_dictionary.h"
12 #include "core/fpdfdoc/cpdf_structelement.h"
13 #include "core/fpdfdoc/cpdf_structtree.h"
14 #include "core/fxcrt/fx_safe_types.h"
15 #include "core/fxcrt/stl_util.h"
16 #include "fpdfsdk/cpdfsdk_helpers.h"
17 #include "third_party/base/numerics/safe_conversions.h"
18
19 namespace {
20
WideStringToBuffer(const WideString & str,void * buffer,unsigned long buflen)21 unsigned long WideStringToBuffer(const WideString& str,
22 void* buffer,
23 unsigned long buflen) {
24 if (str.IsEmpty())
25 return 0;
26
27 ByteString encodedStr = str.ToUTF16LE();
28 const unsigned long len =
29 pdfium::base::checked_cast<unsigned long>(encodedStr.GetLength());
30 if (buffer && len <= buflen)
31 memcpy(buffer, encodedStr.c_str(), len);
32 return len;
33 }
34
GetMcidFromDict(const CPDF_Dictionary * dict)35 int GetMcidFromDict(const CPDF_Dictionary* dict) {
36 if (dict && dict->GetNameFor("Type") == "MCR") {
37 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor("MCID");
38 if (obj && obj->IsNumber())
39 return obj->GetInteger();
40 }
41 return -1;
42 }
43
44 } // namespace
45
46 FPDF_EXPORT FPDF_STRUCTTREE FPDF_CALLCONV
FPDF_StructTree_GetForPage(FPDF_PAGE page)47 FPDF_StructTree_GetForPage(FPDF_PAGE page) {
48 CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
49 if (!pPage)
50 return nullptr;
51
52 // Caller takes onwership.
53 return FPDFStructTreeFromCPDFStructTree(
54 CPDF_StructTree::LoadPage(pPage->GetDocument(), pPage->GetDict())
55 .release());
56 }
57
58 FPDF_EXPORT void FPDF_CALLCONV
FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree)59 FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree) {
60 std::unique_ptr<CPDF_StructTree>(
61 CPDFStructTreeFromFPDFStructTree(struct_tree));
62 }
63
64 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree)65 FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree) {
66 CPDF_StructTree* tree = CPDFStructTreeFromFPDFStructTree(struct_tree);
67 if (!tree)
68 return -1;
69
70 FX_SAFE_INT32 tmp_size = tree->CountTopElements();
71 return tmp_size.ValueOrDefault(-1);
72 }
73
74 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree,int index)75 FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree, int index) {
76 CPDF_StructTree* tree = CPDFStructTreeFromFPDFStructTree(struct_tree);
77 if (!tree || index < 0 ||
78 static_cast<size_t>(index) >= tree->CountTopElements()) {
79 return nullptr;
80 }
81 return FPDFStructElementFromCPDFStructElement(
82 tree->GetTopElement(static_cast<size_t>(index)));
83 }
84
85 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)86 FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,
87 void* buffer,
88 unsigned long buflen) {
89 CPDF_StructElement* elem =
90 CPDFStructElementFromFPDFStructElement(struct_element);
91 return elem ? WideStringToBuffer(elem->GetAltText(), buffer, buflen) : 0;
92 }
93
94 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetActualText(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)95 FPDF_StructElement_GetActualText(FPDF_STRUCTELEMENT struct_element,
96 void* buffer,
97 unsigned long buflen) {
98 CPDF_StructElement* elem =
99 CPDFStructElementFromFPDFStructElement(struct_element);
100 return elem ? WideStringToBuffer(elem->GetActualText(), buffer, buflen) : 0;
101 }
102
103 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetID(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)104 FPDF_StructElement_GetID(FPDF_STRUCTELEMENT struct_element,
105 void* buffer,
106 unsigned long buflen) {
107 CPDF_StructElement* elem =
108 CPDFStructElementFromFPDFStructElement(struct_element);
109 if (!elem)
110 return 0;
111 absl::optional<WideString> id = elem->GetID();
112 if (!id.has_value())
113 return 0;
114 return Utf16EncodeMaybeCopyAndReturnLength(id.value(), buffer, buflen);
115 }
116
117 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetLang(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)118 FPDF_StructElement_GetLang(FPDF_STRUCTELEMENT struct_element,
119 void* buffer,
120 unsigned long buflen) {
121 CPDF_StructElement* elem =
122 CPDFStructElementFromFPDFStructElement(struct_element);
123 if (!elem)
124 return 0;
125 absl::optional<WideString> lang = elem->GetLang();
126 if (!lang.has_value())
127 return 0;
128 return Utf16EncodeMaybeCopyAndReturnLength(lang.value(), buffer, buflen);
129 }
130
131 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetAttributeCount(FPDF_STRUCTELEMENT struct_element)132 FPDF_StructElement_GetAttributeCount(FPDF_STRUCTELEMENT struct_element) {
133 CPDF_StructElement* elem =
134 CPDFStructElementFromFPDFStructElement(struct_element);
135 if (!elem)
136 return -1;
137 RetainPtr<const CPDF_Object> attr_obj = elem->GetA();
138 if (!attr_obj) {
139 return -1;
140 }
141 attr_obj = attr_obj->GetDirect();
142 if (!attr_obj)
143 return -1;
144 if (attr_obj->IsArray())
145 return fxcrt::CollectionSize<int>(*attr_obj->AsArray());
146 return attr_obj->IsDictionary() ? 1 : -1;
147 }
148
149 FPDF_EXPORT FPDF_STRUCTELEMENT_ATTR FPDF_CALLCONV
FPDF_StructElement_GetAttributeAtIndex(FPDF_STRUCTELEMENT struct_element,int index)150 FPDF_StructElement_GetAttributeAtIndex(FPDF_STRUCTELEMENT struct_element,
151 int index) {
152 CPDF_StructElement* elem =
153 CPDFStructElementFromFPDFStructElement(struct_element);
154 if (!elem)
155 return nullptr;
156
157 RetainPtr<const CPDF_Object> attr_obj = elem->GetA();
158 if (!attr_obj)
159 return nullptr;
160
161 attr_obj = attr_obj->GetDirect();
162 if (!attr_obj) {
163 return nullptr;
164 }
165 if (attr_obj->IsDictionary()) {
166 return index == 0 ? FPDFStructElementAttrFromCPDFDictionary(
167 attr_obj->AsDictionary())
168 : nullptr;
169 }
170 if (attr_obj->IsArray()) {
171 const CPDF_Array* array = attr_obj->AsArray();
172 if (index < 0 || static_cast<size_t>(index) >= array->size())
173 return nullptr;
174
175 // TODO(tsepez): should embedder take a reference here?
176 // Unretained reference in public API. NOLINTNEXTLINE
177 return FPDFStructElementAttrFromCPDFDictionary(array->GetDictAt(index));
178 }
179 return nullptr;
180 }
181
182 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetStringAttribute(FPDF_STRUCTELEMENT struct_element,FPDF_BYTESTRING attr_name,void * buffer,unsigned long buflen)183 FPDF_StructElement_GetStringAttribute(FPDF_STRUCTELEMENT struct_element,
184 FPDF_BYTESTRING attr_name,
185 void* buffer,
186 unsigned long buflen) {
187 CPDF_StructElement* elem =
188 CPDFStructElementFromFPDFStructElement(struct_element);
189 if (!elem)
190 return 0;
191 RetainPtr<const CPDF_Array> array = ToArray(elem->GetA());
192 if (!array)
193 return 0;
194 CPDF_ArrayLocker locker(array);
195 for (const RetainPtr<CPDF_Object>& obj : locker) {
196 const CPDF_Dictionary* obj_dict = obj->AsDictionary();
197 if (!obj_dict)
198 continue;
199 RetainPtr<const CPDF_Object> attr = obj_dict->GetObjectFor(attr_name);
200 if (!attr || !(attr->IsString() || attr->IsName()))
201 continue;
202 return Utf16EncodeMaybeCopyAndReturnLength(attr->GetUnicodeText(), buffer,
203 buflen);
204 }
205 return 0;
206 }
207
208 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentID(FPDF_STRUCTELEMENT struct_element)209 FPDF_StructElement_GetMarkedContentID(FPDF_STRUCTELEMENT struct_element) {
210 CPDF_StructElement* elem =
211 CPDFStructElementFromFPDFStructElement(struct_element);
212 if (!elem)
213 return -1;
214 RetainPtr<const CPDF_Object> p = elem->GetK();
215 return p && p->IsNumber() ? p->GetInteger() : -1;
216 }
217
218 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)219 FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element,
220 void* buffer,
221 unsigned long buflen) {
222 CPDF_StructElement* elem =
223 CPDFStructElementFromFPDFStructElement(struct_element);
224 return elem ? WideStringToBuffer(
225 WideString::FromUTF8(elem->GetType().AsStringView()),
226 buffer, buflen)
227 : 0;
228 }
229
230 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)231 FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element,
232 void* buffer,
233 unsigned long buflen) {
234 CPDF_StructElement* elem =
235 CPDFStructElementFromFPDFStructElement(struct_element);
236 return elem ? WideStringToBuffer(
237 WideString::FromUTF8(elem->GetObjType().AsStringView()),
238 buffer, buflen)
239 : 0;
240 }
241
242 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)243 FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,
244 void* buffer,
245 unsigned long buflen) {
246 CPDF_StructElement* elem =
247 CPDFStructElementFromFPDFStructElement(struct_element);
248 return elem ? WideStringToBuffer(elem->GetTitle(), buffer, buflen) : 0;
249 }
250
251 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element)252 FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) {
253 CPDF_StructElement* elem =
254 CPDFStructElementFromFPDFStructElement(struct_element);
255 if (!elem)
256 return -1;
257
258 FX_SAFE_INT32 tmp_size = elem->CountKids();
259 return tmp_size.ValueOrDefault(-1);
260 }
261
262 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,int index)263 FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,
264 int index) {
265 CPDF_StructElement* elem =
266 CPDFStructElementFromFPDFStructElement(struct_element);
267 if (!elem || index < 0 || static_cast<size_t>(index) >= elem->CountKids())
268 return nullptr;
269
270 return FPDFStructElementFromCPDFStructElement(elem->GetKidIfElement(index));
271 }
272
273 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element)274 FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element) {
275 CPDF_StructElement* elem =
276 CPDFStructElementFromFPDFStructElement(struct_element);
277 CPDF_StructElement* parent = elem ? elem->GetParent() : nullptr;
278 if (!parent) {
279 return nullptr;
280 }
281 return FPDFStructElementFromCPDFStructElement(parent);
282 }
283
284 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_Attr_GetCount(FPDF_STRUCTELEMENT_ATTR struct_attribute)285 FPDF_StructElement_Attr_GetCount(FPDF_STRUCTELEMENT_ATTR struct_attribute) {
286 const CPDF_Dictionary* dict =
287 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
288 if (!dict)
289 return -1;
290 return fxcrt::CollectionSize<int>(*dict);
291 }
292
293 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetName(FPDF_STRUCTELEMENT_ATTR struct_attribute,int index,void * buffer,unsigned long buflen,unsigned long * out_buflen)294 FPDF_StructElement_Attr_GetName(FPDF_STRUCTELEMENT_ATTR struct_attribute,
295 int index,
296 void* buffer,
297 unsigned long buflen,
298 unsigned long* out_buflen) {
299 if (!out_buflen) {
300 return false;
301 }
302
303 const CPDF_Dictionary* dict =
304 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
305 if (!dict)
306 return false;
307
308 CPDF_DictionaryLocker locker(dict);
309 for (auto& it : locker) {
310 if (index == 0) {
311 *out_buflen =
312 NulTerminateMaybeCopyAndReturnLength(it.first, buffer, buflen);
313 return true;
314 }
315 --index;
316 }
317 return false;
318 }
319
320 FPDF_EXPORT FPDF_OBJECT_TYPE FPDF_CALLCONV
FPDF_StructElement_Attr_GetType(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name)321 FPDF_StructElement_Attr_GetType(FPDF_STRUCTELEMENT_ATTR struct_attribute,
322 FPDF_BYTESTRING name) {
323 const CPDF_Dictionary* dict =
324 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
325 if (!dict)
326 return FPDF_OBJECT_UNKNOWN;
327
328 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
329 return obj ? obj->GetType() : FPDF_OBJECT_UNKNOWN;
330 }
331
FPDF_StructElement_Attr_GetBooleanValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,FPDF_BOOL * out_value)332 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_StructElement_Attr_GetBooleanValue(
333 FPDF_STRUCTELEMENT_ATTR struct_attribute,
334 FPDF_BYTESTRING name,
335 FPDF_BOOL* out_value) {
336 if (!out_value)
337 return false;
338
339 const CPDF_Dictionary* dict =
340 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
341 if (!dict)
342 return false;
343
344 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
345 if (!obj || !obj->IsBoolean())
346 return false;
347
348 *out_value = obj->GetInteger();
349 return true;
350 }
351
352 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetNumberValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,float * out_value)353 FPDF_StructElement_Attr_GetNumberValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
354 FPDF_BYTESTRING name,
355 float* out_value) {
356 if (!out_value)
357 return false;
358
359 const CPDF_Dictionary* dict =
360 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
361 if (!dict)
362 return false;
363
364 RetainPtr<const CPDF_Object> obj = dict->GetDirectObjectFor(name);
365 if (!obj || !obj->IsNumber())
366 return false;
367
368 *out_value = obj->GetNumber();
369 return true;
370 }
371
372 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetStringValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,void * buffer,unsigned long buflen,unsigned long * out_buflen)373 FPDF_StructElement_Attr_GetStringValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
374 FPDF_BYTESTRING name,
375 void* buffer,
376 unsigned long buflen,
377 unsigned long* out_buflen) {
378 if (!out_buflen)
379 return false;
380
381 const CPDF_Dictionary* dict =
382 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
383 if (!dict)
384 return false;
385
386 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
387 if (!obj || !(obj->IsString() || obj->IsName()))
388 return false;
389
390 *out_buflen = Utf16EncodeMaybeCopyAndReturnLength(
391 WideString::FromUTF8(obj->GetString().AsStringView()), buffer, buflen);
392 return true;
393 }
394
395 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetBlobValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,void * buffer,unsigned long buflen,unsigned long * out_buflen)396 FPDF_StructElement_Attr_GetBlobValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
397 FPDF_BYTESTRING name,
398 void* buffer,
399 unsigned long buflen,
400 unsigned long* out_buflen) {
401 if (!out_buflen)
402 return false;
403
404 const CPDF_Dictionary* dict =
405 CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
406 if (!dict)
407 return false;
408
409 RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
410 if (!obj || !obj->IsString())
411 return false;
412
413 ByteString result = obj->GetString();
414 const unsigned long len =
415 pdfium::base::checked_cast<unsigned long>(result.GetLength());
416 if (buffer && len <= buflen)
417 memcpy(buffer, result.c_str(), len);
418
419 *out_buflen = len;
420 return true;
421 }
422
423 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentIdCount(FPDF_STRUCTELEMENT struct_element)424 FPDF_StructElement_GetMarkedContentIdCount(FPDF_STRUCTELEMENT struct_element) {
425 CPDF_StructElement* elem =
426 CPDFStructElementFromFPDFStructElement(struct_element);
427 if (!elem)
428 return -1;
429 RetainPtr<const CPDF_Object> p = elem->GetK();
430 if (!p)
431 return -1;
432
433 if (p->IsNumber() || p->IsDictionary())
434 return 1;
435
436 return p->IsArray() ? fxcrt::CollectionSize<int>(*p->AsArray()) : -1;
437 }
438
439 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentIdAtIndex(FPDF_STRUCTELEMENT struct_element,int index)440 FPDF_StructElement_GetMarkedContentIdAtIndex(FPDF_STRUCTELEMENT struct_element,
441 int index) {
442 CPDF_StructElement* elem =
443 CPDFStructElementFromFPDFStructElement(struct_element);
444 if (!elem)
445 return -1;
446 RetainPtr<const CPDF_Object> p = elem->GetK();
447 if (!p)
448 return -1;
449
450 if (p->IsNumber())
451 return index == 0 ? p->GetInteger() : -1;
452
453 if (p->IsDictionary())
454 return GetMcidFromDict(p->GetDict().Get());
455
456 if (p->IsArray()) {
457 const CPDF_Array* array = p->AsArray();
458 if (index < 0 || static_cast<size_t>(index) >= array->size())
459 return -1;
460 RetainPtr<const CPDF_Object> array_elem = array->GetObjectAt(index);
461 if (array_elem->IsNumber())
462 return array_elem->GetInteger();
463 if (array_elem->IsDictionary()) {
464 return GetMcidFromDict(array_elem->GetDict().Get());
465 }
466 }
467 return -1;
468 }
469