xref: /aosp_15_r20/external/pdfium/fpdfsdk/fpdf_structtree.cpp (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2016 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "public/fpdf_structtree.h"
6 
7 #include <memory>
8 
9 #include "core/fpdfapi/page/cpdf_page.h"
10 #include "core/fpdfapi/parser/cpdf_array.h"
11 #include "core/fpdfapi/parser/cpdf_dictionary.h"
12 #include "core/fpdfdoc/cpdf_structelement.h"
13 #include "core/fpdfdoc/cpdf_structtree.h"
14 #include "core/fxcrt/fx_safe_types.h"
15 #include "core/fxcrt/stl_util.h"
16 #include "fpdfsdk/cpdfsdk_helpers.h"
17 #include "third_party/base/numerics/safe_conversions.h"
18 
19 namespace {
20 
WideStringToBuffer(const WideString & str,void * buffer,unsigned long buflen)21 unsigned long WideStringToBuffer(const WideString& str,
22                                  void* buffer,
23                                  unsigned long buflen) {
24   if (str.IsEmpty())
25     return 0;
26 
27   ByteString encodedStr = str.ToUTF16LE();
28   const unsigned long len =
29       pdfium::base::checked_cast<unsigned long>(encodedStr.GetLength());
30   if (buffer && len <= buflen)
31     memcpy(buffer, encodedStr.c_str(), len);
32   return len;
33 }
34 
GetMcidFromDict(const CPDF_Dictionary * dict)35 int GetMcidFromDict(const CPDF_Dictionary* dict) {
36   if (dict && dict->GetNameFor("Type") == "MCR") {
37     RetainPtr<const CPDF_Object> obj = dict->GetObjectFor("MCID");
38     if (obj && obj->IsNumber())
39       return obj->GetInteger();
40   }
41   return -1;
42 }
43 
44 }  // namespace
45 
46 FPDF_EXPORT FPDF_STRUCTTREE FPDF_CALLCONV
FPDF_StructTree_GetForPage(FPDF_PAGE page)47 FPDF_StructTree_GetForPage(FPDF_PAGE page) {
48   CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
49   if (!pPage)
50     return nullptr;
51 
52   // Caller takes onwership.
53   return FPDFStructTreeFromCPDFStructTree(
54       CPDF_StructTree::LoadPage(pPage->GetDocument(), pPage->GetDict())
55           .release());
56 }
57 
58 FPDF_EXPORT void FPDF_CALLCONV
FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree)59 FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree) {
60   std::unique_ptr<CPDF_StructTree>(
61       CPDFStructTreeFromFPDFStructTree(struct_tree));
62 }
63 
64 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree)65 FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree) {
66   CPDF_StructTree* tree = CPDFStructTreeFromFPDFStructTree(struct_tree);
67   if (!tree)
68     return -1;
69 
70   FX_SAFE_INT32 tmp_size = tree->CountTopElements();
71   return tmp_size.ValueOrDefault(-1);
72 }
73 
74 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree,int index)75 FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree, int index) {
76   CPDF_StructTree* tree = CPDFStructTreeFromFPDFStructTree(struct_tree);
77   if (!tree || index < 0 ||
78       static_cast<size_t>(index) >= tree->CountTopElements()) {
79     return nullptr;
80   }
81   return FPDFStructElementFromCPDFStructElement(
82       tree->GetTopElement(static_cast<size_t>(index)));
83 }
84 
85 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)86 FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element,
87                               void* buffer,
88                               unsigned long buflen) {
89   CPDF_StructElement* elem =
90       CPDFStructElementFromFPDFStructElement(struct_element);
91   return elem ? WideStringToBuffer(elem->GetAltText(), buffer, buflen) : 0;
92 }
93 
94 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetActualText(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)95 FPDF_StructElement_GetActualText(FPDF_STRUCTELEMENT struct_element,
96                                  void* buffer,
97                                  unsigned long buflen) {
98   CPDF_StructElement* elem =
99       CPDFStructElementFromFPDFStructElement(struct_element);
100   return elem ? WideStringToBuffer(elem->GetActualText(), buffer, buflen) : 0;
101 }
102 
103 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetID(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)104 FPDF_StructElement_GetID(FPDF_STRUCTELEMENT struct_element,
105                          void* buffer,
106                          unsigned long buflen) {
107   CPDF_StructElement* elem =
108       CPDFStructElementFromFPDFStructElement(struct_element);
109   if (!elem)
110     return 0;
111   absl::optional<WideString> id = elem->GetID();
112   if (!id.has_value())
113     return 0;
114   return Utf16EncodeMaybeCopyAndReturnLength(id.value(), buffer, buflen);
115 }
116 
117 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetLang(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)118 FPDF_StructElement_GetLang(FPDF_STRUCTELEMENT struct_element,
119                            void* buffer,
120                            unsigned long buflen) {
121   CPDF_StructElement* elem =
122       CPDFStructElementFromFPDFStructElement(struct_element);
123   if (!elem)
124     return 0;
125   absl::optional<WideString> lang = elem->GetLang();
126   if (!lang.has_value())
127     return 0;
128   return Utf16EncodeMaybeCopyAndReturnLength(lang.value(), buffer, buflen);
129 }
130 
131 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetAttributeCount(FPDF_STRUCTELEMENT struct_element)132 FPDF_StructElement_GetAttributeCount(FPDF_STRUCTELEMENT struct_element) {
133   CPDF_StructElement* elem =
134       CPDFStructElementFromFPDFStructElement(struct_element);
135   if (!elem)
136     return -1;
137   RetainPtr<const CPDF_Object> attr_obj = elem->GetA();
138   if (!attr_obj) {
139     return -1;
140   }
141   attr_obj = attr_obj->GetDirect();
142   if (!attr_obj)
143     return -1;
144   if (attr_obj->IsArray())
145     return fxcrt::CollectionSize<int>(*attr_obj->AsArray());
146   return attr_obj->IsDictionary() ? 1 : -1;
147 }
148 
149 FPDF_EXPORT FPDF_STRUCTELEMENT_ATTR FPDF_CALLCONV
FPDF_StructElement_GetAttributeAtIndex(FPDF_STRUCTELEMENT struct_element,int index)150 FPDF_StructElement_GetAttributeAtIndex(FPDF_STRUCTELEMENT struct_element,
151                                        int index) {
152   CPDF_StructElement* elem =
153       CPDFStructElementFromFPDFStructElement(struct_element);
154   if (!elem)
155     return nullptr;
156 
157   RetainPtr<const CPDF_Object> attr_obj = elem->GetA();
158   if (!attr_obj)
159     return nullptr;
160 
161   attr_obj = attr_obj->GetDirect();
162   if (!attr_obj) {
163     return nullptr;
164   }
165   if (attr_obj->IsDictionary()) {
166     return index == 0 ? FPDFStructElementAttrFromCPDFDictionary(
167                             attr_obj->AsDictionary())
168                       : nullptr;
169   }
170   if (attr_obj->IsArray()) {
171     const CPDF_Array* array = attr_obj->AsArray();
172     if (index < 0 || static_cast<size_t>(index) >= array->size())
173       return nullptr;
174 
175     // TODO(tsepez): should embedder take a reference here?
176     // Unretained reference in public API. NOLINTNEXTLINE
177     return FPDFStructElementAttrFromCPDFDictionary(array->GetDictAt(index));
178   }
179   return nullptr;
180 }
181 
182 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetStringAttribute(FPDF_STRUCTELEMENT struct_element,FPDF_BYTESTRING attr_name,void * buffer,unsigned long buflen)183 FPDF_StructElement_GetStringAttribute(FPDF_STRUCTELEMENT struct_element,
184                                       FPDF_BYTESTRING attr_name,
185                                       void* buffer,
186                                       unsigned long buflen) {
187   CPDF_StructElement* elem =
188       CPDFStructElementFromFPDFStructElement(struct_element);
189   if (!elem)
190     return 0;
191   RetainPtr<const CPDF_Array> array = ToArray(elem->GetA());
192   if (!array)
193     return 0;
194   CPDF_ArrayLocker locker(array);
195   for (const RetainPtr<CPDF_Object>& obj : locker) {
196     const CPDF_Dictionary* obj_dict = obj->AsDictionary();
197     if (!obj_dict)
198       continue;
199     RetainPtr<const CPDF_Object> attr = obj_dict->GetObjectFor(attr_name);
200     if (!attr || !(attr->IsString() || attr->IsName()))
201       continue;
202     return Utf16EncodeMaybeCopyAndReturnLength(attr->GetUnicodeText(), buffer,
203                                                buflen);
204   }
205   return 0;
206 }
207 
208 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentID(FPDF_STRUCTELEMENT struct_element)209 FPDF_StructElement_GetMarkedContentID(FPDF_STRUCTELEMENT struct_element) {
210   CPDF_StructElement* elem =
211       CPDFStructElementFromFPDFStructElement(struct_element);
212   if (!elem)
213     return -1;
214   RetainPtr<const CPDF_Object> p = elem->GetK();
215   return p && p->IsNumber() ? p->GetInteger() : -1;
216 }
217 
218 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)219 FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element,
220                            void* buffer,
221                            unsigned long buflen) {
222   CPDF_StructElement* elem =
223       CPDFStructElementFromFPDFStructElement(struct_element);
224   return elem ? WideStringToBuffer(
225                     WideString::FromUTF8(elem->GetType().AsStringView()),
226                     buffer, buflen)
227               : 0;
228 }
229 
230 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)231 FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element,
232                               void* buffer,
233                               unsigned long buflen) {
234   CPDF_StructElement* elem =
235       CPDFStructElementFromFPDFStructElement(struct_element);
236   return elem ? WideStringToBuffer(
237                     WideString::FromUTF8(elem->GetObjType().AsStringView()),
238                     buffer, buflen)
239               : 0;
240 }
241 
242 FPDF_EXPORT unsigned long FPDF_CALLCONV
FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,void * buffer,unsigned long buflen)243 FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element,
244                             void* buffer,
245                             unsigned long buflen) {
246   CPDF_StructElement* elem =
247       CPDFStructElementFromFPDFStructElement(struct_element);
248   return elem ? WideStringToBuffer(elem->GetTitle(), buffer, buflen) : 0;
249 }
250 
251 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element)252 FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) {
253   CPDF_StructElement* elem =
254       CPDFStructElementFromFPDFStructElement(struct_element);
255   if (!elem)
256     return -1;
257 
258   FX_SAFE_INT32 tmp_size = elem->CountKids();
259   return tmp_size.ValueOrDefault(-1);
260 }
261 
262 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,int index)263 FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element,
264                                    int index) {
265   CPDF_StructElement* elem =
266       CPDFStructElementFromFPDFStructElement(struct_element);
267   if (!elem || index < 0 || static_cast<size_t>(index) >= elem->CountKids())
268     return nullptr;
269 
270   return FPDFStructElementFromCPDFStructElement(elem->GetKidIfElement(index));
271 }
272 
273 FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV
FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element)274 FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element) {
275   CPDF_StructElement* elem =
276       CPDFStructElementFromFPDFStructElement(struct_element);
277   CPDF_StructElement* parent = elem ? elem->GetParent() : nullptr;
278   if (!parent) {
279     return nullptr;
280   }
281   return FPDFStructElementFromCPDFStructElement(parent);
282 }
283 
284 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_Attr_GetCount(FPDF_STRUCTELEMENT_ATTR struct_attribute)285 FPDF_StructElement_Attr_GetCount(FPDF_STRUCTELEMENT_ATTR struct_attribute) {
286   const CPDF_Dictionary* dict =
287       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
288   if (!dict)
289     return -1;
290   return fxcrt::CollectionSize<int>(*dict);
291 }
292 
293 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetName(FPDF_STRUCTELEMENT_ATTR struct_attribute,int index,void * buffer,unsigned long buflen,unsigned long * out_buflen)294 FPDF_StructElement_Attr_GetName(FPDF_STRUCTELEMENT_ATTR struct_attribute,
295                                 int index,
296                                 void* buffer,
297                                 unsigned long buflen,
298                                 unsigned long* out_buflen) {
299   if (!out_buflen) {
300     return false;
301   }
302 
303   const CPDF_Dictionary* dict =
304       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
305   if (!dict)
306     return false;
307 
308   CPDF_DictionaryLocker locker(dict);
309   for (auto& it : locker) {
310     if (index == 0) {
311       *out_buflen =
312           NulTerminateMaybeCopyAndReturnLength(it.first, buffer, buflen);
313       return true;
314     }
315     --index;
316   }
317   return false;
318 }
319 
320 FPDF_EXPORT FPDF_OBJECT_TYPE FPDF_CALLCONV
FPDF_StructElement_Attr_GetType(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name)321 FPDF_StructElement_Attr_GetType(FPDF_STRUCTELEMENT_ATTR struct_attribute,
322                                 FPDF_BYTESTRING name) {
323   const CPDF_Dictionary* dict =
324       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
325   if (!dict)
326     return FPDF_OBJECT_UNKNOWN;
327 
328   RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
329   return obj ? obj->GetType() : FPDF_OBJECT_UNKNOWN;
330 }
331 
FPDF_StructElement_Attr_GetBooleanValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,FPDF_BOOL * out_value)332 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_StructElement_Attr_GetBooleanValue(
333     FPDF_STRUCTELEMENT_ATTR struct_attribute,
334     FPDF_BYTESTRING name,
335     FPDF_BOOL* out_value) {
336   if (!out_value)
337     return false;
338 
339   const CPDF_Dictionary* dict =
340       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
341   if (!dict)
342     return false;
343 
344   RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
345   if (!obj || !obj->IsBoolean())
346     return false;
347 
348   *out_value = obj->GetInteger();
349   return true;
350 }
351 
352 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetNumberValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,float * out_value)353 FPDF_StructElement_Attr_GetNumberValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
354                                        FPDF_BYTESTRING name,
355                                        float* out_value) {
356   if (!out_value)
357     return false;
358 
359   const CPDF_Dictionary* dict =
360       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
361   if (!dict)
362     return false;
363 
364   RetainPtr<const CPDF_Object> obj = dict->GetDirectObjectFor(name);
365   if (!obj || !obj->IsNumber())
366     return false;
367 
368   *out_value = obj->GetNumber();
369   return true;
370 }
371 
372 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetStringValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,void * buffer,unsigned long buflen,unsigned long * out_buflen)373 FPDF_StructElement_Attr_GetStringValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
374                                        FPDF_BYTESTRING name,
375                                        void* buffer,
376                                        unsigned long buflen,
377                                        unsigned long* out_buflen) {
378   if (!out_buflen)
379     return false;
380 
381   const CPDF_Dictionary* dict =
382       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
383   if (!dict)
384     return false;
385 
386   RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
387   if (!obj || !(obj->IsString() || obj->IsName()))
388     return false;
389 
390   *out_buflen = Utf16EncodeMaybeCopyAndReturnLength(
391       WideString::FromUTF8(obj->GetString().AsStringView()), buffer, buflen);
392   return true;
393 }
394 
395 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
FPDF_StructElement_Attr_GetBlobValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,FPDF_BYTESTRING name,void * buffer,unsigned long buflen,unsigned long * out_buflen)396 FPDF_StructElement_Attr_GetBlobValue(FPDF_STRUCTELEMENT_ATTR struct_attribute,
397                                      FPDF_BYTESTRING name,
398                                      void* buffer,
399                                      unsigned long buflen,
400                                      unsigned long* out_buflen) {
401   if (!out_buflen)
402     return false;
403 
404   const CPDF_Dictionary* dict =
405       CPDFDictionaryFromFPDFStructElementAttr(struct_attribute);
406   if (!dict)
407     return false;
408 
409   RetainPtr<const CPDF_Object> obj = dict->GetObjectFor(name);
410   if (!obj || !obj->IsString())
411     return false;
412 
413   ByteString result = obj->GetString();
414   const unsigned long len =
415       pdfium::base::checked_cast<unsigned long>(result.GetLength());
416   if (buffer && len <= buflen)
417     memcpy(buffer, result.c_str(), len);
418 
419   *out_buflen = len;
420   return true;
421 }
422 
423 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentIdCount(FPDF_STRUCTELEMENT struct_element)424 FPDF_StructElement_GetMarkedContentIdCount(FPDF_STRUCTELEMENT struct_element) {
425   CPDF_StructElement* elem =
426       CPDFStructElementFromFPDFStructElement(struct_element);
427   if (!elem)
428     return -1;
429   RetainPtr<const CPDF_Object> p = elem->GetK();
430   if (!p)
431     return -1;
432 
433   if (p->IsNumber() || p->IsDictionary())
434     return 1;
435 
436   return p->IsArray() ? fxcrt::CollectionSize<int>(*p->AsArray()) : -1;
437 }
438 
439 FPDF_EXPORT int FPDF_CALLCONV
FPDF_StructElement_GetMarkedContentIdAtIndex(FPDF_STRUCTELEMENT struct_element,int index)440 FPDF_StructElement_GetMarkedContentIdAtIndex(FPDF_STRUCTELEMENT struct_element,
441                                              int index) {
442   CPDF_StructElement* elem =
443       CPDFStructElementFromFPDFStructElement(struct_element);
444   if (!elem)
445     return -1;
446   RetainPtr<const CPDF_Object> p = elem->GetK();
447   if (!p)
448     return -1;
449 
450   if (p->IsNumber())
451     return index == 0 ? p->GetInteger() : -1;
452 
453   if (p->IsDictionary())
454     return GetMcidFromDict(p->GetDict().Get());
455 
456   if (p->IsArray()) {
457     const CPDF_Array* array = p->AsArray();
458     if (index < 0 || static_cast<size_t>(index) >= array->size())
459       return -1;
460     RetainPtr<const CPDF_Object> array_elem = array->GetObjectAt(index);
461     if (array_elem->IsNumber())
462       return array_elem->GetInteger();
463     if (array_elem->IsDictionary()) {
464       return GetMcidFromDict(array_elem->GetDict().Get());
465     }
466   }
467   return -1;
468 }
469