// Copyright 2016 The PDFium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "public/fpdf_structtree.h" #include #include "core/fpdfapi/page/cpdf_page.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfdoc/cpdf_structelement.h" #include "core/fpdfdoc/cpdf_structtree.h" #include "core/fxcrt/fx_safe_types.h" #include "core/fxcrt/stl_util.h" #include "fpdfsdk/cpdfsdk_helpers.h" #include "third_party/base/numerics/safe_conversions.h" namespace { unsigned long WideStringToBuffer(const WideString& str, void* buffer, unsigned long buflen) { if (str.IsEmpty()) return 0; ByteString encodedStr = str.ToUTF16LE(); const unsigned long len = pdfium::base::checked_cast(encodedStr.GetLength()); if (buffer && len <= buflen) memcpy(buffer, encodedStr.c_str(), len); return len; } int GetMcidFromDict(const CPDF_Dictionary* dict) { if (dict && dict->GetNameFor("Type") == "MCR") { RetainPtr obj = dict->GetObjectFor("MCID"); if (obj && obj->IsNumber()) return obj->GetInteger(); } return -1; } } // namespace FPDF_EXPORT FPDF_STRUCTTREE FPDF_CALLCONV FPDF_StructTree_GetForPage(FPDF_PAGE page) { CPDF_Page* pPage = CPDFPageFromFPDFPage(page); if (!pPage) return nullptr; // Caller takes onwership. return FPDFStructTreeFromCPDFStructTree( CPDF_StructTree::LoadPage(pPage->GetDocument(), pPage->GetDict()) .release()); } FPDF_EXPORT void FPDF_CALLCONV FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree) { std::unique_ptr( CPDFStructTreeFromFPDFStructTree(struct_tree)); } FPDF_EXPORT int FPDF_CALLCONV FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree) { CPDF_StructTree* tree = CPDFStructTreeFromFPDFStructTree(struct_tree); if (!tree) return -1; FX_SAFE_INT32 tmp_size = tree->CountTopElements(); return tmp_size.ValueOrDefault(-1); } FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree, int index) { CPDF_StructTree* tree = CPDFStructTreeFromFPDFStructTree(struct_tree); if (!tree || index < 0 || static_cast(index) >= tree->CountTopElements()) { return nullptr; } return FPDFStructElementFromCPDFStructElement( tree->GetTopElement(static_cast(index))); } FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); return elem ? WideStringToBuffer(elem->GetAltText(), buffer, buflen) : 0; } FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_StructElement_GetActualText(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); return elem ? WideStringToBuffer(elem->GetActualText(), buffer, buflen) : 0; } FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_StructElement_GetID(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); if (!elem) return 0; absl::optional id = elem->GetID(); if (!id.has_value()) return 0; return Utf16EncodeMaybeCopyAndReturnLength(id.value(), buffer, buflen); } FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_StructElement_GetLang(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); if (!elem) return 0; absl::optional lang = elem->GetLang(); if (!lang.has_value()) return 0; return Utf16EncodeMaybeCopyAndReturnLength(lang.value(), buffer, buflen); } FPDF_EXPORT int FPDF_CALLCONV FPDF_StructElement_GetAttributeCount(FPDF_STRUCTELEMENT struct_element) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); if (!elem) return -1; RetainPtr attr_obj = elem->GetA(); if (!attr_obj) { return -1; } attr_obj = attr_obj->GetDirect(); if (!attr_obj) return -1; if (attr_obj->IsArray()) return fxcrt::CollectionSize(*attr_obj->AsArray()); return attr_obj->IsDictionary() ? 1 : -1; } FPDF_EXPORT FPDF_STRUCTELEMENT_ATTR FPDF_CALLCONV FPDF_StructElement_GetAttributeAtIndex(FPDF_STRUCTELEMENT struct_element, int index) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); if (!elem) return nullptr; RetainPtr attr_obj = elem->GetA(); if (!attr_obj) return nullptr; attr_obj = attr_obj->GetDirect(); if (!attr_obj) { return nullptr; } if (attr_obj->IsDictionary()) { return index == 0 ? FPDFStructElementAttrFromCPDFDictionary( attr_obj->AsDictionary()) : nullptr; } if (attr_obj->IsArray()) { const CPDF_Array* array = attr_obj->AsArray(); if (index < 0 || static_cast(index) >= array->size()) return nullptr; // TODO(tsepez): should embedder take a reference here? // Unretained reference in public API. NOLINTNEXTLINE return FPDFStructElementAttrFromCPDFDictionary(array->GetDictAt(index)); } return nullptr; } FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_StructElement_GetStringAttribute(FPDF_STRUCTELEMENT struct_element, FPDF_BYTESTRING attr_name, void* buffer, unsigned long buflen) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); if (!elem) return 0; RetainPtr array = ToArray(elem->GetA()); if (!array) return 0; CPDF_ArrayLocker locker(array); for (const RetainPtr& obj : locker) { const CPDF_Dictionary* obj_dict = obj->AsDictionary(); if (!obj_dict) continue; RetainPtr attr = obj_dict->GetObjectFor(attr_name); if (!attr || !(attr->IsString() || attr->IsName())) continue; return Utf16EncodeMaybeCopyAndReturnLength(attr->GetUnicodeText(), buffer, buflen); } return 0; } FPDF_EXPORT int FPDF_CALLCONV FPDF_StructElement_GetMarkedContentID(FPDF_STRUCTELEMENT struct_element) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); if (!elem) return -1; RetainPtr p = elem->GetK(); return p && p->IsNumber() ? p->GetInteger() : -1; } FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); return elem ? WideStringToBuffer( WideString::FromUTF8(elem->GetType().AsStringView()), buffer, buflen) : 0; } FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_StructElement_GetObjType(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); return elem ? WideStringToBuffer( WideString::FromUTF8(elem->GetObjType().AsStringView()), buffer, buflen) : 0; } FPDF_EXPORT unsigned long FPDF_CALLCONV FPDF_StructElement_GetTitle(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); return elem ? WideStringToBuffer(elem->GetTitle(), buffer, buflen) : 0; } FPDF_EXPORT int FPDF_CALLCONV FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); if (!elem) return -1; FX_SAFE_INT32 tmp_size = elem->CountKids(); return tmp_size.ValueOrDefault(-1); } FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element, int index) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); if (!elem || index < 0 || static_cast(index) >= elem->CountKids()) return nullptr; return FPDFStructElementFromCPDFStructElement(elem->GetKidIfElement(index)); } FPDF_EXPORT FPDF_STRUCTELEMENT FPDF_CALLCONV FPDF_StructElement_GetParent(FPDF_STRUCTELEMENT struct_element) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); CPDF_StructElement* parent = elem ? elem->GetParent() : nullptr; if (!parent) { return nullptr; } return FPDFStructElementFromCPDFStructElement(parent); } FPDF_EXPORT int FPDF_CALLCONV FPDF_StructElement_Attr_GetCount(FPDF_STRUCTELEMENT_ATTR struct_attribute) { const CPDF_Dictionary* dict = CPDFDictionaryFromFPDFStructElementAttr(struct_attribute); if (!dict) return -1; return fxcrt::CollectionSize(*dict); } FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_StructElement_Attr_GetName(FPDF_STRUCTELEMENT_ATTR struct_attribute, int index, void* buffer, unsigned long buflen, unsigned long* out_buflen) { if (!out_buflen) { return false; } const CPDF_Dictionary* dict = CPDFDictionaryFromFPDFStructElementAttr(struct_attribute); if (!dict) return false; CPDF_DictionaryLocker locker(dict); for (auto& it : locker) { if (index == 0) { *out_buflen = NulTerminateMaybeCopyAndReturnLength(it.first, buffer, buflen); return true; } --index; } return false; } FPDF_EXPORT FPDF_OBJECT_TYPE FPDF_CALLCONV FPDF_StructElement_Attr_GetType(FPDF_STRUCTELEMENT_ATTR struct_attribute, FPDF_BYTESTRING name) { const CPDF_Dictionary* dict = CPDFDictionaryFromFPDFStructElementAttr(struct_attribute); if (!dict) return FPDF_OBJECT_UNKNOWN; RetainPtr obj = dict->GetObjectFor(name); return obj ? obj->GetType() : FPDF_OBJECT_UNKNOWN; } FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_StructElement_Attr_GetBooleanValue( FPDF_STRUCTELEMENT_ATTR struct_attribute, FPDF_BYTESTRING name, FPDF_BOOL* out_value) { if (!out_value) return false; const CPDF_Dictionary* dict = CPDFDictionaryFromFPDFStructElementAttr(struct_attribute); if (!dict) return false; RetainPtr obj = dict->GetObjectFor(name); if (!obj || !obj->IsBoolean()) return false; *out_value = obj->GetInteger(); return true; } FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_StructElement_Attr_GetNumberValue(FPDF_STRUCTELEMENT_ATTR struct_attribute, FPDF_BYTESTRING name, float* out_value) { if (!out_value) return false; const CPDF_Dictionary* dict = CPDFDictionaryFromFPDFStructElementAttr(struct_attribute); if (!dict) return false; RetainPtr obj = dict->GetDirectObjectFor(name); if (!obj || !obj->IsNumber()) return false; *out_value = obj->GetNumber(); return true; } FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_StructElement_Attr_GetStringValue(FPDF_STRUCTELEMENT_ATTR struct_attribute, FPDF_BYTESTRING name, void* buffer, unsigned long buflen, unsigned long* out_buflen) { if (!out_buflen) return false; const CPDF_Dictionary* dict = CPDFDictionaryFromFPDFStructElementAttr(struct_attribute); if (!dict) return false; RetainPtr obj = dict->GetObjectFor(name); if (!obj || !(obj->IsString() || obj->IsName())) return false; *out_buflen = Utf16EncodeMaybeCopyAndReturnLength( WideString::FromUTF8(obj->GetString().AsStringView()), buffer, buflen); return true; } FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_StructElement_Attr_GetBlobValue(FPDF_STRUCTELEMENT_ATTR struct_attribute, FPDF_BYTESTRING name, void* buffer, unsigned long buflen, unsigned long* out_buflen) { if (!out_buflen) return false; const CPDF_Dictionary* dict = CPDFDictionaryFromFPDFStructElementAttr(struct_attribute); if (!dict) return false; RetainPtr obj = dict->GetObjectFor(name); if (!obj || !obj->IsString()) return false; ByteString result = obj->GetString(); const unsigned long len = pdfium::base::checked_cast(result.GetLength()); if (buffer && len <= buflen) memcpy(buffer, result.c_str(), len); *out_buflen = len; return true; } FPDF_EXPORT int FPDF_CALLCONV FPDF_StructElement_GetMarkedContentIdCount(FPDF_STRUCTELEMENT struct_element) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); if (!elem) return -1; RetainPtr p = elem->GetK(); if (!p) return -1; if (p->IsNumber() || p->IsDictionary()) return 1; return p->IsArray() ? fxcrt::CollectionSize(*p->AsArray()) : -1; } FPDF_EXPORT int FPDF_CALLCONV FPDF_StructElement_GetMarkedContentIdAtIndex(FPDF_STRUCTELEMENT struct_element, int index) { CPDF_StructElement* elem = CPDFStructElementFromFPDFStructElement(struct_element); if (!elem) return -1; RetainPtr p = elem->GetK(); if (!p) return -1; if (p->IsNumber()) return index == 0 ? p->GetInteger() : -1; if (p->IsDictionary()) return GetMcidFromDict(p->GetDict().Get()); if (p->IsArray()) { const CPDF_Array* array = p->AsArray(); if (index < 0 || static_cast(index) >= array->size()) return -1; RetainPtr array_elem = array->GetObjectAt(index); if (array_elem->IsNumber()) return array_elem->GetInteger(); if (array_elem->IsDictionary()) { return GetMcidFromDict(array_elem->GetDict().Get()); } } return -1; }