// Copyright 2016 The PDFium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include #include "public/fpdf_structtree.h" #include "testing/embedder_test.h" #include "testing/fx_string_testhelpers.h" #include "third_party/abseil-cpp/absl/types/optional.h" class FPDFStructTreeEmbedderTest : public EmbedderTest {}; TEST_F(FPDFStructTreeEmbedderTest, GetAltText) { ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), -1); EXPECT_FALSE(element); element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 1); EXPECT_FALSE(element); element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(element); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(element)); EXPECT_EQ(0U, FPDF_StructElement_GetAltText(element, nullptr, 0)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(element)); FPDF_STRUCTELEMENT child_element = FPDF_StructElement_GetChildAtIndex(element, -1); EXPECT_FALSE(child_element); child_element = FPDF_StructElement_GetChildAtIndex(element, 1); EXPECT_FALSE(child_element); child_element = FPDF_StructElement_GetChildAtIndex(element, 0); ASSERT_TRUE(child_element); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(child_element)); EXPECT_EQ(0U, FPDF_StructElement_GetAltText(child_element, nullptr, 0)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(child_element)); FPDF_STRUCTELEMENT gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, -1); EXPECT_FALSE(gchild_element); gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 1); EXPECT_FALSE(gchild_element); gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 0); ASSERT_TRUE(gchild_element); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(gchild_element)); ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, nullptr, 0)); unsigned short buffer[12]; memset(buffer, 0, sizeof(buffer)); // Deliberately pass in a small buffer size to make sure |buffer| remains // untouched. ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer, 1)); for (size_t i = 0; i < std::size(buffer); ++i) EXPECT_EQ(0U, buffer[i]); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(gchild_element)); ASSERT_EQ(24U, FPDF_StructElement_GetAltText(gchild_element, buffer, sizeof(buffer))); EXPECT_EQ(L"Black Image", GetPlatformWString(buffer)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild_element)); FPDF_STRUCTELEMENT ggchild_element = FPDF_StructElement_GetChildAtIndex(gchild_element, 0); EXPECT_FALSE(ggchild_element); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetActualText) { ASSERT_TRUE(OpenDocument("tagged_actual_text.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); EXPECT_EQ(0U, FPDF_StructElement_GetActualText(nullptr, nullptr, 0)); FPDF_STRUCTELEMENT element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(element); EXPECT_EQ(0U, FPDF_StructElement_GetActualText(element, nullptr, 0)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(element)); FPDF_STRUCTELEMENT child_element = FPDF_StructElement_GetChildAtIndex(element, 0); ASSERT_TRUE(child_element); EXPECT_EQ(0U, FPDF_StructElement_GetActualText(child_element, nullptr, 0)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(child_element)); FPDF_STRUCTELEMENT gchild_element = FPDF_StructElement_GetChildAtIndex(child_element, 0); ASSERT_TRUE(gchild_element); ASSERT_EQ(24U, FPDF_StructElement_GetActualText(gchild_element, nullptr, 0)); unsigned short buffer[12] = {}; // Deliberately pass in a small buffer size to make sure |buffer| remains // untouched. ASSERT_EQ(24U, FPDF_StructElement_GetActualText(gchild_element, buffer, 1)); for (size_t i = 0; i < std::size(buffer); ++i) EXPECT_EQ(0U, buffer[i]); ASSERT_EQ(24U, FPDF_StructElement_GetActualText(gchild_element, buffer, sizeof(buffer))); EXPECT_EQ(L"Actual Text", GetPlatformWString(buffer)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetStringAttribute) { ASSERT_TRUE(OpenDocument("tagged_table.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT document = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(document); constexpr int kBufLen = 100; uint16_t buffer[kBufLen] = {0}; EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen)); EXPECT_EQ("Document", GetPlatformString(buffer)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(document)); FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0); ASSERT_TRUE(table); EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen)); EXPECT_EQ("Table", GetPlatformString(buffer)); // The table should have an attribute "Summary" set to the empty string. EXPECT_EQ(2U, FPDF_StructElement_GetStringAttribute(table, "Summary", buffer, kBufLen)); ASSERT_EQ(2, FPDF_StructElement_CountChildren(table)); FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0); ASSERT_TRUE(row); ASSERT_EQ(2, FPDF_StructElement_CountChildren(row)); FPDF_STRUCTELEMENT header_cell = FPDF_StructElement_GetChildAtIndex(row, 0); ASSERT_TRUE(header_cell); EXPECT_EQ(6U, FPDF_StructElement_GetType(header_cell, buffer, kBufLen)); EXPECT_EQ("TH", GetPlatformString(buffer)); // The header should have an attribute "Scope" with a scope of "Row". EXPECT_EQ(8U, FPDF_StructElement_GetStringAttribute(header_cell, "Scope", buffer, kBufLen)); EXPECT_EQ("Row", GetPlatformString(buffer)); // The header has an attribute "ColSpan", but it's not a string so it // returns null. EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(header_cell, "ColSpan", buffer, kBufLen)); // An unsupported attribute should return 0. EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(header_cell, "Other", buffer, kBufLen)); // A null struct element should not crash. EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(nullptr, "Other", buffer, kBufLen)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetStringAttributeBadStructElement) { ASSERT_TRUE(OpenDocument("tagged_table_bad_elem.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT document = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(document); constexpr int kBufLen = 100; uint16_t buffer[kBufLen] = {0}; EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen)); EXPECT_EQ("Document", GetPlatformString(buffer)); // The table can be retrieved, even though it does not have /Type. ASSERT_EQ(1, FPDF_StructElement_CountChildren(document)); FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0); ASSERT_TRUE(table); EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen)); EXPECT_EQ("Table", GetPlatformString(buffer)); // The table entry cannot be retrieved, as the element is malformed. EXPECT_EQ(0U, FPDF_StructElement_GetStringAttribute(table, "Summary", buffer, kBufLen)); // The row can be retrieved, even though it had an invalid /Type. ASSERT_EQ(1, FPDF_StructElement_CountChildren(table)); FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0); EXPECT_TRUE(row); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetID) { ASSERT_TRUE(OpenDocument("tagged_table.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT document = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(document); constexpr int kBufLen = 100; uint16_t buffer[kBufLen] = {0}; EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen)); EXPECT_EQ("Document", GetPlatformString(buffer)); // The document has no ID. EXPECT_EQ(0U, FPDF_StructElement_GetID(document, buffer, kBufLen)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(document)); FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0); ASSERT_TRUE(table); EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen)); EXPECT_EQ("Table", GetPlatformString(buffer)); // The table has an ID. EXPECT_EQ(14U, FPDF_StructElement_GetID(table, buffer, kBufLen)); EXPECT_EQ("node12", GetPlatformString(buffer)); // The first child of the table is a row, which has an empty ID. // It returns 2U, the length of an empty string, instead of 0U, // representing null. ASSERT_EQ(2, FPDF_StructElement_CountChildren(table)); FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0); ASSERT_TRUE(row); EXPECT_EQ(2U, FPDF_StructElement_GetID(row, buffer, kBufLen)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetLang) { ASSERT_TRUE(OpenDocument("tagged_table.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT document = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(document); constexpr int kBufLen = 100; uint16_t buffer[kBufLen] = {0}; EXPECT_EQ(18U, FPDF_StructElement_GetType(document, buffer, kBufLen)); EXPECT_EQ("Document", GetPlatformString(buffer)); // Nullptr test EXPECT_EQ(0U, FPDF_StructElement_GetLang(nullptr, buffer, kBufLen)); // The document has a language. EXPECT_EQ(12U, FPDF_StructElement_GetLang(document, buffer, kBufLen)); EXPECT_EQ("en-US", GetPlatformString(buffer)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(document)); FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0); ASSERT_TRUE(table); // The first child is a table, with a language. EXPECT_EQ(12U, FPDF_StructElement_GetType(table, buffer, kBufLen)); EXPECT_EQ("Table", GetPlatformString(buffer)); EXPECT_EQ(6U, FPDF_StructElement_GetLang(table, buffer, kBufLen)); EXPECT_EQ("hu", GetPlatformString(buffer)); // The first child of the table is a row, which doesn't have a // language explicitly set on it. ASSERT_EQ(2, FPDF_StructElement_CountChildren(table)); FPDF_STRUCTELEMENT row = FPDF_StructElement_GetChildAtIndex(table, 0); ASSERT_TRUE(row); EXPECT_EQ(0U, FPDF_StructElement_GetLang(row, buffer, kBufLen)); } UnloadPage(page); } // See also FPDFEditEmbedderTest.TraverseMarkedContentID, which traverses the // marked contents using FPDFPageObj_GetMark() and related API. TEST_F(FPDFStructTreeEmbedderTest, GetMarkedContentID) { ASSERT_TRUE(OpenDocument("marked_content_id.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentID(element)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetMarkedContentIdAtIndex) { ASSERT_TRUE(OpenDocument("tagged_marked_content.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(4, FPDF_StructTree_CountChildren(struct_tree.get())); // K is an integer MCID FPDF_STRUCTELEMENT child1 = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(child1); // Legacy API EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentID(child1)); // K is a dict containing MCR object reference FPDF_STRUCTELEMENT child2 = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 1); ASSERT_TRUE(child2); // K is an array containing dict MCR object reference and integer MCID FPDF_STRUCTELEMENT child3 = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 2); ASSERT_TRUE(child3); // K does not exist FPDF_STRUCTELEMENT child4 = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 3); ASSERT_TRUE(child4); // New APIs EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdCount(nullptr)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(nullptr, 0)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(child1, -1)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(child1, 1)); EXPECT_EQ(1, FPDF_StructElement_GetMarkedContentIdCount(child1)); EXPECT_EQ(0, FPDF_StructElement_GetMarkedContentIdAtIndex(child1, 0)); EXPECT_EQ(1, FPDF_StructElement_GetMarkedContentIdCount(child2)); EXPECT_EQ(1, FPDF_StructElement_GetMarkedContentIdAtIndex(child2, 0)); EXPECT_EQ(2, FPDF_StructElement_GetMarkedContentIdCount(child3)); EXPECT_EQ(2, FPDF_StructElement_GetMarkedContentIdAtIndex(child3, 0)); EXPECT_EQ(3, FPDF_StructElement_GetMarkedContentIdAtIndex(child3, 1)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdCount(child4)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentIdAtIndex(child4, 0)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetType) { ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(element); // test nullptr inputs unsigned short buffer[12]; ASSERT_EQ(0U, FPDF_StructElement_GetType(nullptr, buffer, sizeof(buffer))); ASSERT_EQ(0U, FPDF_StructElement_GetType(nullptr, nullptr, 0)); ASSERT_EQ(18U, FPDF_StructElement_GetType(element, nullptr, 0)); memset(buffer, 0, sizeof(buffer)); // Deliberately pass in a small buffer size to make sure |buffer| remains // untouched. ASSERT_EQ(18U, FPDF_StructElement_GetType(element, buffer, 1)); for (size_t i = 0; i < std::size(buffer); ++i) EXPECT_EQ(0U, buffer[i]); ASSERT_EQ(18U, FPDF_StructElement_GetType(element, buffer, sizeof(buffer))); EXPECT_EQ(L"Document", GetPlatformWString(buffer)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetObjType) { ASSERT_TRUE(OpenDocument("tagged_table_bad_elem.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT child = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(child); // test nullptr inputs unsigned short buffer[28] = {}; ASSERT_EQ(0U, FPDF_StructElement_GetObjType(nullptr, buffer, sizeof(buffer))); ASSERT_EQ(0U, FPDF_StructElement_GetObjType(nullptr, nullptr, 0)); ASSERT_EQ(22U, FPDF_StructElement_GetObjType(child, nullptr, 0)); // Deliberately pass in a small buffer size to make sure `buffer` remains // untouched. ASSERT_EQ(22U, FPDF_StructElement_GetObjType(child, buffer, 1)); for (size_t i = 0; i < std::size(buffer); ++i) EXPECT_EQ(0U, buffer[i]); ASSERT_EQ(22U, FPDF_StructElement_GetObjType(child, buffer, sizeof(buffer))); EXPECT_EQ(L"StructElem", GetPlatformWString(buffer)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(child)); FPDF_STRUCTELEMENT gchild = FPDF_StructElement_GetChildAtIndex(child, 0); memset(buffer, 0, sizeof(buffer)); // Missing /Type in `gchild` ASSERT_EQ(0U, FPDF_StructElement_GetObjType(gchild, buffer, sizeof(buffer))); // Buffer is untouched. for (size_t i = 0; i < std::size(buffer); ++i) EXPECT_EQ(0U, buffer[i]); ASSERT_EQ(1, FPDF_StructElement_CountChildren(gchild)); FPDF_STRUCTELEMENT ggchild = FPDF_StructElement_GetChildAtIndex(gchild, 0); ASSERT_EQ(28U, FPDF_StructElement_GetObjType(ggchild, buffer, sizeof(buffer))); // Reading bad elem also works. EXPECT_EQ(L"NotStructElem", GetPlatformWString(buffer)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetParent) { ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT parent = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(parent); ASSERT_EQ(1, FPDF_StructElement_CountChildren(parent)); FPDF_STRUCTELEMENT child = FPDF_StructElement_GetChildAtIndex(parent, 0); ASSERT_TRUE(child); // test nullptr inputs ASSERT_EQ(nullptr, FPDF_StructElement_GetParent(nullptr)); ASSERT_EQ(parent, FPDF_StructElement_GetParent(child)); // The parent of `parent` is StructTreeRoot and no longer a StructElement. // We currently handle this case by returning a nullptr. ASSERT_EQ(nullptr, FPDF_StructElement_GetParent(parent)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetTitle) { ASSERT_TRUE(OpenDocument("tagged_alt_text.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT element = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(element); // test nullptr inputs unsigned short buffer[13]; ASSERT_EQ(0U, FPDF_StructElement_GetTitle(nullptr, buffer, sizeof(buffer))); ASSERT_EQ(0U, FPDF_StructElement_GetTitle(nullptr, nullptr, 0)); ASSERT_EQ(20U, FPDF_StructElement_GetTitle(element, nullptr, 0)); memset(buffer, 0, sizeof(buffer)); // Deliberately pass in a small buffer size to make sure |buffer| remains // untouched. ASSERT_EQ(20U, FPDF_StructElement_GetTitle(element, buffer, 1)); for (size_t i = 0; i < std::size(buffer); ++i) EXPECT_EQ(0U, buffer[i]); ASSERT_EQ(20U, FPDF_StructElement_GetTitle(element, buffer, sizeof(buffer))); EXPECT_EQ(L"TitleText", GetPlatformWString(buffer)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(element)); FPDF_STRUCTELEMENT child_element = FPDF_StructElement_GetChildAtIndex(element, 0); ASSERT_TRUE(element); ASSERT_EQ(26U, FPDF_StructElement_GetTitle(child_element, buffer, sizeof(buffer))); EXPECT_EQ(L"symbol: 100k", GetPlatformWString(buffer)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetAttributes) { ASSERT_TRUE(OpenDocument("tagged_table.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT document = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(document); ASSERT_EQ(1, FPDF_StructElement_CountChildren(document)); ASSERT_EQ(-1, FPDF_StructElement_GetAttributeCount(document)); FPDF_STRUCTELEMENT table = FPDF_StructElement_GetChildAtIndex(document, 0); ASSERT_TRUE(table); ASSERT_EQ(2, FPDF_StructElement_CountChildren(table)); { FPDF_STRUCTELEMENT tr = FPDF_StructElement_GetChildAtIndex(table, 0); ASSERT_TRUE(tr); ASSERT_EQ(2, FPDF_StructElement_CountChildren(tr)); FPDF_STRUCTELEMENT th = FPDF_StructElement_GetChildAtIndex(tr, 0); ASSERT_TRUE(th); ASSERT_EQ(2, FPDF_StructElement_GetAttributeCount(th)); // nullptr test ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(document, 0)); ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(document, -1)); ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(th, 2)); FPDF_STRUCTELEMENT_ATTR attr = FPDF_StructElement_GetAttributeAtIndex(th, 1); ASSERT_TRUE(attr); ASSERT_EQ(2, FPDF_StructElement_Attr_GetCount(attr)); ASSERT_FALSE( FPDF_StructElement_Attr_GetName(attr, 1, nullptr, 0U, nullptr)); unsigned long buffer_len_needed = ULONG_MAX; // Pass buffer = nullptr to obtain the size of the buffer needed, ASSERT_TRUE(FPDF_StructElement_Attr_GetName(attr, 1, nullptr, 0, &buffer_len_needed)); EXPECT_EQ(2U, buffer_len_needed); char buffer[8] = {}; unsigned long out_len = ULONG_MAX; // Deliberately pass in a small buffer size to make sure `buffer` remains // untouched. ASSERT_TRUE( FPDF_StructElement_Attr_GetName(attr, 1, buffer, 1, &out_len)); EXPECT_EQ(2U, out_len); for (size_t i = 0; i < std::size(buffer); ++i) EXPECT_EQ(0, buffer[i]); ASSERT_TRUE(FPDF_StructElement_Attr_GetName(attr, 1, buffer, sizeof(buffer), &out_len)); EXPECT_EQ(2U, out_len); EXPECT_STREQ("O", buffer); EXPECT_EQ(FPDF_OBJECT_NAME, FPDF_StructElement_Attr_GetType(attr, buffer)); unsigned short str_val[12] = {}; ASSERT_TRUE(FPDF_StructElement_Attr_GetStringValue( attr, buffer, str_val, sizeof(str_val), &out_len)); EXPECT_EQ(12U, out_len); EXPECT_EQ(L"Table", GetPlatformWString(str_val)); memset(buffer, 0, sizeof(buffer)); ASSERT_TRUE(FPDF_StructElement_Attr_GetName(attr, 0, buffer, sizeof(buffer), &out_len)); EXPECT_EQ(8U, out_len); EXPECT_STREQ("ColSpan", buffer); EXPECT_EQ(FPDF_OBJECT_NUMBER, FPDF_StructElement_Attr_GetType(attr, buffer)); float num_val; ASSERT_TRUE( FPDF_StructElement_Attr_GetNumberValue(attr, buffer, &num_val)); EXPECT_FLOAT_EQ(2.0f, num_val); } { FPDF_STRUCTELEMENT tr = FPDF_StructElement_GetChildAtIndex(table, 1); ASSERT_TRUE(tr); ASSERT_EQ(1, FPDF_StructElement_GetAttributeCount(tr)); // nullptr when index out of range ASSERT_EQ(nullptr, FPDF_StructElement_GetAttributeAtIndex(tr, 1)); ASSERT_EQ(2, FPDF_StructElement_CountChildren(tr)); FPDF_STRUCTELEMENT td = FPDF_StructElement_GetChildAtIndex(tr, 1); ASSERT_TRUE(td); { // Test counting and obtaining attributes via reference ASSERT_EQ(1, FPDF_StructElement_GetAttributeCount(td)); FPDF_STRUCTELEMENT_ATTR attr = FPDF_StructElement_GetAttributeAtIndex(td, 0); ASSERT_TRUE(attr); ASSERT_EQ(4, FPDF_StructElement_Attr_GetCount(attr)); // Test string and blob type { char buffer[16] = {}; unsigned long out_len = ULONG_MAX; ASSERT_TRUE(FPDF_StructElement_Attr_GetName( attr, 0, buffer, sizeof(buffer), &out_len)); EXPECT_EQ(8U, out_len); EXPECT_STREQ("ColProp", buffer); EXPECT_EQ(FPDF_OBJECT_STRING, FPDF_StructElement_Attr_GetType(attr, buffer)); unsigned short str_val[12] = {}; ASSERT_TRUE(FPDF_StructElement_Attr_GetStringValue( attr, buffer, str_val, sizeof(str_val), &out_len)); EXPECT_EQ(8U, out_len); EXPECT_EQ(L"Sum", GetPlatformWString(str_val)); char blob_val[3] = {}; ASSERT_TRUE(FPDF_StructElement_Attr_GetBlobValue( attr, buffer, blob_val, sizeof(blob_val), &out_len)); EXPECT_EQ(3U, out_len); EXPECT_EQ('S', blob_val[0]); EXPECT_EQ('u', blob_val[1]); EXPECT_EQ('m', blob_val[2]); } // Test boolean type { char buffer[16] = {}; unsigned long out_len = ULONG_MAX; ASSERT_TRUE(FPDF_StructElement_Attr_GetName( attr, 1, buffer, sizeof(buffer), &out_len)); EXPECT_EQ(7U, out_len); EXPECT_STREQ("CurUSD", buffer); EXPECT_EQ(FPDF_OBJECT_BOOLEAN, FPDF_StructElement_Attr_GetType(attr, buffer)); FPDF_BOOL val; ASSERT_TRUE( FPDF_StructElement_Attr_GetBooleanValue(attr, buffer, &val)); EXPECT_TRUE(val); } // Test reference to number { char buffer[16] = {}; unsigned long out_len = ULONG_MAX; ASSERT_TRUE(FPDF_StructElement_Attr_GetName( attr, 3, buffer, sizeof(buffer), &out_len)); EXPECT_EQ(8U, out_len); EXPECT_STREQ("RowSpan", buffer); EXPECT_EQ(FPDF_OBJECT_REFERENCE, FPDF_StructElement_Attr_GetType(attr, buffer)); float val; ASSERT_TRUE( FPDF_StructElement_Attr_GetNumberValue(attr, buffer, &val)); EXPECT_FLOAT_EQ(3, val); } } } } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, GetStructTreeForNestedTaggedPDF) { ASSERT_TRUE(OpenDocument("tagged_nested.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { // This call should not crash. https://crbug.com/pdfium/1480 ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, MarkedContentReferenceAndObjectReference) { ASSERT_TRUE(OpenDocument("tagged_mcr_objr.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); FPDF_STRUCTELEMENT object8 = FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0); ASSERT_TRUE(object8); unsigned short buffer[12]; ASSERT_EQ(18U, FPDF_StructElement_GetType(object8, buffer, sizeof(buffer))); EXPECT_EQ(L"Document", GetPlatformWString(buffer)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object8)); ASSERT_EQ(2, FPDF_StructElement_CountChildren(object8)); // First branch. 10 -> 12 -> 13 -> Inline dict. FPDF_STRUCTELEMENT object10 = FPDF_StructElement_GetChildAtIndex(object8, 0); ASSERT_TRUE(object10); ASSERT_EQ(20U, FPDF_StructElement_GetType(object10, buffer, sizeof(buffer))); EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object10)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(object10)); FPDF_STRUCTELEMENT object12 = FPDF_StructElement_GetChildAtIndex(object10, 0); ASSERT_TRUE(object12); ASSERT_EQ(4U, FPDF_StructElement_GetType(object12, buffer, sizeof(buffer))); EXPECT_EQ(L"P", GetPlatformWString(buffer)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object12)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(object12)); FPDF_STRUCTELEMENT object13 = FPDF_StructElement_GetChildAtIndex(object12, 0); ASSERT_TRUE(object13); ASSERT_EQ(20U, FPDF_StructElement_GetType(object13, buffer, sizeof(buffer))); EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object13)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(object13)); // TODO(crbug.com/pdfium/672): Fetch this child element. EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object13, 0)); // Second branch. 11 -> 14 -> Inline dict. // -> 15 -> Inline dict. FPDF_STRUCTELEMENT object11 = FPDF_StructElement_GetChildAtIndex(object8, 1); ASSERT_TRUE(object11); ASSERT_EQ(4U, FPDF_StructElement_GetType(object11, buffer, sizeof(buffer))); EXPECT_EQ(L"P", GetPlatformWString(buffer)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object11)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(object11)); FPDF_STRUCTELEMENT object14 = FPDF_StructElement_GetChildAtIndex(object11, 0); ASSERT_TRUE(object14); ASSERT_EQ(20U, FPDF_StructElement_GetType(object14, buffer, sizeof(buffer))); EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object14)); ASSERT_EQ(2, FPDF_StructElement_CountChildren(object14)); // TODO(crbug.com/pdfium/672): Object 15 should be at index 1. EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object14, 1)); FPDF_STRUCTELEMENT object15 = FPDF_StructElement_GetChildAtIndex(object14, 0); ASSERT_TRUE(object15); ASSERT_EQ(20U, FPDF_StructElement_GetType(object15, buffer, sizeof(buffer))); EXPECT_EQ(L"NonStruct", GetPlatformWString(buffer)); EXPECT_EQ(-1, FPDF_StructElement_GetMarkedContentID(object15)); ASSERT_EQ(1, FPDF_StructElement_CountChildren(object15)); // TODO(crbug.com/pdfium/672): Fetch this child element. EXPECT_FALSE(FPDF_StructElement_GetChildAtIndex(object15, 0)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, Bug1768) { ASSERT_TRUE(OpenDocument("bug_1768.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); // TODO(crbug.com/pdfium/1768): Fetch this child element. Then consider // writing more of the test to make sure other elements in the tree can be // fetched correctly as well. EXPECT_FALSE(FPDF_StructTree_GetChildAtIndex(struct_tree.get(), 0)); } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, Bug1296920) { ASSERT_TRUE(OpenDocument("bug_1296920.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); // Destroying this tree should not crash. } UnloadPage(page); } TEST_F(FPDFStructTreeEmbedderTest, Bug1443100) { ASSERT_TRUE(OpenDocument("tagged_table_bad_parent.pdf")); FPDF_PAGE page = LoadPage(0); ASSERT_TRUE(page); { // Calling these APIs should not trigger a dangling pointer. ScopedFPDFStructTree struct_tree(FPDF_StructTree_GetForPage(page)); ASSERT_TRUE(struct_tree); ASSERT_EQ(1, FPDF_StructTree_CountChildren(struct_tree.get())); } UnloadPage(page); }