1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "samples/helpers/dump.h"
6
7 #include <limits.h>
8 #include <string.h>
9
10 #include <algorithm>
11 #include <functional>
12 #include <iterator>
13 #include <string>
14 #include <utility>
15
16 #include "public/cpp/fpdf_scopers.h"
17 #include "public/fpdf_doc.h"
18 #include "public/fpdf_transformpage.h"
19 #include "testing/fx_string_testhelpers.h"
20
21 using GetBoxInfoFunc =
22 std::function<bool(FPDF_PAGE, float*, float*, float*, float*)>;
23
24 namespace {
25
ConvertToWString(const unsigned short * buf,unsigned long buf_size)26 std::wstring ConvertToWString(const unsigned short* buf,
27 unsigned long buf_size) {
28 std::wstring result;
29 result.reserve(buf_size);
30 std::copy(buf, buf + buf_size, std::back_inserter(result));
31 return result;
32 }
33
DumpBoxInfo(GetBoxInfoFunc func,const char * box_type,FPDF_PAGE page,int page_idx)34 void DumpBoxInfo(GetBoxInfoFunc func,
35 const char* box_type,
36 FPDF_PAGE page,
37 int page_idx) {
38 FS_RECTF rect;
39 bool ret = func(page, &rect.left, &rect.bottom, &rect.right, &rect.top);
40 if (!ret) {
41 printf("Page %d: No %s.\n", page_idx, box_type);
42 return;
43 }
44 printf("Page %d: %s: %0.2f %0.2f %0.2f %0.2f\n", page_idx, box_type,
45 rect.left, rect.bottom, rect.right, rect.top);
46 }
47
DumpStructureElementAttributes(FPDF_STRUCTELEMENT_ATTR attr,int indent)48 void DumpStructureElementAttributes(FPDF_STRUCTELEMENT_ATTR attr, int indent) {
49 static const size_t kBufSize = 1024;
50 int count = FPDF_StructElement_Attr_GetCount(attr);
51 for (int i = 0; i < count; i++) {
52 char name[kBufSize] = {};
53 unsigned long len = ULONG_MAX;
54 if (!FPDF_StructElement_Attr_GetName(attr, i, name, sizeof(name), &len)) {
55 printf("%*s FPDF_StructElement_Attr_GetName failed for %d\n", indent, "",
56 i);
57 continue;
58 }
59
60 FPDF_OBJECT_TYPE type = FPDF_StructElement_Attr_GetType(attr, name);
61 if (type == FPDF_OBJECT_BOOLEAN) {
62 int value;
63 if (!FPDF_StructElement_Attr_GetBooleanValue(attr, name, &value)) {
64 printf("%*s %s: Failed FPDF_StructElement_Attr_GetBooleanValue\n",
65 indent, "", name);
66 continue;
67 }
68 printf("%*s %s: %d\n", indent, "", name, value);
69 } else if (type == FPDF_OBJECT_NUMBER) {
70 float value;
71 if (!FPDF_StructElement_Attr_GetNumberValue(attr, name, &value)) {
72 printf("%*s %s: Failed FPDF_StructElement_Attr_GetNumberValue\n",
73 indent, "", name);
74 continue;
75 }
76 printf("%*s %s: %f\n", indent, "", name, value);
77 } else if (type == FPDF_OBJECT_STRING || type == FPDF_OBJECT_NAME) {
78 unsigned short buffer[kBufSize] = {};
79 if (!FPDF_StructElement_Attr_GetStringValue(attr, name, buffer,
80 sizeof(buffer), &len)) {
81 printf("%*s %s: Failed FPDF_StructElement_Attr_GetStringValue\n",
82 indent, "", name);
83 continue;
84 }
85 printf("%*s %s: %ls\n", indent, "", name,
86 ConvertToWString(buffer, len).c_str());
87 } else if (type == FPDF_OBJECT_UNKNOWN) {
88 printf("%*s %s: FPDF_OBJECT_UNKNOWN\n", indent, "", name);
89 } else {
90 printf("%*s %s: NOT_YET_IMPLEMENTED: %d\n", indent, "", name, type);
91 }
92 }
93 }
94
95 } // namespace
96
DumpChildStructure(FPDF_STRUCTELEMENT child,int indent)97 void DumpChildStructure(FPDF_STRUCTELEMENT child, int indent) {
98 static const size_t kBufSize = 1024;
99 unsigned short buf[kBufSize];
100 unsigned long len = FPDF_StructElement_GetType(child, buf, kBufSize);
101 if (len > 0) {
102 printf("%*s S: %ls\n", indent * 2, "", ConvertToWString(buf, len).c_str());
103 }
104
105 int attr_count = FPDF_StructElement_GetAttributeCount(child);
106 for (int i = 0; i < attr_count; i++) {
107 FPDF_STRUCTELEMENT_ATTR child_attr =
108 FPDF_StructElement_GetAttributeAtIndex(child, i);
109 if (!child_attr) {
110 continue;
111 }
112 printf("%*s A[%d]:\n", indent * 2, "", i);
113 DumpStructureElementAttributes(child_attr, indent * 2 + 2);
114 }
115
116 memset(buf, 0, sizeof(buf));
117 len = FPDF_StructElement_GetActualText(child, buf, kBufSize);
118 if (len > 0) {
119 printf("%*s ActualText: %ls\n", indent * 2, "",
120 ConvertToWString(buf, len).c_str());
121 }
122
123 memset(buf, 0, sizeof(buf));
124 len = FPDF_StructElement_GetAltText(child, buf, kBufSize);
125 if (len > 0) {
126 printf("%*s AltText: %ls\n", indent * 2, "",
127 ConvertToWString(buf, len).c_str());
128 }
129
130 memset(buf, 0, sizeof(buf));
131 len = FPDF_StructElement_GetID(child, buf, kBufSize);
132 if (len > 0) {
133 printf("%*s ID: %ls\n", indent * 2, "", ConvertToWString(buf, len).c_str());
134 }
135
136 memset(buf, 0, sizeof(buf));
137 len = FPDF_StructElement_GetLang(child, buf, kBufSize);
138 if (len > 0) {
139 printf("%*s Lang: %ls\n", indent * 2, "",
140 ConvertToWString(buf, len).c_str());
141 }
142
143 int mcid = FPDF_StructElement_GetMarkedContentID(child);
144 if (mcid != -1) {
145 printf("%*s MCID: %d\n", indent * 2, "", mcid);
146 }
147
148 FPDF_STRUCTELEMENT parent = FPDF_StructElement_GetParent(child);
149 if (parent) {
150 memset(buf, 0, sizeof(buf));
151 len = FPDF_StructElement_GetID(parent, buf, kBufSize);
152 if (len > 0) {
153 printf("%*s Parent ID: %ls\n", indent * 2, "",
154 ConvertToWString(buf, len).c_str());
155 }
156 }
157
158 memset(buf, 0, sizeof(buf));
159 len = FPDF_StructElement_GetTitle(child, buf, kBufSize);
160 if (len > 0) {
161 printf("%*s Title: %ls\n", indent * 2, "",
162 ConvertToWString(buf, len).c_str());
163 }
164
165 memset(buf, 0, sizeof(buf));
166 len = FPDF_StructElement_GetObjType(child, buf, kBufSize);
167 if (len > 0) {
168 printf("%*s Type: %ls\n", indent * 2, "",
169 ConvertToWString(buf, len).c_str());
170 }
171
172 for (int i = 0; i < FPDF_StructElement_CountChildren(child); ++i) {
173 FPDF_STRUCTELEMENT sub_child = FPDF_StructElement_GetChildAtIndex(child, i);
174 // If the child is not an Element then this will return null. This can
175 // happen if the element is things like an object reference or a stream.
176 if (!sub_child) {
177 continue;
178 }
179
180 DumpChildStructure(sub_child, indent + 1);
181 }
182 }
183
DumpPageInfo(FPDF_PAGE page,int page_idx)184 void DumpPageInfo(FPDF_PAGE page, int page_idx) {
185 DumpBoxInfo(&FPDFPage_GetMediaBox, "MediaBox", page, page_idx);
186 DumpBoxInfo(&FPDFPage_GetCropBox, "CropBox", page, page_idx);
187 DumpBoxInfo(&FPDFPage_GetBleedBox, "BleedBox", page, page_idx);
188 DumpBoxInfo(&FPDFPage_GetTrimBox, "TrimBox", page, page_idx);
189 DumpBoxInfo(&FPDFPage_GetArtBox, "ArtBox", page, page_idx);
190 }
191
DumpPageStructure(FPDF_PAGE page,int page_idx)192 void DumpPageStructure(FPDF_PAGE page, int page_idx) {
193 ScopedFPDFStructTree tree(FPDF_StructTree_GetForPage(page));
194 if (!tree) {
195 fprintf(stderr, "Failed to load struct tree for page %d\n", page_idx);
196 return;
197 }
198
199 printf("Structure Tree for Page %d\n", page_idx);
200 for (int i = 0; i < FPDF_StructTree_CountChildren(tree.get()); ++i) {
201 FPDF_STRUCTELEMENT child = FPDF_StructTree_GetChildAtIndex(tree.get(), i);
202 if (!child) {
203 fprintf(stderr, "Failed to load child %d for page %d\n", i, page_idx);
204 continue;
205 }
206 DumpChildStructure(child, 0);
207 }
208 printf("\n\n");
209 }
210
DumpMetaData(FPDF_DOCUMENT doc)211 void DumpMetaData(FPDF_DOCUMENT doc) {
212 static constexpr const char* kMetaTags[] = {
213 "Title", "Author", "Subject", "Keywords",
214 "Creator", "Producer", "CreationDate", "ModDate"};
215 for (const char* meta_tag : kMetaTags) {
216 char meta_buffer[4096];
217 unsigned long len =
218 FPDF_GetMetaText(doc, meta_tag, meta_buffer, sizeof(meta_buffer));
219 if (!len) {
220 continue;
221 }
222
223 auto* meta_string = reinterpret_cast<unsigned short*>(meta_buffer);
224 printf("%-12s = %ls (%lu bytes)\n", meta_tag,
225 GetPlatformWString(meta_string).c_str(), len);
226 }
227 }
228