xref: /aosp_15_r20/external/pdfium/samples/helpers/dump.cc (revision 3ac0a46f773bac49fa9476ec2b1cf3f8da5ec3a4)
1 // Copyright 2018 The PDFium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "samples/helpers/dump.h"
6 
7 #include <limits.h>
8 #include <string.h>
9 
10 #include <algorithm>
11 #include <functional>
12 #include <iterator>
13 #include <string>
14 #include <utility>
15 
16 #include "public/cpp/fpdf_scopers.h"
17 #include "public/fpdf_doc.h"
18 #include "public/fpdf_transformpage.h"
19 #include "testing/fx_string_testhelpers.h"
20 
21 using GetBoxInfoFunc =
22     std::function<bool(FPDF_PAGE, float*, float*, float*, float*)>;
23 
24 namespace {
25 
ConvertToWString(const unsigned short * buf,unsigned long buf_size)26 std::wstring ConvertToWString(const unsigned short* buf,
27                               unsigned long buf_size) {
28   std::wstring result;
29   result.reserve(buf_size);
30   std::copy(buf, buf + buf_size, std::back_inserter(result));
31   return result;
32 }
33 
DumpBoxInfo(GetBoxInfoFunc func,const char * box_type,FPDF_PAGE page,int page_idx)34 void DumpBoxInfo(GetBoxInfoFunc func,
35                  const char* box_type,
36                  FPDF_PAGE page,
37                  int page_idx) {
38   FS_RECTF rect;
39   bool ret = func(page, &rect.left, &rect.bottom, &rect.right, &rect.top);
40   if (!ret) {
41     printf("Page %d: No %s.\n", page_idx, box_type);
42     return;
43   }
44   printf("Page %d: %s: %0.2f %0.2f %0.2f %0.2f\n", page_idx, box_type,
45          rect.left, rect.bottom, rect.right, rect.top);
46 }
47 
DumpStructureElementAttributes(FPDF_STRUCTELEMENT_ATTR attr,int indent)48 void DumpStructureElementAttributes(FPDF_STRUCTELEMENT_ATTR attr, int indent) {
49   static const size_t kBufSize = 1024;
50   int count = FPDF_StructElement_Attr_GetCount(attr);
51   for (int i = 0; i < count; i++) {
52     char name[kBufSize] = {};
53     unsigned long len = ULONG_MAX;
54     if (!FPDF_StructElement_Attr_GetName(attr, i, name, sizeof(name), &len)) {
55       printf("%*s FPDF_StructElement_Attr_GetName failed for %d\n", indent, "",
56              i);
57       continue;
58     }
59 
60     FPDF_OBJECT_TYPE type = FPDF_StructElement_Attr_GetType(attr, name);
61     if (type == FPDF_OBJECT_BOOLEAN) {
62       int value;
63       if (!FPDF_StructElement_Attr_GetBooleanValue(attr, name, &value)) {
64         printf("%*s %s: Failed FPDF_StructElement_Attr_GetBooleanValue\n",
65                indent, "", name);
66         continue;
67       }
68       printf("%*s %s: %d\n", indent, "", name, value);
69     } else if (type == FPDF_OBJECT_NUMBER) {
70       float value;
71       if (!FPDF_StructElement_Attr_GetNumberValue(attr, name, &value)) {
72         printf("%*s %s: Failed FPDF_StructElement_Attr_GetNumberValue\n",
73                indent, "", name);
74         continue;
75       }
76       printf("%*s %s: %f\n", indent, "", name, value);
77     } else if (type == FPDF_OBJECT_STRING || type == FPDF_OBJECT_NAME) {
78       unsigned short buffer[kBufSize] = {};
79       if (!FPDF_StructElement_Attr_GetStringValue(attr, name, buffer,
80                                                   sizeof(buffer), &len)) {
81         printf("%*s %s: Failed FPDF_StructElement_Attr_GetStringValue\n",
82                indent, "", name);
83         continue;
84       }
85       printf("%*s %s: %ls\n", indent, "", name,
86              ConvertToWString(buffer, len).c_str());
87     } else if (type == FPDF_OBJECT_UNKNOWN) {
88       printf("%*s %s: FPDF_OBJECT_UNKNOWN\n", indent, "", name);
89     } else {
90       printf("%*s %s: NOT_YET_IMPLEMENTED: %d\n", indent, "", name, type);
91     }
92   }
93 }
94 
95 }  // namespace
96 
DumpChildStructure(FPDF_STRUCTELEMENT child,int indent)97 void DumpChildStructure(FPDF_STRUCTELEMENT child, int indent) {
98   static const size_t kBufSize = 1024;
99   unsigned short buf[kBufSize];
100   unsigned long len = FPDF_StructElement_GetType(child, buf, kBufSize);
101   if (len > 0) {
102     printf("%*s S: %ls\n", indent * 2, "", ConvertToWString(buf, len).c_str());
103   }
104 
105   int attr_count = FPDF_StructElement_GetAttributeCount(child);
106   for (int i = 0; i < attr_count; i++) {
107     FPDF_STRUCTELEMENT_ATTR child_attr =
108         FPDF_StructElement_GetAttributeAtIndex(child, i);
109     if (!child_attr) {
110       continue;
111     }
112     printf("%*s A[%d]:\n", indent * 2, "", i);
113     DumpStructureElementAttributes(child_attr, indent * 2 + 2);
114   }
115 
116   memset(buf, 0, sizeof(buf));
117   len = FPDF_StructElement_GetActualText(child, buf, kBufSize);
118   if (len > 0) {
119     printf("%*s ActualText: %ls\n", indent * 2, "",
120            ConvertToWString(buf, len).c_str());
121   }
122 
123   memset(buf, 0, sizeof(buf));
124   len = FPDF_StructElement_GetAltText(child, buf, kBufSize);
125   if (len > 0) {
126     printf("%*s AltText: %ls\n", indent * 2, "",
127            ConvertToWString(buf, len).c_str());
128   }
129 
130   memset(buf, 0, sizeof(buf));
131   len = FPDF_StructElement_GetID(child, buf, kBufSize);
132   if (len > 0) {
133     printf("%*s ID: %ls\n", indent * 2, "", ConvertToWString(buf, len).c_str());
134   }
135 
136   memset(buf, 0, sizeof(buf));
137   len = FPDF_StructElement_GetLang(child, buf, kBufSize);
138   if (len > 0) {
139     printf("%*s Lang: %ls\n", indent * 2, "",
140            ConvertToWString(buf, len).c_str());
141   }
142 
143   int mcid = FPDF_StructElement_GetMarkedContentID(child);
144   if (mcid != -1) {
145     printf("%*s MCID: %d\n", indent * 2, "", mcid);
146   }
147 
148   FPDF_STRUCTELEMENT parent = FPDF_StructElement_GetParent(child);
149   if (parent) {
150     memset(buf, 0, sizeof(buf));
151     len = FPDF_StructElement_GetID(parent, buf, kBufSize);
152     if (len > 0) {
153       printf("%*s Parent ID: %ls\n", indent * 2, "",
154              ConvertToWString(buf, len).c_str());
155     }
156   }
157 
158   memset(buf, 0, sizeof(buf));
159   len = FPDF_StructElement_GetTitle(child, buf, kBufSize);
160   if (len > 0) {
161     printf("%*s Title: %ls\n", indent * 2, "",
162            ConvertToWString(buf, len).c_str());
163   }
164 
165   memset(buf, 0, sizeof(buf));
166   len = FPDF_StructElement_GetObjType(child, buf, kBufSize);
167   if (len > 0) {
168     printf("%*s Type: %ls\n", indent * 2, "",
169            ConvertToWString(buf, len).c_str());
170   }
171 
172   for (int i = 0; i < FPDF_StructElement_CountChildren(child); ++i) {
173     FPDF_STRUCTELEMENT sub_child = FPDF_StructElement_GetChildAtIndex(child, i);
174     // If the child is not an Element then this will return null. This can
175     // happen if the element is things like an object reference or a stream.
176     if (!sub_child) {
177       continue;
178     }
179 
180     DumpChildStructure(sub_child, indent + 1);
181   }
182 }
183 
DumpPageInfo(FPDF_PAGE page,int page_idx)184 void DumpPageInfo(FPDF_PAGE page, int page_idx) {
185   DumpBoxInfo(&FPDFPage_GetMediaBox, "MediaBox", page, page_idx);
186   DumpBoxInfo(&FPDFPage_GetCropBox, "CropBox", page, page_idx);
187   DumpBoxInfo(&FPDFPage_GetBleedBox, "BleedBox", page, page_idx);
188   DumpBoxInfo(&FPDFPage_GetTrimBox, "TrimBox", page, page_idx);
189   DumpBoxInfo(&FPDFPage_GetArtBox, "ArtBox", page, page_idx);
190 }
191 
DumpPageStructure(FPDF_PAGE page,int page_idx)192 void DumpPageStructure(FPDF_PAGE page, int page_idx) {
193   ScopedFPDFStructTree tree(FPDF_StructTree_GetForPage(page));
194   if (!tree) {
195     fprintf(stderr, "Failed to load struct tree for page %d\n", page_idx);
196     return;
197   }
198 
199   printf("Structure Tree for Page %d\n", page_idx);
200   for (int i = 0; i < FPDF_StructTree_CountChildren(tree.get()); ++i) {
201     FPDF_STRUCTELEMENT child = FPDF_StructTree_GetChildAtIndex(tree.get(), i);
202     if (!child) {
203       fprintf(stderr, "Failed to load child %d for page %d\n", i, page_idx);
204       continue;
205     }
206     DumpChildStructure(child, 0);
207   }
208   printf("\n\n");
209 }
210 
DumpMetaData(FPDF_DOCUMENT doc)211 void DumpMetaData(FPDF_DOCUMENT doc) {
212   static constexpr const char* kMetaTags[] = {
213       "Title",   "Author",   "Subject",      "Keywords",
214       "Creator", "Producer", "CreationDate", "ModDate"};
215   for (const char* meta_tag : kMetaTags) {
216     char meta_buffer[4096];
217     unsigned long len =
218         FPDF_GetMetaText(doc, meta_tag, meta_buffer, sizeof(meta_buffer));
219     if (!len) {
220       continue;
221     }
222 
223     auto* meta_string = reinterpret_cast<unsigned short*>(meta_buffer);
224     printf("%-12s = %ls (%lu bytes)\n", meta_tag,
225            GetPlatformWString(meta_string).c_str(), len);
226   }
227 }
228