xref: /aosp_15_r20/external/dynamic_depth/internal/xmpmeta/xmp_parser.cc (revision a62be0856e8e1158f43b03e41bbad10f4d005fde)
1 #include "xmpmeta/xmp_parser.h"
2 
3 #include <algorithm>
4 #include <cassert>
5 #include <cstring>
6 #include <sstream>
7 #include <stack>
8 
9 #include "android-base/logging.h"
10 #include "libxml/parser.h"
11 #include "strings/case.h"
12 #include "strings/numbers.h"
13 #include "xmpmeta/base64.h"
14 #include "xmpmeta/jpeg_io.h"
15 #include "xmpmeta/xml/const.h"
16 #include "xmpmeta/xml/deserializer_impl.h"
17 #include "xmpmeta/xml/search.h"
18 #include "xmpmeta/xml/utils.h"
19 #include "xmpmeta/xmp_const.h"
20 
21 using ::dynamic_depth::xmpmeta::xml::DepthFirstSearch;
22 using ::dynamic_depth::xmpmeta::xml::DeserializerImpl;
23 using ::dynamic_depth::xmpmeta::xml::FromXmlChar;
24 using ::dynamic_depth::xmpmeta::xml::GetFirstDescriptionElement;
25 
26 namespace dynamic_depth {
27 namespace xmpmeta {
28 namespace {
29 
30 const char kJpgExtension[] = "jpg";
31 const char kJpegExtension[] = "jpeg";
32 
BoolStringToBool(const string & bool_str,bool * value)33 bool BoolStringToBool(const string& bool_str, bool* value) {
34   if (dynamic_depth::StringCaseEqual(bool_str, "true")) {
35     *value = true;
36     return true;
37   }
38   if (dynamic_depth::StringCaseEqual(bool_str, "false")) {
39     *value = false;
40     return true;
41   }
42   return false;
43 }
44 
45 // Converts string_property to the type T.
46 template <typename T>
47 bool ConvertStringPropertyToType(const string& string_property, T* value);
48 
49 // Gets the end of the XMP meta content. If there is no packet wrapper, returns
50 // data.length, otherwise returns 1 + the position of last '>' without '?'
51 // before it. Usually the packet wrapper end is "<?xpacket end="w"?>.
GetXmpContentEnd(const string & data)52 size_t GetXmpContentEnd(const string& data) {
53   if (data.empty()) {
54     return 0;
55   }
56   for (size_t i = data.size() - 1; i >= 1; --i) {
57     if (data[i] == '>') {
58       if (data[i - 1] != '?') {
59         return i + 1;
60       }
61     }
62   }
63   // It should not reach here for a valid XMP meta.
64   LOG(WARNING) << "Failed to find the end of the XMP meta content.";
65   return data.size();
66 }
67 
68 // True if 's' starts with substring 'x'.
StartsWith(const string & s,const string & x)69 bool StartsWith(const string& s, const string& x) {
70   return s.size() >= x.size() && !s.compare(0, x.size(), x);
71 }
72 // True if 's' ends with substring 'x'.
EndsWith(const string & s,const string & x)73 bool EndsWith(const string& s, const string& x) {
74   return s.size() >= x.size() && !s.compare(s.size() - x.size(), x.size(), x);
75 }
76 
77 // Parses the first valid XMP section. Any other valid XMP section will be
78 // ignored.
ParseFirstValidXMPSection(const std::vector<Section> & sections,XmpData * xmp)79 bool ParseFirstValidXMPSection(const std::vector<Section>& sections,
80                                XmpData* xmp) {
81   for (const Section& section : sections) {
82     if (StartsWith(section.data, XmpConst::Header())) {
83       const size_t end = GetXmpContentEnd(section.data);
84       // Increment header length by 1 for the null termination.
85       const size_t header_length = strlen(XmpConst::Header()) + 1;
86       // Check for integer underflow before subtracting.
87       if (header_length >= end) {
88         LOG(ERROR) << "Invalid content length: "
89                    << static_cast<int>(end - header_length);
90         return false;
91       }
92       const size_t content_length = end - header_length;
93       // header_length is guaranteed to be <= data.size due to the if condition
94       // above. If this contract changes we must add an additonal check.
95       const char* content_start = &section.data[header_length];
96       // xmlReadMemory requires an int. Before casting size_t to int we must
97       // check for integer overflow.
98       if (content_length > INT_MAX) {
99         LOG(ERROR) << "First XMP section too large, size: " << content_length;
100         return false;
101       }
102       *xmp->MutableStandardSection() = xmlReadMemory(
103           content_start, static_cast<int>(content_length), nullptr, nullptr, 0);
104       if (xmp->StandardSection() == nullptr) {
105         LOG(WARNING) << "Failed to parse standard section.";
106         return false;
107       }
108       return true;
109     }
110   }
111   return false;
112 }
113 
114 // Collects the extended XMP sections with the given name into a string. Other
115 // sections will be ignored.
GetExtendedXmpSections(const std::vector<Section> & sections,const string & section_name)116 string GetExtendedXmpSections(const std::vector<Section>& sections,
117                               const string& section_name) {
118   string extended_header = XmpConst::ExtensionHeader();
119   extended_header += '\0' + section_name;
120   // section_name is dynamically extracted from the xml file and can have an
121   // arbitrary size. Check for integer overflow before addition.
122   if (extended_header.size() > SIZE_MAX - XmpConst::ExtensionHeaderOffset()) {
123     return "";
124   }
125   const size_t section_start_offset =
126       extended_header.size() + XmpConst::ExtensionHeaderOffset();
127 
128   // Compute the size of the buffer to parse the extended sections.
129   std::vector<const Section*> xmp_sections;
130   std::vector<size_t> xmp_end_offsets;
131   size_t buffer_size = 0;
132   for (const Section& section : sections) {
133     if (extended_header.empty() || StartsWith(section.data, extended_header)) {
134       const size_t end_offset = section.data.size();
135       const size_t section_size = end_offset - section_start_offset;
136       if (end_offset < section_start_offset ||
137           section_size > SIZE_MAX - buffer_size) {
138         return "";
139       }
140       buffer_size += section_size;
141       xmp_sections.push_back(&section);
142       xmp_end_offsets.push_back(end_offset);
143     }
144   }
145 
146   // Copy all the relevant sections' data into a buffer.
147   string buffer(buffer_size, '\0');
148   if (buffer.size() != buffer_size) {
149     return "";
150   }
151   size_t offset = 0;
152   for (int i = 0; i < xmp_sections.size(); ++i) {
153     const Section* section = xmp_sections[i];
154     const size_t length = xmp_end_offsets[i] - section_start_offset;
155     std::copy_n(&section->data[section_start_offset], length, &buffer[offset]);
156     offset += length;
157   }
158   return buffer;
159 }
160 
161 // Parses the extended XMP sections with the given name. All other sections
162 // will be ignored.
ParseExtendedXmpSections(const std::vector<Section> & sections,const string & section_name,XmpData * xmp_data)163 bool ParseExtendedXmpSections(const std::vector<Section>& sections,
164                               const string& section_name, XmpData* xmp_data) {
165   const string extended_sections =
166       GetExtendedXmpSections(sections, section_name);
167   // xmlReadMemory requires an int. Before casting size_t to int we must check
168   // for integer overflow.
169   if (extended_sections.size() > INT_MAX) {
170     LOG(WARNING) << "Extended sections too large, size: "
171                  << extended_sections.size();
172     return false;
173   }
174   *xmp_data->MutableExtendedSection() = xmlReadMemory(
175       extended_sections.data(), static_cast<int>(extended_sections.size()),
176       nullptr, nullptr, XML_PARSE_HUGE);
177   if (xmp_data->ExtendedSection() == nullptr) {
178     LOG(WARNING) << "Failed to parse extended sections.";
179     return false;
180   }
181   return true;
182 }
183 
184 // Extracts a XmpData from a JPEG image stream.
ExtractXmpMeta(const bool skip_extended,std::istream * file,XmpData * xmp_data)185 bool ExtractXmpMeta(const bool skip_extended, std::istream* file,
186                     XmpData* xmp_data) {
187   // We cannot use CHECK because this is ported to AOSP.
188   assert(xmp_data != nullptr);  // NOLINT
189   xmp_data->Reset();
190 
191   ParseOptions parse_options;
192   parse_options.read_meta_only = true;
193   if (skip_extended) {
194     parse_options.section_header = XmpConst::Header();
195     parse_options.section_header_return_first = true;
196   }
197   const std::vector<Section> sections = Parse(parse_options, file);
198   if (sections.empty()) {
199     LOG(WARNING) << "No sections found.";
200     return false;
201   }
202 
203   if (!ParseFirstValidXMPSection(sections, xmp_data)) {
204     LOG(WARNING) << "Could not parse first section.";
205     return false;
206   }
207   if (skip_extended) {
208     return true;
209   }
210   string extension_name;
211   DeserializerImpl deserializer(
212       GetFirstDescriptionElement(xmp_data->StandardSection()));
213   if (!deserializer.ParseString(XmpConst::HasExtensionPrefix(),
214                                 XmpConst::HasExtension(), &extension_name)) {
215     // No extended sections present, so nothing to parse.
216     return true;
217   }
218   if (!ParseExtendedXmpSections(sections, extension_name, xmp_data)) {
219     LOG(WARNING) << "Extended sections present, but could not be parsed.";
220     return false;
221   }
222   return true;
223 }
224 
225 // Extracts the specified string attribute.
GetStringProperty(const xmlNodePtr node,const char * prefix,const char * property,string * value)226 bool GetStringProperty(const xmlNodePtr node, const char* prefix,
227                        const char* property, string* value) {
228   const xmlDocPtr doc = node->doc;
229   for (const _xmlAttr* attribute = node->properties; attribute != nullptr;
230        attribute = attribute->next) {
231     if (attribute->ns &&
232         strcmp(FromXmlChar(attribute->ns->prefix), prefix) == 0 &&
233         strcmp(FromXmlChar(attribute->name), property) == 0) {
234       xmlChar* attribute_string =
235           xmlNodeListGetString(doc, attribute->children, 1);
236       *value = FromXmlChar(attribute_string);
237       xmlFree(attribute_string);
238       return true;
239     }
240   }
241   return false;
242 }
243 
244 // Reads the contents of a node.
245 // E.g. <prefix:node_name>Contents Here</prefix:node_name>
ReadNodeContent(const xmlNodePtr node,const char * prefix,const char * node_name,string * value)246 bool ReadNodeContent(const xmlNodePtr node, const char* prefix,
247                      const char* node_name, string* value) {
248   auto* element = DepthFirstSearch(node, node_name);
249   if (element == nullptr) {
250     return false;
251   }
252   if (prefix != nullptr &&
253       (element->ns == nullptr || element->ns->prefix == nullptr ||
254        strcmp(FromXmlChar(element->ns->prefix), prefix) != 0)) {
255     return false;
256   }
257   xmlChar* node_content = xmlNodeGetContent(element);
258   *value = FromXmlChar(node_content);
259   free(node_content);
260   return true;
261 }
262 
263 template <typename T>
ConvertStringPropertyToType(const string & string_property,T * value)264 bool ConvertStringPropertyToType(const string& string_property, T* value) {
265   QCHECK(value) << "Cannot call this method on a generic type";
266   return false;
267 }
268 
269 template <>
ConvertStringPropertyToType(const string & string_property,bool * value)270 bool ConvertStringPropertyToType<bool>(const string& string_property,
271                                        bool* value) {
272   return BoolStringToBool(string_property, value);
273 }
274 
275 template <>
ConvertStringPropertyToType(const string & string_property,double * value)276 bool ConvertStringPropertyToType<double>(const string& string_property,
277                                          double* value) {
278   *value = std::stod(string_property);
279   return true;
280 }
281 
282 template <>
ConvertStringPropertyToType(const string & string_property,int * value)283 bool ConvertStringPropertyToType<int>(const string& string_property,
284                                       int* value) {
285   *value = 0;
286   for (int i = 0; i < string_property.size(); ++i) {
287     if (!isdigit(string_property[i])) {
288       return false;
289     }
290   }
291 
292   *value = std::atoi(string_property.c_str());  // NOLINT
293   return true;
294 }
295 
296 template <>
ConvertStringPropertyToType(const string & string_property,int64 * value)297 bool ConvertStringPropertyToType<int64>(const string& string_property,
298                                         int64* value) {
299   *value = std::stol(string_property);
300   return true;
301 }
302 
303 }  // namespace
304 
ReadXmpHeader(const string & filename,const bool skip_extended,XmpData * xmp_data)305 bool ReadXmpHeader(const string& filename, const bool skip_extended,
306                    XmpData* xmp_data) {
307   string filename_lower = filename;
308   std::transform(filename_lower.begin(), filename_lower.end(),
309                  filename_lower.begin(), ::tolower);
310   if (!EndsWith(filename_lower, kJpgExtension) &&
311       !EndsWith(filename_lower, kJpegExtension)) {
312     LOG(WARNING) << "XMP parse: only JPEG file is supported";
313     return false;
314   }
315 
316   std::ifstream file(filename.c_str(), std::ios::binary);
317   if (!file.is_open()) {
318     LOG(WARNING) << " Could not read file: " << filename;
319     return false;
320   }
321   return ExtractXmpMeta(skip_extended, &file, xmp_data);
322 }
323 
ReadXmpFromMemory(const string & jpeg_contents,const bool skip_extended,XmpData * xmp_data)324 bool ReadXmpFromMemory(const string& jpeg_contents, const bool skip_extended,
325                        XmpData* xmp_data) {
326   std::istringstream stream(jpeg_contents);
327   return ExtractXmpMeta(skip_extended, &stream, xmp_data);
328 }
329 
ReadXmpHeader(std::istream * input_stream,bool skip_extended,XmpData * xmp_data)330 bool ReadXmpHeader(std::istream* input_stream, bool skip_extended,
331                    XmpData* xmp_data) {
332   return ExtractXmpMeta(skip_extended, input_stream, xmp_data);
333 }
334 
335 }  // namespace xmpmeta
336 }  // namespace dynamic_depth
337