1*a62be085SSadaf Ebrahimi #include "xmpmeta/jpeg_io.h"
2*a62be085SSadaf Ebrahimi
3*a62be085SSadaf Ebrahimi #include <fstream>
4*a62be085SSadaf Ebrahimi #include <sstream>
5*a62be085SSadaf Ebrahimi
6*a62be085SSadaf Ebrahimi #include "android-base/logging.h"
7*a62be085SSadaf Ebrahimi
8*a62be085SSadaf Ebrahimi namespace dynamic_depth {
9*a62be085SSadaf Ebrahimi namespace xmpmeta {
10*a62be085SSadaf Ebrahimi namespace {
11*a62be085SSadaf Ebrahimi
12*a62be085SSadaf Ebrahimi // File markers.
13*a62be085SSadaf Ebrahimi // See: http://www.fileformat.info/format/jpeg/egff.htm or
14*a62be085SSadaf Ebrahimi // https://en.wikipedia.org/wiki/JPEG
15*a62be085SSadaf Ebrahimi const int kSoi = 0xd8; // Start of image marker.
16*a62be085SSadaf Ebrahimi const int kApp1 = 0xe1; // Start of EXIF section.
17*a62be085SSadaf Ebrahimi const int kSos = 0xda; // Start of scan.
18*a62be085SSadaf Ebrahimi
19*a62be085SSadaf Ebrahimi // Number of bytes used to store a section's length in a JPEG file.
20*a62be085SSadaf Ebrahimi const int kSectionLengthByteSize = 2;
21*a62be085SSadaf Ebrahimi
22*a62be085SSadaf Ebrahimi // Returns the number of bytes available to be read. Sets the seek position
23*a62be085SSadaf Ebrahimi // to the place it was in before calling this function.
GetBytesAvailable(std::istream * input_stream)24*a62be085SSadaf Ebrahimi size_t GetBytesAvailable(std::istream* input_stream) {
25*a62be085SSadaf Ebrahimi const std::streamoff pos = input_stream->tellg();
26*a62be085SSadaf Ebrahimi if (pos == -1) {
27*a62be085SSadaf Ebrahimi return 0;
28*a62be085SSadaf Ebrahimi }
29*a62be085SSadaf Ebrahimi
30*a62be085SSadaf Ebrahimi input_stream->seekg(0, std::ios::end);
31*a62be085SSadaf Ebrahimi if (!input_stream->good()) {
32*a62be085SSadaf Ebrahimi return 0;
33*a62be085SSadaf Ebrahimi }
34*a62be085SSadaf Ebrahimi
35*a62be085SSadaf Ebrahimi const std::streamoff end = input_stream->tellg();
36*a62be085SSadaf Ebrahimi if (end == -1) {
37*a62be085SSadaf Ebrahimi return 0;
38*a62be085SSadaf Ebrahimi }
39*a62be085SSadaf Ebrahimi input_stream->seekg(pos);
40*a62be085SSadaf Ebrahimi
41*a62be085SSadaf Ebrahimi if (end <= pos) {
42*a62be085SSadaf Ebrahimi return 0;
43*a62be085SSadaf Ebrahimi }
44*a62be085SSadaf Ebrahimi return end - pos;
45*a62be085SSadaf Ebrahimi }
46*a62be085SSadaf Ebrahimi
47*a62be085SSadaf Ebrahimi // Returns the first byte in the stream cast to an integer.
ReadByteAsInt(std::istream * input_stream)48*a62be085SSadaf Ebrahimi int ReadByteAsInt(std::istream* input_stream) {
49*a62be085SSadaf Ebrahimi unsigned char byte;
50*a62be085SSadaf Ebrahimi input_stream->read(reinterpret_cast<char*>(&byte), 1);
51*a62be085SSadaf Ebrahimi if (!input_stream->good()) {
52*a62be085SSadaf Ebrahimi // Return an invalid value - no byte can be read as -1.
53*a62be085SSadaf Ebrahimi return -1;
54*a62be085SSadaf Ebrahimi }
55*a62be085SSadaf Ebrahimi return static_cast<int>(byte);
56*a62be085SSadaf Ebrahimi }
57*a62be085SSadaf Ebrahimi
58*a62be085SSadaf Ebrahimi // Reads the length of a section from 2 bytes.
Read2ByteLength(std::istream * input_stream,bool * error)59*a62be085SSadaf Ebrahimi size_t Read2ByteLength(std::istream* input_stream, bool* error) {
60*a62be085SSadaf Ebrahimi const int length_high = ReadByteAsInt(input_stream);
61*a62be085SSadaf Ebrahimi const int length_low = ReadByteAsInt(input_stream);
62*a62be085SSadaf Ebrahimi if (length_high == -1 || length_low == -1) {
63*a62be085SSadaf Ebrahimi *error = true;
64*a62be085SSadaf Ebrahimi return 0;
65*a62be085SSadaf Ebrahimi }
66*a62be085SSadaf Ebrahimi *error = false;
67*a62be085SSadaf Ebrahimi return length_high << 8 | length_low;
68*a62be085SSadaf Ebrahimi }
69*a62be085SSadaf Ebrahimi
HasPrefixString(const string & to_check,const string & prefix)70*a62be085SSadaf Ebrahimi bool HasPrefixString(const string& to_check, const string& prefix) {
71*a62be085SSadaf Ebrahimi if (to_check.size() < prefix.size()) {
72*a62be085SSadaf Ebrahimi return false;
73*a62be085SSadaf Ebrahimi }
74*a62be085SSadaf Ebrahimi return std::equal(prefix.begin(), prefix.end(), to_check.begin());
75*a62be085SSadaf Ebrahimi }
76*a62be085SSadaf Ebrahimi
77*a62be085SSadaf Ebrahimi } // namespace
78*a62be085SSadaf Ebrahimi
Section(const string & buffer)79*a62be085SSadaf Ebrahimi Section::Section(const string& buffer) {
80*a62be085SSadaf Ebrahimi marker = kApp1;
81*a62be085SSadaf Ebrahimi is_image_section = false;
82*a62be085SSadaf Ebrahimi data = buffer;
83*a62be085SSadaf Ebrahimi }
84*a62be085SSadaf Ebrahimi
IsMarkerApp1()85*a62be085SSadaf Ebrahimi bool Section::IsMarkerApp1() { return marker == kApp1; }
86*a62be085SSadaf Ebrahimi
Parse(const ParseOptions & options,std::istream * input_stream)87*a62be085SSadaf Ebrahimi std::vector<Section> Parse(const ParseOptions& options,
88*a62be085SSadaf Ebrahimi std::istream* input_stream) {
89*a62be085SSadaf Ebrahimi std::vector<Section> sections;
90*a62be085SSadaf Ebrahimi // Return early if this is not the start of a JPEG section.
91*a62be085SSadaf Ebrahimi if (ReadByteAsInt(input_stream) != 0xff ||
92*a62be085SSadaf Ebrahimi ReadByteAsInt(input_stream) != kSoi) {
93*a62be085SSadaf Ebrahimi LOG(WARNING) << "File's first two bytes does not match the sequence \xff"
94*a62be085SSadaf Ebrahimi << kSoi;
95*a62be085SSadaf Ebrahimi return std::vector<Section>();
96*a62be085SSadaf Ebrahimi }
97*a62be085SSadaf Ebrahimi
98*a62be085SSadaf Ebrahimi int chr; // Short for character.
99*a62be085SSadaf Ebrahimi while ((chr = ReadByteAsInt(input_stream)) != -1) {
100*a62be085SSadaf Ebrahimi if (chr != 0xff) {
101*a62be085SSadaf Ebrahimi LOG(WARNING) << "Read non-padding byte: " << chr;
102*a62be085SSadaf Ebrahimi return sections;
103*a62be085SSadaf Ebrahimi }
104*a62be085SSadaf Ebrahimi // Skip padding bytes.
105*a62be085SSadaf Ebrahimi while ((chr = ReadByteAsInt(input_stream)) == 0xff) {
106*a62be085SSadaf Ebrahimi }
107*a62be085SSadaf Ebrahimi if (chr == -1) {
108*a62be085SSadaf Ebrahimi LOG(WARNING) << "No more bytes in file available to be read.";
109*a62be085SSadaf Ebrahimi return sections;
110*a62be085SSadaf Ebrahimi }
111*a62be085SSadaf Ebrahimi
112*a62be085SSadaf Ebrahimi const int marker = chr;
113*a62be085SSadaf Ebrahimi if (marker == kSos) {
114*a62be085SSadaf Ebrahimi // kSos indicates the image data will follow and no metadata after that,
115*a62be085SSadaf Ebrahimi // so read all data at one time.
116*a62be085SSadaf Ebrahimi if (!options.read_meta_only) {
117*a62be085SSadaf Ebrahimi Section section;
118*a62be085SSadaf Ebrahimi section.marker = marker;
119*a62be085SSadaf Ebrahimi section.is_image_section = true;
120*a62be085SSadaf Ebrahimi const size_t bytes_available = GetBytesAvailable(input_stream);
121*a62be085SSadaf Ebrahimi section.data.resize(bytes_available);
122*a62be085SSadaf Ebrahimi input_stream->read(§ion.data[0], bytes_available);
123*a62be085SSadaf Ebrahimi if (input_stream->good()) {
124*a62be085SSadaf Ebrahimi sections.push_back(section);
125*a62be085SSadaf Ebrahimi }
126*a62be085SSadaf Ebrahimi }
127*a62be085SSadaf Ebrahimi // All sections have been read.
128*a62be085SSadaf Ebrahimi return sections;
129*a62be085SSadaf Ebrahimi }
130*a62be085SSadaf Ebrahimi
131*a62be085SSadaf Ebrahimi bool error;
132*a62be085SSadaf Ebrahimi const size_t length = Read2ByteLength(input_stream, &error);
133*a62be085SSadaf Ebrahimi if (error || length < kSectionLengthByteSize) {
134*a62be085SSadaf Ebrahimi // No sections to read.
135*a62be085SSadaf Ebrahimi LOG(WARNING) << "No sections to read; section length is " << length;
136*a62be085SSadaf Ebrahimi return sections;
137*a62be085SSadaf Ebrahimi }
138*a62be085SSadaf Ebrahimi
139*a62be085SSadaf Ebrahimi const size_t bytes_left = GetBytesAvailable(input_stream);
140*a62be085SSadaf Ebrahimi if (length - kSectionLengthByteSize > bytes_left) {
141*a62be085SSadaf Ebrahimi LOG(WARNING) << "Invalid section length = " << length
142*a62be085SSadaf Ebrahimi << " total bytes available = " << bytes_left;
143*a62be085SSadaf Ebrahimi return sections;
144*a62be085SSadaf Ebrahimi }
145*a62be085SSadaf Ebrahimi
146*a62be085SSadaf Ebrahimi if (!options.read_meta_only || marker == kApp1) {
147*a62be085SSadaf Ebrahimi Section section;
148*a62be085SSadaf Ebrahimi section.marker = marker;
149*a62be085SSadaf Ebrahimi section.is_image_section = false;
150*a62be085SSadaf Ebrahimi const size_t data_size = length - kSectionLengthByteSize;
151*a62be085SSadaf Ebrahimi section.data.resize(data_size);
152*a62be085SSadaf Ebrahimi if (section.data.size() != data_size) {
153*a62be085SSadaf Ebrahimi LOG(WARNING) << "Discrepancy in section data size "
154*a62be085SSadaf Ebrahimi << section.data.size() << "and data size " << data_size;
155*a62be085SSadaf Ebrahimi return sections;
156*a62be085SSadaf Ebrahimi }
157*a62be085SSadaf Ebrahimi input_stream->read(§ion.data[0], section.data.size());
158*a62be085SSadaf Ebrahimi if (input_stream->good() &&
159*a62be085SSadaf Ebrahimi (options.section_header.empty() ||
160*a62be085SSadaf Ebrahimi HasPrefixString(section.data, options.section_header))) {
161*a62be085SSadaf Ebrahimi sections.push_back(section);
162*a62be085SSadaf Ebrahimi // Return if we have specified to return the 1st section with
163*a62be085SSadaf Ebrahimi // the given name.
164*a62be085SSadaf Ebrahimi if (options.section_header_return_first) {
165*a62be085SSadaf Ebrahimi return sections;
166*a62be085SSadaf Ebrahimi }
167*a62be085SSadaf Ebrahimi }
168*a62be085SSadaf Ebrahimi } else {
169*a62be085SSadaf Ebrahimi // Skip this section since all EXIF/XMP meta will be in kApp1 section.
170*a62be085SSadaf Ebrahimi input_stream->ignore(length - kSectionLengthByteSize);
171*a62be085SSadaf Ebrahimi }
172*a62be085SSadaf Ebrahimi }
173*a62be085SSadaf Ebrahimi return sections;
174*a62be085SSadaf Ebrahimi }
175*a62be085SSadaf Ebrahimi
WriteSections(const std::vector<Section> & sections,std::ostream * output_stream)176*a62be085SSadaf Ebrahimi void WriteSections(const std::vector<Section>& sections,
177*a62be085SSadaf Ebrahimi std::ostream* output_stream) {
178*a62be085SSadaf Ebrahimi output_stream->put(0xff);
179*a62be085SSadaf Ebrahimi output_stream->put(static_cast<unsigned char>(kSoi));
180*a62be085SSadaf Ebrahimi for (const Section& section : sections) {
181*a62be085SSadaf Ebrahimi output_stream->put(0xff);
182*a62be085SSadaf Ebrahimi output_stream->put(section.marker);
183*a62be085SSadaf Ebrahimi if (!section.is_image_section) {
184*a62be085SSadaf Ebrahimi const int section_length = static_cast<int>(section.data.length()) + 2;
185*a62be085SSadaf Ebrahimi // It's not the image data.
186*a62be085SSadaf Ebrahimi const int lh = section_length >> 8;
187*a62be085SSadaf Ebrahimi const int ll = section_length & 0xff;
188*a62be085SSadaf Ebrahimi output_stream->put(lh);
189*a62be085SSadaf Ebrahimi output_stream->put(ll);
190*a62be085SSadaf Ebrahimi }
191*a62be085SSadaf Ebrahimi output_stream->write(section.data.c_str(), section.data.length());
192*a62be085SSadaf Ebrahimi }
193*a62be085SSadaf Ebrahimi }
194*a62be085SSadaf Ebrahimi
195*a62be085SSadaf Ebrahimi } // namespace xmpmeta
196*a62be085SSadaf Ebrahimi } // namespace dynamic_depth
197