xref: /aosp_15_r20/external/libwebm/webm_parser/src/master_parser.cc (revision 103e46e4cd4b6efcf6001f23fa8665fb110abf8d)
1*103e46e4SHarish Mahendrakar // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2*103e46e4SHarish Mahendrakar //
3*103e46e4SHarish Mahendrakar // Use of this source code is governed by a BSD-style license
4*103e46e4SHarish Mahendrakar // that can be found in the LICENSE file in the root of the source
5*103e46e4SHarish Mahendrakar // tree. An additional intellectual property rights grant can be found
6*103e46e4SHarish Mahendrakar // in the file PATENTS.  All contributing project authors may
7*103e46e4SHarish Mahendrakar // be found in the AUTHORS file in the root of the source tree.
8*103e46e4SHarish Mahendrakar #include "src/master_parser.h"
9*103e46e4SHarish Mahendrakar 
10*103e46e4SHarish Mahendrakar #include <cassert>
11*103e46e4SHarish Mahendrakar #include <cstdint>
12*103e46e4SHarish Mahendrakar #include <limits>
13*103e46e4SHarish Mahendrakar 
14*103e46e4SHarish Mahendrakar #include "src/element_parser.h"
15*103e46e4SHarish Mahendrakar #include "src/skip_callback.h"
16*103e46e4SHarish Mahendrakar #include "webm/element.h"
17*103e46e4SHarish Mahendrakar #include "webm/id.h"
18*103e46e4SHarish Mahendrakar #include "webm/reader.h"
19*103e46e4SHarish Mahendrakar #include "webm/status.h"
20*103e46e4SHarish Mahendrakar 
21*103e46e4SHarish Mahendrakar namespace webm {
22*103e46e4SHarish Mahendrakar 
23*103e46e4SHarish Mahendrakar // Spec reference:
24*103e46e4SHarish Mahendrakar // http://matroska.org/technical/specs/index.html#EBML_ex
25*103e46e4SHarish Mahendrakar // https://github.com/Matroska-Org/ebml-specification/blob/master/specification.markdown
Init(const ElementMetadata & metadata,std::uint64_t max_size)26*103e46e4SHarish Mahendrakar Status MasterParser::Init(const ElementMetadata& metadata,
27*103e46e4SHarish Mahendrakar                           std::uint64_t max_size) {
28*103e46e4SHarish Mahendrakar   assert(metadata.size == kUnknownElementSize || metadata.size <= max_size);
29*103e46e4SHarish Mahendrakar 
30*103e46e4SHarish Mahendrakar   InitSetup(metadata.header_size, metadata.size, metadata.position);
31*103e46e4SHarish Mahendrakar 
32*103e46e4SHarish Mahendrakar   if (metadata.size != kUnknownElementSize) {
33*103e46e4SHarish Mahendrakar     max_size_ = metadata.size;
34*103e46e4SHarish Mahendrakar   } else {
35*103e46e4SHarish Mahendrakar     max_size_ = max_size;
36*103e46e4SHarish Mahendrakar   }
37*103e46e4SHarish Mahendrakar 
38*103e46e4SHarish Mahendrakar   if (metadata.size == 0) {
39*103e46e4SHarish Mahendrakar     state_ = State::kEndReached;
40*103e46e4SHarish Mahendrakar   } else {
41*103e46e4SHarish Mahendrakar     state_ = State::kFirstReadOfChildId;
42*103e46e4SHarish Mahendrakar   }
43*103e46e4SHarish Mahendrakar 
44*103e46e4SHarish Mahendrakar   return Status(Status::kOkCompleted);
45*103e46e4SHarish Mahendrakar }
46*103e46e4SHarish Mahendrakar 
InitAfterSeek(const Ancestory & child_ancestory,const ElementMetadata & child_metadata)47*103e46e4SHarish Mahendrakar void MasterParser::InitAfterSeek(const Ancestory& child_ancestory,
48*103e46e4SHarish Mahendrakar                                  const ElementMetadata& child_metadata) {
49*103e46e4SHarish Mahendrakar   InitSetup(kUnknownHeaderSize, kUnknownElementSize, kUnknownElementPosition);
50*103e46e4SHarish Mahendrakar   max_size_ = std::numeric_limits<std::uint64_t>::max();
51*103e46e4SHarish Mahendrakar 
52*103e46e4SHarish Mahendrakar   if (child_ancestory.empty()) {
53*103e46e4SHarish Mahendrakar     child_metadata_ = child_metadata;
54*103e46e4SHarish Mahendrakar     auto iter = parsers_.find(child_metadata_.id);
55*103e46e4SHarish Mahendrakar     assert(iter != parsers_.end());
56*103e46e4SHarish Mahendrakar     child_parser_ = iter->second.get();
57*103e46e4SHarish Mahendrakar     state_ = State::kGettingAction;
58*103e46e4SHarish Mahendrakar   } else {
59*103e46e4SHarish Mahendrakar     child_metadata_.id = child_ancestory.id();
60*103e46e4SHarish Mahendrakar     child_metadata_.header_size = kUnknownHeaderSize;
61*103e46e4SHarish Mahendrakar     child_metadata_.size = kUnknownElementSize;
62*103e46e4SHarish Mahendrakar     child_metadata_.position = kUnknownElementPosition;
63*103e46e4SHarish Mahendrakar 
64*103e46e4SHarish Mahendrakar     auto iter = parsers_.find(child_metadata_.id);
65*103e46e4SHarish Mahendrakar     assert(iter != parsers_.end());
66*103e46e4SHarish Mahendrakar     child_parser_ = iter->second.get();
67*103e46e4SHarish Mahendrakar     child_parser_->InitAfterSeek(child_ancestory.next(), child_metadata);
68*103e46e4SHarish Mahendrakar     state_ = State::kReadingChildBody;
69*103e46e4SHarish Mahendrakar   }
70*103e46e4SHarish Mahendrakar }
71*103e46e4SHarish Mahendrakar 
Feed(Callback * callback,Reader * reader,std::uint64_t * num_bytes_read)72*103e46e4SHarish Mahendrakar Status MasterParser::Feed(Callback* callback, Reader* reader,
73*103e46e4SHarish Mahendrakar                           std::uint64_t* num_bytes_read) {
74*103e46e4SHarish Mahendrakar   assert(callback != nullptr);
75*103e46e4SHarish Mahendrakar   assert(reader != nullptr);
76*103e46e4SHarish Mahendrakar   assert(num_bytes_read != nullptr);
77*103e46e4SHarish Mahendrakar 
78*103e46e4SHarish Mahendrakar   *num_bytes_read = 0;
79*103e46e4SHarish Mahendrakar 
80*103e46e4SHarish Mahendrakar   Callback* const original_callback = callback;
81*103e46e4SHarish Mahendrakar 
82*103e46e4SHarish Mahendrakar   SkipCallback skip_callback;
83*103e46e4SHarish Mahendrakar   if (action_ == Action::kSkip) {
84*103e46e4SHarish Mahendrakar     callback = &skip_callback;
85*103e46e4SHarish Mahendrakar   }
86*103e46e4SHarish Mahendrakar 
87*103e46e4SHarish Mahendrakar   Status status;
88*103e46e4SHarish Mahendrakar   std::uint64_t local_num_bytes_read;
89*103e46e4SHarish Mahendrakar   while (true) {
90*103e46e4SHarish Mahendrakar     switch (state_) {
91*103e46e4SHarish Mahendrakar       case State::kFirstReadOfChildId: {
92*103e46e4SHarish Mahendrakar         // This separate case for the first read of the child ID is needed to
93*103e46e4SHarish Mahendrakar         // avoid potential bugs where calling Feed() twice in a row on an
94*103e46e4SHarish Mahendrakar         // unsized element at the end of the stream would return
95*103e46e4SHarish Mahendrakar         // Status::kOkCompleted instead of Status::kEndOfFile (since we convert
96*103e46e4SHarish Mahendrakar         // Status::kEndOfFile to Status::kOkCompleted when EOF is hit for an
97*103e46e4SHarish Mahendrakar         // unsized element after its children have been fully parsed). Once
98*103e46e4SHarish Mahendrakar         // the ID parser consumes > 0 bytes, this state must be exited.
99*103e46e4SHarish Mahendrakar         assert(child_parser_ == nullptr);
100*103e46e4SHarish Mahendrakar         assert(my_size_ == kUnknownElementSize || total_bytes_read_ < my_size_);
101*103e46e4SHarish Mahendrakar         child_metadata_.position = reader->Position();
102*103e46e4SHarish Mahendrakar         child_metadata_.header_size = 0;
103*103e46e4SHarish Mahendrakar         status = id_parser_.Feed(callback, reader, &local_num_bytes_read);
104*103e46e4SHarish Mahendrakar         *num_bytes_read += local_num_bytes_read;
105*103e46e4SHarish Mahendrakar         total_bytes_read_ += local_num_bytes_read;
106*103e46e4SHarish Mahendrakar         child_metadata_.header_size +=
107*103e46e4SHarish Mahendrakar             static_cast<std::uint32_t>(local_num_bytes_read);
108*103e46e4SHarish Mahendrakar         if (status.code == Status::kEndOfFile &&
109*103e46e4SHarish Mahendrakar             my_size_ == kUnknownElementSize && local_num_bytes_read == 0) {
110*103e46e4SHarish Mahendrakar           state_ = State::kEndReached;
111*103e46e4SHarish Mahendrakar         } else if (!status.ok()) {
112*103e46e4SHarish Mahendrakar           if (local_num_bytes_read > 0) {
113*103e46e4SHarish Mahendrakar             state_ = State::kFinishingReadingChildId;
114*103e46e4SHarish Mahendrakar           }
115*103e46e4SHarish Mahendrakar           return status;
116*103e46e4SHarish Mahendrakar         } else if (status.completed_ok()) {
117*103e46e4SHarish Mahendrakar           state_ = State::kReadingChildSize;
118*103e46e4SHarish Mahendrakar         } else {
119*103e46e4SHarish Mahendrakar           state_ = State::kFinishingReadingChildId;
120*103e46e4SHarish Mahendrakar         }
121*103e46e4SHarish Mahendrakar         continue;
122*103e46e4SHarish Mahendrakar       }
123*103e46e4SHarish Mahendrakar 
124*103e46e4SHarish Mahendrakar       case State::kFinishingReadingChildId: {
125*103e46e4SHarish Mahendrakar         assert(child_parser_ == nullptr);
126*103e46e4SHarish Mahendrakar         assert(my_size_ == kUnknownElementSize || total_bytes_read_ < my_size_);
127*103e46e4SHarish Mahendrakar         status = id_parser_.Feed(callback, reader, &local_num_bytes_read);
128*103e46e4SHarish Mahendrakar         *num_bytes_read += local_num_bytes_read;
129*103e46e4SHarish Mahendrakar         total_bytes_read_ += local_num_bytes_read;
130*103e46e4SHarish Mahendrakar         child_metadata_.header_size +=
131*103e46e4SHarish Mahendrakar             static_cast<std::uint32_t>(local_num_bytes_read);
132*103e46e4SHarish Mahendrakar         if (!status.completed_ok()) {
133*103e46e4SHarish Mahendrakar           return status;
134*103e46e4SHarish Mahendrakar         }
135*103e46e4SHarish Mahendrakar         state_ = State::kReadingChildSize;
136*103e46e4SHarish Mahendrakar         continue;
137*103e46e4SHarish Mahendrakar       }
138*103e46e4SHarish Mahendrakar 
139*103e46e4SHarish Mahendrakar       case State::kReadingChildSize: {
140*103e46e4SHarish Mahendrakar         assert(child_parser_ == nullptr);
141*103e46e4SHarish Mahendrakar         assert(total_bytes_read_ > 0);
142*103e46e4SHarish Mahendrakar         status = size_parser_.Feed(callback, reader, &local_num_bytes_read);
143*103e46e4SHarish Mahendrakar         *num_bytes_read += local_num_bytes_read;
144*103e46e4SHarish Mahendrakar         total_bytes_read_ += local_num_bytes_read;
145*103e46e4SHarish Mahendrakar         child_metadata_.header_size +=
146*103e46e4SHarish Mahendrakar             static_cast<std::uint32_t>(local_num_bytes_read);
147*103e46e4SHarish Mahendrakar         if (!status.completed_ok()) {
148*103e46e4SHarish Mahendrakar           return status;
149*103e46e4SHarish Mahendrakar         }
150*103e46e4SHarish Mahendrakar         child_metadata_.id = id_parser_.id();
151*103e46e4SHarish Mahendrakar         child_metadata_.size = size_parser_.size();
152*103e46e4SHarish Mahendrakar         state_ = State::kValidatingChildSize;
153*103e46e4SHarish Mahendrakar         continue;
154*103e46e4SHarish Mahendrakar       }
155*103e46e4SHarish Mahendrakar 
156*103e46e4SHarish Mahendrakar       case State::kValidatingChildSize: {
157*103e46e4SHarish Mahendrakar         assert(child_parser_ == nullptr);
158*103e46e4SHarish Mahendrakar 
159*103e46e4SHarish Mahendrakar         std::uint64_t byte_count = total_bytes_read_;
160*103e46e4SHarish Mahendrakar         if (child_metadata_.size != kUnknownElementSize) {
161*103e46e4SHarish Mahendrakar           byte_count += child_metadata_.size;
162*103e46e4SHarish Mahendrakar         }
163*103e46e4SHarish Mahendrakar 
164*103e46e4SHarish Mahendrakar         std::uint64_t byte_cap = max_size_;
165*103e46e4SHarish Mahendrakar         // my_size_ is <= max_size_ if it's known, so pick the smaller value.
166*103e46e4SHarish Mahendrakar         if (my_size_ != kUnknownElementSize) {
167*103e46e4SHarish Mahendrakar           byte_cap = my_size_;
168*103e46e4SHarish Mahendrakar         }
169*103e46e4SHarish Mahendrakar 
170*103e46e4SHarish Mahendrakar         if (byte_count > byte_cap) {
171*103e46e4SHarish Mahendrakar           return Status(Status::kElementOverflow);
172*103e46e4SHarish Mahendrakar         }
173*103e46e4SHarish Mahendrakar 
174*103e46e4SHarish Mahendrakar         auto iter = parsers_.find(child_metadata_.id);
175*103e46e4SHarish Mahendrakar         bool unknown_child = iter == parsers_.end();
176*103e46e4SHarish Mahendrakar 
177*103e46e4SHarish Mahendrakar         if (my_size_ == kUnknownElementSize && unknown_child) {
178*103e46e4SHarish Mahendrakar           // The end of an unsized master element is considered to be the first
179*103e46e4SHarish Mahendrakar           // instance of an element that isn't a known/valid child element.
180*103e46e4SHarish Mahendrakar           has_cached_metadata_ = true;
181*103e46e4SHarish Mahendrakar           state_ = State::kEndReached;
182*103e46e4SHarish Mahendrakar           continue;
183*103e46e4SHarish Mahendrakar         } else if (unknown_child &&
184*103e46e4SHarish Mahendrakar                    child_metadata_.size == kUnknownElementSize) {
185*103e46e4SHarish Mahendrakar           // We can't skip or otherwise handle unknown elements with an unknown
186*103e46e4SHarish Mahendrakar           // size.
187*103e46e4SHarish Mahendrakar           return Status(Status::kIndefiniteUnknownElement);
188*103e46e4SHarish Mahendrakar         }
189*103e46e4SHarish Mahendrakar         if (unknown_child) {
190*103e46e4SHarish Mahendrakar           child_parser_ = &unknown_parser_;
191*103e46e4SHarish Mahendrakar         } else {
192*103e46e4SHarish Mahendrakar           child_parser_ = iter->second.get();
193*103e46e4SHarish Mahendrakar         }
194*103e46e4SHarish Mahendrakar         state_ = State::kGettingAction;
195*103e46e4SHarish Mahendrakar         continue;
196*103e46e4SHarish Mahendrakar       }
197*103e46e4SHarish Mahendrakar 
198*103e46e4SHarish Mahendrakar       case State::kGettingAction: {
199*103e46e4SHarish Mahendrakar         assert(child_parser_ != nullptr);
200*103e46e4SHarish Mahendrakar         status = callback->OnElementBegin(child_metadata_, &action_);
201*103e46e4SHarish Mahendrakar         if (!status.completed_ok()) {
202*103e46e4SHarish Mahendrakar           return status;
203*103e46e4SHarish Mahendrakar         }
204*103e46e4SHarish Mahendrakar 
205*103e46e4SHarish Mahendrakar         if (action_ == Action::kSkip) {
206*103e46e4SHarish Mahendrakar           callback = &skip_callback;
207*103e46e4SHarish Mahendrakar           if (child_metadata_.size != kUnknownElementSize) {
208*103e46e4SHarish Mahendrakar             child_parser_ = &skip_parser_;
209*103e46e4SHarish Mahendrakar           }
210*103e46e4SHarish Mahendrakar         }
211*103e46e4SHarish Mahendrakar         state_ = State::kInitializingChildParser;
212*103e46e4SHarish Mahendrakar         continue;
213*103e46e4SHarish Mahendrakar       }
214*103e46e4SHarish Mahendrakar 
215*103e46e4SHarish Mahendrakar       case State::kInitializingChildParser: {
216*103e46e4SHarish Mahendrakar         assert(child_parser_ != nullptr);
217*103e46e4SHarish Mahendrakar         status =
218*103e46e4SHarish Mahendrakar             child_parser_->Init(child_metadata_, max_size_ - total_bytes_read_);
219*103e46e4SHarish Mahendrakar         if (!status.completed_ok()) {
220*103e46e4SHarish Mahendrakar           return status;
221*103e46e4SHarish Mahendrakar         }
222*103e46e4SHarish Mahendrakar         state_ = State::kReadingChildBody;
223*103e46e4SHarish Mahendrakar         continue;
224*103e46e4SHarish Mahendrakar       }
225*103e46e4SHarish Mahendrakar 
226*103e46e4SHarish Mahendrakar       case State::kReadingChildBody: {
227*103e46e4SHarish Mahendrakar         assert(child_parser_ != nullptr);
228*103e46e4SHarish Mahendrakar         status = child_parser_->Feed(callback, reader, &local_num_bytes_read);
229*103e46e4SHarish Mahendrakar         *num_bytes_read += local_num_bytes_read;
230*103e46e4SHarish Mahendrakar         total_bytes_read_ += local_num_bytes_read;
231*103e46e4SHarish Mahendrakar         if (!status.completed_ok()) {
232*103e46e4SHarish Mahendrakar           return status;
233*103e46e4SHarish Mahendrakar         }
234*103e46e4SHarish Mahendrakar         state_ = State::kChildFullyParsed;
235*103e46e4SHarish Mahendrakar         continue;
236*103e46e4SHarish Mahendrakar       }
237*103e46e4SHarish Mahendrakar 
238*103e46e4SHarish Mahendrakar       case State::kChildFullyParsed: {
239*103e46e4SHarish Mahendrakar         assert(child_parser_ != nullptr);
240*103e46e4SHarish Mahendrakar         std::uint64_t byte_cap = max_size_;
241*103e46e4SHarish Mahendrakar         // my_size_ is <= max_size_ if it's known, so pick the smaller value.
242*103e46e4SHarish Mahendrakar         if (my_size_ != kUnknownElementSize) {
243*103e46e4SHarish Mahendrakar           byte_cap = my_size_;
244*103e46e4SHarish Mahendrakar         }
245*103e46e4SHarish Mahendrakar 
246*103e46e4SHarish Mahendrakar         if (total_bytes_read_ > byte_cap) {
247*103e46e4SHarish Mahendrakar           return Status(Status::kElementOverflow);
248*103e46e4SHarish Mahendrakar         } else if (total_bytes_read_ == byte_cap) {
249*103e46e4SHarish Mahendrakar           state_ = State::kEndReached;
250*103e46e4SHarish Mahendrakar           continue;
251*103e46e4SHarish Mahendrakar         }
252*103e46e4SHarish Mahendrakar 
253*103e46e4SHarish Mahendrakar         if (child_parser_->GetCachedMetadata(&child_metadata_)) {
254*103e46e4SHarish Mahendrakar           state_ = State::kValidatingChildSize;
255*103e46e4SHarish Mahendrakar         } else {
256*103e46e4SHarish Mahendrakar           state_ = State::kFirstReadOfChildId;
257*103e46e4SHarish Mahendrakar         }
258*103e46e4SHarish Mahendrakar         PrepareForNextChild();
259*103e46e4SHarish Mahendrakar         callback = original_callback;
260*103e46e4SHarish Mahendrakar         continue;
261*103e46e4SHarish Mahendrakar       }
262*103e46e4SHarish Mahendrakar 
263*103e46e4SHarish Mahendrakar       case State::kEndReached: {
264*103e46e4SHarish Mahendrakar         return Status(Status::kOkCompleted);
265*103e46e4SHarish Mahendrakar       }
266*103e46e4SHarish Mahendrakar     }
267*103e46e4SHarish Mahendrakar   }
268*103e46e4SHarish Mahendrakar }
269*103e46e4SHarish Mahendrakar 
GetCachedMetadata(ElementMetadata * metadata)270*103e46e4SHarish Mahendrakar bool MasterParser::GetCachedMetadata(ElementMetadata* metadata) {
271*103e46e4SHarish Mahendrakar   assert(metadata != nullptr);
272*103e46e4SHarish Mahendrakar 
273*103e46e4SHarish Mahendrakar   if (has_cached_metadata_) {
274*103e46e4SHarish Mahendrakar     *metadata = child_metadata_;
275*103e46e4SHarish Mahendrakar   }
276*103e46e4SHarish Mahendrakar   return has_cached_metadata_;
277*103e46e4SHarish Mahendrakar }
278*103e46e4SHarish Mahendrakar 
InitSetup(std::uint32_t header_size,std::uint64_t size_in_bytes,std::uint64_t position)279*103e46e4SHarish Mahendrakar void MasterParser::InitSetup(std::uint32_t header_size,
280*103e46e4SHarish Mahendrakar                              std::uint64_t size_in_bytes,
281*103e46e4SHarish Mahendrakar                              std::uint64_t position) {
282*103e46e4SHarish Mahendrakar   PrepareForNextChild();
283*103e46e4SHarish Mahendrakar   header_size_ = header_size;
284*103e46e4SHarish Mahendrakar   my_size_ = size_in_bytes;
285*103e46e4SHarish Mahendrakar   my_position_ = position;
286*103e46e4SHarish Mahendrakar   total_bytes_read_ = 0;
287*103e46e4SHarish Mahendrakar   has_cached_metadata_ = false;
288*103e46e4SHarish Mahendrakar }
289*103e46e4SHarish Mahendrakar 
PrepareForNextChild()290*103e46e4SHarish Mahendrakar void MasterParser::PrepareForNextChild() {
291*103e46e4SHarish Mahendrakar   // Do not reset child_metadata_ here.
292*103e46e4SHarish Mahendrakar   id_parser_ = {};
293*103e46e4SHarish Mahendrakar   size_parser_ = {};
294*103e46e4SHarish Mahendrakar   child_parser_ = nullptr;
295*103e46e4SHarish Mahendrakar   action_ = Action::kRead;
296*103e46e4SHarish Mahendrakar }
297*103e46e4SHarish Mahendrakar 
298*103e46e4SHarish Mahendrakar }  // namespace webm
299