1*103e46e4SHarish Mahendrakar // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2*103e46e4SHarish Mahendrakar //
3*103e46e4SHarish Mahendrakar // Use of this source code is governed by a BSD-style license
4*103e46e4SHarish Mahendrakar // that can be found in the LICENSE file in the root of the source
5*103e46e4SHarish Mahendrakar // tree. An additional intellectual property rights grant can be found
6*103e46e4SHarish Mahendrakar // in the file PATENTS. All contributing project authors may
7*103e46e4SHarish Mahendrakar // be found in the AUTHORS file in the root of the source tree.
8*103e46e4SHarish Mahendrakar #include "src/master_parser.h"
9*103e46e4SHarish Mahendrakar
10*103e46e4SHarish Mahendrakar #include <cassert>
11*103e46e4SHarish Mahendrakar #include <cstdint>
12*103e46e4SHarish Mahendrakar #include <limits>
13*103e46e4SHarish Mahendrakar
14*103e46e4SHarish Mahendrakar #include "src/element_parser.h"
15*103e46e4SHarish Mahendrakar #include "src/skip_callback.h"
16*103e46e4SHarish Mahendrakar #include "webm/element.h"
17*103e46e4SHarish Mahendrakar #include "webm/id.h"
18*103e46e4SHarish Mahendrakar #include "webm/reader.h"
19*103e46e4SHarish Mahendrakar #include "webm/status.h"
20*103e46e4SHarish Mahendrakar
21*103e46e4SHarish Mahendrakar namespace webm {
22*103e46e4SHarish Mahendrakar
23*103e46e4SHarish Mahendrakar // Spec reference:
24*103e46e4SHarish Mahendrakar // http://matroska.org/technical/specs/index.html#EBML_ex
25*103e46e4SHarish Mahendrakar // https://github.com/Matroska-Org/ebml-specification/blob/master/specification.markdown
Init(const ElementMetadata & metadata,std::uint64_t max_size)26*103e46e4SHarish Mahendrakar Status MasterParser::Init(const ElementMetadata& metadata,
27*103e46e4SHarish Mahendrakar std::uint64_t max_size) {
28*103e46e4SHarish Mahendrakar assert(metadata.size == kUnknownElementSize || metadata.size <= max_size);
29*103e46e4SHarish Mahendrakar
30*103e46e4SHarish Mahendrakar InitSetup(metadata.header_size, metadata.size, metadata.position);
31*103e46e4SHarish Mahendrakar
32*103e46e4SHarish Mahendrakar if (metadata.size != kUnknownElementSize) {
33*103e46e4SHarish Mahendrakar max_size_ = metadata.size;
34*103e46e4SHarish Mahendrakar } else {
35*103e46e4SHarish Mahendrakar max_size_ = max_size;
36*103e46e4SHarish Mahendrakar }
37*103e46e4SHarish Mahendrakar
38*103e46e4SHarish Mahendrakar if (metadata.size == 0) {
39*103e46e4SHarish Mahendrakar state_ = State::kEndReached;
40*103e46e4SHarish Mahendrakar } else {
41*103e46e4SHarish Mahendrakar state_ = State::kFirstReadOfChildId;
42*103e46e4SHarish Mahendrakar }
43*103e46e4SHarish Mahendrakar
44*103e46e4SHarish Mahendrakar return Status(Status::kOkCompleted);
45*103e46e4SHarish Mahendrakar }
46*103e46e4SHarish Mahendrakar
InitAfterSeek(const Ancestory & child_ancestory,const ElementMetadata & child_metadata)47*103e46e4SHarish Mahendrakar void MasterParser::InitAfterSeek(const Ancestory& child_ancestory,
48*103e46e4SHarish Mahendrakar const ElementMetadata& child_metadata) {
49*103e46e4SHarish Mahendrakar InitSetup(kUnknownHeaderSize, kUnknownElementSize, kUnknownElementPosition);
50*103e46e4SHarish Mahendrakar max_size_ = std::numeric_limits<std::uint64_t>::max();
51*103e46e4SHarish Mahendrakar
52*103e46e4SHarish Mahendrakar if (child_ancestory.empty()) {
53*103e46e4SHarish Mahendrakar child_metadata_ = child_metadata;
54*103e46e4SHarish Mahendrakar auto iter = parsers_.find(child_metadata_.id);
55*103e46e4SHarish Mahendrakar assert(iter != parsers_.end());
56*103e46e4SHarish Mahendrakar child_parser_ = iter->second.get();
57*103e46e4SHarish Mahendrakar state_ = State::kGettingAction;
58*103e46e4SHarish Mahendrakar } else {
59*103e46e4SHarish Mahendrakar child_metadata_.id = child_ancestory.id();
60*103e46e4SHarish Mahendrakar child_metadata_.header_size = kUnknownHeaderSize;
61*103e46e4SHarish Mahendrakar child_metadata_.size = kUnknownElementSize;
62*103e46e4SHarish Mahendrakar child_metadata_.position = kUnknownElementPosition;
63*103e46e4SHarish Mahendrakar
64*103e46e4SHarish Mahendrakar auto iter = parsers_.find(child_metadata_.id);
65*103e46e4SHarish Mahendrakar assert(iter != parsers_.end());
66*103e46e4SHarish Mahendrakar child_parser_ = iter->second.get();
67*103e46e4SHarish Mahendrakar child_parser_->InitAfterSeek(child_ancestory.next(), child_metadata);
68*103e46e4SHarish Mahendrakar state_ = State::kReadingChildBody;
69*103e46e4SHarish Mahendrakar }
70*103e46e4SHarish Mahendrakar }
71*103e46e4SHarish Mahendrakar
Feed(Callback * callback,Reader * reader,std::uint64_t * num_bytes_read)72*103e46e4SHarish Mahendrakar Status MasterParser::Feed(Callback* callback, Reader* reader,
73*103e46e4SHarish Mahendrakar std::uint64_t* num_bytes_read) {
74*103e46e4SHarish Mahendrakar assert(callback != nullptr);
75*103e46e4SHarish Mahendrakar assert(reader != nullptr);
76*103e46e4SHarish Mahendrakar assert(num_bytes_read != nullptr);
77*103e46e4SHarish Mahendrakar
78*103e46e4SHarish Mahendrakar *num_bytes_read = 0;
79*103e46e4SHarish Mahendrakar
80*103e46e4SHarish Mahendrakar Callback* const original_callback = callback;
81*103e46e4SHarish Mahendrakar
82*103e46e4SHarish Mahendrakar SkipCallback skip_callback;
83*103e46e4SHarish Mahendrakar if (action_ == Action::kSkip) {
84*103e46e4SHarish Mahendrakar callback = &skip_callback;
85*103e46e4SHarish Mahendrakar }
86*103e46e4SHarish Mahendrakar
87*103e46e4SHarish Mahendrakar Status status;
88*103e46e4SHarish Mahendrakar std::uint64_t local_num_bytes_read;
89*103e46e4SHarish Mahendrakar while (true) {
90*103e46e4SHarish Mahendrakar switch (state_) {
91*103e46e4SHarish Mahendrakar case State::kFirstReadOfChildId: {
92*103e46e4SHarish Mahendrakar // This separate case for the first read of the child ID is needed to
93*103e46e4SHarish Mahendrakar // avoid potential bugs where calling Feed() twice in a row on an
94*103e46e4SHarish Mahendrakar // unsized element at the end of the stream would return
95*103e46e4SHarish Mahendrakar // Status::kOkCompleted instead of Status::kEndOfFile (since we convert
96*103e46e4SHarish Mahendrakar // Status::kEndOfFile to Status::kOkCompleted when EOF is hit for an
97*103e46e4SHarish Mahendrakar // unsized element after its children have been fully parsed). Once
98*103e46e4SHarish Mahendrakar // the ID parser consumes > 0 bytes, this state must be exited.
99*103e46e4SHarish Mahendrakar assert(child_parser_ == nullptr);
100*103e46e4SHarish Mahendrakar assert(my_size_ == kUnknownElementSize || total_bytes_read_ < my_size_);
101*103e46e4SHarish Mahendrakar child_metadata_.position = reader->Position();
102*103e46e4SHarish Mahendrakar child_metadata_.header_size = 0;
103*103e46e4SHarish Mahendrakar status = id_parser_.Feed(callback, reader, &local_num_bytes_read);
104*103e46e4SHarish Mahendrakar *num_bytes_read += local_num_bytes_read;
105*103e46e4SHarish Mahendrakar total_bytes_read_ += local_num_bytes_read;
106*103e46e4SHarish Mahendrakar child_metadata_.header_size +=
107*103e46e4SHarish Mahendrakar static_cast<std::uint32_t>(local_num_bytes_read);
108*103e46e4SHarish Mahendrakar if (status.code == Status::kEndOfFile &&
109*103e46e4SHarish Mahendrakar my_size_ == kUnknownElementSize && local_num_bytes_read == 0) {
110*103e46e4SHarish Mahendrakar state_ = State::kEndReached;
111*103e46e4SHarish Mahendrakar } else if (!status.ok()) {
112*103e46e4SHarish Mahendrakar if (local_num_bytes_read > 0) {
113*103e46e4SHarish Mahendrakar state_ = State::kFinishingReadingChildId;
114*103e46e4SHarish Mahendrakar }
115*103e46e4SHarish Mahendrakar return status;
116*103e46e4SHarish Mahendrakar } else if (status.completed_ok()) {
117*103e46e4SHarish Mahendrakar state_ = State::kReadingChildSize;
118*103e46e4SHarish Mahendrakar } else {
119*103e46e4SHarish Mahendrakar state_ = State::kFinishingReadingChildId;
120*103e46e4SHarish Mahendrakar }
121*103e46e4SHarish Mahendrakar continue;
122*103e46e4SHarish Mahendrakar }
123*103e46e4SHarish Mahendrakar
124*103e46e4SHarish Mahendrakar case State::kFinishingReadingChildId: {
125*103e46e4SHarish Mahendrakar assert(child_parser_ == nullptr);
126*103e46e4SHarish Mahendrakar assert(my_size_ == kUnknownElementSize || total_bytes_read_ < my_size_);
127*103e46e4SHarish Mahendrakar status = id_parser_.Feed(callback, reader, &local_num_bytes_read);
128*103e46e4SHarish Mahendrakar *num_bytes_read += local_num_bytes_read;
129*103e46e4SHarish Mahendrakar total_bytes_read_ += local_num_bytes_read;
130*103e46e4SHarish Mahendrakar child_metadata_.header_size +=
131*103e46e4SHarish Mahendrakar static_cast<std::uint32_t>(local_num_bytes_read);
132*103e46e4SHarish Mahendrakar if (!status.completed_ok()) {
133*103e46e4SHarish Mahendrakar return status;
134*103e46e4SHarish Mahendrakar }
135*103e46e4SHarish Mahendrakar state_ = State::kReadingChildSize;
136*103e46e4SHarish Mahendrakar continue;
137*103e46e4SHarish Mahendrakar }
138*103e46e4SHarish Mahendrakar
139*103e46e4SHarish Mahendrakar case State::kReadingChildSize: {
140*103e46e4SHarish Mahendrakar assert(child_parser_ == nullptr);
141*103e46e4SHarish Mahendrakar assert(total_bytes_read_ > 0);
142*103e46e4SHarish Mahendrakar status = size_parser_.Feed(callback, reader, &local_num_bytes_read);
143*103e46e4SHarish Mahendrakar *num_bytes_read += local_num_bytes_read;
144*103e46e4SHarish Mahendrakar total_bytes_read_ += local_num_bytes_read;
145*103e46e4SHarish Mahendrakar child_metadata_.header_size +=
146*103e46e4SHarish Mahendrakar static_cast<std::uint32_t>(local_num_bytes_read);
147*103e46e4SHarish Mahendrakar if (!status.completed_ok()) {
148*103e46e4SHarish Mahendrakar return status;
149*103e46e4SHarish Mahendrakar }
150*103e46e4SHarish Mahendrakar child_metadata_.id = id_parser_.id();
151*103e46e4SHarish Mahendrakar child_metadata_.size = size_parser_.size();
152*103e46e4SHarish Mahendrakar state_ = State::kValidatingChildSize;
153*103e46e4SHarish Mahendrakar continue;
154*103e46e4SHarish Mahendrakar }
155*103e46e4SHarish Mahendrakar
156*103e46e4SHarish Mahendrakar case State::kValidatingChildSize: {
157*103e46e4SHarish Mahendrakar assert(child_parser_ == nullptr);
158*103e46e4SHarish Mahendrakar
159*103e46e4SHarish Mahendrakar std::uint64_t byte_count = total_bytes_read_;
160*103e46e4SHarish Mahendrakar if (child_metadata_.size != kUnknownElementSize) {
161*103e46e4SHarish Mahendrakar byte_count += child_metadata_.size;
162*103e46e4SHarish Mahendrakar }
163*103e46e4SHarish Mahendrakar
164*103e46e4SHarish Mahendrakar std::uint64_t byte_cap = max_size_;
165*103e46e4SHarish Mahendrakar // my_size_ is <= max_size_ if it's known, so pick the smaller value.
166*103e46e4SHarish Mahendrakar if (my_size_ != kUnknownElementSize) {
167*103e46e4SHarish Mahendrakar byte_cap = my_size_;
168*103e46e4SHarish Mahendrakar }
169*103e46e4SHarish Mahendrakar
170*103e46e4SHarish Mahendrakar if (byte_count > byte_cap) {
171*103e46e4SHarish Mahendrakar return Status(Status::kElementOverflow);
172*103e46e4SHarish Mahendrakar }
173*103e46e4SHarish Mahendrakar
174*103e46e4SHarish Mahendrakar auto iter = parsers_.find(child_metadata_.id);
175*103e46e4SHarish Mahendrakar bool unknown_child = iter == parsers_.end();
176*103e46e4SHarish Mahendrakar
177*103e46e4SHarish Mahendrakar if (my_size_ == kUnknownElementSize && unknown_child) {
178*103e46e4SHarish Mahendrakar // The end of an unsized master element is considered to be the first
179*103e46e4SHarish Mahendrakar // instance of an element that isn't a known/valid child element.
180*103e46e4SHarish Mahendrakar has_cached_metadata_ = true;
181*103e46e4SHarish Mahendrakar state_ = State::kEndReached;
182*103e46e4SHarish Mahendrakar continue;
183*103e46e4SHarish Mahendrakar } else if (unknown_child &&
184*103e46e4SHarish Mahendrakar child_metadata_.size == kUnknownElementSize) {
185*103e46e4SHarish Mahendrakar // We can't skip or otherwise handle unknown elements with an unknown
186*103e46e4SHarish Mahendrakar // size.
187*103e46e4SHarish Mahendrakar return Status(Status::kIndefiniteUnknownElement);
188*103e46e4SHarish Mahendrakar }
189*103e46e4SHarish Mahendrakar if (unknown_child) {
190*103e46e4SHarish Mahendrakar child_parser_ = &unknown_parser_;
191*103e46e4SHarish Mahendrakar } else {
192*103e46e4SHarish Mahendrakar child_parser_ = iter->second.get();
193*103e46e4SHarish Mahendrakar }
194*103e46e4SHarish Mahendrakar state_ = State::kGettingAction;
195*103e46e4SHarish Mahendrakar continue;
196*103e46e4SHarish Mahendrakar }
197*103e46e4SHarish Mahendrakar
198*103e46e4SHarish Mahendrakar case State::kGettingAction: {
199*103e46e4SHarish Mahendrakar assert(child_parser_ != nullptr);
200*103e46e4SHarish Mahendrakar status = callback->OnElementBegin(child_metadata_, &action_);
201*103e46e4SHarish Mahendrakar if (!status.completed_ok()) {
202*103e46e4SHarish Mahendrakar return status;
203*103e46e4SHarish Mahendrakar }
204*103e46e4SHarish Mahendrakar
205*103e46e4SHarish Mahendrakar if (action_ == Action::kSkip) {
206*103e46e4SHarish Mahendrakar callback = &skip_callback;
207*103e46e4SHarish Mahendrakar if (child_metadata_.size != kUnknownElementSize) {
208*103e46e4SHarish Mahendrakar child_parser_ = &skip_parser_;
209*103e46e4SHarish Mahendrakar }
210*103e46e4SHarish Mahendrakar }
211*103e46e4SHarish Mahendrakar state_ = State::kInitializingChildParser;
212*103e46e4SHarish Mahendrakar continue;
213*103e46e4SHarish Mahendrakar }
214*103e46e4SHarish Mahendrakar
215*103e46e4SHarish Mahendrakar case State::kInitializingChildParser: {
216*103e46e4SHarish Mahendrakar assert(child_parser_ != nullptr);
217*103e46e4SHarish Mahendrakar status =
218*103e46e4SHarish Mahendrakar child_parser_->Init(child_metadata_, max_size_ - total_bytes_read_);
219*103e46e4SHarish Mahendrakar if (!status.completed_ok()) {
220*103e46e4SHarish Mahendrakar return status;
221*103e46e4SHarish Mahendrakar }
222*103e46e4SHarish Mahendrakar state_ = State::kReadingChildBody;
223*103e46e4SHarish Mahendrakar continue;
224*103e46e4SHarish Mahendrakar }
225*103e46e4SHarish Mahendrakar
226*103e46e4SHarish Mahendrakar case State::kReadingChildBody: {
227*103e46e4SHarish Mahendrakar assert(child_parser_ != nullptr);
228*103e46e4SHarish Mahendrakar status = child_parser_->Feed(callback, reader, &local_num_bytes_read);
229*103e46e4SHarish Mahendrakar *num_bytes_read += local_num_bytes_read;
230*103e46e4SHarish Mahendrakar total_bytes_read_ += local_num_bytes_read;
231*103e46e4SHarish Mahendrakar if (!status.completed_ok()) {
232*103e46e4SHarish Mahendrakar return status;
233*103e46e4SHarish Mahendrakar }
234*103e46e4SHarish Mahendrakar state_ = State::kChildFullyParsed;
235*103e46e4SHarish Mahendrakar continue;
236*103e46e4SHarish Mahendrakar }
237*103e46e4SHarish Mahendrakar
238*103e46e4SHarish Mahendrakar case State::kChildFullyParsed: {
239*103e46e4SHarish Mahendrakar assert(child_parser_ != nullptr);
240*103e46e4SHarish Mahendrakar std::uint64_t byte_cap = max_size_;
241*103e46e4SHarish Mahendrakar // my_size_ is <= max_size_ if it's known, so pick the smaller value.
242*103e46e4SHarish Mahendrakar if (my_size_ != kUnknownElementSize) {
243*103e46e4SHarish Mahendrakar byte_cap = my_size_;
244*103e46e4SHarish Mahendrakar }
245*103e46e4SHarish Mahendrakar
246*103e46e4SHarish Mahendrakar if (total_bytes_read_ > byte_cap) {
247*103e46e4SHarish Mahendrakar return Status(Status::kElementOverflow);
248*103e46e4SHarish Mahendrakar } else if (total_bytes_read_ == byte_cap) {
249*103e46e4SHarish Mahendrakar state_ = State::kEndReached;
250*103e46e4SHarish Mahendrakar continue;
251*103e46e4SHarish Mahendrakar }
252*103e46e4SHarish Mahendrakar
253*103e46e4SHarish Mahendrakar if (child_parser_->GetCachedMetadata(&child_metadata_)) {
254*103e46e4SHarish Mahendrakar state_ = State::kValidatingChildSize;
255*103e46e4SHarish Mahendrakar } else {
256*103e46e4SHarish Mahendrakar state_ = State::kFirstReadOfChildId;
257*103e46e4SHarish Mahendrakar }
258*103e46e4SHarish Mahendrakar PrepareForNextChild();
259*103e46e4SHarish Mahendrakar callback = original_callback;
260*103e46e4SHarish Mahendrakar continue;
261*103e46e4SHarish Mahendrakar }
262*103e46e4SHarish Mahendrakar
263*103e46e4SHarish Mahendrakar case State::kEndReached: {
264*103e46e4SHarish Mahendrakar return Status(Status::kOkCompleted);
265*103e46e4SHarish Mahendrakar }
266*103e46e4SHarish Mahendrakar }
267*103e46e4SHarish Mahendrakar }
268*103e46e4SHarish Mahendrakar }
269*103e46e4SHarish Mahendrakar
GetCachedMetadata(ElementMetadata * metadata)270*103e46e4SHarish Mahendrakar bool MasterParser::GetCachedMetadata(ElementMetadata* metadata) {
271*103e46e4SHarish Mahendrakar assert(metadata != nullptr);
272*103e46e4SHarish Mahendrakar
273*103e46e4SHarish Mahendrakar if (has_cached_metadata_) {
274*103e46e4SHarish Mahendrakar *metadata = child_metadata_;
275*103e46e4SHarish Mahendrakar }
276*103e46e4SHarish Mahendrakar return has_cached_metadata_;
277*103e46e4SHarish Mahendrakar }
278*103e46e4SHarish Mahendrakar
InitSetup(std::uint32_t header_size,std::uint64_t size_in_bytes,std::uint64_t position)279*103e46e4SHarish Mahendrakar void MasterParser::InitSetup(std::uint32_t header_size,
280*103e46e4SHarish Mahendrakar std::uint64_t size_in_bytes,
281*103e46e4SHarish Mahendrakar std::uint64_t position) {
282*103e46e4SHarish Mahendrakar PrepareForNextChild();
283*103e46e4SHarish Mahendrakar header_size_ = header_size;
284*103e46e4SHarish Mahendrakar my_size_ = size_in_bytes;
285*103e46e4SHarish Mahendrakar my_position_ = position;
286*103e46e4SHarish Mahendrakar total_bytes_read_ = 0;
287*103e46e4SHarish Mahendrakar has_cached_metadata_ = false;
288*103e46e4SHarish Mahendrakar }
289*103e46e4SHarish Mahendrakar
PrepareForNextChild()290*103e46e4SHarish Mahendrakar void MasterParser::PrepareForNextChild() {
291*103e46e4SHarish Mahendrakar // Do not reset child_metadata_ here.
292*103e46e4SHarish Mahendrakar id_parser_ = {};
293*103e46e4SHarish Mahendrakar size_parser_ = {};
294*103e46e4SHarish Mahendrakar child_parser_ = nullptr;
295*103e46e4SHarish Mahendrakar action_ = Action::kRead;
296*103e46e4SHarish Mahendrakar }
297*103e46e4SHarish Mahendrakar
298*103e46e4SHarish Mahendrakar } // namespace webm
299