1 #include "image_io/base/data_scanner.h"
2
3 #include <algorithm>
4
5 namespace photos_editing_formats {
6 namespace image_io {
7
8 using std::string;
9
10 namespace {
11
12 const char kWhitespaceChars[] = " \t\n\r";
13 const char kBase64PadChar = '=';
14 const char kBase64Chars[] =
15 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
16
17 /// This function is like strspn but does not assume a null-terminated string.
memspn(const char * s,size_t slen,const char * accept)18 size_t memspn(const char* s, size_t slen, const char* accept) {
19 const char* p = s;
20 const char* spanp;
21 char c, sc;
22
23 cont:
24 c = *p++;
25 if (slen-- == 0) return p - 1 - s;
26 for (spanp = accept; (sc = *spanp++) != '\0';)
27 if (sc == c) goto cont;
28 return p - 1 - s;
29 }
30
31 /// @return Whether value is in the range [lo:hi].
InRange(char value,char lo,char hi)32 bool InRange(char value, char lo, char hi) {
33 return value >= lo && value <= hi;
34 }
35
36 /// @return Whether the value is the first character of a kName type scanner.
IsFirstNameChar(char value)37 bool IsFirstNameChar(char value) {
38 return InRange(value, 'A', 'Z') || InRange(value, 'a', 'z') || value == '_' ||
39 value == ':';
40 }
41
42 /// Scans the characters in the s string, where the characters can be any legal
43 /// character in the name.
44 /// @return The number of name characters scanned.
ScanOptionalNameChars(const char * s,size_t slen)45 size_t ScanOptionalNameChars(const char* s, size_t slen) {
46 const char* kOptionalChars =
47 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_:";
48 return memspn(s, slen, kOptionalChars);
49 }
50
51 /// Scans the whitespace characters in the s string.
52 /// @return The number of whitepace characters scanned.
ScanWhitespaceChars(const char * s,size_t slen)53 size_t ScanWhitespaceChars(const char* s, size_t slen) {
54 return memspn(s, slen, kWhitespaceChars);
55 }
56
57 } // namespace
58
GetWhitespaceChars()59 string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; }
60
GetBase64Chars(bool include_pad_char)61 string DataScanner::GetBase64Chars(bool include_pad_char) {
62 string chars(kBase64Chars);
63 if (include_pad_char) chars += kBase64PadChar;
64 return chars;
65 }
66
GetBase64PadChar()67 string DataScanner::GetBase64PadChar() { return string(1, kBase64PadChar); }
68
CreateLiteralScanner(const string & literal)69 DataScanner DataScanner::CreateLiteralScanner(const string& literal) {
70 return DataScanner(DataScanner::kLiteral, literal);
71 }
72
CreateNameScanner()73 DataScanner DataScanner::CreateNameScanner() {
74 return DataScanner(DataScanner::kName);
75 }
76
CreateQuotedStringScanner()77 DataScanner DataScanner::CreateQuotedStringScanner() {
78 return DataScanner(DataScanner::kQuotedString);
79 }
80
CreateSentinelScanner(const string & sentinels)81 DataScanner DataScanner::CreateSentinelScanner(const string& sentinels) {
82 return DataScanner(DataScanner::kSentinel, sentinels);
83 }
84
CreateThroughLiteralScanner(const string & literal)85 DataScanner DataScanner::CreateThroughLiteralScanner(const string& literal) {
86 return DataScanner(DataScanner::kThroughLiteral, literal);
87 }
88
CreateWhitespaceScanner()89 DataScanner DataScanner::CreateWhitespaceScanner() {
90 return DataScanner(DataScanner::kWhitespace);
91 }
92
CreateOptionalWhitespaceScanner()93 DataScanner DataScanner::CreateOptionalWhitespaceScanner() {
94 return DataScanner(DataScanner::kOptionalWhitespace);
95 }
96
ScanChars(const char * s,size_t slen,const char * scanset)97 size_t DataScanner::ScanChars(const char* s, size_t slen, const char* scanset) {
98 return memspn(s, slen, scanset);
99 }
100
ExtendTokenLength(size_t delta_length)101 size_t DataScanner::ExtendTokenLength(size_t delta_length) {
102 token_range_ =
103 DataRange(token_range_.GetBegin(), token_range_.GetEnd() + delta_length);
104 return token_range_.GetLength();
105 }
106
SetInternalError(const DataContext & context,const string & error_description,DataMatchResult * result)107 void DataScanner::SetInternalError(const DataContext& context,
108 const string& error_description,
109 DataMatchResult* result) {
110 result->SetType(DataMatchResult::kError);
111 result->SetMessage(
112 Message::kInternalError,
113 context.GetErrorText({}, {GetDescription()}, error_description, ""));
114 }
115
SetSyntaxError(const DataContext & context,const string & error_description,DataMatchResult * result)116 void DataScanner::SetSyntaxError(const DataContext& context,
117 const string& error_description,
118 DataMatchResult* result) {
119 result->SetType(DataMatchResult::kError);
120 result->SetMessage(Message::kSyntaxError,
121 context.GetErrorText(error_description, GetDescription()));
122 }
123
ScanLiteral(const char * cbytes,size_t bytes_available,const DataContext & context)124 DataMatchResult DataScanner::ScanLiteral(const char* cbytes,
125 size_t bytes_available,
126 const DataContext& context) {
127 DataMatchResult result;
128 size_t token_length = token_range_.GetLength();
129 if (token_length >= literal_or_sentinels_.length()) {
130 SetInternalError(context, "Literal already scanned", &result);
131 return result;
132 }
133 size_t bytes_still_needed = literal_or_sentinels_.length() - token_length;
134 size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
135 if (strncmp(&literal_or_sentinels_[token_length], cbytes, bytes_to_compare) ==
136 0) {
137 token_length = ExtendTokenLength(bytes_to_compare);
138 result.SetBytesConsumed(bytes_to_compare);
139 result.SetType(token_length == literal_or_sentinels_.length()
140 ? DataMatchResult::kFull
141 : DataMatchResult::kPartialOutOfData);
142 } else {
143 SetSyntaxError(context, "Expected literal", &result);
144 }
145 return result;
146 }
147
ScanName(const char * cbytes,size_t bytes_available,const DataContext & context)148 DataMatchResult DataScanner::ScanName(const char* cbytes,
149 size_t bytes_available,
150 const DataContext& context) {
151 DataMatchResult result;
152 size_t token_length = token_range_.GetLength();
153 if (token_length == 0) {
154 if (!IsFirstNameChar(*cbytes)) {
155 SetSyntaxError(context, "Expected first character of a name", &result);
156 return result;
157 }
158 token_length = ExtendTokenLength(1);
159 result.SetBytesConsumed(1);
160 bytes_available -= 1;
161 cbytes += 1;
162 }
163 size_t optional_bytes_consumed =
164 ScanOptionalNameChars(cbytes, bytes_available);
165 token_length = ExtendTokenLength(optional_bytes_consumed);
166 result.IncrementBytesConsumed(optional_bytes_consumed);
167 if (result.GetBytesConsumed() == 0 && token_length > 0) {
168 result.SetType(DataMatchResult::kFull);
169 } else if (optional_bytes_consumed < bytes_available) {
170 result.SetType(DataMatchResult::kFull);
171 } else {
172 result.SetType(DataMatchResult::kPartialOutOfData);
173 }
174 return result;
175 }
176
ScanQuotedString(const char * cbytes,size_t bytes_available,const DataContext & context)177 DataMatchResult DataScanner::ScanQuotedString(const char* cbytes,
178 size_t bytes_available,
179 const DataContext& context) {
180 const size_t kStart = 0;
181 const size_t kDone = '.';
182 const size_t kSquote = '\'';
183 const size_t kDquote = '"';
184 DataMatchResult result;
185 size_t token_length = token_range_.GetLength();
186 if ((data_ == kStart && token_length != 0) ||
187 (data_ != kStart && data_ != kSquote && data_ != kDquote)) {
188 SetInternalError(context, "Inconsistent state", &result);
189 return result;
190 }
191 if (data_ == kStart) {
192 if (*cbytes != kSquote && *cbytes != kDquote) {
193 SetSyntaxError(context, "Expected start of a quoted string", &result);
194 return result;
195 }
196 data_ = *cbytes++;
197 bytes_available--;
198 result.SetBytesConsumed(1);
199 token_length = ExtendTokenLength(1);
200 }
201 const char* ebytes = reinterpret_cast<const char*>(
202 memchr(cbytes, static_cast<int>(data_), bytes_available));
203 size_t bytes_scanned = ebytes ? ebytes - cbytes : bytes_available;
204 result.IncrementBytesConsumed(bytes_scanned);
205 token_length = ExtendTokenLength(bytes_scanned);
206 if (bytes_scanned == bytes_available) {
207 result.SetType(DataMatchResult::kPartialOutOfData);
208 } else {
209 result.SetType(DataMatchResult::kFull);
210 result.IncrementBytesConsumed(1);
211 ExtendTokenLength(1);
212 data_ = kDone;
213 }
214 return result;
215 }
216
ScanSentinel(const char * cbytes,size_t bytes_available,const DataContext & context)217 DataMatchResult DataScanner::ScanSentinel(const char* cbytes,
218 size_t bytes_available,
219 const DataContext& context) {
220 DataMatchResult result;
221 if (data_ != 0) {
222 SetInternalError(context, "Sentinel already scanned", &result);
223 return result;
224 }
225 char cbyte = *cbytes;
226 for (size_t index = 0; index < literal_or_sentinels_.size(); ++index) {
227 char sentinel = literal_or_sentinels_[index];
228 if ((sentinel == '~' && IsFirstNameChar(cbyte)) || cbyte == sentinel) {
229 ExtendTokenLength(1);
230 result.SetBytesConsumed(1).SetType(DataMatchResult::kFull);
231 data_ = sentinel;
232 break;
233 }
234 }
235 if (result.GetBytesConsumed() == 0) {
236 SetSyntaxError(context, "Unexpected character encountered", &result);
237 }
238 return result;
239 }
240
ScanThroughLiteral(const char * cbytes,size_t bytes_available,const DataContext & context)241 DataMatchResult DataScanner::ScanThroughLiteral(const char* cbytes,
242 size_t bytes_available,
243 const DataContext& context) {
244 DataMatchResult result;
245 size_t& scanned_literal_length = data_;
246 if (scanned_literal_length >= literal_or_sentinels_.length()) {
247 SetInternalError(context, "Literal already scanned", &result);
248 return result;
249 }
250 while (bytes_available > 0) {
251 if (scanned_literal_length == 0) {
252 // Literal scan not in progress. Find the first char of the literal.
253 auto* matched_byte = reinterpret_cast<const char*>(
254 memchr(cbytes, literal_or_sentinels_[0], bytes_available));
255 if (matched_byte == nullptr) {
256 // first char not found and chars exhausted.
257 ExtendTokenLength(bytes_available);
258 result.IncrementBytesConsumed(bytes_available);
259 result.SetType(DataMatchResult::kPartialOutOfData);
260 break;
261 } else {
262 // found the first char of the literal.
263 size_t bytes_scanned = (matched_byte - cbytes) + 1;
264 result.IncrementBytesConsumed(bytes_scanned);
265 bytes_available -= bytes_scanned;
266 cbytes += bytes_scanned;
267 ExtendTokenLength(bytes_scanned);
268 scanned_literal_length = 1;
269 }
270 }
271 // check if the rest of the literal is there.
272 size_t bytes_still_needed =
273 literal_or_sentinels_.length() - scanned_literal_length;
274 size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
275 if (strncmp(&literal_or_sentinels_[scanned_literal_length], cbytes,
276 bytes_to_compare) == 0) {
277 // Yes, the whole literal is there or chars are exhausted.
278 ExtendTokenLength(bytes_to_compare);
279 scanned_literal_length += bytes_to_compare;
280 result.IncrementBytesConsumed(bytes_to_compare);
281 result.SetType(scanned_literal_length == literal_or_sentinels_.length()
282 ? DataMatchResult::kFull
283 : DataMatchResult::kPartialOutOfData);
284 break;
285 }
286 // false alarm, the firsts char of the literal were found, but not the
287 // whole enchilada. Keep searching at one past the first char of the match.
288 scanned_literal_length = 0;
289 }
290 return result;
291 }
292
ScanWhitespace(const char * cbytes,size_t bytes_available,const DataContext & context)293 DataMatchResult DataScanner::ScanWhitespace(const char* cbytes,
294 size_t bytes_available,
295 const DataContext& context) {
296 DataMatchResult result;
297 size_t token_length = token_range_.GetLength();
298 result.SetBytesConsumed(ScanWhitespaceChars(cbytes, bytes_available));
299 token_length = ExtendTokenLength(result.GetBytesConsumed());
300 if (result.GetBytesConsumed() == 0) {
301 if (token_length == 0 && type_ == kWhitespace) {
302 SetSyntaxError(context, "Expected whitespace", &result);
303 } else {
304 result.SetType(DataMatchResult::kFull);
305 }
306 } else {
307 result.SetType((result.GetBytesConsumed() < bytes_available)
308 ? DataMatchResult::kFull
309 : DataMatchResult::kPartialOutOfData);
310 }
311 return result;
312 }
313
Scan(const DataContext & context)314 DataMatchResult DataScanner::Scan(const DataContext& context) {
315 scan_call_count_ += 1;
316 DataMatchResult result;
317 if (!context.IsValidLocationAndRange()) {
318 SetInternalError(context, context.GetInvalidLocationAndRangeErrorText(),
319 &result);
320 return result;
321 }
322 if (!token_range_.IsValid()) {
323 token_range_ = DataRange(context.GetLocation(), context.GetLocation());
324 }
325 size_t bytes_available = context.GetRange().GetEnd() - context.GetLocation();
326 const char* cbytes = context.GetCharBytes();
327 switch (type_) {
328 case kLiteral:
329 result = ScanLiteral(cbytes, bytes_available, context);
330 break;
331 case kName:
332 result = ScanName(cbytes, bytes_available, context);
333 break;
334 case kQuotedString:
335 result = ScanQuotedString(cbytes, bytes_available, context);
336 break;
337 case kSentinel:
338 result = ScanSentinel(cbytes, bytes_available, context);
339 break;
340 case kThroughLiteral:
341 result = ScanThroughLiteral(cbytes, bytes_available, context);
342 break;
343 case kWhitespace:
344 case kOptionalWhitespace:
345 result = ScanWhitespace(cbytes, bytes_available, context);
346 break;
347 default:
348 SetInternalError(context, "Undefined scanner type", &result);
349 break;
350 }
351 return result;
352 }
353
ResetTokenRange()354 void DataScanner::ResetTokenRange() { token_range_ = DataRange(); }
355
Reset()356 void DataScanner::Reset() {
357 data_ = 0;
358 scan_call_count_ = 0;
359 ResetTokenRange();
360 }
361
GetDescription() const362 string DataScanner::GetDescription() const {
363 if (!description_.empty()) {
364 return description_;
365 }
366 string description;
367 switch (type_) {
368 case kLiteral:
369 description = "Literal:'";
370 description += literal_or_sentinels_;
371 description += "'";
372 break;
373 case kName:
374 description = "Name";
375 break;
376 case kQuotedString:
377 description = "QuotedString";
378 break;
379 case kSentinel:
380 description = "OneOf:'";
381 description += literal_or_sentinels_;
382 description += "'";
383 break;
384 case kThroughLiteral:
385 description = "ThruLiteral:'";
386 description += literal_or_sentinels_;
387 description += "'";
388 break;
389 case kWhitespace:
390 description = "Whitespace";
391 break;
392 case kOptionalWhitespace:
393 description = "OptionalWhitespace";
394 break;
395 }
396 return description;
397 }
398
GetLiteral() const399 string DataScanner::GetLiteral() const {
400 return type_ == kLiteral || type_ == kThroughLiteral ? literal_or_sentinels_
401 : "";
402 }
403
GetSentenels() const404 string DataScanner::GetSentenels() const {
405 return type_ == kSentinel ? literal_or_sentinels_ : "";
406 }
407
GetSentinel() const408 char DataScanner::GetSentinel() const { return type_ == kSentinel ? data_ : 0; }
409
410 } // namespace image_io
411 } // namespace photos_editing_formats
412