xref: /aosp_15_r20/external/image_io/src/base/data_scanner.cc (revision ca0779eb572efbbfda2e47f806647c3c7eeea8c3)
1 #include "image_io/base/data_scanner.h"
2 
3 #include <algorithm>
4 
5 namespace photos_editing_formats {
6 namespace image_io {
7 
8 using std::string;
9 
10 namespace {
11 
12 const char kWhitespaceChars[] = " \t\n\r";
13 const char kBase64PadChar = '=';
14 const char kBase64Chars[] =
15     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
16 
17 /// This function is like strspn but does not assume a null-terminated string.
memspn(const char * s,size_t slen,const char * accept)18 size_t memspn(const char* s, size_t slen, const char* accept) {
19   const char* p = s;
20   const char* spanp;
21   char c, sc;
22 
23 cont:
24   c = *p++;
25   if (slen-- == 0) return p - 1 - s;
26   for (spanp = accept; (sc = *spanp++) != '\0';)
27     if (sc == c) goto cont;
28   return p - 1 - s;
29 }
30 
31 /// @return Whether value is in the range [lo:hi].
InRange(char value,char lo,char hi)32 bool InRange(char value, char lo, char hi) {
33   return value >= lo && value <= hi;
34 }
35 
36 /// @return Whether the value is the first character of a kName type scanner.
IsFirstNameChar(char value)37 bool IsFirstNameChar(char value) {
38   return InRange(value, 'A', 'Z') || InRange(value, 'a', 'z') || value == '_' ||
39          value == ':';
40 }
41 
42 /// Scans the characters in the s string, where the characters can be any legal
43 /// character in the name.
44 /// @return The number of name characters scanned.
ScanOptionalNameChars(const char * s,size_t slen)45 size_t ScanOptionalNameChars(const char* s, size_t slen) {
46   const char* kOptionalChars =
47       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_:";
48   return memspn(s, slen, kOptionalChars);
49 }
50 
51 /// Scans the whitespace characters in the s string.
52 /// @return The number of whitepace characters scanned.
ScanWhitespaceChars(const char * s,size_t slen)53 size_t ScanWhitespaceChars(const char* s, size_t slen) {
54   return memspn(s, slen, kWhitespaceChars);
55 }
56 
57 }  // namespace
58 
GetWhitespaceChars()59 string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; }
60 
GetBase64Chars(bool include_pad_char)61 string DataScanner::GetBase64Chars(bool include_pad_char) {
62   string chars(kBase64Chars);
63   if (include_pad_char) chars += kBase64PadChar;
64   return chars;
65 }
66 
GetBase64PadChar()67 string DataScanner::GetBase64PadChar() { return string(1, kBase64PadChar); }
68 
CreateLiteralScanner(const string & literal)69 DataScanner DataScanner::CreateLiteralScanner(const string& literal) {
70   return DataScanner(DataScanner::kLiteral, literal);
71 }
72 
CreateNameScanner()73 DataScanner DataScanner::CreateNameScanner() {
74   return DataScanner(DataScanner::kName);
75 }
76 
CreateQuotedStringScanner()77 DataScanner DataScanner::CreateQuotedStringScanner() {
78   return DataScanner(DataScanner::kQuotedString);
79 }
80 
CreateSentinelScanner(const string & sentinels)81 DataScanner DataScanner::CreateSentinelScanner(const string& sentinels) {
82   return DataScanner(DataScanner::kSentinel, sentinels);
83 }
84 
CreateThroughLiteralScanner(const string & literal)85 DataScanner DataScanner::CreateThroughLiteralScanner(const string& literal) {
86   return DataScanner(DataScanner::kThroughLiteral, literal);
87 }
88 
CreateWhitespaceScanner()89 DataScanner DataScanner::CreateWhitespaceScanner() {
90   return DataScanner(DataScanner::kWhitespace);
91 }
92 
CreateOptionalWhitespaceScanner()93 DataScanner DataScanner::CreateOptionalWhitespaceScanner() {
94   return DataScanner(DataScanner::kOptionalWhitespace);
95 }
96 
ScanChars(const char * s,size_t slen,const char * scanset)97 size_t DataScanner::ScanChars(const char* s, size_t slen, const char* scanset) {
98   return memspn(s, slen, scanset);
99 }
100 
ExtendTokenLength(size_t delta_length)101 size_t DataScanner::ExtendTokenLength(size_t delta_length) {
102   token_range_ =
103       DataRange(token_range_.GetBegin(), token_range_.GetEnd() + delta_length);
104   return token_range_.GetLength();
105 }
106 
SetInternalError(const DataContext & context,const string & error_description,DataMatchResult * result)107 void DataScanner::SetInternalError(const DataContext& context,
108                                    const string& error_description,
109                                    DataMatchResult* result) {
110   result->SetType(DataMatchResult::kError);
111   result->SetMessage(
112       Message::kInternalError,
113       context.GetErrorText({}, {GetDescription()}, error_description, ""));
114 }
115 
SetSyntaxError(const DataContext & context,const string & error_description,DataMatchResult * result)116 void DataScanner::SetSyntaxError(const DataContext& context,
117                                  const string& error_description,
118                                  DataMatchResult* result) {
119   result->SetType(DataMatchResult::kError);
120   result->SetMessage(Message::kSyntaxError,
121                      context.GetErrorText(error_description, GetDescription()));
122 }
123 
ScanLiteral(const char * cbytes,size_t bytes_available,const DataContext & context)124 DataMatchResult DataScanner::ScanLiteral(const char* cbytes,
125                                          size_t bytes_available,
126                                          const DataContext& context) {
127   DataMatchResult result;
128   size_t token_length = token_range_.GetLength();
129   if (token_length >= literal_or_sentinels_.length()) {
130     SetInternalError(context, "Literal already scanned", &result);
131     return result;
132   }
133   size_t bytes_still_needed = literal_or_sentinels_.length() - token_length;
134   size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
135   if (strncmp(&literal_or_sentinels_[token_length], cbytes, bytes_to_compare) ==
136       0) {
137     token_length = ExtendTokenLength(bytes_to_compare);
138     result.SetBytesConsumed(bytes_to_compare);
139     result.SetType(token_length == literal_or_sentinels_.length()
140                        ? DataMatchResult::kFull
141                        : DataMatchResult::kPartialOutOfData);
142   } else {
143     SetSyntaxError(context, "Expected literal", &result);
144   }
145   return result;
146 }
147 
ScanName(const char * cbytes,size_t bytes_available,const DataContext & context)148 DataMatchResult DataScanner::ScanName(const char* cbytes,
149                                       size_t bytes_available,
150                                       const DataContext& context) {
151   DataMatchResult result;
152   size_t token_length = token_range_.GetLength();
153   if (token_length == 0) {
154     if (!IsFirstNameChar(*cbytes)) {
155       SetSyntaxError(context, "Expected first character of a name", &result);
156       return result;
157     }
158     token_length = ExtendTokenLength(1);
159     result.SetBytesConsumed(1);
160     bytes_available -= 1;
161     cbytes += 1;
162   }
163   size_t optional_bytes_consumed =
164       ScanOptionalNameChars(cbytes, bytes_available);
165   token_length = ExtendTokenLength(optional_bytes_consumed);
166   result.IncrementBytesConsumed(optional_bytes_consumed);
167   if (result.GetBytesConsumed() == 0 && token_length > 0) {
168     result.SetType(DataMatchResult::kFull);
169   } else if (optional_bytes_consumed < bytes_available) {
170     result.SetType(DataMatchResult::kFull);
171   } else {
172     result.SetType(DataMatchResult::kPartialOutOfData);
173   }
174   return result;
175 }
176 
ScanQuotedString(const char * cbytes,size_t bytes_available,const DataContext & context)177 DataMatchResult DataScanner::ScanQuotedString(const char* cbytes,
178                                               size_t bytes_available,
179                                               const DataContext& context) {
180   const size_t kStart = 0;
181   const size_t kDone = '.';
182   const size_t kSquote = '\'';
183   const size_t kDquote = '"';
184   DataMatchResult result;
185   size_t token_length = token_range_.GetLength();
186   if ((data_ == kStart && token_length != 0) ||
187       (data_ != kStart && data_ != kSquote && data_ != kDquote)) {
188     SetInternalError(context, "Inconsistent state", &result);
189     return result;
190   }
191   if (data_ == kStart) {
192     if (*cbytes != kSquote && *cbytes != kDquote) {
193       SetSyntaxError(context, "Expected start of a quoted string", &result);
194       return result;
195     }
196     data_ = *cbytes++;
197     bytes_available--;
198     result.SetBytesConsumed(1);
199     token_length = ExtendTokenLength(1);
200   }
201   const char* ebytes = reinterpret_cast<const char*>(
202       memchr(cbytes, static_cast<int>(data_), bytes_available));
203   size_t bytes_scanned = ebytes ? ebytes - cbytes : bytes_available;
204   result.IncrementBytesConsumed(bytes_scanned);
205   token_length = ExtendTokenLength(bytes_scanned);
206   if (bytes_scanned == bytes_available) {
207     result.SetType(DataMatchResult::kPartialOutOfData);
208   } else {
209     result.SetType(DataMatchResult::kFull);
210     result.IncrementBytesConsumed(1);
211     ExtendTokenLength(1);
212     data_ = kDone;
213   }
214   return result;
215 }
216 
ScanSentinel(const char * cbytes,size_t bytes_available,const DataContext & context)217 DataMatchResult DataScanner::ScanSentinel(const char* cbytes,
218                                           size_t bytes_available,
219                                           const DataContext& context) {
220   DataMatchResult result;
221   if (data_ != 0) {
222     SetInternalError(context, "Sentinel already scanned", &result);
223     return result;
224   }
225   char cbyte = *cbytes;
226   for (size_t index = 0; index < literal_or_sentinels_.size(); ++index) {
227     char sentinel = literal_or_sentinels_[index];
228     if ((sentinel == '~' && IsFirstNameChar(cbyte)) || cbyte == sentinel) {
229       ExtendTokenLength(1);
230       result.SetBytesConsumed(1).SetType(DataMatchResult::kFull);
231       data_ = sentinel;
232       break;
233     }
234   }
235   if (result.GetBytesConsumed() == 0) {
236     SetSyntaxError(context, "Unexpected character encountered", &result);
237   }
238   return result;
239 }
240 
ScanThroughLiteral(const char * cbytes,size_t bytes_available,const DataContext & context)241 DataMatchResult DataScanner::ScanThroughLiteral(const char* cbytes,
242                                                 size_t bytes_available,
243                                                 const DataContext& context) {
244   DataMatchResult result;
245   size_t& scanned_literal_length = data_;
246   if (scanned_literal_length >= literal_or_sentinels_.length()) {
247     SetInternalError(context, "Literal already scanned", &result);
248     return result;
249   }
250   while (bytes_available > 0) {
251     if (scanned_literal_length == 0) {
252       // Literal scan not in progress. Find the first char of the literal.
253       auto* matched_byte = reinterpret_cast<const char*>(
254           memchr(cbytes, literal_or_sentinels_[0], bytes_available));
255       if (matched_byte == nullptr) {
256         // first char not found and chars exhausted.
257         ExtendTokenLength(bytes_available);
258         result.IncrementBytesConsumed(bytes_available);
259         result.SetType(DataMatchResult::kPartialOutOfData);
260         break;
261       } else {
262         // found the first char of the literal.
263         size_t bytes_scanned = (matched_byte - cbytes) + 1;
264         result.IncrementBytesConsumed(bytes_scanned);
265         bytes_available -= bytes_scanned;
266         cbytes += bytes_scanned;
267         ExtendTokenLength(bytes_scanned);
268         scanned_literal_length = 1;
269       }
270     }
271     // check if the rest of the literal is there.
272     size_t bytes_still_needed =
273         literal_or_sentinels_.length() - scanned_literal_length;
274     size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
275     if (strncmp(&literal_or_sentinels_[scanned_literal_length], cbytes,
276                 bytes_to_compare) == 0) {
277       // Yes, the whole literal is there or chars are exhausted.
278       ExtendTokenLength(bytes_to_compare);
279       scanned_literal_length += bytes_to_compare;
280       result.IncrementBytesConsumed(bytes_to_compare);
281       result.SetType(scanned_literal_length == literal_or_sentinels_.length()
282                          ? DataMatchResult::kFull
283                          : DataMatchResult::kPartialOutOfData);
284       break;
285     }
286     // false alarm, the firsts char of the literal were found, but not the
287     // whole enchilada. Keep searching at one past the first char of the match.
288     scanned_literal_length = 0;
289   }
290   return result;
291 }
292 
ScanWhitespace(const char * cbytes,size_t bytes_available,const DataContext & context)293 DataMatchResult DataScanner::ScanWhitespace(const char* cbytes,
294                                             size_t bytes_available,
295                                             const DataContext& context) {
296   DataMatchResult result;
297   size_t token_length = token_range_.GetLength();
298   result.SetBytesConsumed(ScanWhitespaceChars(cbytes, bytes_available));
299   token_length = ExtendTokenLength(result.GetBytesConsumed());
300   if (result.GetBytesConsumed() == 0) {
301     if (token_length == 0 && type_ == kWhitespace) {
302       SetSyntaxError(context, "Expected whitespace", &result);
303     } else {
304       result.SetType(DataMatchResult::kFull);
305     }
306   } else {
307     result.SetType((result.GetBytesConsumed() < bytes_available)
308                        ? DataMatchResult::kFull
309                        : DataMatchResult::kPartialOutOfData);
310   }
311   return result;
312 }
313 
Scan(const DataContext & context)314 DataMatchResult DataScanner::Scan(const DataContext& context) {
315   scan_call_count_ += 1;
316   DataMatchResult result;
317   if (!context.IsValidLocationAndRange()) {
318     SetInternalError(context, context.GetInvalidLocationAndRangeErrorText(),
319                      &result);
320     return result;
321   }
322   if (!token_range_.IsValid()) {
323     token_range_ = DataRange(context.GetLocation(), context.GetLocation());
324   }
325   size_t bytes_available = context.GetRange().GetEnd() - context.GetLocation();
326   const char* cbytes = context.GetCharBytes();
327   switch (type_) {
328     case kLiteral:
329       result = ScanLiteral(cbytes, bytes_available, context);
330       break;
331     case kName:
332       result = ScanName(cbytes, bytes_available, context);
333       break;
334     case kQuotedString:
335       result = ScanQuotedString(cbytes, bytes_available, context);
336       break;
337     case kSentinel:
338       result = ScanSentinel(cbytes, bytes_available, context);
339       break;
340     case kThroughLiteral:
341       result = ScanThroughLiteral(cbytes, bytes_available, context);
342       break;
343     case kWhitespace:
344     case kOptionalWhitespace:
345       result = ScanWhitespace(cbytes, bytes_available, context);
346       break;
347     default:
348       SetInternalError(context, "Undefined scanner type", &result);
349       break;
350   }
351   return result;
352 }
353 
ResetTokenRange()354 void DataScanner::ResetTokenRange() { token_range_ = DataRange(); }
355 
Reset()356 void DataScanner::Reset() {
357   data_ = 0;
358   scan_call_count_ = 0;
359   ResetTokenRange();
360 }
361 
GetDescription() const362 string DataScanner::GetDescription() const {
363   if (!description_.empty()) {
364     return description_;
365   }
366   string description;
367   switch (type_) {
368     case kLiteral:
369       description = "Literal:'";
370       description += literal_or_sentinels_;
371       description += "'";
372       break;
373     case kName:
374       description = "Name";
375       break;
376     case kQuotedString:
377       description = "QuotedString";
378       break;
379     case kSentinel:
380       description = "OneOf:'";
381       description += literal_or_sentinels_;
382       description += "'";
383       break;
384     case kThroughLiteral:
385       description = "ThruLiteral:'";
386       description += literal_or_sentinels_;
387       description += "'";
388       break;
389     case kWhitespace:
390       description = "Whitespace";
391       break;
392     case kOptionalWhitespace:
393       description = "OptionalWhitespace";
394       break;
395   }
396   return description;
397 }
398 
GetLiteral() const399 string DataScanner::GetLiteral() const {
400   return type_ == kLiteral || type_ == kThroughLiteral ? literal_or_sentinels_
401                                                        : "";
402 }
403 
GetSentenels() const404 string DataScanner::GetSentenels() const {
405   return type_ == kSentinel ? literal_or_sentinels_ : "";
406 }
407 
GetSentinel() const408 char DataScanner::GetSentinel() const { return type_ == kSentinel ? data_ : 0; }
409 
410 }  // namespace image_io
411 }  // namespace photos_editing_formats
412