xref: /aosp_15_r20/external/jsoncpp/src/lib_json/json_reader.cpp (revision 4484440890e2bc6e07362b4feaf15601abfe0071)
1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6 
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <algorithm>
14 #include <cassert>
15 #include <cstring>
16 #include <iostream>
17 #include <istream>
18 #include <limits>
19 #include <memory>
20 #include <set>
21 #include <sstream>
22 #include <utility>
23 
24 #include <cstdio>
25 #if __cplusplus >= 201103L
26 
27 #if !defined(sscanf)
28 #define sscanf std::sscanf
29 #endif
30 
31 #endif //__cplusplus
32 
33 #if defined(_MSC_VER)
34 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
35 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
36 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
37 #endif //_MSC_VER
38 
39 #if defined(_MSC_VER)
40 // Disable warning about strdup being deprecated.
41 #pragma warning(disable : 4996)
42 #endif
43 
44 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
45 // time to change the stack limit
46 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
47 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
48 #endif
49 
50 static size_t const stackLimit_g =
51     JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
52 
53 namespace Json {
54 
55 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
56 using CharReaderPtr = std::unique_ptr<CharReader>;
57 #else
58 using CharReaderPtr = std::auto_ptr<CharReader>;
59 #endif
60 
61 // Implementation of class Features
62 // ////////////////////////////////
63 
64 Features::Features() = default;
65 
all()66 Features Features::all() { return {}; }
67 
strictMode()68 Features Features::strictMode() {
69   Features features;
70   features.allowComments_ = false;
71   features.strictRoot_ = true;
72   features.allowDroppedNullPlaceholders_ = false;
73   features.allowNumericKeys_ = false;
74   return features;
75 }
76 
77 // Implementation of class Reader
78 // ////////////////////////////////
79 
containsNewLine(Reader::Location begin,Reader::Location end)80 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
81   return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
82 }
83 
84 // Class Reader
85 // //////////////////////////////////////////////////////////////////
86 
Reader()87 Reader::Reader() : features_(Features::all()) {}
88 
Reader(const Features & features)89 Reader::Reader(const Features& features) : features_(features) {}
90 
parse(const std::string & document,Value & root,bool collectComments)91 bool Reader::parse(const std::string& document, Value& root,
92                    bool collectComments) {
93   document_.assign(document.begin(), document.end());
94   const char* begin = document_.c_str();
95   const char* end = begin + document_.length();
96   return parse(begin, end, root, collectComments);
97 }
98 
parse(std::istream & is,Value & root,bool collectComments)99 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
100   // std::istream_iterator<char> begin(is);
101   // std::istream_iterator<char> end;
102   // Those would allow streamed input from a file, if parse() were a
103   // template function.
104 
105   // Since String is reference-counted, this at least does not
106   // create an extra copy.
107   String doc(std::istreambuf_iterator<char>(is), {});
108   return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
109 }
110 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)111 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
112                    bool collectComments) {
113   if (!features_.allowComments_) {
114     collectComments = false;
115   }
116 
117   begin_ = beginDoc;
118   end_ = endDoc;
119   collectComments_ = collectComments;
120   current_ = begin_;
121   lastValueEnd_ = nullptr;
122   lastValue_ = nullptr;
123   commentsBefore_.clear();
124   errors_.clear();
125   while (!nodes_.empty())
126     nodes_.pop();
127   nodes_.push(&root);
128 
129   bool successful = readValue();
130   Token token;
131   skipCommentTokens(token);
132   if (collectComments_ && !commentsBefore_.empty())
133     root.setComment(commentsBefore_, commentAfter);
134   if (features_.strictRoot_) {
135     if (!root.isArray() && !root.isObject()) {
136       // Set error location to start of doc, ideally should be first token found
137       // in doc
138       token.type_ = tokenError;
139       token.start_ = beginDoc;
140       token.end_ = endDoc;
141       addError(
142           "A valid JSON document must be either an array or an object value.",
143           token);
144       return false;
145     }
146   }
147   return successful;
148 }
149 
readValue()150 bool Reader::readValue() {
151   // readValue() may call itself only if it calls readObject() or ReadArray().
152   // These methods execute nodes_.push() just before and nodes_.pop)() just
153   // after calling readValue(). parse() executes one nodes_.push(), so > instead
154   // of >=.
155   if (nodes_.size() > stackLimit_g)
156     throwRuntimeError("Exceeded stackLimit in readValue().");
157 
158   Token token;
159   skipCommentTokens(token);
160   bool successful = true;
161 
162   if (collectComments_ && !commentsBefore_.empty()) {
163     currentValue().setComment(commentsBefore_, commentBefore);
164     commentsBefore_.clear();
165   }
166 
167   switch (token.type_) {
168   case tokenObjectBegin:
169     successful = readObject(token);
170     currentValue().setOffsetLimit(current_ - begin_);
171     break;
172   case tokenArrayBegin:
173     successful = readArray(token);
174     currentValue().setOffsetLimit(current_ - begin_);
175     break;
176   case tokenNumber:
177     successful = decodeNumber(token);
178     break;
179   case tokenString:
180     successful = decodeString(token);
181     break;
182   case tokenTrue: {
183     Value v(true);
184     currentValue().swapPayload(v);
185     currentValue().setOffsetStart(token.start_ - begin_);
186     currentValue().setOffsetLimit(token.end_ - begin_);
187   } break;
188   case tokenFalse: {
189     Value v(false);
190     currentValue().swapPayload(v);
191     currentValue().setOffsetStart(token.start_ - begin_);
192     currentValue().setOffsetLimit(token.end_ - begin_);
193   } break;
194   case tokenNull: {
195     Value v;
196     currentValue().swapPayload(v);
197     currentValue().setOffsetStart(token.start_ - begin_);
198     currentValue().setOffsetLimit(token.end_ - begin_);
199   } break;
200   case tokenArraySeparator:
201   case tokenObjectEnd:
202   case tokenArrayEnd:
203     if (features_.allowDroppedNullPlaceholders_) {
204       // "Un-read" the current token and mark the current value as a null
205       // token.
206       current_--;
207       Value v;
208       currentValue().swapPayload(v);
209       currentValue().setOffsetStart(current_ - begin_ - 1);
210       currentValue().setOffsetLimit(current_ - begin_);
211       break;
212     } // Else, fall through...
213   default:
214     currentValue().setOffsetStart(token.start_ - begin_);
215     currentValue().setOffsetLimit(token.end_ - begin_);
216     return addError("Syntax error: value, object or array expected.", token);
217   }
218 
219   if (collectComments_) {
220     lastValueEnd_ = current_;
221     lastValue_ = &currentValue();
222   }
223 
224   return successful;
225 }
226 
skipCommentTokens(Token & token)227 void Reader::skipCommentTokens(Token& token) {
228   if (features_.allowComments_) {
229     do {
230       readToken(token);
231     } while (token.type_ == tokenComment);
232   } else {
233     readToken(token);
234   }
235 }
236 
readToken(Token & token)237 bool Reader::readToken(Token& token) {
238   skipSpaces();
239   token.start_ = current_;
240   Char c = getNextChar();
241   bool ok = true;
242   switch (c) {
243   case '{':
244     token.type_ = tokenObjectBegin;
245     break;
246   case '}':
247     token.type_ = tokenObjectEnd;
248     break;
249   case '[':
250     token.type_ = tokenArrayBegin;
251     break;
252   case ']':
253     token.type_ = tokenArrayEnd;
254     break;
255   case '"':
256     token.type_ = tokenString;
257     ok = readString();
258     break;
259   case '/':
260     token.type_ = tokenComment;
261     ok = readComment();
262     break;
263   case '0':
264   case '1':
265   case '2':
266   case '3':
267   case '4':
268   case '5':
269   case '6':
270   case '7':
271   case '8':
272   case '9':
273   case '-':
274     token.type_ = tokenNumber;
275     readNumber();
276     break;
277   case 't':
278     token.type_ = tokenTrue;
279     ok = match("rue", 3);
280     break;
281   case 'f':
282     token.type_ = tokenFalse;
283     ok = match("alse", 4);
284     break;
285   case 'n':
286     token.type_ = tokenNull;
287     ok = match("ull", 3);
288     break;
289   case ',':
290     token.type_ = tokenArraySeparator;
291     break;
292   case ':':
293     token.type_ = tokenMemberSeparator;
294     break;
295   case 0:
296     token.type_ = tokenEndOfStream;
297     break;
298   default:
299     ok = false;
300     break;
301   }
302   if (!ok)
303     token.type_ = tokenError;
304   token.end_ = current_;
305   return ok;
306 }
307 
skipSpaces()308 void Reader::skipSpaces() {
309   while (current_ != end_) {
310     Char c = *current_;
311     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
312       ++current_;
313     else
314       break;
315   }
316 }
317 
match(const Char * pattern,int patternLength)318 bool Reader::match(const Char* pattern, int patternLength) {
319   if (end_ - current_ < patternLength)
320     return false;
321   int index = patternLength;
322   while (index--)
323     if (current_[index] != pattern[index])
324       return false;
325   current_ += patternLength;
326   return true;
327 }
328 
readComment()329 bool Reader::readComment() {
330   Location commentBegin = current_ - 1;
331   Char c = getNextChar();
332   bool successful = false;
333   if (c == '*')
334     successful = readCStyleComment();
335   else if (c == '/')
336     successful = readCppStyleComment();
337   if (!successful)
338     return false;
339 
340   if (collectComments_) {
341     CommentPlacement placement = commentBefore;
342     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
343       if (c != '*' || !containsNewLine(commentBegin, current_))
344         placement = commentAfterOnSameLine;
345     }
346 
347     addComment(commentBegin, current_, placement);
348   }
349   return true;
350 }
351 
normalizeEOL(Reader::Location begin,Reader::Location end)352 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
353   String normalized;
354   normalized.reserve(static_cast<size_t>(end - begin));
355   Reader::Location current = begin;
356   while (current != end) {
357     char c = *current++;
358     if (c == '\r') {
359       if (current != end && *current == '\n')
360         // convert dos EOL
361         ++current;
362       // convert Mac EOL
363       normalized += '\n';
364     } else {
365       normalized += c;
366     }
367   }
368   return normalized;
369 }
370 
addComment(Location begin,Location end,CommentPlacement placement)371 void Reader::addComment(Location begin, Location end,
372                         CommentPlacement placement) {
373   assert(collectComments_);
374   const String& normalized = normalizeEOL(begin, end);
375   if (placement == commentAfterOnSameLine) {
376     assert(lastValue_ != nullptr);
377     lastValue_->setComment(normalized, placement);
378   } else {
379     commentsBefore_ += normalized;
380   }
381 }
382 
readCStyleComment()383 bool Reader::readCStyleComment() {
384   while ((current_ + 1) < end_) {
385     Char c = getNextChar();
386     if (c == '*' && *current_ == '/')
387       break;
388   }
389   return getNextChar() == '/';
390 }
391 
readCppStyleComment()392 bool Reader::readCppStyleComment() {
393   while (current_ != end_) {
394     Char c = getNextChar();
395     if (c == '\n')
396       break;
397     if (c == '\r') {
398       // Consume DOS EOL. It will be normalized in addComment.
399       if (current_ != end_ && *current_ == '\n')
400         getNextChar();
401       // Break on Moc OS 9 EOL.
402       break;
403     }
404   }
405   return true;
406 }
407 
readNumber()408 void Reader::readNumber() {
409   Location p = current_;
410   char c = '0'; // stopgap for already consumed character
411   // integral part
412   while (c >= '0' && c <= '9')
413     c = (current_ = p) < end_ ? *p++ : '\0';
414   // fractional part
415   if (c == '.') {
416     c = (current_ = p) < end_ ? *p++ : '\0';
417     while (c >= '0' && c <= '9')
418       c = (current_ = p) < end_ ? *p++ : '\0';
419   }
420   // exponential part
421   if (c == 'e' || c == 'E') {
422     c = (current_ = p) < end_ ? *p++ : '\0';
423     if (c == '+' || c == '-')
424       c = (current_ = p) < end_ ? *p++ : '\0';
425     while (c >= '0' && c <= '9')
426       c = (current_ = p) < end_ ? *p++ : '\0';
427   }
428 }
429 
readString()430 bool Reader::readString() {
431   Char c = '\0';
432   while (current_ != end_) {
433     c = getNextChar();
434     if (c == '\\')
435       getNextChar();
436     else if (c == '"')
437       break;
438   }
439   return c == '"';
440 }
441 
readObject(Token & token)442 bool Reader::readObject(Token& token) {
443   Token tokenName;
444   String name;
445   Value init(objectValue);
446   currentValue().swapPayload(init);
447   currentValue().setOffsetStart(token.start_ - begin_);
448   while (readToken(tokenName)) {
449     bool initialTokenOk = true;
450     while (tokenName.type_ == tokenComment && initialTokenOk)
451       initialTokenOk = readToken(tokenName);
452     if (!initialTokenOk)
453       break;
454     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
455       return true;
456     name.clear();
457     if (tokenName.type_ == tokenString) {
458       if (!decodeString(tokenName, name))
459         return recoverFromError(tokenObjectEnd);
460     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
461       Value numberName;
462       if (!decodeNumber(tokenName, numberName))
463         return recoverFromError(tokenObjectEnd);
464       name = numberName.asString();
465     } else {
466       break;
467     }
468 
469     Token colon;
470     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
471       return addErrorAndRecover("Missing ':' after object member name", colon,
472                                 tokenObjectEnd);
473     }
474     Value& value = currentValue()[name];
475     nodes_.push(&value);
476     bool ok = readValue();
477     nodes_.pop();
478     if (!ok) // error already set
479       return recoverFromError(tokenObjectEnd);
480 
481     Token comma;
482     if (!readToken(comma) ||
483         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
484          comma.type_ != tokenComment)) {
485       return addErrorAndRecover("Missing ',' or '}' in object declaration",
486                                 comma, tokenObjectEnd);
487     }
488     bool finalizeTokenOk = true;
489     while (comma.type_ == tokenComment && finalizeTokenOk)
490       finalizeTokenOk = readToken(comma);
491     if (comma.type_ == tokenObjectEnd)
492       return true;
493   }
494   return addErrorAndRecover("Missing '}' or object member name", tokenName,
495                             tokenObjectEnd);
496 }
497 
readArray(Token & token)498 bool Reader::readArray(Token& token) {
499   Value init(arrayValue);
500   currentValue().swapPayload(init);
501   currentValue().setOffsetStart(token.start_ - begin_);
502   skipSpaces();
503   if (current_ != end_ && *current_ == ']') // empty array
504   {
505     Token endArray;
506     readToken(endArray);
507     return true;
508   }
509   int index = 0;
510   for (;;) {
511     Value& value = currentValue()[index++];
512     nodes_.push(&value);
513     bool ok = readValue();
514     nodes_.pop();
515     if (!ok) // error already set
516       return recoverFromError(tokenArrayEnd);
517 
518     Token currentToken;
519     // Accept Comment after last item in the array.
520     ok = readToken(currentToken);
521     while (currentToken.type_ == tokenComment && ok) {
522       ok = readToken(currentToken);
523     }
524     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
525                          currentToken.type_ != tokenArrayEnd);
526     if (!ok || badTokenType) {
527       return addErrorAndRecover("Missing ',' or ']' in array declaration",
528                                 currentToken, tokenArrayEnd);
529     }
530     if (currentToken.type_ == tokenArrayEnd)
531       break;
532   }
533   return true;
534 }
535 
decodeNumber(Token & token)536 bool Reader::decodeNumber(Token& token) {
537   Value decoded;
538   if (!decodeNumber(token, decoded))
539     return false;
540   currentValue().swapPayload(decoded);
541   currentValue().setOffsetStart(token.start_ - begin_);
542   currentValue().setOffsetLimit(token.end_ - begin_);
543   return true;
544 }
545 
decodeNumber(Token & token,Value & decoded)546 bool Reader::decodeNumber(Token& token, Value& decoded) {
547   // Attempts to parse the number as an integer. If the number is
548   // larger than the maximum supported value of an integer then
549   // we decode the number as a double.
550   Location current = token.start_;
551   bool isNegative = *current == '-';
552   if (isNegative)
553     ++current;
554   // TODO: Help the compiler do the div and mod at compile time or get rid of
555   // them.
556   Value::LargestUInt maxIntegerValue =
557       isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
558                  : Value::maxLargestUInt;
559   Value::LargestUInt threshold = maxIntegerValue / 10;
560   Value::LargestUInt value = 0;
561   while (current < token.end_) {
562     Char c = *current++;
563     if (c < '0' || c > '9')
564       return decodeDouble(token, decoded);
565     auto digit(static_cast<Value::UInt>(c - '0'));
566     if (value >= threshold) {
567       // We've hit or exceeded the max value divided by 10 (rounded down). If
568       // a) we've only just touched the limit, b) this is the last digit, and
569       // c) it's small enough to fit in that rounding delta, we're okay.
570       // Otherwise treat this number as a double to avoid overflow.
571       if (value > threshold || current != token.end_ ||
572           digit > maxIntegerValue % 10) {
573         return decodeDouble(token, decoded);
574       }
575     }
576     value = value * 10 + digit;
577   }
578   if (isNegative && value == maxIntegerValue)
579     decoded = Value::minLargestInt;
580   else if (isNegative)
581     decoded = -Value::LargestInt(value);
582   else if (value <= Value::LargestUInt(Value::maxInt))
583     decoded = Value::LargestInt(value);
584   else
585     decoded = value;
586   return true;
587 }
588 
decodeDouble(Token & token)589 bool Reader::decodeDouble(Token& token) {
590   Value decoded;
591   if (!decodeDouble(token, decoded))
592     return false;
593   currentValue().swapPayload(decoded);
594   currentValue().setOffsetStart(token.start_ - begin_);
595   currentValue().setOffsetLimit(token.end_ - begin_);
596   return true;
597 }
598 
decodeDouble(Token & token,Value & decoded)599 bool Reader::decodeDouble(Token& token, Value& decoded) {
600   double value = 0;
601   String buffer(token.start_, token.end_);
602   IStringStream is(buffer);
603   if (!(is >> value))
604     return addError(
605         "'" + String(token.start_, token.end_) + "' is not a number.", token);
606   decoded = value;
607   return true;
608 }
609 
decodeString(Token & token)610 bool Reader::decodeString(Token& token) {
611   String decoded_string;
612   if (!decodeString(token, decoded_string))
613     return false;
614   Value decoded(decoded_string);
615   currentValue().swapPayload(decoded);
616   currentValue().setOffsetStart(token.start_ - begin_);
617   currentValue().setOffsetLimit(token.end_ - begin_);
618   return true;
619 }
620 
decodeString(Token & token,String & decoded)621 bool Reader::decodeString(Token& token, String& decoded) {
622   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
623   Location current = token.start_ + 1; // skip '"'
624   Location end = token.end_ - 1;       // do not include '"'
625   while (current != end) {
626     Char c = *current++;
627     if (c == '"')
628       break;
629     if (c == '\\') {
630       if (current == end)
631         return addError("Empty escape sequence in string", token, current);
632       Char escape = *current++;
633       switch (escape) {
634       case '"':
635         decoded += '"';
636         break;
637       case '/':
638         decoded += '/';
639         break;
640       case '\\':
641         decoded += '\\';
642         break;
643       case 'b':
644         decoded += '\b';
645         break;
646       case 'f':
647         decoded += '\f';
648         break;
649       case 'n':
650         decoded += '\n';
651         break;
652       case 'r':
653         decoded += '\r';
654         break;
655       case 't':
656         decoded += '\t';
657         break;
658       case 'u': {
659         unsigned int unicode;
660         if (!decodeUnicodeCodePoint(token, current, end, unicode))
661           return false;
662         decoded += codePointToUTF8(unicode);
663       } break;
664       default:
665         return addError("Bad escape sequence in string", token, current);
666       }
667     } else {
668       decoded += c;
669     }
670   }
671   return true;
672 }
673 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)674 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
675                                     Location end, unsigned int& unicode) {
676 
677   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
678     return false;
679   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
680     // surrogate pairs
681     if (end - current < 6)
682       return addError(
683           "additional six characters expected to parse unicode surrogate pair.",
684           token, current);
685     if (*(current++) == '\\' && *(current++) == 'u') {
686       unsigned int surrogatePair;
687       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
688         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
689       } else
690         return false;
691     } else
692       return addError("expecting another \\u token to begin the second half of "
693                       "a unicode surrogate pair",
694                       token, current);
695   }
696   return true;
697 }
698 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)699 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
700                                          Location end,
701                                          unsigned int& ret_unicode) {
702   if (end - current < 4)
703     return addError(
704         "Bad unicode escape sequence in string: four digits expected.", token,
705         current);
706   int unicode = 0;
707   for (int index = 0; index < 4; ++index) {
708     Char c = *current++;
709     unicode *= 16;
710     if (c >= '0' && c <= '9')
711       unicode += c - '0';
712     else if (c >= 'a' && c <= 'f')
713       unicode += c - 'a' + 10;
714     else if (c >= 'A' && c <= 'F')
715       unicode += c - 'A' + 10;
716     else
717       return addError(
718           "Bad unicode escape sequence in string: hexadecimal digit expected.",
719           token, current);
720   }
721   ret_unicode = static_cast<unsigned int>(unicode);
722   return true;
723 }
724 
addError(const String & message,Token & token,Location extra)725 bool Reader::addError(const String& message, Token& token, Location extra) {
726   ErrorInfo info;
727   info.token_ = token;
728   info.message_ = message;
729   info.extra_ = extra;
730   errors_.push_back(info);
731   return false;
732 }
733 
recoverFromError(TokenType skipUntilToken)734 bool Reader::recoverFromError(TokenType skipUntilToken) {
735   size_t const errorCount = errors_.size();
736   Token skip;
737   for (;;) {
738     if (!readToken(skip))
739       errors_.resize(errorCount); // discard errors caused by recovery
740     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
741       break;
742   }
743   errors_.resize(errorCount);
744   return false;
745 }
746 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)747 bool Reader::addErrorAndRecover(const String& message, Token& token,
748                                 TokenType skipUntilToken) {
749   addError(message, token);
750   return recoverFromError(skipUntilToken);
751 }
752 
currentValue()753 Value& Reader::currentValue() { return *(nodes_.top()); }
754 
getNextChar()755 Reader::Char Reader::getNextChar() {
756   if (current_ == end_)
757     return 0;
758   return *current_++;
759 }
760 
getLocationLineAndColumn(Location location,int & line,int & column) const761 void Reader::getLocationLineAndColumn(Location location, int& line,
762                                       int& column) const {
763   Location current = begin_;
764   Location lastLineStart = current;
765   line = 0;
766   while (current < location && current != end_) {
767     Char c = *current++;
768     if (c == '\r') {
769       if (*current == '\n')
770         ++current;
771       lastLineStart = current;
772       ++line;
773     } else if (c == '\n') {
774       lastLineStart = current;
775       ++line;
776     }
777   }
778   // column & line start at 1
779   column = int(location - lastLineStart) + 1;
780   ++line;
781 }
782 
getLocationLineAndColumn(Location location) const783 String Reader::getLocationLineAndColumn(Location location) const {
784   int line, column;
785   getLocationLineAndColumn(location, line, column);
786   char buffer[18 + 16 + 16 + 1];
787   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
788   return buffer;
789 }
790 
791 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const792 String Reader::getFormatedErrorMessages() const {
793   return getFormattedErrorMessages();
794 }
795 
getFormattedErrorMessages() const796 String Reader::getFormattedErrorMessages() const {
797   String formattedMessage;
798   for (const auto& error : errors_) {
799     formattedMessage +=
800         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
801     formattedMessage += "  " + error.message_ + "\n";
802     if (error.extra_)
803       formattedMessage +=
804           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
805   }
806   return formattedMessage;
807 }
808 
getStructuredErrors() const809 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
810   std::vector<Reader::StructuredError> allErrors;
811   for (const auto& error : errors_) {
812     Reader::StructuredError structured;
813     structured.offset_start = error.token_.start_ - begin_;
814     structured.offset_limit = error.token_.end_ - begin_;
815     structured.message = error.message_;
816     allErrors.push_back(structured);
817   }
818   return allErrors;
819 }
820 
pushError(const Value & value,const String & message)821 bool Reader::pushError(const Value& value, const String& message) {
822   ptrdiff_t const length = end_ - begin_;
823   if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
824     return false;
825   Token token;
826   token.type_ = tokenError;
827   token.start_ = begin_ + value.getOffsetStart();
828   token.end_ = begin_ + value.getOffsetLimit();
829   ErrorInfo info;
830   info.token_ = token;
831   info.message_ = message;
832   info.extra_ = nullptr;
833   errors_.push_back(info);
834   return true;
835 }
836 
pushError(const Value & value,const String & message,const Value & extra)837 bool Reader::pushError(const Value& value, const String& message,
838                        const Value& extra) {
839   ptrdiff_t const length = end_ - begin_;
840   if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
841       extra.getOffsetLimit() > length)
842     return false;
843   Token token;
844   token.type_ = tokenError;
845   token.start_ = begin_ + value.getOffsetStart();
846   token.end_ = begin_ + value.getOffsetLimit();
847   ErrorInfo info;
848   info.token_ = token;
849   info.message_ = message;
850   info.extra_ = begin_ + extra.getOffsetStart();
851   errors_.push_back(info);
852   return true;
853 }
854 
good() const855 bool Reader::good() const { return errors_.empty(); }
856 
857 // Originally copied from the Features class (now deprecated), used internally
858 // for features implementation.
859 class OurFeatures {
860 public:
861   static OurFeatures all();
862   bool allowComments_;
863   bool allowTrailingCommas_;
864   bool strictRoot_;
865   bool allowDroppedNullPlaceholders_;
866   bool allowNumericKeys_;
867   bool allowSingleQuotes_;
868   bool failIfExtra_;
869   bool rejectDupKeys_;
870   bool allowSpecialFloats_;
871   bool skipBom_;
872   size_t stackLimit_;
873 }; // OurFeatures
874 
all()875 OurFeatures OurFeatures::all() { return {}; }
876 
877 // Implementation of class Reader
878 // ////////////////////////////////
879 
880 // Originally copied from the Reader class (now deprecated), used internally
881 // for implementing JSON reading.
882 class OurReader {
883 public:
884   using Char = char;
885   using Location = const Char*;
886   struct StructuredError {
887     ptrdiff_t offset_start;
888     ptrdiff_t offset_limit;
889     String message;
890   };
891 
892   explicit OurReader(OurFeatures const& features);
893   bool parse(const char* beginDoc, const char* endDoc, Value& root,
894              bool collectComments = true);
895   String getFormattedErrorMessages() const;
896   std::vector<StructuredError> getStructuredErrors() const;
897 
898 private:
899   OurReader(OurReader const&);      // no impl
900   void operator=(OurReader const&); // no impl
901 
902   enum TokenType {
903     tokenEndOfStream = 0,
904     tokenObjectBegin,
905     tokenObjectEnd,
906     tokenArrayBegin,
907     tokenArrayEnd,
908     tokenString,
909     tokenNumber,
910     tokenTrue,
911     tokenFalse,
912     tokenNull,
913     tokenNaN,
914     tokenPosInf,
915     tokenNegInf,
916     tokenArraySeparator,
917     tokenMemberSeparator,
918     tokenComment,
919     tokenError
920   };
921 
922   class Token {
923   public:
924     TokenType type_;
925     Location start_;
926     Location end_;
927   };
928 
929   class ErrorInfo {
930   public:
931     Token token_;
932     String message_;
933     Location extra_;
934   };
935 
936   using Errors = std::deque<ErrorInfo>;
937 
938   bool readToken(Token& token);
939   void skipSpaces();
940   void skipBom(bool skipBom);
941   bool match(const Char* pattern, int patternLength);
942   bool readComment();
943   bool readCStyleComment(bool* containsNewLineResult);
944   bool readCppStyleComment();
945   bool readString();
946   bool readStringSingleQuote();
947   bool readNumber(bool checkInf);
948   bool readValue();
949   bool readObject(Token& token);
950   bool readArray(Token& token);
951   bool decodeNumber(Token& token);
952   bool decodeNumber(Token& token, Value& decoded);
953   bool decodeString(Token& token);
954   bool decodeString(Token& token, String& decoded);
955   bool decodeDouble(Token& token);
956   bool decodeDouble(Token& token, Value& decoded);
957   bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
958                               unsigned int& unicode);
959   bool decodeUnicodeEscapeSequence(Token& token, Location& current,
960                                    Location end, unsigned int& unicode);
961   bool addError(const String& message, Token& token, Location extra = nullptr);
962   bool recoverFromError(TokenType skipUntilToken);
963   bool addErrorAndRecover(const String& message, Token& token,
964                           TokenType skipUntilToken);
965   void skipUntilSpace();
966   Value& currentValue();
967   Char getNextChar();
968   void getLocationLineAndColumn(Location location, int& line,
969                                 int& column) const;
970   String getLocationLineAndColumn(Location location) const;
971   void addComment(Location begin, Location end, CommentPlacement placement);
972   void skipCommentTokens(Token& token);
973 
974   static String normalizeEOL(Location begin, Location end);
975   static bool containsNewLine(Location begin, Location end);
976 
977   using Nodes = std::stack<Value*>;
978 
979   Nodes nodes_{};
980   Errors errors_{};
981   String document_{};
982   Location begin_ = nullptr;
983   Location end_ = nullptr;
984   Location current_ = nullptr;
985   Location lastValueEnd_ = nullptr;
986   Value* lastValue_ = nullptr;
987   bool lastValueHasAComment_ = false;
988   String commentsBefore_{};
989 
990   OurFeatures const features_;
991   bool collectComments_ = false;
992 }; // OurReader
993 
994 // complete copy of Read impl, for OurReader
995 
containsNewLine(OurReader::Location begin,OurReader::Location end)996 bool OurReader::containsNewLine(OurReader::Location begin,
997                                 OurReader::Location end) {
998   return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
999 }
1000 
OurReader(OurFeatures const & features)1001 OurReader::OurReader(OurFeatures const& features) : features_(features) {}
1002 
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1003 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1004                       bool collectComments) {
1005   if (!features_.allowComments_) {
1006     collectComments = false;
1007   }
1008 
1009   begin_ = beginDoc;
1010   end_ = endDoc;
1011   collectComments_ = collectComments;
1012   current_ = begin_;
1013   lastValueEnd_ = nullptr;
1014   lastValue_ = nullptr;
1015   commentsBefore_.clear();
1016   errors_.clear();
1017   while (!nodes_.empty())
1018     nodes_.pop();
1019   nodes_.push(&root);
1020 
1021   // skip byte order mark if it exists at the beginning of the UTF-8 text.
1022   skipBom(features_.skipBom_);
1023   bool successful = readValue();
1024   nodes_.pop();
1025   Token token;
1026   skipCommentTokens(token);
1027   if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1028     addError("Extra non-whitespace after JSON value.", token);
1029     return false;
1030   }
1031   if (collectComments_ && !commentsBefore_.empty())
1032     root.setComment(commentsBefore_, commentAfter);
1033   if (features_.strictRoot_) {
1034     if (!root.isArray() && !root.isObject()) {
1035       // Set error location to start of doc, ideally should be first token found
1036       // in doc
1037       token.type_ = tokenError;
1038       token.start_ = beginDoc;
1039       token.end_ = endDoc;
1040       addError(
1041           "A valid JSON document must be either an array or an object value.",
1042           token);
1043       return false;
1044     }
1045   }
1046   return successful;
1047 }
1048 
readValue()1049 bool OurReader::readValue() {
1050   //  To preserve the old behaviour we cast size_t to int.
1051   if (nodes_.size() > features_.stackLimit_)
1052     throwRuntimeError("Exceeded stackLimit in readValue().");
1053   Token token;
1054   skipCommentTokens(token);
1055   bool successful = true;
1056 
1057   if (collectComments_ && !commentsBefore_.empty()) {
1058     currentValue().setComment(commentsBefore_, commentBefore);
1059     commentsBefore_.clear();
1060   }
1061 
1062   switch (token.type_) {
1063   case tokenObjectBegin:
1064     successful = readObject(token);
1065     currentValue().setOffsetLimit(current_ - begin_);
1066     break;
1067   case tokenArrayBegin:
1068     successful = readArray(token);
1069     currentValue().setOffsetLimit(current_ - begin_);
1070     break;
1071   case tokenNumber:
1072     successful = decodeNumber(token);
1073     break;
1074   case tokenString:
1075     successful = decodeString(token);
1076     break;
1077   case tokenTrue: {
1078     Value v(true);
1079     currentValue().swapPayload(v);
1080     currentValue().setOffsetStart(token.start_ - begin_);
1081     currentValue().setOffsetLimit(token.end_ - begin_);
1082   } break;
1083   case tokenFalse: {
1084     Value v(false);
1085     currentValue().swapPayload(v);
1086     currentValue().setOffsetStart(token.start_ - begin_);
1087     currentValue().setOffsetLimit(token.end_ - begin_);
1088   } break;
1089   case tokenNull: {
1090     Value v;
1091     currentValue().swapPayload(v);
1092     currentValue().setOffsetStart(token.start_ - begin_);
1093     currentValue().setOffsetLimit(token.end_ - begin_);
1094   } break;
1095   case tokenNaN: {
1096     Value v(std::numeric_limits<double>::quiet_NaN());
1097     currentValue().swapPayload(v);
1098     currentValue().setOffsetStart(token.start_ - begin_);
1099     currentValue().setOffsetLimit(token.end_ - begin_);
1100   } break;
1101   case tokenPosInf: {
1102     Value v(std::numeric_limits<double>::infinity());
1103     currentValue().swapPayload(v);
1104     currentValue().setOffsetStart(token.start_ - begin_);
1105     currentValue().setOffsetLimit(token.end_ - begin_);
1106   } break;
1107   case tokenNegInf: {
1108     Value v(-std::numeric_limits<double>::infinity());
1109     currentValue().swapPayload(v);
1110     currentValue().setOffsetStart(token.start_ - begin_);
1111     currentValue().setOffsetLimit(token.end_ - begin_);
1112   } break;
1113   case tokenArraySeparator:
1114   case tokenObjectEnd:
1115   case tokenArrayEnd:
1116     if (features_.allowDroppedNullPlaceholders_) {
1117       // "Un-read" the current token and mark the current value as a null
1118       // token.
1119       current_--;
1120       Value v;
1121       currentValue().swapPayload(v);
1122       currentValue().setOffsetStart(current_ - begin_ - 1);
1123       currentValue().setOffsetLimit(current_ - begin_);
1124       break;
1125     } // else, fall through ...
1126   default:
1127     currentValue().setOffsetStart(token.start_ - begin_);
1128     currentValue().setOffsetLimit(token.end_ - begin_);
1129     return addError("Syntax error: value, object or array expected.", token);
1130   }
1131 
1132   if (collectComments_) {
1133     lastValueEnd_ = current_;
1134     lastValueHasAComment_ = false;
1135     lastValue_ = &currentValue();
1136   }
1137 
1138   return successful;
1139 }
1140 
skipCommentTokens(Token & token)1141 void OurReader::skipCommentTokens(Token& token) {
1142   if (features_.allowComments_) {
1143     do {
1144       readToken(token);
1145     } while (token.type_ == tokenComment);
1146   } else {
1147     readToken(token);
1148   }
1149 }
1150 
readToken(Token & token)1151 bool OurReader::readToken(Token& token) {
1152   skipSpaces();
1153   token.start_ = current_;
1154   Char c = getNextChar();
1155   bool ok = true;
1156   switch (c) {
1157   case '{':
1158     token.type_ = tokenObjectBegin;
1159     break;
1160   case '}':
1161     token.type_ = tokenObjectEnd;
1162     break;
1163   case '[':
1164     token.type_ = tokenArrayBegin;
1165     break;
1166   case ']':
1167     token.type_ = tokenArrayEnd;
1168     break;
1169   case '"':
1170     token.type_ = tokenString;
1171     ok = readString();
1172     break;
1173   case '\'':
1174     if (features_.allowSingleQuotes_) {
1175       token.type_ = tokenString;
1176       ok = readStringSingleQuote();
1177     } else {
1178       // If we don't allow single quotes, this is a failure case.
1179       ok = false;
1180     }
1181     break;
1182   case '/':
1183     token.type_ = tokenComment;
1184     ok = readComment();
1185     break;
1186   case '0':
1187   case '1':
1188   case '2':
1189   case '3':
1190   case '4':
1191   case '5':
1192   case '6':
1193   case '7':
1194   case '8':
1195   case '9':
1196     token.type_ = tokenNumber;
1197     readNumber(false);
1198     break;
1199   case '-':
1200     if (readNumber(true)) {
1201       token.type_ = tokenNumber;
1202     } else {
1203       token.type_ = tokenNegInf;
1204       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1205     }
1206     break;
1207   case '+':
1208     if (readNumber(true)) {
1209       token.type_ = tokenNumber;
1210     } else {
1211       token.type_ = tokenPosInf;
1212       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1213     }
1214     break;
1215   case 't':
1216     token.type_ = tokenTrue;
1217     ok = match("rue", 3);
1218     break;
1219   case 'f':
1220     token.type_ = tokenFalse;
1221     ok = match("alse", 4);
1222     break;
1223   case 'n':
1224     token.type_ = tokenNull;
1225     ok = match("ull", 3);
1226     break;
1227   case 'N':
1228     if (features_.allowSpecialFloats_) {
1229       token.type_ = tokenNaN;
1230       ok = match("aN", 2);
1231     } else {
1232       ok = false;
1233     }
1234     break;
1235   case 'I':
1236     if (features_.allowSpecialFloats_) {
1237       token.type_ = tokenPosInf;
1238       ok = match("nfinity", 7);
1239     } else {
1240       ok = false;
1241     }
1242     break;
1243   case ',':
1244     token.type_ = tokenArraySeparator;
1245     break;
1246   case ':':
1247     token.type_ = tokenMemberSeparator;
1248     break;
1249   case 0:
1250     token.type_ = tokenEndOfStream;
1251     break;
1252   default:
1253     ok = false;
1254     break;
1255   }
1256   if (!ok)
1257     token.type_ = tokenError;
1258   token.end_ = current_;
1259   return ok;
1260 }
1261 
skipSpaces()1262 void OurReader::skipSpaces() {
1263   while (current_ != end_) {
1264     Char c = *current_;
1265     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1266       ++current_;
1267     else
1268       break;
1269   }
1270 }
1271 
skipBom(bool skipBom)1272 void OurReader::skipBom(bool skipBom) {
1273   // The default behavior is to skip BOM.
1274   if (skipBom) {
1275     if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1276       begin_ += 3;
1277       current_ = begin_;
1278     }
1279   }
1280 }
1281 
match(const Char * pattern,int patternLength)1282 bool OurReader::match(const Char* pattern, int patternLength) {
1283   if (end_ - current_ < patternLength)
1284     return false;
1285   int index = patternLength;
1286   while (index--)
1287     if (current_[index] != pattern[index])
1288       return false;
1289   current_ += patternLength;
1290   return true;
1291 }
1292 
readComment()1293 bool OurReader::readComment() {
1294   const Location commentBegin = current_ - 1;
1295   const Char c = getNextChar();
1296   bool successful = false;
1297   bool cStyleWithEmbeddedNewline = false;
1298 
1299   const bool isCStyleComment = (c == '*');
1300   const bool isCppStyleComment = (c == '/');
1301   if (isCStyleComment) {
1302     successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1303   } else if (isCppStyleComment) {
1304     successful = readCppStyleComment();
1305   }
1306 
1307   if (!successful)
1308     return false;
1309 
1310   if (collectComments_) {
1311     CommentPlacement placement = commentBefore;
1312 
1313     if (!lastValueHasAComment_) {
1314       if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1315         if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1316           placement = commentAfterOnSameLine;
1317           lastValueHasAComment_ = true;
1318         }
1319       }
1320     }
1321 
1322     addComment(commentBegin, current_, placement);
1323   }
1324   return true;
1325 }
1326 
normalizeEOL(OurReader::Location begin,OurReader::Location end)1327 String OurReader::normalizeEOL(OurReader::Location begin,
1328                                OurReader::Location end) {
1329   String normalized;
1330   normalized.reserve(static_cast<size_t>(end - begin));
1331   OurReader::Location current = begin;
1332   while (current != end) {
1333     char c = *current++;
1334     if (c == '\r') {
1335       if (current != end && *current == '\n')
1336         // convert dos EOL
1337         ++current;
1338       // convert Mac EOL
1339       normalized += '\n';
1340     } else {
1341       normalized += c;
1342     }
1343   }
1344   return normalized;
1345 }
1346 
addComment(Location begin,Location end,CommentPlacement placement)1347 void OurReader::addComment(Location begin, Location end,
1348                            CommentPlacement placement) {
1349   assert(collectComments_);
1350   const String& normalized = normalizeEOL(begin, end);
1351   if (placement == commentAfterOnSameLine) {
1352     assert(lastValue_ != nullptr);
1353     lastValue_->setComment(normalized, placement);
1354   } else {
1355     commentsBefore_ += normalized;
1356   }
1357 }
1358 
readCStyleComment(bool * containsNewLineResult)1359 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1360   *containsNewLineResult = false;
1361 
1362   while ((current_ + 1) < end_) {
1363     Char c = getNextChar();
1364     if (c == '*' && *current_ == '/')
1365       break;
1366     if (c == '\n')
1367       *containsNewLineResult = true;
1368   }
1369 
1370   return getNextChar() == '/';
1371 }
1372 
readCppStyleComment()1373 bool OurReader::readCppStyleComment() {
1374   while (current_ != end_) {
1375     Char c = getNextChar();
1376     if (c == '\n')
1377       break;
1378     if (c == '\r') {
1379       // Consume DOS EOL. It will be normalized in addComment.
1380       if (current_ != end_ && *current_ == '\n')
1381         getNextChar();
1382       // Break on Moc OS 9 EOL.
1383       break;
1384     }
1385   }
1386   return true;
1387 }
1388 
readNumber(bool checkInf)1389 bool OurReader::readNumber(bool checkInf) {
1390   Location p = current_;
1391   if (checkInf && p != end_ && *p == 'I') {
1392     current_ = ++p;
1393     return false;
1394   }
1395   char c = '0'; // stopgap for already consumed character
1396   // integral part
1397   while (c >= '0' && c <= '9')
1398     c = (current_ = p) < end_ ? *p++ : '\0';
1399   // fractional part
1400   if (c == '.') {
1401     c = (current_ = p) < end_ ? *p++ : '\0';
1402     while (c >= '0' && c <= '9')
1403       c = (current_ = p) < end_ ? *p++ : '\0';
1404   }
1405   // exponential part
1406   if (c == 'e' || c == 'E') {
1407     c = (current_ = p) < end_ ? *p++ : '\0';
1408     if (c == '+' || c == '-')
1409       c = (current_ = p) < end_ ? *p++ : '\0';
1410     while (c >= '0' && c <= '9')
1411       c = (current_ = p) < end_ ? *p++ : '\0';
1412   }
1413   return true;
1414 }
readString()1415 bool OurReader::readString() {
1416   Char c = 0;
1417   while (current_ != end_) {
1418     c = getNextChar();
1419     if (c == '\\')
1420       getNextChar();
1421     else if (c == '"')
1422       break;
1423   }
1424   return c == '"';
1425 }
1426 
readStringSingleQuote()1427 bool OurReader::readStringSingleQuote() {
1428   Char c = 0;
1429   while (current_ != end_) {
1430     c = getNextChar();
1431     if (c == '\\')
1432       getNextChar();
1433     else if (c == '\'')
1434       break;
1435   }
1436   return c == '\'';
1437 }
1438 
readObject(Token & token)1439 bool OurReader::readObject(Token& token) {
1440   Token tokenName;
1441   String name;
1442   Value init(objectValue);
1443   currentValue().swapPayload(init);
1444   currentValue().setOffsetStart(token.start_ - begin_);
1445   while (readToken(tokenName)) {
1446     bool initialTokenOk = true;
1447     while (tokenName.type_ == tokenComment && initialTokenOk)
1448       initialTokenOk = readToken(tokenName);
1449     if (!initialTokenOk)
1450       break;
1451     if (tokenName.type_ == tokenObjectEnd &&
1452         (name.empty() ||
1453          features_.allowTrailingCommas_)) // empty object or trailing comma
1454       return true;
1455     name.clear();
1456     if (tokenName.type_ == tokenString) {
1457       if (!decodeString(tokenName, name))
1458         return recoverFromError(tokenObjectEnd);
1459     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1460       Value numberName;
1461       if (!decodeNumber(tokenName, numberName))
1462         return recoverFromError(tokenObjectEnd);
1463       name = numberName.asString();
1464     } else {
1465       break;
1466     }
1467     if (name.length() >= (1U << 30))
1468       throwRuntimeError("keylength >= 2^30");
1469     if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1470       String msg = "Duplicate key: '" + name + "'";
1471       return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1472     }
1473 
1474     Token colon;
1475     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1476       return addErrorAndRecover("Missing ':' after object member name", colon,
1477                                 tokenObjectEnd);
1478     }
1479     Value& value = currentValue()[name];
1480     nodes_.push(&value);
1481     bool ok = readValue();
1482     nodes_.pop();
1483     if (!ok) // error already set
1484       return recoverFromError(tokenObjectEnd);
1485 
1486     Token comma;
1487     if (!readToken(comma) ||
1488         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1489          comma.type_ != tokenComment)) {
1490       return addErrorAndRecover("Missing ',' or '}' in object declaration",
1491                                 comma, tokenObjectEnd);
1492     }
1493     bool finalizeTokenOk = true;
1494     while (comma.type_ == tokenComment && finalizeTokenOk)
1495       finalizeTokenOk = readToken(comma);
1496     if (comma.type_ == tokenObjectEnd)
1497       return true;
1498   }
1499   return addErrorAndRecover("Missing '}' or object member name", tokenName,
1500                             tokenObjectEnd);
1501 }
1502 
readArray(Token & token)1503 bool OurReader::readArray(Token& token) {
1504   Value init(arrayValue);
1505   currentValue().swapPayload(init);
1506   currentValue().setOffsetStart(token.start_ - begin_);
1507   int index = 0;
1508   for (;;) {
1509     skipSpaces();
1510     if (current_ != end_ && *current_ == ']' &&
1511         (index == 0 ||
1512          (features_.allowTrailingCommas_ &&
1513           !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1514                                                       // comma
1515     {
1516       Token endArray;
1517       readToken(endArray);
1518       return true;
1519     }
1520     Value& value = currentValue()[index++];
1521     nodes_.push(&value);
1522     bool ok = readValue();
1523     nodes_.pop();
1524     if (!ok) // error already set
1525       return recoverFromError(tokenArrayEnd);
1526 
1527     Token currentToken;
1528     // Accept Comment after last item in the array.
1529     ok = readToken(currentToken);
1530     while (currentToken.type_ == tokenComment && ok) {
1531       ok = readToken(currentToken);
1532     }
1533     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1534                          currentToken.type_ != tokenArrayEnd);
1535     if (!ok || badTokenType) {
1536       return addErrorAndRecover("Missing ',' or ']' in array declaration",
1537                                 currentToken, tokenArrayEnd);
1538     }
1539     if (currentToken.type_ == tokenArrayEnd)
1540       break;
1541   }
1542   return true;
1543 }
1544 
decodeNumber(Token & token)1545 bool OurReader::decodeNumber(Token& token) {
1546   Value decoded;
1547   if (!decodeNumber(token, decoded))
1548     return false;
1549   currentValue().swapPayload(decoded);
1550   currentValue().setOffsetStart(token.start_ - begin_);
1551   currentValue().setOffsetLimit(token.end_ - begin_);
1552   return true;
1553 }
1554 
decodeNumber(Token & token,Value & decoded)1555 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1556   // Attempts to parse the number as an integer. If the number is
1557   // larger than the maximum supported value of an integer then
1558   // we decode the number as a double.
1559   Location current = token.start_;
1560   const bool isNegative = *current == '-';
1561   if (isNegative) {
1562     ++current;
1563   }
1564 
1565   // We assume we can represent the largest and smallest integer types as
1566   // unsigned integers with separate sign. This is only true if they can fit
1567   // into an unsigned integer.
1568   static_assert(Value::maxLargestInt <= Value::maxLargestUInt,
1569                 "Int must be smaller than UInt");
1570 
1571   // We need to convert minLargestInt into a positive number. The easiest way
1572   // to do this conversion is to assume our "threshold" value of minLargestInt
1573   // divided by 10 can fit in maxLargestInt when absolute valued. This should
1574   // be a safe assumption.
1575   static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1576                 "The absolute value of minLargestInt must be greater than or "
1577                 "equal to maxLargestInt");
1578   static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1579                 "The absolute value of minLargestInt must be only 1 magnitude "
1580                 "larger than maxLargest Int");
1581 
1582   static constexpr Value::LargestUInt positive_threshold =
1583       Value::maxLargestUInt / 10;
1584   static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1585 
1586   // For the negative values, we have to be more careful. Since typically
1587   // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1588   // then take the inverse. This assumes that minLargestInt is only a single
1589   // power of 10 different in magnitude, which we check above. For the last
1590   // digit, we take the modulus before negating for the same reason.
1591   static constexpr auto negative_threshold =
1592       Value::LargestUInt(-(Value::minLargestInt / 10));
1593   static constexpr auto negative_last_digit =
1594       Value::UInt(-(Value::minLargestInt % 10));
1595 
1596   const Value::LargestUInt threshold =
1597       isNegative ? negative_threshold : positive_threshold;
1598   const Value::UInt max_last_digit =
1599       isNegative ? negative_last_digit : positive_last_digit;
1600 
1601   Value::LargestUInt value = 0;
1602   while (current < token.end_) {
1603     Char c = *current++;
1604     if (c < '0' || c > '9')
1605       return decodeDouble(token, decoded);
1606 
1607     const auto digit(static_cast<Value::UInt>(c - '0'));
1608     if (value >= threshold) {
1609       // We've hit or exceeded the max value divided by 10 (rounded down). If
1610       // a) we've only just touched the limit, meaing value == threshold,
1611       // b) this is the last digit, or
1612       // c) it's small enough to fit in that rounding delta, we're okay.
1613       // Otherwise treat this number as a double to avoid overflow.
1614       if (value > threshold || current != token.end_ ||
1615           digit > max_last_digit) {
1616         return decodeDouble(token, decoded);
1617       }
1618     }
1619     value = value * 10 + digit;
1620   }
1621 
1622   if (isNegative) {
1623     // We use the same magnitude assumption here, just in case.
1624     const auto last_digit = static_cast<Value::UInt>(value % 10);
1625     decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1626   } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1627     decoded = Value::LargestInt(value);
1628   } else {
1629     decoded = value;
1630   }
1631 
1632   return true;
1633 }
1634 
decodeDouble(Token & token)1635 bool OurReader::decodeDouble(Token& token) {
1636   Value decoded;
1637   if (!decodeDouble(token, decoded))
1638     return false;
1639   currentValue().swapPayload(decoded);
1640   currentValue().setOffsetStart(token.start_ - begin_);
1641   currentValue().setOffsetLimit(token.end_ - begin_);
1642   return true;
1643 }
1644 
decodeDouble(Token & token,Value & decoded)1645 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1646   double value = 0;
1647   const String buffer(token.start_, token.end_);
1648   IStringStream is(buffer);
1649   if (!(is >> value)) {
1650     return addError(
1651         "'" + String(token.start_, token.end_) + "' is not a number.", token);
1652   }
1653   decoded = value;
1654   return true;
1655 }
1656 
decodeString(Token & token)1657 bool OurReader::decodeString(Token& token) {
1658   String decoded_string;
1659   if (!decodeString(token, decoded_string))
1660     return false;
1661   Value decoded(decoded_string);
1662   currentValue().swapPayload(decoded);
1663   currentValue().setOffsetStart(token.start_ - begin_);
1664   currentValue().setOffsetLimit(token.end_ - begin_);
1665   return true;
1666 }
1667 
decodeString(Token & token,String & decoded)1668 bool OurReader::decodeString(Token& token, String& decoded) {
1669   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1670   Location current = token.start_ + 1; // skip '"'
1671   Location end = token.end_ - 1;       // do not include '"'
1672   while (current != end) {
1673     Char c = *current++;
1674     if (c == '"')
1675       break;
1676     if (c == '\\') {
1677       if (current == end)
1678         return addError("Empty escape sequence in string", token, current);
1679       Char escape = *current++;
1680       switch (escape) {
1681       case '"':
1682         decoded += '"';
1683         break;
1684       case '/':
1685         decoded += '/';
1686         break;
1687       case '\\':
1688         decoded += '\\';
1689         break;
1690       case 'b':
1691         decoded += '\b';
1692         break;
1693       case 'f':
1694         decoded += '\f';
1695         break;
1696       case 'n':
1697         decoded += '\n';
1698         break;
1699       case 'r':
1700         decoded += '\r';
1701         break;
1702       case 't':
1703         decoded += '\t';
1704         break;
1705       case 'u': {
1706         unsigned int unicode;
1707         if (!decodeUnicodeCodePoint(token, current, end, unicode))
1708           return false;
1709         decoded += codePointToUTF8(unicode);
1710       } break;
1711       default:
1712         return addError("Bad escape sequence in string", token, current);
1713       }
1714     } else {
1715       decoded += c;
1716     }
1717   }
1718   return true;
1719 }
1720 
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1721 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1722                                        Location end, unsigned int& unicode) {
1723 
1724   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1725     return false;
1726   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1727     // surrogate pairs
1728     if (end - current < 6)
1729       return addError(
1730           "additional six characters expected to parse unicode surrogate pair.",
1731           token, current);
1732     if (*(current++) == '\\' && *(current++) == 'u') {
1733       unsigned int surrogatePair;
1734       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1735         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1736       } else
1737         return false;
1738     } else
1739       return addError("expecting another \\u token to begin the second half of "
1740                       "a unicode surrogate pair",
1741                       token, current);
1742   }
1743   return true;
1744 }
1745 
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1746 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1747                                             Location end,
1748                                             unsigned int& ret_unicode) {
1749   if (end - current < 4)
1750     return addError(
1751         "Bad unicode escape sequence in string: four digits expected.", token,
1752         current);
1753   int unicode = 0;
1754   for (int index = 0; index < 4; ++index) {
1755     Char c = *current++;
1756     unicode *= 16;
1757     if (c >= '0' && c <= '9')
1758       unicode += c - '0';
1759     else if (c >= 'a' && c <= 'f')
1760       unicode += c - 'a' + 10;
1761     else if (c >= 'A' && c <= 'F')
1762       unicode += c - 'A' + 10;
1763     else
1764       return addError(
1765           "Bad unicode escape sequence in string: hexadecimal digit expected.",
1766           token, current);
1767   }
1768   ret_unicode = static_cast<unsigned int>(unicode);
1769   return true;
1770 }
1771 
addError(const String & message,Token & token,Location extra)1772 bool OurReader::addError(const String& message, Token& token, Location extra) {
1773   ErrorInfo info;
1774   info.token_ = token;
1775   info.message_ = message;
1776   info.extra_ = extra;
1777   errors_.push_back(info);
1778   return false;
1779 }
1780 
recoverFromError(TokenType skipUntilToken)1781 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1782   size_t errorCount = errors_.size();
1783   Token skip;
1784   for (;;) {
1785     if (!readToken(skip))
1786       errors_.resize(errorCount); // discard errors caused by recovery
1787     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1788       break;
1789   }
1790   errors_.resize(errorCount);
1791   return false;
1792 }
1793 
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1794 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1795                                    TokenType skipUntilToken) {
1796   addError(message, token);
1797   return recoverFromError(skipUntilToken);
1798 }
1799 
currentValue()1800 Value& OurReader::currentValue() { return *(nodes_.top()); }
1801 
getNextChar()1802 OurReader::Char OurReader::getNextChar() {
1803   if (current_ == end_)
1804     return 0;
1805   return *current_++;
1806 }
1807 
getLocationLineAndColumn(Location location,int & line,int & column) const1808 void OurReader::getLocationLineAndColumn(Location location, int& line,
1809                                          int& column) const {
1810   Location current = begin_;
1811   Location lastLineStart = current;
1812   line = 0;
1813   while (current < location && current != end_) {
1814     Char c = *current++;
1815     if (c == '\r') {
1816       if (*current == '\n')
1817         ++current;
1818       lastLineStart = current;
1819       ++line;
1820     } else if (c == '\n') {
1821       lastLineStart = current;
1822       ++line;
1823     }
1824   }
1825   // column & line start at 1
1826   column = int(location - lastLineStart) + 1;
1827   ++line;
1828 }
1829 
getLocationLineAndColumn(Location location) const1830 String OurReader::getLocationLineAndColumn(Location location) const {
1831   int line, column;
1832   getLocationLineAndColumn(location, line, column);
1833   char buffer[18 + 16 + 16 + 1];
1834   jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1835   return buffer;
1836 }
1837 
getFormattedErrorMessages() const1838 String OurReader::getFormattedErrorMessages() const {
1839   String formattedMessage;
1840   for (const auto& error : errors_) {
1841     formattedMessage +=
1842         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1843     formattedMessage += "  " + error.message_ + "\n";
1844     if (error.extra_)
1845       formattedMessage +=
1846           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1847   }
1848   return formattedMessage;
1849 }
1850 
getStructuredErrors() const1851 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1852   std::vector<OurReader::StructuredError> allErrors;
1853   for (const auto& error : errors_) {
1854     OurReader::StructuredError structured;
1855     structured.offset_start = error.token_.start_ - begin_;
1856     structured.offset_limit = error.token_.end_ - begin_;
1857     structured.message = error.message_;
1858     allErrors.push_back(structured);
1859   }
1860   return allErrors;
1861 }
1862 
1863 class OurCharReader : public CharReader {
1864   bool const collectComments_;
1865   OurReader reader_;
1866 
1867 public:
OurCharReader(bool collectComments,OurFeatures const & features)1868   OurCharReader(bool collectComments, OurFeatures const& features)
1869       : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1870   bool parse(char const* beginDoc, char const* endDoc, Value* root,
1871              String* errs) override {
1872     bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1873     if (errs) {
1874       *errs = reader_.getFormattedErrorMessages();
1875     }
1876     return ok;
1877   }
1878 };
1879 
CharReaderBuilder()1880 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1881 CharReaderBuilder::~CharReaderBuilder() = default;
newCharReader() const1882 CharReader* CharReaderBuilder::newCharReader() const {
1883   bool collectComments = settings_["collectComments"].asBool();
1884   OurFeatures features = OurFeatures::all();
1885   features.allowComments_ = settings_["allowComments"].asBool();
1886   features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1887   features.strictRoot_ = settings_["strictRoot"].asBool();
1888   features.allowDroppedNullPlaceholders_ =
1889       settings_["allowDroppedNullPlaceholders"].asBool();
1890   features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1891   features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1892 
1893   // Stack limit is always a size_t, so we get this as an unsigned int
1894   // regardless of it we have 64-bit integer support enabled.
1895   features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1896   features.failIfExtra_ = settings_["failIfExtra"].asBool();
1897   features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1898   features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1899   features.skipBom_ = settings_["skipBom"].asBool();
1900   return new OurCharReader(collectComments, features);
1901 }
1902 
validate(Json::Value * invalid) const1903 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1904   static const auto& valid_keys = *new std::set<String>{
1905       "collectComments",
1906       "allowComments",
1907       "allowTrailingCommas",
1908       "strictRoot",
1909       "allowDroppedNullPlaceholders",
1910       "allowNumericKeys",
1911       "allowSingleQuotes",
1912       "stackLimit",
1913       "failIfExtra",
1914       "rejectDupKeys",
1915       "allowSpecialFloats",
1916       "skipBom",
1917   };
1918   for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1919     auto key = si.name();
1920     if (valid_keys.count(key))
1921       continue;
1922     if (invalid)
1923       (*invalid)[key] = *si;
1924     else
1925       return false;
1926   }
1927   return invalid ? invalid->empty() : true;
1928 }
1929 
operator [](const String & key)1930 Value& CharReaderBuilder::operator[](const String& key) {
1931   return settings_[key];
1932 }
1933 // static
strictMode(Json::Value * settings)1934 void CharReaderBuilder::strictMode(Json::Value* settings) {
1935   //! [CharReaderBuilderStrictMode]
1936   (*settings)["allowComments"] = false;
1937   (*settings)["allowTrailingCommas"] = false;
1938   (*settings)["strictRoot"] = true;
1939   (*settings)["allowDroppedNullPlaceholders"] = false;
1940   (*settings)["allowNumericKeys"] = false;
1941   (*settings)["allowSingleQuotes"] = false;
1942   (*settings)["stackLimit"] = 1000;
1943   (*settings)["failIfExtra"] = true;
1944   (*settings)["rejectDupKeys"] = true;
1945   (*settings)["allowSpecialFloats"] = false;
1946   (*settings)["skipBom"] = true;
1947   //! [CharReaderBuilderStrictMode]
1948 }
1949 // static
setDefaults(Json::Value * settings)1950 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1951   //! [CharReaderBuilderDefaults]
1952   (*settings)["collectComments"] = true;
1953   (*settings)["allowComments"] = true;
1954   (*settings)["allowTrailingCommas"] = true;
1955   (*settings)["strictRoot"] = false;
1956   (*settings)["allowDroppedNullPlaceholders"] = false;
1957   (*settings)["allowNumericKeys"] = false;
1958   (*settings)["allowSingleQuotes"] = false;
1959   (*settings)["stackLimit"] = 1000;
1960   (*settings)["failIfExtra"] = false;
1961   (*settings)["rejectDupKeys"] = false;
1962   (*settings)["allowSpecialFloats"] = false;
1963   (*settings)["skipBom"] = true;
1964   //! [CharReaderBuilderDefaults]
1965 }
1966 
1967 //////////////////////////////////
1968 // global functions
1969 
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)1970 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1971                      String* errs) {
1972   OStringStream ssin;
1973   ssin << sin.rdbuf();
1974   String doc = ssin.str();
1975   char const* begin = doc.data();
1976   char const* end = begin + doc.size();
1977   // Note that we do not actually need a null-terminator.
1978   CharReaderPtr const reader(fact.newCharReader());
1979   return reader->parse(begin, end, root, errs);
1980 }
1981 
operator >>(IStream & sin,Value & root)1982 IStream& operator>>(IStream& sin, Value& root) {
1983   CharReaderBuilder b;
1984   String errs;
1985   bool ok = parseFromStream(b, sin, &root, &errs);
1986   if (!ok) {
1987     throwRuntimeError(errs);
1988   }
1989   return sin;
1990 }
1991 
1992 } // namespace Json
1993