1 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
2 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
3 // Distributed under MIT license, or public domain if desired and
4 // recognized in your jurisdiction.
5 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
6
7 #if !defined(JSON_IS_AMALGAMATION)
8 #include "json_tool.h"
9 #include <json/assertions.h>
10 #include <json/reader.h>
11 #include <json/value.h>
12 #endif // if !defined(JSON_IS_AMALGAMATION)
13 #include <algorithm>
14 #include <cassert>
15 #include <cstring>
16 #include <iostream>
17 #include <istream>
18 #include <limits>
19 #include <memory>
20 #include <set>
21 #include <sstream>
22 #include <utility>
23
24 #include <cstdio>
25 #if __cplusplus >= 201103L
26
27 #if !defined(sscanf)
28 #define sscanf std::sscanf
29 #endif
30
31 #endif //__cplusplus
32
33 #if defined(_MSC_VER)
34 #if !defined(_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES)
35 #define _CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES 1
36 #endif //_CRT_SECURE_CPP_OVERLOAD_STANDARD_NAMES
37 #endif //_MSC_VER
38
39 #if defined(_MSC_VER)
40 // Disable warning about strdup being deprecated.
41 #pragma warning(disable : 4996)
42 #endif
43
44 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile
45 // time to change the stack limit
46 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
47 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
48 #endif
49
50 static size_t const stackLimit_g =
51 JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
52
53 namespace Json {
54
55 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
56 using CharReaderPtr = std::unique_ptr<CharReader>;
57 #else
58 using CharReaderPtr = std::auto_ptr<CharReader>;
59 #endif
60
61 // Implementation of class Features
62 // ////////////////////////////////
63
64 Features::Features() = default;
65
all()66 Features Features::all() { return {}; }
67
strictMode()68 Features Features::strictMode() {
69 Features features;
70 features.allowComments_ = false;
71 features.strictRoot_ = true;
72 features.allowDroppedNullPlaceholders_ = false;
73 features.allowNumericKeys_ = false;
74 return features;
75 }
76
77 // Implementation of class Reader
78 // ////////////////////////////////
79
containsNewLine(Reader::Location begin,Reader::Location end)80 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
81 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
82 }
83
84 // Class Reader
85 // //////////////////////////////////////////////////////////////////
86
Reader()87 Reader::Reader() : features_(Features::all()) {}
88
Reader(const Features & features)89 Reader::Reader(const Features& features) : features_(features) {}
90
parse(const std::string & document,Value & root,bool collectComments)91 bool Reader::parse(const std::string& document, Value& root,
92 bool collectComments) {
93 document_.assign(document.begin(), document.end());
94 const char* begin = document_.c_str();
95 const char* end = begin + document_.length();
96 return parse(begin, end, root, collectComments);
97 }
98
parse(std::istream & is,Value & root,bool collectComments)99 bool Reader::parse(std::istream& is, Value& root, bool collectComments) {
100 // std::istream_iterator<char> begin(is);
101 // std::istream_iterator<char> end;
102 // Those would allow streamed input from a file, if parse() were a
103 // template function.
104
105 // Since String is reference-counted, this at least does not
106 // create an extra copy.
107 String doc(std::istreambuf_iterator<char>(is), {});
108 return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
109 }
110
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)111 bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
112 bool collectComments) {
113 if (!features_.allowComments_) {
114 collectComments = false;
115 }
116
117 begin_ = beginDoc;
118 end_ = endDoc;
119 collectComments_ = collectComments;
120 current_ = begin_;
121 lastValueEnd_ = nullptr;
122 lastValue_ = nullptr;
123 commentsBefore_.clear();
124 errors_.clear();
125 while (!nodes_.empty())
126 nodes_.pop();
127 nodes_.push(&root);
128
129 bool successful = readValue();
130 Token token;
131 skipCommentTokens(token);
132 if (collectComments_ && !commentsBefore_.empty())
133 root.setComment(commentsBefore_, commentAfter);
134 if (features_.strictRoot_) {
135 if (!root.isArray() && !root.isObject()) {
136 // Set error location to start of doc, ideally should be first token found
137 // in doc
138 token.type_ = tokenError;
139 token.start_ = beginDoc;
140 token.end_ = endDoc;
141 addError(
142 "A valid JSON document must be either an array or an object value.",
143 token);
144 return false;
145 }
146 }
147 return successful;
148 }
149
readValue()150 bool Reader::readValue() {
151 // readValue() may call itself only if it calls readObject() or ReadArray().
152 // These methods execute nodes_.push() just before and nodes_.pop)() just
153 // after calling readValue(). parse() executes one nodes_.push(), so > instead
154 // of >=.
155 if (nodes_.size() > stackLimit_g)
156 throwRuntimeError("Exceeded stackLimit in readValue().");
157
158 Token token;
159 skipCommentTokens(token);
160 bool successful = true;
161
162 if (collectComments_ && !commentsBefore_.empty()) {
163 currentValue().setComment(commentsBefore_, commentBefore);
164 commentsBefore_.clear();
165 }
166
167 switch (token.type_) {
168 case tokenObjectBegin:
169 successful = readObject(token);
170 currentValue().setOffsetLimit(current_ - begin_);
171 break;
172 case tokenArrayBegin:
173 successful = readArray(token);
174 currentValue().setOffsetLimit(current_ - begin_);
175 break;
176 case tokenNumber:
177 successful = decodeNumber(token);
178 break;
179 case tokenString:
180 successful = decodeString(token);
181 break;
182 case tokenTrue: {
183 Value v(true);
184 currentValue().swapPayload(v);
185 currentValue().setOffsetStart(token.start_ - begin_);
186 currentValue().setOffsetLimit(token.end_ - begin_);
187 } break;
188 case tokenFalse: {
189 Value v(false);
190 currentValue().swapPayload(v);
191 currentValue().setOffsetStart(token.start_ - begin_);
192 currentValue().setOffsetLimit(token.end_ - begin_);
193 } break;
194 case tokenNull: {
195 Value v;
196 currentValue().swapPayload(v);
197 currentValue().setOffsetStart(token.start_ - begin_);
198 currentValue().setOffsetLimit(token.end_ - begin_);
199 } break;
200 case tokenArraySeparator:
201 case tokenObjectEnd:
202 case tokenArrayEnd:
203 if (features_.allowDroppedNullPlaceholders_) {
204 // "Un-read" the current token and mark the current value as a null
205 // token.
206 current_--;
207 Value v;
208 currentValue().swapPayload(v);
209 currentValue().setOffsetStart(current_ - begin_ - 1);
210 currentValue().setOffsetLimit(current_ - begin_);
211 break;
212 } // Else, fall through...
213 default:
214 currentValue().setOffsetStart(token.start_ - begin_);
215 currentValue().setOffsetLimit(token.end_ - begin_);
216 return addError("Syntax error: value, object or array expected.", token);
217 }
218
219 if (collectComments_) {
220 lastValueEnd_ = current_;
221 lastValue_ = ¤tValue();
222 }
223
224 return successful;
225 }
226
skipCommentTokens(Token & token)227 void Reader::skipCommentTokens(Token& token) {
228 if (features_.allowComments_) {
229 do {
230 readToken(token);
231 } while (token.type_ == tokenComment);
232 } else {
233 readToken(token);
234 }
235 }
236
readToken(Token & token)237 bool Reader::readToken(Token& token) {
238 skipSpaces();
239 token.start_ = current_;
240 Char c = getNextChar();
241 bool ok = true;
242 switch (c) {
243 case '{':
244 token.type_ = tokenObjectBegin;
245 break;
246 case '}':
247 token.type_ = tokenObjectEnd;
248 break;
249 case '[':
250 token.type_ = tokenArrayBegin;
251 break;
252 case ']':
253 token.type_ = tokenArrayEnd;
254 break;
255 case '"':
256 token.type_ = tokenString;
257 ok = readString();
258 break;
259 case '/':
260 token.type_ = tokenComment;
261 ok = readComment();
262 break;
263 case '0':
264 case '1':
265 case '2':
266 case '3':
267 case '4':
268 case '5':
269 case '6':
270 case '7':
271 case '8':
272 case '9':
273 case '-':
274 token.type_ = tokenNumber;
275 readNumber();
276 break;
277 case 't':
278 token.type_ = tokenTrue;
279 ok = match("rue", 3);
280 break;
281 case 'f':
282 token.type_ = tokenFalse;
283 ok = match("alse", 4);
284 break;
285 case 'n':
286 token.type_ = tokenNull;
287 ok = match("ull", 3);
288 break;
289 case ',':
290 token.type_ = tokenArraySeparator;
291 break;
292 case ':':
293 token.type_ = tokenMemberSeparator;
294 break;
295 case 0:
296 token.type_ = tokenEndOfStream;
297 break;
298 default:
299 ok = false;
300 break;
301 }
302 if (!ok)
303 token.type_ = tokenError;
304 token.end_ = current_;
305 return ok;
306 }
307
skipSpaces()308 void Reader::skipSpaces() {
309 while (current_ != end_) {
310 Char c = *current_;
311 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
312 ++current_;
313 else
314 break;
315 }
316 }
317
match(const Char * pattern,int patternLength)318 bool Reader::match(const Char* pattern, int patternLength) {
319 if (end_ - current_ < patternLength)
320 return false;
321 int index = patternLength;
322 while (index--)
323 if (current_[index] != pattern[index])
324 return false;
325 current_ += patternLength;
326 return true;
327 }
328
readComment()329 bool Reader::readComment() {
330 Location commentBegin = current_ - 1;
331 Char c = getNextChar();
332 bool successful = false;
333 if (c == '*')
334 successful = readCStyleComment();
335 else if (c == '/')
336 successful = readCppStyleComment();
337 if (!successful)
338 return false;
339
340 if (collectComments_) {
341 CommentPlacement placement = commentBefore;
342 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
343 if (c != '*' || !containsNewLine(commentBegin, current_))
344 placement = commentAfterOnSameLine;
345 }
346
347 addComment(commentBegin, current_, placement);
348 }
349 return true;
350 }
351
normalizeEOL(Reader::Location begin,Reader::Location end)352 String Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
353 String normalized;
354 normalized.reserve(static_cast<size_t>(end - begin));
355 Reader::Location current = begin;
356 while (current != end) {
357 char c = *current++;
358 if (c == '\r') {
359 if (current != end && *current == '\n')
360 // convert dos EOL
361 ++current;
362 // convert Mac EOL
363 normalized += '\n';
364 } else {
365 normalized += c;
366 }
367 }
368 return normalized;
369 }
370
addComment(Location begin,Location end,CommentPlacement placement)371 void Reader::addComment(Location begin, Location end,
372 CommentPlacement placement) {
373 assert(collectComments_);
374 const String& normalized = normalizeEOL(begin, end);
375 if (placement == commentAfterOnSameLine) {
376 assert(lastValue_ != nullptr);
377 lastValue_->setComment(normalized, placement);
378 } else {
379 commentsBefore_ += normalized;
380 }
381 }
382
readCStyleComment()383 bool Reader::readCStyleComment() {
384 while ((current_ + 1) < end_) {
385 Char c = getNextChar();
386 if (c == '*' && *current_ == '/')
387 break;
388 }
389 return getNextChar() == '/';
390 }
391
readCppStyleComment()392 bool Reader::readCppStyleComment() {
393 while (current_ != end_) {
394 Char c = getNextChar();
395 if (c == '\n')
396 break;
397 if (c == '\r') {
398 // Consume DOS EOL. It will be normalized in addComment.
399 if (current_ != end_ && *current_ == '\n')
400 getNextChar();
401 // Break on Moc OS 9 EOL.
402 break;
403 }
404 }
405 return true;
406 }
407
readNumber()408 void Reader::readNumber() {
409 Location p = current_;
410 char c = '0'; // stopgap for already consumed character
411 // integral part
412 while (c >= '0' && c <= '9')
413 c = (current_ = p) < end_ ? *p++ : '\0';
414 // fractional part
415 if (c == '.') {
416 c = (current_ = p) < end_ ? *p++ : '\0';
417 while (c >= '0' && c <= '9')
418 c = (current_ = p) < end_ ? *p++ : '\0';
419 }
420 // exponential part
421 if (c == 'e' || c == 'E') {
422 c = (current_ = p) < end_ ? *p++ : '\0';
423 if (c == '+' || c == '-')
424 c = (current_ = p) < end_ ? *p++ : '\0';
425 while (c >= '0' && c <= '9')
426 c = (current_ = p) < end_ ? *p++ : '\0';
427 }
428 }
429
readString()430 bool Reader::readString() {
431 Char c = '\0';
432 while (current_ != end_) {
433 c = getNextChar();
434 if (c == '\\')
435 getNextChar();
436 else if (c == '"')
437 break;
438 }
439 return c == '"';
440 }
441
readObject(Token & token)442 bool Reader::readObject(Token& token) {
443 Token tokenName;
444 String name;
445 Value init(objectValue);
446 currentValue().swapPayload(init);
447 currentValue().setOffsetStart(token.start_ - begin_);
448 while (readToken(tokenName)) {
449 bool initialTokenOk = true;
450 while (tokenName.type_ == tokenComment && initialTokenOk)
451 initialTokenOk = readToken(tokenName);
452 if (!initialTokenOk)
453 break;
454 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
455 return true;
456 name.clear();
457 if (tokenName.type_ == tokenString) {
458 if (!decodeString(tokenName, name))
459 return recoverFromError(tokenObjectEnd);
460 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
461 Value numberName;
462 if (!decodeNumber(tokenName, numberName))
463 return recoverFromError(tokenObjectEnd);
464 name = numberName.asString();
465 } else {
466 break;
467 }
468
469 Token colon;
470 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
471 return addErrorAndRecover("Missing ':' after object member name", colon,
472 tokenObjectEnd);
473 }
474 Value& value = currentValue()[name];
475 nodes_.push(&value);
476 bool ok = readValue();
477 nodes_.pop();
478 if (!ok) // error already set
479 return recoverFromError(tokenObjectEnd);
480
481 Token comma;
482 if (!readToken(comma) ||
483 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
484 comma.type_ != tokenComment)) {
485 return addErrorAndRecover("Missing ',' or '}' in object declaration",
486 comma, tokenObjectEnd);
487 }
488 bool finalizeTokenOk = true;
489 while (comma.type_ == tokenComment && finalizeTokenOk)
490 finalizeTokenOk = readToken(comma);
491 if (comma.type_ == tokenObjectEnd)
492 return true;
493 }
494 return addErrorAndRecover("Missing '}' or object member name", tokenName,
495 tokenObjectEnd);
496 }
497
readArray(Token & token)498 bool Reader::readArray(Token& token) {
499 Value init(arrayValue);
500 currentValue().swapPayload(init);
501 currentValue().setOffsetStart(token.start_ - begin_);
502 skipSpaces();
503 if (current_ != end_ && *current_ == ']') // empty array
504 {
505 Token endArray;
506 readToken(endArray);
507 return true;
508 }
509 int index = 0;
510 for (;;) {
511 Value& value = currentValue()[index++];
512 nodes_.push(&value);
513 bool ok = readValue();
514 nodes_.pop();
515 if (!ok) // error already set
516 return recoverFromError(tokenArrayEnd);
517
518 Token currentToken;
519 // Accept Comment after last item in the array.
520 ok = readToken(currentToken);
521 while (currentToken.type_ == tokenComment && ok) {
522 ok = readToken(currentToken);
523 }
524 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
525 currentToken.type_ != tokenArrayEnd);
526 if (!ok || badTokenType) {
527 return addErrorAndRecover("Missing ',' or ']' in array declaration",
528 currentToken, tokenArrayEnd);
529 }
530 if (currentToken.type_ == tokenArrayEnd)
531 break;
532 }
533 return true;
534 }
535
decodeNumber(Token & token)536 bool Reader::decodeNumber(Token& token) {
537 Value decoded;
538 if (!decodeNumber(token, decoded))
539 return false;
540 currentValue().swapPayload(decoded);
541 currentValue().setOffsetStart(token.start_ - begin_);
542 currentValue().setOffsetLimit(token.end_ - begin_);
543 return true;
544 }
545
decodeNumber(Token & token,Value & decoded)546 bool Reader::decodeNumber(Token& token, Value& decoded) {
547 // Attempts to parse the number as an integer. If the number is
548 // larger than the maximum supported value of an integer then
549 // we decode the number as a double.
550 Location current = token.start_;
551 bool isNegative = *current == '-';
552 if (isNegative)
553 ++current;
554 // TODO: Help the compiler do the div and mod at compile time or get rid of
555 // them.
556 Value::LargestUInt maxIntegerValue =
557 isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
558 : Value::maxLargestUInt;
559 Value::LargestUInt threshold = maxIntegerValue / 10;
560 Value::LargestUInt value = 0;
561 while (current < token.end_) {
562 Char c = *current++;
563 if (c < '0' || c > '9')
564 return decodeDouble(token, decoded);
565 auto digit(static_cast<Value::UInt>(c - '0'));
566 if (value >= threshold) {
567 // We've hit or exceeded the max value divided by 10 (rounded down). If
568 // a) we've only just touched the limit, b) this is the last digit, and
569 // c) it's small enough to fit in that rounding delta, we're okay.
570 // Otherwise treat this number as a double to avoid overflow.
571 if (value > threshold || current != token.end_ ||
572 digit > maxIntegerValue % 10) {
573 return decodeDouble(token, decoded);
574 }
575 }
576 value = value * 10 + digit;
577 }
578 if (isNegative && value == maxIntegerValue)
579 decoded = Value::minLargestInt;
580 else if (isNegative)
581 decoded = -Value::LargestInt(value);
582 else if (value <= Value::LargestUInt(Value::maxInt))
583 decoded = Value::LargestInt(value);
584 else
585 decoded = value;
586 return true;
587 }
588
decodeDouble(Token & token)589 bool Reader::decodeDouble(Token& token) {
590 Value decoded;
591 if (!decodeDouble(token, decoded))
592 return false;
593 currentValue().swapPayload(decoded);
594 currentValue().setOffsetStart(token.start_ - begin_);
595 currentValue().setOffsetLimit(token.end_ - begin_);
596 return true;
597 }
598
decodeDouble(Token & token,Value & decoded)599 bool Reader::decodeDouble(Token& token, Value& decoded) {
600 double value = 0;
601 String buffer(token.start_, token.end_);
602 IStringStream is(buffer);
603 if (!(is >> value))
604 return addError(
605 "'" + String(token.start_, token.end_) + "' is not a number.", token);
606 decoded = value;
607 return true;
608 }
609
decodeString(Token & token)610 bool Reader::decodeString(Token& token) {
611 String decoded_string;
612 if (!decodeString(token, decoded_string))
613 return false;
614 Value decoded(decoded_string);
615 currentValue().swapPayload(decoded);
616 currentValue().setOffsetStart(token.start_ - begin_);
617 currentValue().setOffsetLimit(token.end_ - begin_);
618 return true;
619 }
620
decodeString(Token & token,String & decoded)621 bool Reader::decodeString(Token& token, String& decoded) {
622 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
623 Location current = token.start_ + 1; // skip '"'
624 Location end = token.end_ - 1; // do not include '"'
625 while (current != end) {
626 Char c = *current++;
627 if (c == '"')
628 break;
629 if (c == '\\') {
630 if (current == end)
631 return addError("Empty escape sequence in string", token, current);
632 Char escape = *current++;
633 switch (escape) {
634 case '"':
635 decoded += '"';
636 break;
637 case '/':
638 decoded += '/';
639 break;
640 case '\\':
641 decoded += '\\';
642 break;
643 case 'b':
644 decoded += '\b';
645 break;
646 case 'f':
647 decoded += '\f';
648 break;
649 case 'n':
650 decoded += '\n';
651 break;
652 case 'r':
653 decoded += '\r';
654 break;
655 case 't':
656 decoded += '\t';
657 break;
658 case 'u': {
659 unsigned int unicode;
660 if (!decodeUnicodeCodePoint(token, current, end, unicode))
661 return false;
662 decoded += codePointToUTF8(unicode);
663 } break;
664 default:
665 return addError("Bad escape sequence in string", token, current);
666 }
667 } else {
668 decoded += c;
669 }
670 }
671 return true;
672 }
673
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)674 bool Reader::decodeUnicodeCodePoint(Token& token, Location& current,
675 Location end, unsigned int& unicode) {
676
677 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
678 return false;
679 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
680 // surrogate pairs
681 if (end - current < 6)
682 return addError(
683 "additional six characters expected to parse unicode surrogate pair.",
684 token, current);
685 if (*(current++) == '\\' && *(current++) == 'u') {
686 unsigned int surrogatePair;
687 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
688 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
689 } else
690 return false;
691 } else
692 return addError("expecting another \\u token to begin the second half of "
693 "a unicode surrogate pair",
694 token, current);
695 }
696 return true;
697 }
698
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)699 bool Reader::decodeUnicodeEscapeSequence(Token& token, Location& current,
700 Location end,
701 unsigned int& ret_unicode) {
702 if (end - current < 4)
703 return addError(
704 "Bad unicode escape sequence in string: four digits expected.", token,
705 current);
706 int unicode = 0;
707 for (int index = 0; index < 4; ++index) {
708 Char c = *current++;
709 unicode *= 16;
710 if (c >= '0' && c <= '9')
711 unicode += c - '0';
712 else if (c >= 'a' && c <= 'f')
713 unicode += c - 'a' + 10;
714 else if (c >= 'A' && c <= 'F')
715 unicode += c - 'A' + 10;
716 else
717 return addError(
718 "Bad unicode escape sequence in string: hexadecimal digit expected.",
719 token, current);
720 }
721 ret_unicode = static_cast<unsigned int>(unicode);
722 return true;
723 }
724
addError(const String & message,Token & token,Location extra)725 bool Reader::addError(const String& message, Token& token, Location extra) {
726 ErrorInfo info;
727 info.token_ = token;
728 info.message_ = message;
729 info.extra_ = extra;
730 errors_.push_back(info);
731 return false;
732 }
733
recoverFromError(TokenType skipUntilToken)734 bool Reader::recoverFromError(TokenType skipUntilToken) {
735 size_t const errorCount = errors_.size();
736 Token skip;
737 for (;;) {
738 if (!readToken(skip))
739 errors_.resize(errorCount); // discard errors caused by recovery
740 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
741 break;
742 }
743 errors_.resize(errorCount);
744 return false;
745 }
746
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)747 bool Reader::addErrorAndRecover(const String& message, Token& token,
748 TokenType skipUntilToken) {
749 addError(message, token);
750 return recoverFromError(skipUntilToken);
751 }
752
currentValue()753 Value& Reader::currentValue() { return *(nodes_.top()); }
754
getNextChar()755 Reader::Char Reader::getNextChar() {
756 if (current_ == end_)
757 return 0;
758 return *current_++;
759 }
760
getLocationLineAndColumn(Location location,int & line,int & column) const761 void Reader::getLocationLineAndColumn(Location location, int& line,
762 int& column) const {
763 Location current = begin_;
764 Location lastLineStart = current;
765 line = 0;
766 while (current < location && current != end_) {
767 Char c = *current++;
768 if (c == '\r') {
769 if (*current == '\n')
770 ++current;
771 lastLineStart = current;
772 ++line;
773 } else if (c == '\n') {
774 lastLineStart = current;
775 ++line;
776 }
777 }
778 // column & line start at 1
779 column = int(location - lastLineStart) + 1;
780 ++line;
781 }
782
getLocationLineAndColumn(Location location) const783 String Reader::getLocationLineAndColumn(Location location) const {
784 int line, column;
785 getLocationLineAndColumn(location, line, column);
786 char buffer[18 + 16 + 16 + 1];
787 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
788 return buffer;
789 }
790
791 // Deprecated. Preserved for backward compatibility
getFormatedErrorMessages() const792 String Reader::getFormatedErrorMessages() const {
793 return getFormattedErrorMessages();
794 }
795
getFormattedErrorMessages() const796 String Reader::getFormattedErrorMessages() const {
797 String formattedMessage;
798 for (const auto& error : errors_) {
799 formattedMessage +=
800 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
801 formattedMessage += " " + error.message_ + "\n";
802 if (error.extra_)
803 formattedMessage +=
804 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
805 }
806 return formattedMessage;
807 }
808
getStructuredErrors() const809 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
810 std::vector<Reader::StructuredError> allErrors;
811 for (const auto& error : errors_) {
812 Reader::StructuredError structured;
813 structured.offset_start = error.token_.start_ - begin_;
814 structured.offset_limit = error.token_.end_ - begin_;
815 structured.message = error.message_;
816 allErrors.push_back(structured);
817 }
818 return allErrors;
819 }
820
pushError(const Value & value,const String & message)821 bool Reader::pushError(const Value& value, const String& message) {
822 ptrdiff_t const length = end_ - begin_;
823 if (value.getOffsetStart() > length || value.getOffsetLimit() > length)
824 return false;
825 Token token;
826 token.type_ = tokenError;
827 token.start_ = begin_ + value.getOffsetStart();
828 token.end_ = begin_ + value.getOffsetLimit();
829 ErrorInfo info;
830 info.token_ = token;
831 info.message_ = message;
832 info.extra_ = nullptr;
833 errors_.push_back(info);
834 return true;
835 }
836
pushError(const Value & value,const String & message,const Value & extra)837 bool Reader::pushError(const Value& value, const String& message,
838 const Value& extra) {
839 ptrdiff_t const length = end_ - begin_;
840 if (value.getOffsetStart() > length || value.getOffsetLimit() > length ||
841 extra.getOffsetLimit() > length)
842 return false;
843 Token token;
844 token.type_ = tokenError;
845 token.start_ = begin_ + value.getOffsetStart();
846 token.end_ = begin_ + value.getOffsetLimit();
847 ErrorInfo info;
848 info.token_ = token;
849 info.message_ = message;
850 info.extra_ = begin_ + extra.getOffsetStart();
851 errors_.push_back(info);
852 return true;
853 }
854
good() const855 bool Reader::good() const { return errors_.empty(); }
856
857 // Originally copied from the Features class (now deprecated), used internally
858 // for features implementation.
859 class OurFeatures {
860 public:
861 static OurFeatures all();
862 bool allowComments_;
863 bool allowTrailingCommas_;
864 bool strictRoot_;
865 bool allowDroppedNullPlaceholders_;
866 bool allowNumericKeys_;
867 bool allowSingleQuotes_;
868 bool failIfExtra_;
869 bool rejectDupKeys_;
870 bool allowSpecialFloats_;
871 bool skipBom_;
872 size_t stackLimit_;
873 }; // OurFeatures
874
all()875 OurFeatures OurFeatures::all() { return {}; }
876
877 // Implementation of class Reader
878 // ////////////////////////////////
879
880 // Originally copied from the Reader class (now deprecated), used internally
881 // for implementing JSON reading.
882 class OurReader {
883 public:
884 using Char = char;
885 using Location = const Char*;
886 struct StructuredError {
887 ptrdiff_t offset_start;
888 ptrdiff_t offset_limit;
889 String message;
890 };
891
892 explicit OurReader(OurFeatures const& features);
893 bool parse(const char* beginDoc, const char* endDoc, Value& root,
894 bool collectComments = true);
895 String getFormattedErrorMessages() const;
896 std::vector<StructuredError> getStructuredErrors() const;
897
898 private:
899 OurReader(OurReader const&); // no impl
900 void operator=(OurReader const&); // no impl
901
902 enum TokenType {
903 tokenEndOfStream = 0,
904 tokenObjectBegin,
905 tokenObjectEnd,
906 tokenArrayBegin,
907 tokenArrayEnd,
908 tokenString,
909 tokenNumber,
910 tokenTrue,
911 tokenFalse,
912 tokenNull,
913 tokenNaN,
914 tokenPosInf,
915 tokenNegInf,
916 tokenArraySeparator,
917 tokenMemberSeparator,
918 tokenComment,
919 tokenError
920 };
921
922 class Token {
923 public:
924 TokenType type_;
925 Location start_;
926 Location end_;
927 };
928
929 class ErrorInfo {
930 public:
931 Token token_;
932 String message_;
933 Location extra_;
934 };
935
936 using Errors = std::deque<ErrorInfo>;
937
938 bool readToken(Token& token);
939 void skipSpaces();
940 void skipBom(bool skipBom);
941 bool match(const Char* pattern, int patternLength);
942 bool readComment();
943 bool readCStyleComment(bool* containsNewLineResult);
944 bool readCppStyleComment();
945 bool readString();
946 bool readStringSingleQuote();
947 bool readNumber(bool checkInf);
948 bool readValue();
949 bool readObject(Token& token);
950 bool readArray(Token& token);
951 bool decodeNumber(Token& token);
952 bool decodeNumber(Token& token, Value& decoded);
953 bool decodeString(Token& token);
954 bool decodeString(Token& token, String& decoded);
955 bool decodeDouble(Token& token);
956 bool decodeDouble(Token& token, Value& decoded);
957 bool decodeUnicodeCodePoint(Token& token, Location& current, Location end,
958 unsigned int& unicode);
959 bool decodeUnicodeEscapeSequence(Token& token, Location& current,
960 Location end, unsigned int& unicode);
961 bool addError(const String& message, Token& token, Location extra = nullptr);
962 bool recoverFromError(TokenType skipUntilToken);
963 bool addErrorAndRecover(const String& message, Token& token,
964 TokenType skipUntilToken);
965 void skipUntilSpace();
966 Value& currentValue();
967 Char getNextChar();
968 void getLocationLineAndColumn(Location location, int& line,
969 int& column) const;
970 String getLocationLineAndColumn(Location location) const;
971 void addComment(Location begin, Location end, CommentPlacement placement);
972 void skipCommentTokens(Token& token);
973
974 static String normalizeEOL(Location begin, Location end);
975 static bool containsNewLine(Location begin, Location end);
976
977 using Nodes = std::stack<Value*>;
978
979 Nodes nodes_{};
980 Errors errors_{};
981 String document_{};
982 Location begin_ = nullptr;
983 Location end_ = nullptr;
984 Location current_ = nullptr;
985 Location lastValueEnd_ = nullptr;
986 Value* lastValue_ = nullptr;
987 bool lastValueHasAComment_ = false;
988 String commentsBefore_{};
989
990 OurFeatures const features_;
991 bool collectComments_ = false;
992 }; // OurReader
993
994 // complete copy of Read impl, for OurReader
995
containsNewLine(OurReader::Location begin,OurReader::Location end)996 bool OurReader::containsNewLine(OurReader::Location begin,
997 OurReader::Location end) {
998 return std::any_of(begin, end, [](char b) { return b == '\n' || b == '\r'; });
999 }
1000
OurReader(OurFeatures const & features)1001 OurReader::OurReader(OurFeatures const& features) : features_(features) {}
1002
parse(const char * beginDoc,const char * endDoc,Value & root,bool collectComments)1003 bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
1004 bool collectComments) {
1005 if (!features_.allowComments_) {
1006 collectComments = false;
1007 }
1008
1009 begin_ = beginDoc;
1010 end_ = endDoc;
1011 collectComments_ = collectComments;
1012 current_ = begin_;
1013 lastValueEnd_ = nullptr;
1014 lastValue_ = nullptr;
1015 commentsBefore_.clear();
1016 errors_.clear();
1017 while (!nodes_.empty())
1018 nodes_.pop();
1019 nodes_.push(&root);
1020
1021 // skip byte order mark if it exists at the beginning of the UTF-8 text.
1022 skipBom(features_.skipBom_);
1023 bool successful = readValue();
1024 nodes_.pop();
1025 Token token;
1026 skipCommentTokens(token);
1027 if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
1028 addError("Extra non-whitespace after JSON value.", token);
1029 return false;
1030 }
1031 if (collectComments_ && !commentsBefore_.empty())
1032 root.setComment(commentsBefore_, commentAfter);
1033 if (features_.strictRoot_) {
1034 if (!root.isArray() && !root.isObject()) {
1035 // Set error location to start of doc, ideally should be first token found
1036 // in doc
1037 token.type_ = tokenError;
1038 token.start_ = beginDoc;
1039 token.end_ = endDoc;
1040 addError(
1041 "A valid JSON document must be either an array or an object value.",
1042 token);
1043 return false;
1044 }
1045 }
1046 return successful;
1047 }
1048
readValue()1049 bool OurReader::readValue() {
1050 // To preserve the old behaviour we cast size_t to int.
1051 if (nodes_.size() > features_.stackLimit_)
1052 throwRuntimeError("Exceeded stackLimit in readValue().");
1053 Token token;
1054 skipCommentTokens(token);
1055 bool successful = true;
1056
1057 if (collectComments_ && !commentsBefore_.empty()) {
1058 currentValue().setComment(commentsBefore_, commentBefore);
1059 commentsBefore_.clear();
1060 }
1061
1062 switch (token.type_) {
1063 case tokenObjectBegin:
1064 successful = readObject(token);
1065 currentValue().setOffsetLimit(current_ - begin_);
1066 break;
1067 case tokenArrayBegin:
1068 successful = readArray(token);
1069 currentValue().setOffsetLimit(current_ - begin_);
1070 break;
1071 case tokenNumber:
1072 successful = decodeNumber(token);
1073 break;
1074 case tokenString:
1075 successful = decodeString(token);
1076 break;
1077 case tokenTrue: {
1078 Value v(true);
1079 currentValue().swapPayload(v);
1080 currentValue().setOffsetStart(token.start_ - begin_);
1081 currentValue().setOffsetLimit(token.end_ - begin_);
1082 } break;
1083 case tokenFalse: {
1084 Value v(false);
1085 currentValue().swapPayload(v);
1086 currentValue().setOffsetStart(token.start_ - begin_);
1087 currentValue().setOffsetLimit(token.end_ - begin_);
1088 } break;
1089 case tokenNull: {
1090 Value v;
1091 currentValue().swapPayload(v);
1092 currentValue().setOffsetStart(token.start_ - begin_);
1093 currentValue().setOffsetLimit(token.end_ - begin_);
1094 } break;
1095 case tokenNaN: {
1096 Value v(std::numeric_limits<double>::quiet_NaN());
1097 currentValue().swapPayload(v);
1098 currentValue().setOffsetStart(token.start_ - begin_);
1099 currentValue().setOffsetLimit(token.end_ - begin_);
1100 } break;
1101 case tokenPosInf: {
1102 Value v(std::numeric_limits<double>::infinity());
1103 currentValue().swapPayload(v);
1104 currentValue().setOffsetStart(token.start_ - begin_);
1105 currentValue().setOffsetLimit(token.end_ - begin_);
1106 } break;
1107 case tokenNegInf: {
1108 Value v(-std::numeric_limits<double>::infinity());
1109 currentValue().swapPayload(v);
1110 currentValue().setOffsetStart(token.start_ - begin_);
1111 currentValue().setOffsetLimit(token.end_ - begin_);
1112 } break;
1113 case tokenArraySeparator:
1114 case tokenObjectEnd:
1115 case tokenArrayEnd:
1116 if (features_.allowDroppedNullPlaceholders_) {
1117 // "Un-read" the current token and mark the current value as a null
1118 // token.
1119 current_--;
1120 Value v;
1121 currentValue().swapPayload(v);
1122 currentValue().setOffsetStart(current_ - begin_ - 1);
1123 currentValue().setOffsetLimit(current_ - begin_);
1124 break;
1125 } // else, fall through ...
1126 default:
1127 currentValue().setOffsetStart(token.start_ - begin_);
1128 currentValue().setOffsetLimit(token.end_ - begin_);
1129 return addError("Syntax error: value, object or array expected.", token);
1130 }
1131
1132 if (collectComments_) {
1133 lastValueEnd_ = current_;
1134 lastValueHasAComment_ = false;
1135 lastValue_ = ¤tValue();
1136 }
1137
1138 return successful;
1139 }
1140
skipCommentTokens(Token & token)1141 void OurReader::skipCommentTokens(Token& token) {
1142 if (features_.allowComments_) {
1143 do {
1144 readToken(token);
1145 } while (token.type_ == tokenComment);
1146 } else {
1147 readToken(token);
1148 }
1149 }
1150
readToken(Token & token)1151 bool OurReader::readToken(Token& token) {
1152 skipSpaces();
1153 token.start_ = current_;
1154 Char c = getNextChar();
1155 bool ok = true;
1156 switch (c) {
1157 case '{':
1158 token.type_ = tokenObjectBegin;
1159 break;
1160 case '}':
1161 token.type_ = tokenObjectEnd;
1162 break;
1163 case '[':
1164 token.type_ = tokenArrayBegin;
1165 break;
1166 case ']':
1167 token.type_ = tokenArrayEnd;
1168 break;
1169 case '"':
1170 token.type_ = tokenString;
1171 ok = readString();
1172 break;
1173 case '\'':
1174 if (features_.allowSingleQuotes_) {
1175 token.type_ = tokenString;
1176 ok = readStringSingleQuote();
1177 } else {
1178 // If we don't allow single quotes, this is a failure case.
1179 ok = false;
1180 }
1181 break;
1182 case '/':
1183 token.type_ = tokenComment;
1184 ok = readComment();
1185 break;
1186 case '0':
1187 case '1':
1188 case '2':
1189 case '3':
1190 case '4':
1191 case '5':
1192 case '6':
1193 case '7':
1194 case '8':
1195 case '9':
1196 token.type_ = tokenNumber;
1197 readNumber(false);
1198 break;
1199 case '-':
1200 if (readNumber(true)) {
1201 token.type_ = tokenNumber;
1202 } else {
1203 token.type_ = tokenNegInf;
1204 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1205 }
1206 break;
1207 case '+':
1208 if (readNumber(true)) {
1209 token.type_ = tokenNumber;
1210 } else {
1211 token.type_ = tokenPosInf;
1212 ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1213 }
1214 break;
1215 case 't':
1216 token.type_ = tokenTrue;
1217 ok = match("rue", 3);
1218 break;
1219 case 'f':
1220 token.type_ = tokenFalse;
1221 ok = match("alse", 4);
1222 break;
1223 case 'n':
1224 token.type_ = tokenNull;
1225 ok = match("ull", 3);
1226 break;
1227 case 'N':
1228 if (features_.allowSpecialFloats_) {
1229 token.type_ = tokenNaN;
1230 ok = match("aN", 2);
1231 } else {
1232 ok = false;
1233 }
1234 break;
1235 case 'I':
1236 if (features_.allowSpecialFloats_) {
1237 token.type_ = tokenPosInf;
1238 ok = match("nfinity", 7);
1239 } else {
1240 ok = false;
1241 }
1242 break;
1243 case ',':
1244 token.type_ = tokenArraySeparator;
1245 break;
1246 case ':':
1247 token.type_ = tokenMemberSeparator;
1248 break;
1249 case 0:
1250 token.type_ = tokenEndOfStream;
1251 break;
1252 default:
1253 ok = false;
1254 break;
1255 }
1256 if (!ok)
1257 token.type_ = tokenError;
1258 token.end_ = current_;
1259 return ok;
1260 }
1261
skipSpaces()1262 void OurReader::skipSpaces() {
1263 while (current_ != end_) {
1264 Char c = *current_;
1265 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1266 ++current_;
1267 else
1268 break;
1269 }
1270 }
1271
skipBom(bool skipBom)1272 void OurReader::skipBom(bool skipBom) {
1273 // The default behavior is to skip BOM.
1274 if (skipBom) {
1275 if ((end_ - begin_) >= 3 && strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) {
1276 begin_ += 3;
1277 current_ = begin_;
1278 }
1279 }
1280 }
1281
match(const Char * pattern,int patternLength)1282 bool OurReader::match(const Char* pattern, int patternLength) {
1283 if (end_ - current_ < patternLength)
1284 return false;
1285 int index = patternLength;
1286 while (index--)
1287 if (current_[index] != pattern[index])
1288 return false;
1289 current_ += patternLength;
1290 return true;
1291 }
1292
readComment()1293 bool OurReader::readComment() {
1294 const Location commentBegin = current_ - 1;
1295 const Char c = getNextChar();
1296 bool successful = false;
1297 bool cStyleWithEmbeddedNewline = false;
1298
1299 const bool isCStyleComment = (c == '*');
1300 const bool isCppStyleComment = (c == '/');
1301 if (isCStyleComment) {
1302 successful = readCStyleComment(&cStyleWithEmbeddedNewline);
1303 } else if (isCppStyleComment) {
1304 successful = readCppStyleComment();
1305 }
1306
1307 if (!successful)
1308 return false;
1309
1310 if (collectComments_) {
1311 CommentPlacement placement = commentBefore;
1312
1313 if (!lastValueHasAComment_) {
1314 if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1315 if (isCppStyleComment || !cStyleWithEmbeddedNewline) {
1316 placement = commentAfterOnSameLine;
1317 lastValueHasAComment_ = true;
1318 }
1319 }
1320 }
1321
1322 addComment(commentBegin, current_, placement);
1323 }
1324 return true;
1325 }
1326
normalizeEOL(OurReader::Location begin,OurReader::Location end)1327 String OurReader::normalizeEOL(OurReader::Location begin,
1328 OurReader::Location end) {
1329 String normalized;
1330 normalized.reserve(static_cast<size_t>(end - begin));
1331 OurReader::Location current = begin;
1332 while (current != end) {
1333 char c = *current++;
1334 if (c == '\r') {
1335 if (current != end && *current == '\n')
1336 // convert dos EOL
1337 ++current;
1338 // convert Mac EOL
1339 normalized += '\n';
1340 } else {
1341 normalized += c;
1342 }
1343 }
1344 return normalized;
1345 }
1346
addComment(Location begin,Location end,CommentPlacement placement)1347 void OurReader::addComment(Location begin, Location end,
1348 CommentPlacement placement) {
1349 assert(collectComments_);
1350 const String& normalized = normalizeEOL(begin, end);
1351 if (placement == commentAfterOnSameLine) {
1352 assert(lastValue_ != nullptr);
1353 lastValue_->setComment(normalized, placement);
1354 } else {
1355 commentsBefore_ += normalized;
1356 }
1357 }
1358
readCStyleComment(bool * containsNewLineResult)1359 bool OurReader::readCStyleComment(bool* containsNewLineResult) {
1360 *containsNewLineResult = false;
1361
1362 while ((current_ + 1) < end_) {
1363 Char c = getNextChar();
1364 if (c == '*' && *current_ == '/')
1365 break;
1366 if (c == '\n')
1367 *containsNewLineResult = true;
1368 }
1369
1370 return getNextChar() == '/';
1371 }
1372
readCppStyleComment()1373 bool OurReader::readCppStyleComment() {
1374 while (current_ != end_) {
1375 Char c = getNextChar();
1376 if (c == '\n')
1377 break;
1378 if (c == '\r') {
1379 // Consume DOS EOL. It will be normalized in addComment.
1380 if (current_ != end_ && *current_ == '\n')
1381 getNextChar();
1382 // Break on Moc OS 9 EOL.
1383 break;
1384 }
1385 }
1386 return true;
1387 }
1388
readNumber(bool checkInf)1389 bool OurReader::readNumber(bool checkInf) {
1390 Location p = current_;
1391 if (checkInf && p != end_ && *p == 'I') {
1392 current_ = ++p;
1393 return false;
1394 }
1395 char c = '0'; // stopgap for already consumed character
1396 // integral part
1397 while (c >= '0' && c <= '9')
1398 c = (current_ = p) < end_ ? *p++ : '\0';
1399 // fractional part
1400 if (c == '.') {
1401 c = (current_ = p) < end_ ? *p++ : '\0';
1402 while (c >= '0' && c <= '9')
1403 c = (current_ = p) < end_ ? *p++ : '\0';
1404 }
1405 // exponential part
1406 if (c == 'e' || c == 'E') {
1407 c = (current_ = p) < end_ ? *p++ : '\0';
1408 if (c == '+' || c == '-')
1409 c = (current_ = p) < end_ ? *p++ : '\0';
1410 while (c >= '0' && c <= '9')
1411 c = (current_ = p) < end_ ? *p++ : '\0';
1412 }
1413 return true;
1414 }
readString()1415 bool OurReader::readString() {
1416 Char c = 0;
1417 while (current_ != end_) {
1418 c = getNextChar();
1419 if (c == '\\')
1420 getNextChar();
1421 else if (c == '"')
1422 break;
1423 }
1424 return c == '"';
1425 }
1426
readStringSingleQuote()1427 bool OurReader::readStringSingleQuote() {
1428 Char c = 0;
1429 while (current_ != end_) {
1430 c = getNextChar();
1431 if (c == '\\')
1432 getNextChar();
1433 else if (c == '\'')
1434 break;
1435 }
1436 return c == '\'';
1437 }
1438
readObject(Token & token)1439 bool OurReader::readObject(Token& token) {
1440 Token tokenName;
1441 String name;
1442 Value init(objectValue);
1443 currentValue().swapPayload(init);
1444 currentValue().setOffsetStart(token.start_ - begin_);
1445 while (readToken(tokenName)) {
1446 bool initialTokenOk = true;
1447 while (tokenName.type_ == tokenComment && initialTokenOk)
1448 initialTokenOk = readToken(tokenName);
1449 if (!initialTokenOk)
1450 break;
1451 if (tokenName.type_ == tokenObjectEnd &&
1452 (name.empty() ||
1453 features_.allowTrailingCommas_)) // empty object or trailing comma
1454 return true;
1455 name.clear();
1456 if (tokenName.type_ == tokenString) {
1457 if (!decodeString(tokenName, name))
1458 return recoverFromError(tokenObjectEnd);
1459 } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1460 Value numberName;
1461 if (!decodeNumber(tokenName, numberName))
1462 return recoverFromError(tokenObjectEnd);
1463 name = numberName.asString();
1464 } else {
1465 break;
1466 }
1467 if (name.length() >= (1U << 30))
1468 throwRuntimeError("keylength >= 2^30");
1469 if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1470 String msg = "Duplicate key: '" + name + "'";
1471 return addErrorAndRecover(msg, tokenName, tokenObjectEnd);
1472 }
1473
1474 Token colon;
1475 if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1476 return addErrorAndRecover("Missing ':' after object member name", colon,
1477 tokenObjectEnd);
1478 }
1479 Value& value = currentValue()[name];
1480 nodes_.push(&value);
1481 bool ok = readValue();
1482 nodes_.pop();
1483 if (!ok) // error already set
1484 return recoverFromError(tokenObjectEnd);
1485
1486 Token comma;
1487 if (!readToken(comma) ||
1488 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1489 comma.type_ != tokenComment)) {
1490 return addErrorAndRecover("Missing ',' or '}' in object declaration",
1491 comma, tokenObjectEnd);
1492 }
1493 bool finalizeTokenOk = true;
1494 while (comma.type_ == tokenComment && finalizeTokenOk)
1495 finalizeTokenOk = readToken(comma);
1496 if (comma.type_ == tokenObjectEnd)
1497 return true;
1498 }
1499 return addErrorAndRecover("Missing '}' or object member name", tokenName,
1500 tokenObjectEnd);
1501 }
1502
readArray(Token & token)1503 bool OurReader::readArray(Token& token) {
1504 Value init(arrayValue);
1505 currentValue().swapPayload(init);
1506 currentValue().setOffsetStart(token.start_ - begin_);
1507 int index = 0;
1508 for (;;) {
1509 skipSpaces();
1510 if (current_ != end_ && *current_ == ']' &&
1511 (index == 0 ||
1512 (features_.allowTrailingCommas_ &&
1513 !features_.allowDroppedNullPlaceholders_))) // empty array or trailing
1514 // comma
1515 {
1516 Token endArray;
1517 readToken(endArray);
1518 return true;
1519 }
1520 Value& value = currentValue()[index++];
1521 nodes_.push(&value);
1522 bool ok = readValue();
1523 nodes_.pop();
1524 if (!ok) // error already set
1525 return recoverFromError(tokenArrayEnd);
1526
1527 Token currentToken;
1528 // Accept Comment after last item in the array.
1529 ok = readToken(currentToken);
1530 while (currentToken.type_ == tokenComment && ok) {
1531 ok = readToken(currentToken);
1532 }
1533 bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
1534 currentToken.type_ != tokenArrayEnd);
1535 if (!ok || badTokenType) {
1536 return addErrorAndRecover("Missing ',' or ']' in array declaration",
1537 currentToken, tokenArrayEnd);
1538 }
1539 if (currentToken.type_ == tokenArrayEnd)
1540 break;
1541 }
1542 return true;
1543 }
1544
decodeNumber(Token & token)1545 bool OurReader::decodeNumber(Token& token) {
1546 Value decoded;
1547 if (!decodeNumber(token, decoded))
1548 return false;
1549 currentValue().swapPayload(decoded);
1550 currentValue().setOffsetStart(token.start_ - begin_);
1551 currentValue().setOffsetLimit(token.end_ - begin_);
1552 return true;
1553 }
1554
decodeNumber(Token & token,Value & decoded)1555 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1556 // Attempts to parse the number as an integer. If the number is
1557 // larger than the maximum supported value of an integer then
1558 // we decode the number as a double.
1559 Location current = token.start_;
1560 const bool isNegative = *current == '-';
1561 if (isNegative) {
1562 ++current;
1563 }
1564
1565 // We assume we can represent the largest and smallest integer types as
1566 // unsigned integers with separate sign. This is only true if they can fit
1567 // into an unsigned integer.
1568 static_assert(Value::maxLargestInt <= Value::maxLargestUInt,
1569 "Int must be smaller than UInt");
1570
1571 // We need to convert minLargestInt into a positive number. The easiest way
1572 // to do this conversion is to assume our "threshold" value of minLargestInt
1573 // divided by 10 can fit in maxLargestInt when absolute valued. This should
1574 // be a safe assumption.
1575 static_assert(Value::minLargestInt <= -Value::maxLargestInt,
1576 "The absolute value of minLargestInt must be greater than or "
1577 "equal to maxLargestInt");
1578 static_assert(Value::minLargestInt / 10 >= -Value::maxLargestInt,
1579 "The absolute value of minLargestInt must be only 1 magnitude "
1580 "larger than maxLargest Int");
1581
1582 static constexpr Value::LargestUInt positive_threshold =
1583 Value::maxLargestUInt / 10;
1584 static constexpr Value::UInt positive_last_digit = Value::maxLargestUInt % 10;
1585
1586 // For the negative values, we have to be more careful. Since typically
1587 // -Value::minLargestInt will cause an overflow, we first divide by 10 and
1588 // then take the inverse. This assumes that minLargestInt is only a single
1589 // power of 10 different in magnitude, which we check above. For the last
1590 // digit, we take the modulus before negating for the same reason.
1591 static constexpr auto negative_threshold =
1592 Value::LargestUInt(-(Value::minLargestInt / 10));
1593 static constexpr auto negative_last_digit =
1594 Value::UInt(-(Value::minLargestInt % 10));
1595
1596 const Value::LargestUInt threshold =
1597 isNegative ? negative_threshold : positive_threshold;
1598 const Value::UInt max_last_digit =
1599 isNegative ? negative_last_digit : positive_last_digit;
1600
1601 Value::LargestUInt value = 0;
1602 while (current < token.end_) {
1603 Char c = *current++;
1604 if (c < '0' || c > '9')
1605 return decodeDouble(token, decoded);
1606
1607 const auto digit(static_cast<Value::UInt>(c - '0'));
1608 if (value >= threshold) {
1609 // We've hit or exceeded the max value divided by 10 (rounded down). If
1610 // a) we've only just touched the limit, meaing value == threshold,
1611 // b) this is the last digit, or
1612 // c) it's small enough to fit in that rounding delta, we're okay.
1613 // Otherwise treat this number as a double to avoid overflow.
1614 if (value > threshold || current != token.end_ ||
1615 digit > max_last_digit) {
1616 return decodeDouble(token, decoded);
1617 }
1618 }
1619 value = value * 10 + digit;
1620 }
1621
1622 if (isNegative) {
1623 // We use the same magnitude assumption here, just in case.
1624 const auto last_digit = static_cast<Value::UInt>(value % 10);
1625 decoded = -Value::LargestInt(value / 10) * 10 - last_digit;
1626 } else if (value <= Value::LargestUInt(Value::maxLargestInt)) {
1627 decoded = Value::LargestInt(value);
1628 } else {
1629 decoded = value;
1630 }
1631
1632 return true;
1633 }
1634
decodeDouble(Token & token)1635 bool OurReader::decodeDouble(Token& token) {
1636 Value decoded;
1637 if (!decodeDouble(token, decoded))
1638 return false;
1639 currentValue().swapPayload(decoded);
1640 currentValue().setOffsetStart(token.start_ - begin_);
1641 currentValue().setOffsetLimit(token.end_ - begin_);
1642 return true;
1643 }
1644
decodeDouble(Token & token,Value & decoded)1645 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1646 double value = 0;
1647 const String buffer(token.start_, token.end_);
1648 IStringStream is(buffer);
1649 if (!(is >> value)) {
1650 return addError(
1651 "'" + String(token.start_, token.end_) + "' is not a number.", token);
1652 }
1653 decoded = value;
1654 return true;
1655 }
1656
decodeString(Token & token)1657 bool OurReader::decodeString(Token& token) {
1658 String decoded_string;
1659 if (!decodeString(token, decoded_string))
1660 return false;
1661 Value decoded(decoded_string);
1662 currentValue().swapPayload(decoded);
1663 currentValue().setOffsetStart(token.start_ - begin_);
1664 currentValue().setOffsetLimit(token.end_ - begin_);
1665 return true;
1666 }
1667
decodeString(Token & token,String & decoded)1668 bool OurReader::decodeString(Token& token, String& decoded) {
1669 decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
1670 Location current = token.start_ + 1; // skip '"'
1671 Location end = token.end_ - 1; // do not include '"'
1672 while (current != end) {
1673 Char c = *current++;
1674 if (c == '"')
1675 break;
1676 if (c == '\\') {
1677 if (current == end)
1678 return addError("Empty escape sequence in string", token, current);
1679 Char escape = *current++;
1680 switch (escape) {
1681 case '"':
1682 decoded += '"';
1683 break;
1684 case '/':
1685 decoded += '/';
1686 break;
1687 case '\\':
1688 decoded += '\\';
1689 break;
1690 case 'b':
1691 decoded += '\b';
1692 break;
1693 case 'f':
1694 decoded += '\f';
1695 break;
1696 case 'n':
1697 decoded += '\n';
1698 break;
1699 case 'r':
1700 decoded += '\r';
1701 break;
1702 case 't':
1703 decoded += '\t';
1704 break;
1705 case 'u': {
1706 unsigned int unicode;
1707 if (!decodeUnicodeCodePoint(token, current, end, unicode))
1708 return false;
1709 decoded += codePointToUTF8(unicode);
1710 } break;
1711 default:
1712 return addError("Bad escape sequence in string", token, current);
1713 }
1714 } else {
1715 decoded += c;
1716 }
1717 }
1718 return true;
1719 }
1720
decodeUnicodeCodePoint(Token & token,Location & current,Location end,unsigned int & unicode)1721 bool OurReader::decodeUnicodeCodePoint(Token& token, Location& current,
1722 Location end, unsigned int& unicode) {
1723
1724 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1725 return false;
1726 if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1727 // surrogate pairs
1728 if (end - current < 6)
1729 return addError(
1730 "additional six characters expected to parse unicode surrogate pair.",
1731 token, current);
1732 if (*(current++) == '\\' && *(current++) == 'u') {
1733 unsigned int surrogatePair;
1734 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1735 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1736 } else
1737 return false;
1738 } else
1739 return addError("expecting another \\u token to begin the second half of "
1740 "a unicode surrogate pair",
1741 token, current);
1742 }
1743 return true;
1744 }
1745
decodeUnicodeEscapeSequence(Token & token,Location & current,Location end,unsigned int & ret_unicode)1746 bool OurReader::decodeUnicodeEscapeSequence(Token& token, Location& current,
1747 Location end,
1748 unsigned int& ret_unicode) {
1749 if (end - current < 4)
1750 return addError(
1751 "Bad unicode escape sequence in string: four digits expected.", token,
1752 current);
1753 int unicode = 0;
1754 for (int index = 0; index < 4; ++index) {
1755 Char c = *current++;
1756 unicode *= 16;
1757 if (c >= '0' && c <= '9')
1758 unicode += c - '0';
1759 else if (c >= 'a' && c <= 'f')
1760 unicode += c - 'a' + 10;
1761 else if (c >= 'A' && c <= 'F')
1762 unicode += c - 'A' + 10;
1763 else
1764 return addError(
1765 "Bad unicode escape sequence in string: hexadecimal digit expected.",
1766 token, current);
1767 }
1768 ret_unicode = static_cast<unsigned int>(unicode);
1769 return true;
1770 }
1771
addError(const String & message,Token & token,Location extra)1772 bool OurReader::addError(const String& message, Token& token, Location extra) {
1773 ErrorInfo info;
1774 info.token_ = token;
1775 info.message_ = message;
1776 info.extra_ = extra;
1777 errors_.push_back(info);
1778 return false;
1779 }
1780
recoverFromError(TokenType skipUntilToken)1781 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1782 size_t errorCount = errors_.size();
1783 Token skip;
1784 for (;;) {
1785 if (!readToken(skip))
1786 errors_.resize(errorCount); // discard errors caused by recovery
1787 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1788 break;
1789 }
1790 errors_.resize(errorCount);
1791 return false;
1792 }
1793
addErrorAndRecover(const String & message,Token & token,TokenType skipUntilToken)1794 bool OurReader::addErrorAndRecover(const String& message, Token& token,
1795 TokenType skipUntilToken) {
1796 addError(message, token);
1797 return recoverFromError(skipUntilToken);
1798 }
1799
currentValue()1800 Value& OurReader::currentValue() { return *(nodes_.top()); }
1801
getNextChar()1802 OurReader::Char OurReader::getNextChar() {
1803 if (current_ == end_)
1804 return 0;
1805 return *current_++;
1806 }
1807
getLocationLineAndColumn(Location location,int & line,int & column) const1808 void OurReader::getLocationLineAndColumn(Location location, int& line,
1809 int& column) const {
1810 Location current = begin_;
1811 Location lastLineStart = current;
1812 line = 0;
1813 while (current < location && current != end_) {
1814 Char c = *current++;
1815 if (c == '\r') {
1816 if (*current == '\n')
1817 ++current;
1818 lastLineStart = current;
1819 ++line;
1820 } else if (c == '\n') {
1821 lastLineStart = current;
1822 ++line;
1823 }
1824 }
1825 // column & line start at 1
1826 column = int(location - lastLineStart) + 1;
1827 ++line;
1828 }
1829
getLocationLineAndColumn(Location location) const1830 String OurReader::getLocationLineAndColumn(Location location) const {
1831 int line, column;
1832 getLocationLineAndColumn(location, line, column);
1833 char buffer[18 + 16 + 16 + 1];
1834 jsoncpp_snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1835 return buffer;
1836 }
1837
getFormattedErrorMessages() const1838 String OurReader::getFormattedErrorMessages() const {
1839 String formattedMessage;
1840 for (const auto& error : errors_) {
1841 formattedMessage +=
1842 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1843 formattedMessage += " " + error.message_ + "\n";
1844 if (error.extra_)
1845 formattedMessage +=
1846 "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1847 }
1848 return formattedMessage;
1849 }
1850
getStructuredErrors() const1851 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1852 std::vector<OurReader::StructuredError> allErrors;
1853 for (const auto& error : errors_) {
1854 OurReader::StructuredError structured;
1855 structured.offset_start = error.token_.start_ - begin_;
1856 structured.offset_limit = error.token_.end_ - begin_;
1857 structured.message = error.message_;
1858 allErrors.push_back(structured);
1859 }
1860 return allErrors;
1861 }
1862
1863 class OurCharReader : public CharReader {
1864 bool const collectComments_;
1865 OurReader reader_;
1866
1867 public:
OurCharReader(bool collectComments,OurFeatures const & features)1868 OurCharReader(bool collectComments, OurFeatures const& features)
1869 : collectComments_(collectComments), reader_(features) {}
parse(char const * beginDoc,char const * endDoc,Value * root,String * errs)1870 bool parse(char const* beginDoc, char const* endDoc, Value* root,
1871 String* errs) override {
1872 bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1873 if (errs) {
1874 *errs = reader_.getFormattedErrorMessages();
1875 }
1876 return ok;
1877 }
1878 };
1879
CharReaderBuilder()1880 CharReaderBuilder::CharReaderBuilder() { setDefaults(&settings_); }
1881 CharReaderBuilder::~CharReaderBuilder() = default;
newCharReader() const1882 CharReader* CharReaderBuilder::newCharReader() const {
1883 bool collectComments = settings_["collectComments"].asBool();
1884 OurFeatures features = OurFeatures::all();
1885 features.allowComments_ = settings_["allowComments"].asBool();
1886 features.allowTrailingCommas_ = settings_["allowTrailingCommas"].asBool();
1887 features.strictRoot_ = settings_["strictRoot"].asBool();
1888 features.allowDroppedNullPlaceholders_ =
1889 settings_["allowDroppedNullPlaceholders"].asBool();
1890 features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1891 features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1892
1893 // Stack limit is always a size_t, so we get this as an unsigned int
1894 // regardless of it we have 64-bit integer support enabled.
1895 features.stackLimit_ = static_cast<size_t>(settings_["stackLimit"].asUInt());
1896 features.failIfExtra_ = settings_["failIfExtra"].asBool();
1897 features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1898 features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1899 features.skipBom_ = settings_["skipBom"].asBool();
1900 return new OurCharReader(collectComments, features);
1901 }
1902
validate(Json::Value * invalid) const1903 bool CharReaderBuilder::validate(Json::Value* invalid) const {
1904 static const auto& valid_keys = *new std::set<String>{
1905 "collectComments",
1906 "allowComments",
1907 "allowTrailingCommas",
1908 "strictRoot",
1909 "allowDroppedNullPlaceholders",
1910 "allowNumericKeys",
1911 "allowSingleQuotes",
1912 "stackLimit",
1913 "failIfExtra",
1914 "rejectDupKeys",
1915 "allowSpecialFloats",
1916 "skipBom",
1917 };
1918 for (auto si = settings_.begin(); si != settings_.end(); ++si) {
1919 auto key = si.name();
1920 if (valid_keys.count(key))
1921 continue;
1922 if (invalid)
1923 (*invalid)[key] = *si;
1924 else
1925 return false;
1926 }
1927 return invalid ? invalid->empty() : true;
1928 }
1929
operator [](const String & key)1930 Value& CharReaderBuilder::operator[](const String& key) {
1931 return settings_[key];
1932 }
1933 // static
strictMode(Json::Value * settings)1934 void CharReaderBuilder::strictMode(Json::Value* settings) {
1935 //! [CharReaderBuilderStrictMode]
1936 (*settings)["allowComments"] = false;
1937 (*settings)["allowTrailingCommas"] = false;
1938 (*settings)["strictRoot"] = true;
1939 (*settings)["allowDroppedNullPlaceholders"] = false;
1940 (*settings)["allowNumericKeys"] = false;
1941 (*settings)["allowSingleQuotes"] = false;
1942 (*settings)["stackLimit"] = 1000;
1943 (*settings)["failIfExtra"] = true;
1944 (*settings)["rejectDupKeys"] = true;
1945 (*settings)["allowSpecialFloats"] = false;
1946 (*settings)["skipBom"] = true;
1947 //! [CharReaderBuilderStrictMode]
1948 }
1949 // static
setDefaults(Json::Value * settings)1950 void CharReaderBuilder::setDefaults(Json::Value* settings) {
1951 //! [CharReaderBuilderDefaults]
1952 (*settings)["collectComments"] = true;
1953 (*settings)["allowComments"] = true;
1954 (*settings)["allowTrailingCommas"] = true;
1955 (*settings)["strictRoot"] = false;
1956 (*settings)["allowDroppedNullPlaceholders"] = false;
1957 (*settings)["allowNumericKeys"] = false;
1958 (*settings)["allowSingleQuotes"] = false;
1959 (*settings)["stackLimit"] = 1000;
1960 (*settings)["failIfExtra"] = false;
1961 (*settings)["rejectDupKeys"] = false;
1962 (*settings)["allowSpecialFloats"] = false;
1963 (*settings)["skipBom"] = true;
1964 //! [CharReaderBuilderDefaults]
1965 }
1966
1967 //////////////////////////////////
1968 // global functions
1969
parseFromStream(CharReader::Factory const & fact,IStream & sin,Value * root,String * errs)1970 bool parseFromStream(CharReader::Factory const& fact, IStream& sin, Value* root,
1971 String* errs) {
1972 OStringStream ssin;
1973 ssin << sin.rdbuf();
1974 String doc = ssin.str();
1975 char const* begin = doc.data();
1976 char const* end = begin + doc.size();
1977 // Note that we do not actually need a null-terminator.
1978 CharReaderPtr const reader(fact.newCharReader());
1979 return reader->parse(begin, end, root, errs);
1980 }
1981
operator >>(IStream & sin,Value & root)1982 IStream& operator>>(IStream& sin, Value& root) {
1983 CharReaderBuilder b;
1984 String errs;
1985 bool ok = parseFromStream(b, sin, &root, &errs);
1986 if (!ok) {
1987 throwRuntimeError(errs);
1988 }
1989 return sin;
1990 }
1991
1992 } // namespace Json
1993