1 // Copyright 2012 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // A JSON parser, converting from a std::string_view to a base::Value. 6 // 7 // The JSON spec is: 8 // https://tools.ietf.org/rfc/rfc8259.txt 9 // which obsoletes the earlier RFCs 4627, 7158 and 7159. 10 // 11 // This RFC should be equivalent to the informal spec: 12 // https://www.json.org/json-en.html 13 // 14 // Implementation choices permitted by the RFC: 15 // - Nesting is limited (to a configurable depth, 200 by default). 16 // - Numbers are limited to those representable by a finite double. The 17 // conversion from a JSON number (in the std::string_view input) to a 18 // double-flavored base::Value may also be lossy. 19 // - The input (which must be UTF-8) may begin with a BOM (Byte Order Mark). 20 // - Duplicate object keys (strings) are silently allowed. Last key-value pair 21 // wins. Previous pairs are discarded. 22 // 23 // Configurable (see the JSONParserOptions type) deviations from the RFC: 24 // - Allow trailing commas: "[1,2,]". 25 // - Replace invalid Unicode with U+FFFD REPLACEMENT CHARACTER. 26 // - Allow "// etc\n" and "/* etc */" C-style comments. 27 // - Allow ASCII control characters, including literal (not escaped) NUL bytes 28 // and new lines, within a JSON string. 29 // - Allow "\\v" escapes within a JSON string, producing a vertical tab. 30 // - Allow "\\x23" escapes within a JSON string. Subtly, the 2-digit hex value 31 // is a Unicode code point, not a UTF-8 byte. For example, "\\xFF" in the 32 // JSON source decodes to a base::Value whose string contains "\xC3\xBF", the 33 // UTF-8 encoding of U+00FF LATIN SMALL LETTER Y WITH DIAERESIS. Converting 34 // from UTF-8 to UTF-16, e.g. via UTF8ToWide, will recover a 16-bit 0x00FF. 35 36 #ifndef BASE_JSON_JSON_READER_H_ 37 #define BASE_JSON_JSON_READER_H_ 38 39 #include <optional> 40 #include <string> 41 #include <string_view> 42 43 #include "base/base_export.h" 44 #include "base/json/json_common.h" 45 #include "base/strings/string_number_conversions.h" 46 #include "base/types/expected.h" 47 #include "base/values.h" 48 49 namespace base { 50 51 enum JSONParserOptions { 52 // Parses the input strictly according to RFC 8259. 53 JSON_PARSE_RFC = 0, 54 55 // Allows commas to exist after the last element in structures. 56 JSON_ALLOW_TRAILING_COMMAS = 1 << 0, 57 58 // If set the parser replaces invalid code points (i.e. lone 59 // surrogates) with the Unicode replacement character (U+FFFD). If 60 // not set, invalid code points trigger a hard error and parsing 61 // fails. 62 JSON_REPLACE_INVALID_CHARACTERS = 1 << 1, 63 64 // Allows both C (/* */) and C++ (//) style comments. 65 JSON_ALLOW_COMMENTS = 1 << 2, 66 67 // Permits unescaped ASCII control characters (such as unescaped \r and \n) 68 // in the range [0x00,0x1F]. 69 JSON_ALLOW_CONTROL_CHARS = 1 << 3, 70 71 // Permits \\v vertical tab escapes. 72 JSON_ALLOW_VERT_TAB = 1 << 4, 73 74 // Permits \\xNN escapes as described above. 75 JSON_ALLOW_X_ESCAPES = 1 << 5, 76 77 // Permits exactly \r and \n to occur in strings, which is normally not 78 // allowed; this is a subset of the behavior of JSON_ALLOW_CONTROL_CHARS. 79 JSON_ALLOW_NEWLINES_IN_STRINGS = 1 << 6, 80 81 // This parser historically accepted, without configuration flags, 82 // non-standard JSON extensions. This flag enables that traditional parsing 83 // behavior. 84 // 85 // This set of options is mirrored in Rust 86 // base::JsonOptions::with_chromium_extensions(). 87 JSON_PARSE_CHROMIUM_EXTENSIONS = JSON_ALLOW_COMMENTS | 88 JSON_ALLOW_NEWLINES_IN_STRINGS | 89 JSON_ALLOW_X_ESCAPES, 90 }; 91 92 class BASE_EXPORT JSONReader { 93 public: 94 struct BASE_EXPORT Error { 95 std::string message; 96 int line = 0; 97 int column = 0; 98 ToStringError99 std::string ToString() const { 100 return "line " + base::NumberToString(line) + ", column " + 101 base::NumberToString(column) + ": " + message; 102 } 103 }; 104 105 using Result = base::expected<Value, Error>; 106 107 // This class contains only static methods. 108 JSONReader() = delete; 109 JSONReader(const JSONReader&) = delete; 110 JSONReader& operator=(const JSONReader&) = delete; 111 112 // Reads and parses |json|, returning a Value. 113 // If |json| is not a properly formed JSON string, returns absl::nullopt. 114 static std::optional<Value> Read( 115 std::string_view json, 116 int options = JSON_PARSE_CHROMIUM_EXTENSIONS, 117 size_t max_depth = internal::kAbsoluteMaxDepth); 118 119 // Reads and parses |json|, returning a Value::Dict. 120 // If |json| is not a properly formed JSON dict string, returns absl::nullopt. 121 static std::optional<Value::Dict> ReadDict( 122 std::string_view json, 123 int options = JSON_PARSE_CHROMIUM_EXTENSIONS, 124 size_t max_depth = internal::kAbsoluteMaxDepth); 125 126 // Reads and parses |json| like Read(). On success returns a Value as the 127 // expected value. Otherwise, it returns an Error instance, populated with a 128 // formatted error message, an error code, and the error location if 129 // appropriate as the error value of the expected type. 130 static Result ReadAndReturnValueWithError( 131 std::string_view json, 132 int options = JSON_PARSE_CHROMIUM_EXTENSIONS); 133 134 // Determine whether the Rust parser is in use. 135 static bool UsingRust(); 136 }; 137 138 } // namespace base 139 140 #endif // BASE_JSON_JSON_READER_H_ 141