xref: /aosp_15_r20/external/cronet/base/json/json_reader.h (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // A JSON parser, converting from a std::string_view to a base::Value.
6 //
7 // The JSON spec is:
8 // https://tools.ietf.org/rfc/rfc8259.txt
9 // which obsoletes the earlier RFCs 4627, 7158 and 7159.
10 //
11 // This RFC should be equivalent to the informal spec:
12 // https://www.json.org/json-en.html
13 //
14 // Implementation choices permitted by the RFC:
15 // - Nesting is limited (to a configurable depth, 200 by default).
16 // - Numbers are limited to those representable by a finite double. The
17 //   conversion from a JSON number (in the std::string_view input) to a
18 //   double-flavored base::Value may also be lossy.
19 // - The input (which must be UTF-8) may begin with a BOM (Byte Order Mark).
20 // - Duplicate object keys (strings) are silently allowed. Last key-value pair
21 //   wins. Previous pairs are discarded.
22 //
23 // Configurable (see the JSONParserOptions type) deviations from the RFC:
24 // - Allow trailing commas: "[1,2,]".
25 // - Replace invalid Unicode with U+FFFD REPLACEMENT CHARACTER.
26 // - Allow "// etc\n" and "/* etc */" C-style comments.
27 // - Allow ASCII control characters, including literal (not escaped) NUL bytes
28 //   and new lines, within a JSON string.
29 // - Allow "\\v" escapes within a JSON string, producing a vertical tab.
30 // - Allow "\\x23" escapes within a JSON string. Subtly, the 2-digit hex value
31 //   is a Unicode code point, not a UTF-8 byte. For example, "\\xFF" in the
32 //   JSON source decodes to a base::Value whose string contains "\xC3\xBF", the
33 //   UTF-8 encoding of U+00FF LATIN SMALL LETTER Y WITH DIAERESIS. Converting
34 //   from UTF-8 to UTF-16, e.g. via UTF8ToWide, will recover a 16-bit 0x00FF.
35 
36 #ifndef BASE_JSON_JSON_READER_H_
37 #define BASE_JSON_JSON_READER_H_
38 
39 #include <optional>
40 #include <string>
41 #include <string_view>
42 
43 #include "base/base_export.h"
44 #include "base/json/json_common.h"
45 #include "base/strings/string_number_conversions.h"
46 #include "base/types/expected.h"
47 #include "base/values.h"
48 
49 namespace base {
50 
51 enum JSONParserOptions {
52   // Parses the input strictly according to RFC 8259.
53   JSON_PARSE_RFC = 0,
54 
55   // Allows commas to exist after the last element in structures.
56   JSON_ALLOW_TRAILING_COMMAS = 1 << 0,
57 
58   // If set the parser replaces invalid code points (i.e. lone
59   // surrogates) with the Unicode replacement character (U+FFFD). If
60   // not set, invalid code points trigger a hard error and parsing
61   // fails.
62   JSON_REPLACE_INVALID_CHARACTERS = 1 << 1,
63 
64   // Allows both C (/* */) and C++ (//) style comments.
65   JSON_ALLOW_COMMENTS = 1 << 2,
66 
67   // Permits unescaped ASCII control characters (such as unescaped \r and \n)
68   // in the range [0x00,0x1F].
69   JSON_ALLOW_CONTROL_CHARS = 1 << 3,
70 
71   // Permits \\v vertical tab escapes.
72   JSON_ALLOW_VERT_TAB = 1 << 4,
73 
74   // Permits \\xNN escapes as described above.
75   JSON_ALLOW_X_ESCAPES = 1 << 5,
76 
77   // Permits exactly \r and \n to occur in strings, which is normally not
78   // allowed; this is a subset of the behavior of JSON_ALLOW_CONTROL_CHARS.
79   JSON_ALLOW_NEWLINES_IN_STRINGS = 1 << 6,
80 
81   // This parser historically accepted, without configuration flags,
82   // non-standard JSON extensions. This flag enables that traditional parsing
83   // behavior.
84   //
85   // This set of options is mirrored in Rust
86   // base::JsonOptions::with_chromium_extensions().
87   JSON_PARSE_CHROMIUM_EXTENSIONS = JSON_ALLOW_COMMENTS |
88                                    JSON_ALLOW_NEWLINES_IN_STRINGS |
89                                    JSON_ALLOW_X_ESCAPES,
90 };
91 
92 class BASE_EXPORT JSONReader {
93  public:
94   struct BASE_EXPORT Error {
95     std::string message;
96     int line = 0;
97     int column = 0;
98 
ToStringError99     std::string ToString() const {
100       return "line " + base::NumberToString(line) + ", column " +
101              base::NumberToString(column) + ": " + message;
102     }
103   };
104 
105   using Result = base::expected<Value, Error>;
106 
107   // This class contains only static methods.
108   JSONReader() = delete;
109   JSONReader(const JSONReader&) = delete;
110   JSONReader& operator=(const JSONReader&) = delete;
111 
112   // Reads and parses |json|, returning a Value.
113   // If |json| is not a properly formed JSON string, returns absl::nullopt.
114   static std::optional<Value> Read(
115       std::string_view json,
116       int options = JSON_PARSE_CHROMIUM_EXTENSIONS,
117       size_t max_depth = internal::kAbsoluteMaxDepth);
118 
119   // Reads and parses |json|, returning a Value::Dict.
120   // If |json| is not a properly formed JSON dict string, returns absl::nullopt.
121   static std::optional<Value::Dict> ReadDict(
122       std::string_view json,
123       int options = JSON_PARSE_CHROMIUM_EXTENSIONS,
124       size_t max_depth = internal::kAbsoluteMaxDepth);
125 
126   // Reads and parses |json| like Read(). On success returns a Value as the
127   // expected value. Otherwise, it returns an Error instance, populated with a
128   // formatted error message, an error code, and the error location if
129   // appropriate as the error value of the expected type.
130   static Result ReadAndReturnValueWithError(
131       std::string_view json,
132       int options = JSON_PARSE_CHROMIUM_EXTENSIONS);
133 
134   // Determine whether the Rust parser is in use.
135   static bool UsingRust();
136 };
137 
138 }  // namespace base
139 
140 #endif  // BASE_JSON_JSON_READER_H_
141