1 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_NARROW_ENCODING_HPP
2 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_NARROW_ENCODING_HPP
3 
4 #include <boost/assert.hpp>
5 #include <boost/range/iterator_range_core.hpp>
6 
7 #include <utility>
8 
9 namespace boost { namespace property_tree {
10     namespace json_parser { namespace detail
11 {
12 
13     struct external_ascii_superset_encoding
14     {
15         typedef char external_char;
16 
is_nlboost::property_tree::json_parser::detail::external_ascii_superset_encoding17         bool is_nl(char c) const { return c == '\n'; }
is_wsboost::property_tree::json_parser::detail::external_ascii_superset_encoding18         bool is_ws(char c) const {
19             return c == ' ' || c == '\t' || c == '\n' || c == '\r';
20         }
21 
is_minusboost::property_tree::json_parser::detail::external_ascii_superset_encoding22         bool is_minus(char c) const { return c == '-'; }
is_plusminusboost::property_tree::json_parser::detail::external_ascii_superset_encoding23         bool is_plusminus(char c) const { return c == '+' || c == '-'; }
is_dotboost::property_tree::json_parser::detail::external_ascii_superset_encoding24         bool is_dot(char c) const { return c == '.'; }
is_eEboost::property_tree::json_parser::detail::external_ascii_superset_encoding25         bool is_eE(char c) const { return c == 'e' || c == 'E'; }
is_0boost::property_tree::json_parser::detail::external_ascii_superset_encoding26         bool is_0(char c) const { return c == '0'; }
is_digitboost::property_tree::json_parser::detail::external_ascii_superset_encoding27         bool is_digit(char c) const { return c >= '0' && c <= '9'; }
is_digit0boost::property_tree::json_parser::detail::external_ascii_superset_encoding28         bool is_digit0(char c) const { return c >= '1' && c <= '9'; }
29 
is_quoteboost::property_tree::json_parser::detail::external_ascii_superset_encoding30         bool is_quote(char c) const { return c == '"'; }
is_backslashboost::property_tree::json_parser::detail::external_ascii_superset_encoding31         bool is_backslash(char c) const { return c == '\\'; }
is_slashboost::property_tree::json_parser::detail::external_ascii_superset_encoding32         bool is_slash(char c) const { return c == '/'; }
33 
is_commaboost::property_tree::json_parser::detail::external_ascii_superset_encoding34         bool is_comma(char c) const { return c == ','; }
is_open_bracketboost::property_tree::json_parser::detail::external_ascii_superset_encoding35         bool is_open_bracket(char c) const { return c == '['; }
is_close_bracketboost::property_tree::json_parser::detail::external_ascii_superset_encoding36         bool is_close_bracket(char c) const { return c == ']'; }
is_colonboost::property_tree::json_parser::detail::external_ascii_superset_encoding37         bool is_colon(char c) const { return c == ':'; }
is_open_braceboost::property_tree::json_parser::detail::external_ascii_superset_encoding38         bool is_open_brace(char c) const { return c == '{'; }
is_close_braceboost::property_tree::json_parser::detail::external_ascii_superset_encoding39         bool is_close_brace(char c) const { return c == '}'; }
40 
is_aboost::property_tree::json_parser::detail::external_ascii_superset_encoding41         bool is_a(char c) const { return c == 'a'; }
is_bboost::property_tree::json_parser::detail::external_ascii_superset_encoding42         bool is_b(char c) const { return c == 'b'; }
is_eboost::property_tree::json_parser::detail::external_ascii_superset_encoding43         bool is_e(char c) const { return c == 'e'; }
is_fboost::property_tree::json_parser::detail::external_ascii_superset_encoding44         bool is_f(char c) const { return c == 'f'; }
is_lboost::property_tree::json_parser::detail::external_ascii_superset_encoding45         bool is_l(char c) const { return c == 'l'; }
is_nboost::property_tree::json_parser::detail::external_ascii_superset_encoding46         bool is_n(char c) const { return c == 'n'; }
is_rboost::property_tree::json_parser::detail::external_ascii_superset_encoding47         bool is_r(char c) const { return c == 'r'; }
is_sboost::property_tree::json_parser::detail::external_ascii_superset_encoding48         bool is_s(char c) const { return c == 's'; }
is_tboost::property_tree::json_parser::detail::external_ascii_superset_encoding49         bool is_t(char c) const { return c == 't'; }
is_uboost::property_tree::json_parser::detail::external_ascii_superset_encoding50         bool is_u(char c) const { return c == 'u'; }
51 
decode_hexdigitboost::property_tree::json_parser::detail::external_ascii_superset_encoding52         int decode_hexdigit(char c) {
53             if (c >= '0' && c <= '9') return c - '0';
54             if (c >= 'A' && c <= 'F') return c - 'A' + 10;
55             if (c >= 'a' && c <= 'f') return c - 'a' + 10;
56             return -1;
57         }
58     };
59 
60     struct utf8_utf8_encoding : external_ascii_superset_encoding
61     {
62         typedef char internal_char;
63 
64         template <typename Iterator>
65         boost::iterator_range<Iterator>
to_internalboost::property_tree::json_parser::detail::utf8_utf8_encoding66         to_internal(Iterator first, Iterator last) const {
67             return boost::make_iterator_range(first, last);
68         }
69 
to_internal_trivialboost::property_tree::json_parser::detail::utf8_utf8_encoding70         char to_internal_trivial(char c) const {
71             BOOST_ASSERT(static_cast<unsigned char>(c) <= 0x7f);
72             return c;
73         }
74 
75         template <typename Iterator, typename Sentinel,
76                   typename EncodingErrorFn>
skip_codepointboost::property_tree::json_parser::detail::utf8_utf8_encoding77         void skip_codepoint(Iterator& cur, Sentinel end,
78                             EncodingErrorFn error_fn) const {
79             transcode_codepoint(cur, end, DoNothing(), error_fn);
80         }
81 
82         template <typename Iterator, typename Sentinel, typename TranscodedFn,
83                   typename EncodingErrorFn>
transcode_codepointboost::property_tree::json_parser::detail::utf8_utf8_encoding84         void transcode_codepoint(Iterator& cur, Sentinel end,
85                 TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const {
86             unsigned char c = *cur;
87             ++cur;
88             if (c <= 0x7f) {
89                 // Solo byte, filter out disallowed codepoints.
90                 if (c < 0x20) {
91                     error_fn();
92                 }
93                 transcoded_fn(c);
94                 return;
95             }
96             int trailing = trail_table(c);
97             if (trailing == -1) {
98                 // Standalone trailing byte or overly long sequence.
99                 error_fn();
100             }
101             transcoded_fn(c);
102             for (int i = 0; i < trailing; ++i) {
103                 if (cur == end || !is_trail(*cur)) {
104                     error_fn();
105                 }
106                 transcoded_fn(*cur);
107                 ++cur;
108             }
109         }
110 
111         template <typename TranscodedFn>
feed_codepointboost::property_tree::json_parser::detail::utf8_utf8_encoding112         void feed_codepoint(unsigned codepoint,
113                             TranscodedFn transcoded_fn) const {
114             if (codepoint <= 0x7f) {
115                 transcoded_fn(static_cast<char>(codepoint));
116             } else if (codepoint <= 0x7ff) {
117                 transcoded_fn(static_cast<char>(0xc0 | (codepoint >> 6)));
118                 transcoded_fn(trail(codepoint));
119             } else if (codepoint <= 0xffff) {
120                 transcoded_fn(static_cast<char>(0xe0 | (codepoint >> 12)));
121                 transcoded_fn(trail(codepoint >> 6));
122                 transcoded_fn(trail(codepoint));
123             } else if (codepoint <= 0x10ffff) {
124                 transcoded_fn(static_cast<char>(0xf0 | (codepoint >> 18)));
125                 transcoded_fn(trail(codepoint >> 12));
126                 transcoded_fn(trail(codepoint >> 6));
127                 transcoded_fn(trail(codepoint));
128             }
129         }
130 
131         template <typename Iterator, typename Sentinel>
skip_introductionboost::property_tree::json_parser::detail::utf8_utf8_encoding132         void skip_introduction(Iterator& cur, Sentinel end) const {
133             if (cur != end && static_cast<unsigned char>(*cur) == 0xef) {
134                 if (++cur == end) return;
135                 if (++cur == end) return;
136                 if (++cur == end) return;
137             }
138         }
139 
140     private:
141         struct DoNothing {
operator ()boost::property_tree::json_parser::detail::utf8_utf8_encoding::DoNothing142             void operator ()(char) const {}
143         };
144 
is_trailboost::property_tree::json_parser::detail::utf8_utf8_encoding145         bool is_trail(unsigned char c) const {
146             return (c & 0xc0) == 0x80;
147         }
148 
trail_tableboost::property_tree::json_parser::detail::utf8_utf8_encoding149         int trail_table(unsigned char c) const {
150             static const signed char table[] = {
151                                  /* not a lead byte */
152                 /* 0x10???sss */ -1, -1, -1, -1, -1, -1, -1, -1,
153                 /* 0x110??sss */ 1, 1, 1, 1, /* 1 trailing byte */
154                 /* 0x1110?sss */ 2, 2, /* 2 trailing bytes */
155                 /* 0x11110sss */ 3, /* 3 trailing bytes */
156                 /* 0x11111sss */ -1 /* 4 or 5 trailing bytes, disallowed */
157             };
158             return table[(c & 0x7f) >> 3];
159         }
160 
trailboost::property_tree::json_parser::detail::utf8_utf8_encoding161         char trail(unsigned unmasked) const {
162             return static_cast<char>(0x80 | (unmasked & 0x3f));
163         }
164     };
165 
166 }}}}
167 
168 #endif
169