1 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
2 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP
3 
4 #include <boost/assert.hpp>
5 #include <boost/range/iterator_range_core.hpp>
6 
7 #include <utility>
8 
9 namespace boost { namespace property_tree {
10     namespace json_parser { namespace detail
11 {
12 
13     struct external_wide_encoding
14     {
15         typedef wchar_t external_char;
16 
is_nlboost::property_tree::json_parser::detail::external_wide_encoding17         bool is_nl(wchar_t c) const { return c == L'\n'; }
is_wsboost::property_tree::json_parser::detail::external_wide_encoding18         bool is_ws(wchar_t c) const {
19             return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r';
20         }
21 
is_minusboost::property_tree::json_parser::detail::external_wide_encoding22         bool is_minus(wchar_t c) const { return c == L'-'; }
is_plusminusboost::property_tree::json_parser::detail::external_wide_encoding23         bool is_plusminus(wchar_t c) const { return c == L'+' || c == L'-'; }
is_dotboost::property_tree::json_parser::detail::external_wide_encoding24         bool is_dot(wchar_t c) const { return c == L'.'; }
is_eEboost::property_tree::json_parser::detail::external_wide_encoding25         bool is_eE(wchar_t c) const { return c == L'e' || c == L'E'; }
is_0boost::property_tree::json_parser::detail::external_wide_encoding26         bool is_0(wchar_t c) const { return c == L'0'; }
is_digitboost::property_tree::json_parser::detail::external_wide_encoding27         bool is_digit(wchar_t c) const { return c >= L'0' && c <= L'9'; }
is_digit0boost::property_tree::json_parser::detail::external_wide_encoding28         bool is_digit0(wchar_t c) const { return c >= L'1' && c <= L'9'; }
29 
is_quoteboost::property_tree::json_parser::detail::external_wide_encoding30         bool is_quote(wchar_t c) const { return c == L'"'; }
is_backslashboost::property_tree::json_parser::detail::external_wide_encoding31         bool is_backslash(wchar_t c) const { return c == L'\\'; }
is_slashboost::property_tree::json_parser::detail::external_wide_encoding32         bool is_slash(wchar_t c) const { return c == L'/'; }
33 
is_commaboost::property_tree::json_parser::detail::external_wide_encoding34         bool is_comma(wchar_t c) const { return c == L','; }
is_open_bracketboost::property_tree::json_parser::detail::external_wide_encoding35         bool is_open_bracket(wchar_t c) const { return c == L'['; }
is_close_bracketboost::property_tree::json_parser::detail::external_wide_encoding36         bool is_close_bracket(wchar_t c) const { return c == L']'; }
is_colonboost::property_tree::json_parser::detail::external_wide_encoding37         bool is_colon(wchar_t c) const { return c == L':'; }
is_open_braceboost::property_tree::json_parser::detail::external_wide_encoding38         bool is_open_brace(wchar_t c) const { return c == L'{'; }
is_close_braceboost::property_tree::json_parser::detail::external_wide_encoding39         bool is_close_brace(wchar_t c) const { return c == L'}'; }
40 
is_aboost::property_tree::json_parser::detail::external_wide_encoding41         bool is_a(wchar_t c) const { return c == L'a'; }
is_bboost::property_tree::json_parser::detail::external_wide_encoding42         bool is_b(wchar_t c) const { return c == L'b'; }
is_eboost::property_tree::json_parser::detail::external_wide_encoding43         bool is_e(wchar_t c) const { return c == L'e'; }
is_fboost::property_tree::json_parser::detail::external_wide_encoding44         bool is_f(wchar_t c) const { return c == L'f'; }
is_lboost::property_tree::json_parser::detail::external_wide_encoding45         bool is_l(wchar_t c) const { return c == L'l'; }
is_nboost::property_tree::json_parser::detail::external_wide_encoding46         bool is_n(wchar_t c) const { return c == L'n'; }
is_rboost::property_tree::json_parser::detail::external_wide_encoding47         bool is_r(wchar_t c) const { return c == L'r'; }
is_sboost::property_tree::json_parser::detail::external_wide_encoding48         bool is_s(wchar_t c) const { return c == L's'; }
is_tboost::property_tree::json_parser::detail::external_wide_encoding49         bool is_t(wchar_t c) const { return c == L't'; }
is_uboost::property_tree::json_parser::detail::external_wide_encoding50         bool is_u(wchar_t c) const { return c == L'u'; }
51 
decode_hexdigitboost::property_tree::json_parser::detail::external_wide_encoding52         int decode_hexdigit(wchar_t c) {
53             if (c >= L'0' && c <= L'9') return c - L'0';
54             if (c >= L'A' && c <= L'F') return c - L'A' + 10;
55             if (c >= L'a' && c <= L'f') return c - L'a' + 10;
56             return -1;
57         }
58     };
59 
60     template <bool B> struct is_utf16 {};
61 
62     class wide_wide_encoding : public external_wide_encoding
63     {
64         typedef is_utf16<sizeof(wchar_t) == 2> test_utf16;
65     public:
66         typedef wchar_t internal_char;
67 
68         template <typename Iterator>
69         boost::iterator_range<Iterator>
to_internal(Iterator first,Iterator last) const70         to_internal(Iterator first, Iterator last) const {
71             return boost::make_iterator_range(first, last);
72         }
73 
to_internal_trivial(wchar_t c) const74         wchar_t to_internal_trivial(wchar_t c) const {
75             BOOST_ASSERT(!is_surrogate_high(c) && !is_surrogate_low(c));
76             return c;
77         }
78 
79         template <typename Iterator, typename Sentinel,
80                   typename EncodingErrorFn>
skip_codepoint(Iterator & cur,Sentinel end,EncodingErrorFn error_fn) const81         void skip_codepoint(Iterator& cur, Sentinel end,
82                             EncodingErrorFn error_fn) const {
83             transcode_codepoint(cur, end, DoNothing(), error_fn);
84         }
85 
86         template <typename Iterator, typename Sentinel, typename TranscodedFn,
87                   typename EncodingErrorFn>
transcode_codepoint(Iterator & cur,Sentinel end,TranscodedFn transcoded_fn,EncodingErrorFn error_fn) const88         void transcode_codepoint(Iterator& cur, Sentinel end,
89                 TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const {
90             return transcode_codepoint(cur, end, transcoded_fn, error_fn,
91                                        test_utf16());
92         }
93 
94         template <typename TranscodedFn>
feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn) const95         void feed_codepoint(unsigned codepoint,
96                             TranscodedFn transcoded_fn) const {
97             feed_codepoint(codepoint, transcoded_fn, test_utf16());
98         }
99 
100         template <typename Iterator, typename Sentinel>
skip_introduction(Iterator & cur,Sentinel end) const101         void skip_introduction(Iterator& cur, Sentinel end) const {
102             // Endianness is already decoded at this level.
103             if (cur != end && *cur == 0xfeff) {
104                 ++cur;
105             }
106         }
107 
108     private:
109         struct DoNothing {
operator ()boost::property_tree::json_parser::detail::wide_wide_encoding::DoNothing110             void operator ()(wchar_t) const {}
111         };
112 
113         template <typename Iterator, typename Sentinel, typename TranscodedFn,
114                   typename EncodingErrorFn>
transcode_codepoint(Iterator & cur,Sentinel,TranscodedFn transcoded_fn,EncodingErrorFn error_fn,is_utf16<false>) const115         void transcode_codepoint(Iterator& cur, Sentinel,
116                                  TranscodedFn transcoded_fn,
117                                  EncodingErrorFn error_fn,
118                                  is_utf16<false>) const {
119             wchar_t c = *cur;
120             if (c < 0x20) {
121                 error_fn();
122             }
123             transcoded_fn(c);
124             ++cur;
125         }
126         template <typename Iterator, typename Sentinel, typename TranscodedFn,
127                   typename EncodingErrorFn>
transcode_codepoint(Iterator & cur,Sentinel end,TranscodedFn transcoded_fn,EncodingErrorFn error_fn,is_utf16<true>) const128         void transcode_codepoint(Iterator& cur, Sentinel end,
129                                  TranscodedFn transcoded_fn,
130                                  EncodingErrorFn error_fn,
131                                  is_utf16<true>) const {
132             wchar_t c = *cur;
133             if (c < 0x20) {
134                 error_fn();
135             }
136             if (is_surrogate_low(c)) {
137                 error_fn();
138             }
139             transcoded_fn(c);
140             ++cur;
141             if (is_surrogate_high(c)) {
142                 if (cur == end) {
143                     error_fn();
144                 }
145                 c = *cur;
146                 if (!is_surrogate_low(c)) {
147                     error_fn();
148                 }
149                 transcoded_fn(c);
150                 ++cur;
151             }
152         }
153 
154         template <typename TranscodedFn>
feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn,is_utf16<false>) const155         void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
156                             is_utf16<false>) const {
157             transcoded_fn(static_cast<wchar_t>(codepoint));
158         }
159         template <typename TranscodedFn>
feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn,is_utf16<true>) const160         void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn,
161                             is_utf16<true>) const {
162             if (codepoint < 0x10000) {
163                 transcoded_fn(static_cast<wchar_t>(codepoint));
164             } else {
165                 codepoint -= 0x10000;
166                 transcoded_fn(static_cast<wchar_t>((codepoint >> 10) | 0xd800));
167                 transcoded_fn(static_cast<wchar_t>(
168                     (codepoint & 0x3ff) | 0xdc00));
169             }
170         }
171 
is_surrogate_high(unsigned codepoint)172         static bool is_surrogate_high(unsigned codepoint) {
173             return (codepoint & 0xfc00) == 0xd800;
174         }
is_surrogate_low(unsigned codepoint)175         static bool is_surrogate_low(unsigned codepoint) {
176             return (codepoint & 0xfc00) == 0xdc00;
177         }
178     };
179 
180 }}}}
181 
182 #endif
183