1 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP 2 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP 3 4 #include <boost/assert.hpp> 5 #include <boost/range/iterator_range_core.hpp> 6 7 #include <utility> 8 9 namespace boost { namespace property_tree { 10 namespace json_parser { namespace detail 11 { 12 13 struct external_wide_encoding 14 { 15 typedef wchar_t external_char; 16 is_nlboost::property_tree::json_parser::detail::external_wide_encoding17 bool is_nl(wchar_t c) const { return c == L'\n'; } is_wsboost::property_tree::json_parser::detail::external_wide_encoding18 bool is_ws(wchar_t c) const { 19 return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r'; 20 } 21 is_minusboost::property_tree::json_parser::detail::external_wide_encoding22 bool is_minus(wchar_t c) const { return c == L'-'; } is_plusminusboost::property_tree::json_parser::detail::external_wide_encoding23 bool is_plusminus(wchar_t c) const { return c == L'+' || c == L'-'; } is_dotboost::property_tree::json_parser::detail::external_wide_encoding24 bool is_dot(wchar_t c) const { return c == L'.'; } is_eEboost::property_tree::json_parser::detail::external_wide_encoding25 bool is_eE(wchar_t c) const { return c == L'e' || c == L'E'; } is_0boost::property_tree::json_parser::detail::external_wide_encoding26 bool is_0(wchar_t c) const { return c == L'0'; } is_digitboost::property_tree::json_parser::detail::external_wide_encoding27 bool is_digit(wchar_t c) const { return c >= L'0' && c <= L'9'; } is_digit0boost::property_tree::json_parser::detail::external_wide_encoding28 bool is_digit0(wchar_t c) const { return c >= L'1' && c <= L'9'; } 29 is_quoteboost::property_tree::json_parser::detail::external_wide_encoding30 bool is_quote(wchar_t c) const { return c == L'"'; } is_backslashboost::property_tree::json_parser::detail::external_wide_encoding31 bool is_backslash(wchar_t c) const { return c == L'\\'; } is_slashboost::property_tree::json_parser::detail::external_wide_encoding32 bool is_slash(wchar_t c) const { return c == L'/'; } 33 is_commaboost::property_tree::json_parser::detail::external_wide_encoding34 bool is_comma(wchar_t c) const { return c == L','; } is_open_bracketboost::property_tree::json_parser::detail::external_wide_encoding35 bool is_open_bracket(wchar_t c) const { return c == L'['; } is_close_bracketboost::property_tree::json_parser::detail::external_wide_encoding36 bool is_close_bracket(wchar_t c) const { return c == L']'; } is_colonboost::property_tree::json_parser::detail::external_wide_encoding37 bool is_colon(wchar_t c) const { return c == L':'; } is_open_braceboost::property_tree::json_parser::detail::external_wide_encoding38 bool is_open_brace(wchar_t c) const { return c == L'{'; } is_close_braceboost::property_tree::json_parser::detail::external_wide_encoding39 bool is_close_brace(wchar_t c) const { return c == L'}'; } 40 is_aboost::property_tree::json_parser::detail::external_wide_encoding41 bool is_a(wchar_t c) const { return c == L'a'; } is_bboost::property_tree::json_parser::detail::external_wide_encoding42 bool is_b(wchar_t c) const { return c == L'b'; } is_eboost::property_tree::json_parser::detail::external_wide_encoding43 bool is_e(wchar_t c) const { return c == L'e'; } is_fboost::property_tree::json_parser::detail::external_wide_encoding44 bool is_f(wchar_t c) const { return c == L'f'; } is_lboost::property_tree::json_parser::detail::external_wide_encoding45 bool is_l(wchar_t c) const { return c == L'l'; } is_nboost::property_tree::json_parser::detail::external_wide_encoding46 bool is_n(wchar_t c) const { return c == L'n'; } is_rboost::property_tree::json_parser::detail::external_wide_encoding47 bool is_r(wchar_t c) const { return c == L'r'; } is_sboost::property_tree::json_parser::detail::external_wide_encoding48 bool is_s(wchar_t c) const { return c == L's'; } is_tboost::property_tree::json_parser::detail::external_wide_encoding49 bool is_t(wchar_t c) const { return c == L't'; } is_uboost::property_tree::json_parser::detail::external_wide_encoding50 bool is_u(wchar_t c) const { return c == L'u'; } 51 decode_hexdigitboost::property_tree::json_parser::detail::external_wide_encoding52 int decode_hexdigit(wchar_t c) { 53 if (c >= L'0' && c <= L'9') return c - L'0'; 54 if (c >= L'A' && c <= L'F') return c - L'A' + 10; 55 if (c >= L'a' && c <= L'f') return c - L'a' + 10; 56 return -1; 57 } 58 }; 59 60 template <bool B> struct is_utf16 {}; 61 62 class wide_wide_encoding : public external_wide_encoding 63 { 64 typedef is_utf16<sizeof(wchar_t) == 2> test_utf16; 65 public: 66 typedef wchar_t internal_char; 67 68 template <typename Iterator> 69 boost::iterator_range<Iterator> to_internal(Iterator first,Iterator last) const70 to_internal(Iterator first, Iterator last) const { 71 return boost::make_iterator_range(first, last); 72 } 73 to_internal_trivial(wchar_t c) const74 wchar_t to_internal_trivial(wchar_t c) const { 75 BOOST_ASSERT(!is_surrogate_high(c) && !is_surrogate_low(c)); 76 return c; 77 } 78 79 template <typename Iterator, typename Sentinel, 80 typename EncodingErrorFn> skip_codepoint(Iterator & cur,Sentinel end,EncodingErrorFn error_fn) const81 void skip_codepoint(Iterator& cur, Sentinel end, 82 EncodingErrorFn error_fn) const { 83 transcode_codepoint(cur, end, DoNothing(), error_fn); 84 } 85 86 template <typename Iterator, typename Sentinel, typename TranscodedFn, 87 typename EncodingErrorFn> transcode_codepoint(Iterator & cur,Sentinel end,TranscodedFn transcoded_fn,EncodingErrorFn error_fn) const88 void transcode_codepoint(Iterator& cur, Sentinel end, 89 TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const { 90 return transcode_codepoint(cur, end, transcoded_fn, error_fn, 91 test_utf16()); 92 } 93 94 template <typename TranscodedFn> feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn) const95 void feed_codepoint(unsigned codepoint, 96 TranscodedFn transcoded_fn) const { 97 feed_codepoint(codepoint, transcoded_fn, test_utf16()); 98 } 99 100 template <typename Iterator, typename Sentinel> skip_introduction(Iterator & cur,Sentinel end) const101 void skip_introduction(Iterator& cur, Sentinel end) const { 102 // Endianness is already decoded at this level. 103 if (cur != end && *cur == 0xfeff) { 104 ++cur; 105 } 106 } 107 108 private: 109 struct DoNothing { operator ()boost::property_tree::json_parser::detail::wide_wide_encoding::DoNothing110 void operator ()(wchar_t) const {} 111 }; 112 113 template <typename Iterator, typename Sentinel, typename TranscodedFn, 114 typename EncodingErrorFn> transcode_codepoint(Iterator & cur,Sentinel,TranscodedFn transcoded_fn,EncodingErrorFn error_fn,is_utf16<false>) const115 void transcode_codepoint(Iterator& cur, Sentinel, 116 TranscodedFn transcoded_fn, 117 EncodingErrorFn error_fn, 118 is_utf16<false>) const { 119 wchar_t c = *cur; 120 if (c < 0x20) { 121 error_fn(); 122 } 123 transcoded_fn(c); 124 ++cur; 125 } 126 template <typename Iterator, typename Sentinel, typename TranscodedFn, 127 typename EncodingErrorFn> transcode_codepoint(Iterator & cur,Sentinel end,TranscodedFn transcoded_fn,EncodingErrorFn error_fn,is_utf16<true>) const128 void transcode_codepoint(Iterator& cur, Sentinel end, 129 TranscodedFn transcoded_fn, 130 EncodingErrorFn error_fn, 131 is_utf16<true>) const { 132 wchar_t c = *cur; 133 if (c < 0x20) { 134 error_fn(); 135 } 136 if (is_surrogate_low(c)) { 137 error_fn(); 138 } 139 transcoded_fn(c); 140 ++cur; 141 if (is_surrogate_high(c)) { 142 if (cur == end) { 143 error_fn(); 144 } 145 c = *cur; 146 if (!is_surrogate_low(c)) { 147 error_fn(); 148 } 149 transcoded_fn(c); 150 ++cur; 151 } 152 } 153 154 template <typename TranscodedFn> feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn,is_utf16<false>) const155 void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn, 156 is_utf16<false>) const { 157 transcoded_fn(static_cast<wchar_t>(codepoint)); 158 } 159 template <typename TranscodedFn> feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn,is_utf16<true>) const160 void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn, 161 is_utf16<true>) const { 162 if (codepoint < 0x10000) { 163 transcoded_fn(static_cast<wchar_t>(codepoint)); 164 } else { 165 codepoint -= 0x10000; 166 transcoded_fn(static_cast<wchar_t>((codepoint >> 10) | 0xd800)); 167 transcoded_fn(static_cast<wchar_t>( 168 (codepoint & 0x3ff) | 0xdc00)); 169 } 170 } 171 is_surrogate_high(unsigned codepoint)172 static bool is_surrogate_high(unsigned codepoint) { 173 return (codepoint & 0xfc00) == 0xd800; 174 } is_surrogate_low(unsigned codepoint)175 static bool is_surrogate_low(unsigned codepoint) { 176 return (codepoint & 0xfc00) == 0xdc00; 177 } 178 }; 179 180 }}}} 181 182 #endif 183