1 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_NARROW_ENCODING_HPP 2 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_NARROW_ENCODING_HPP 3 4 #include <boost/assert.hpp> 5 #include <boost/range/iterator_range_core.hpp> 6 7 #include <utility> 8 9 namespace boost { namespace property_tree { 10 namespace json_parser { namespace detail 11 { 12 13 struct external_ascii_superset_encoding 14 { 15 typedef char external_char; 16 is_nlboost::property_tree::json_parser::detail::external_ascii_superset_encoding17 bool is_nl(char c) const { return c == '\n'; } is_wsboost::property_tree::json_parser::detail::external_ascii_superset_encoding18 bool is_ws(char c) const { 19 return c == ' ' || c == '\t' || c == '\n' || c == '\r'; 20 } 21 is_minusboost::property_tree::json_parser::detail::external_ascii_superset_encoding22 bool is_minus(char c) const { return c == '-'; } is_plusminusboost::property_tree::json_parser::detail::external_ascii_superset_encoding23 bool is_plusminus(char c) const { return c == '+' || c == '-'; } is_dotboost::property_tree::json_parser::detail::external_ascii_superset_encoding24 bool is_dot(char c) const { return c == '.'; } is_eEboost::property_tree::json_parser::detail::external_ascii_superset_encoding25 bool is_eE(char c) const { return c == 'e' || c == 'E'; } is_0boost::property_tree::json_parser::detail::external_ascii_superset_encoding26 bool is_0(char c) const { return c == '0'; } is_digitboost::property_tree::json_parser::detail::external_ascii_superset_encoding27 bool is_digit(char c) const { return c >= '0' && c <= '9'; } is_digit0boost::property_tree::json_parser::detail::external_ascii_superset_encoding28 bool is_digit0(char c) const { return c >= '1' && c <= '9'; } 29 is_quoteboost::property_tree::json_parser::detail::external_ascii_superset_encoding30 bool is_quote(char c) const { return c == '"'; } is_backslashboost::property_tree::json_parser::detail::external_ascii_superset_encoding31 bool is_backslash(char c) const { return c == '\\'; } is_slashboost::property_tree::json_parser::detail::external_ascii_superset_encoding32 bool is_slash(char c) const { return c == '/'; } 33 is_commaboost::property_tree::json_parser::detail::external_ascii_superset_encoding34 bool is_comma(char c) const { return c == ','; } is_open_bracketboost::property_tree::json_parser::detail::external_ascii_superset_encoding35 bool is_open_bracket(char c) const { return c == '['; } is_close_bracketboost::property_tree::json_parser::detail::external_ascii_superset_encoding36 bool is_close_bracket(char c) const { return c == ']'; } is_colonboost::property_tree::json_parser::detail::external_ascii_superset_encoding37 bool is_colon(char c) const { return c == ':'; } is_open_braceboost::property_tree::json_parser::detail::external_ascii_superset_encoding38 bool is_open_brace(char c) const { return c == '{'; } is_close_braceboost::property_tree::json_parser::detail::external_ascii_superset_encoding39 bool is_close_brace(char c) const { return c == '}'; } 40 is_aboost::property_tree::json_parser::detail::external_ascii_superset_encoding41 bool is_a(char c) const { return c == 'a'; } is_bboost::property_tree::json_parser::detail::external_ascii_superset_encoding42 bool is_b(char c) const { return c == 'b'; } is_eboost::property_tree::json_parser::detail::external_ascii_superset_encoding43 bool is_e(char c) const { return c == 'e'; } is_fboost::property_tree::json_parser::detail::external_ascii_superset_encoding44 bool is_f(char c) const { return c == 'f'; } is_lboost::property_tree::json_parser::detail::external_ascii_superset_encoding45 bool is_l(char c) const { return c == 'l'; } is_nboost::property_tree::json_parser::detail::external_ascii_superset_encoding46 bool is_n(char c) const { return c == 'n'; } is_rboost::property_tree::json_parser::detail::external_ascii_superset_encoding47 bool is_r(char c) const { return c == 'r'; } is_sboost::property_tree::json_parser::detail::external_ascii_superset_encoding48 bool is_s(char c) const { return c == 's'; } is_tboost::property_tree::json_parser::detail::external_ascii_superset_encoding49 bool is_t(char c) const { return c == 't'; } is_uboost::property_tree::json_parser::detail::external_ascii_superset_encoding50 bool is_u(char c) const { return c == 'u'; } 51 decode_hexdigitboost::property_tree::json_parser::detail::external_ascii_superset_encoding52 int decode_hexdigit(char c) { 53 if (c >= '0' && c <= '9') return c - '0'; 54 if (c >= 'A' && c <= 'F') return c - 'A' + 10; 55 if (c >= 'a' && c <= 'f') return c - 'a' + 10; 56 return -1; 57 } 58 }; 59 60 struct utf8_utf8_encoding : external_ascii_superset_encoding 61 { 62 typedef char internal_char; 63 64 template <typename Iterator> 65 boost::iterator_range<Iterator> to_internalboost::property_tree::json_parser::detail::utf8_utf8_encoding66 to_internal(Iterator first, Iterator last) const { 67 return boost::make_iterator_range(first, last); 68 } 69 to_internal_trivialboost::property_tree::json_parser::detail::utf8_utf8_encoding70 char to_internal_trivial(char c) const { 71 BOOST_ASSERT(static_cast<unsigned char>(c) <= 0x7f); 72 return c; 73 } 74 75 template <typename Iterator, typename Sentinel, 76 typename EncodingErrorFn> skip_codepointboost::property_tree::json_parser::detail::utf8_utf8_encoding77 void skip_codepoint(Iterator& cur, Sentinel end, 78 EncodingErrorFn error_fn) const { 79 transcode_codepoint(cur, end, DoNothing(), error_fn); 80 } 81 82 template <typename Iterator, typename Sentinel, typename TranscodedFn, 83 typename EncodingErrorFn> transcode_codepointboost::property_tree::json_parser::detail::utf8_utf8_encoding84 void transcode_codepoint(Iterator& cur, Sentinel end, 85 TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const { 86 unsigned char c = *cur; 87 ++cur; 88 if (c <= 0x7f) { 89 // Solo byte, filter out disallowed codepoints. 90 if (c < 0x20) { 91 error_fn(); 92 } 93 transcoded_fn(c); 94 return; 95 } 96 int trailing = trail_table(c); 97 if (trailing == -1) { 98 // Standalone trailing byte or overly long sequence. 99 error_fn(); 100 } 101 transcoded_fn(c); 102 for (int i = 0; i < trailing; ++i) { 103 if (cur == end || !is_trail(*cur)) { 104 error_fn(); 105 } 106 transcoded_fn(*cur); 107 ++cur; 108 } 109 } 110 111 template <typename TranscodedFn> feed_codepointboost::property_tree::json_parser::detail::utf8_utf8_encoding112 void feed_codepoint(unsigned codepoint, 113 TranscodedFn transcoded_fn) const { 114 if (codepoint <= 0x7f) { 115 transcoded_fn(static_cast<char>(codepoint)); 116 } else if (codepoint <= 0x7ff) { 117 transcoded_fn(static_cast<char>(0xc0 | (codepoint >> 6))); 118 transcoded_fn(trail(codepoint)); 119 } else if (codepoint <= 0xffff) { 120 transcoded_fn(static_cast<char>(0xe0 | (codepoint >> 12))); 121 transcoded_fn(trail(codepoint >> 6)); 122 transcoded_fn(trail(codepoint)); 123 } else if (codepoint <= 0x10ffff) { 124 transcoded_fn(static_cast<char>(0xf0 | (codepoint >> 18))); 125 transcoded_fn(trail(codepoint >> 12)); 126 transcoded_fn(trail(codepoint >> 6)); 127 transcoded_fn(trail(codepoint)); 128 } 129 } 130 131 template <typename Iterator, typename Sentinel> skip_introductionboost::property_tree::json_parser::detail::utf8_utf8_encoding132 void skip_introduction(Iterator& cur, Sentinel end) const { 133 if (cur != end && static_cast<unsigned char>(*cur) == 0xef) { 134 if (++cur == end) return; 135 if (++cur == end) return; 136 if (++cur == end) return; 137 } 138 } 139 140 private: 141 struct DoNothing { operator ()boost::property_tree::json_parser::detail::utf8_utf8_encoding::DoNothing142 void operator ()(char) const {} 143 }; 144 is_trailboost::property_tree::json_parser::detail::utf8_utf8_encoding145 bool is_trail(unsigned char c) const { 146 return (c & 0xc0) == 0x80; 147 } 148 trail_tableboost::property_tree::json_parser::detail::utf8_utf8_encoding149 int trail_table(unsigned char c) const { 150 static const signed char table[] = { 151 /* not a lead byte */ 152 /* 0x10???sss */ -1, -1, -1, -1, -1, -1, -1, -1, 153 /* 0x110??sss */ 1, 1, 1, 1, /* 1 trailing byte */ 154 /* 0x1110?sss */ 2, 2, /* 2 trailing bytes */ 155 /* 0x11110sss */ 3, /* 3 trailing bytes */ 156 /* 0x11111sss */ -1 /* 4 or 5 trailing bytes, disallowed */ 157 }; 158 return table[(c & 0x7f) >> 3]; 159 } 160 trailboost::property_tree::json_parser::detail::utf8_utf8_encoding161 char trail(unsigned unmasked) const { 162 return static_cast<char>(0x80 | (unmasked & 0x3f)); 163 } 164 }; 165 166 }}}} 167 168 #endif 169