1 // Copyright 2018 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_EXTRAS_PRELOAD_DATA_DECODER_H_ 6 #define NET_EXTRAS_PRELOAD_DATA_DECODER_H_ 7 8 #include <stdint.h> 9 10 #include <string> 11 12 namespace net::extras { 13 14 // Decodes an entry from preloaded data. 15 // Clients must implement ReadEntry() method to read the specific type of data 16 // they are interested in. 17 class PreloadDecoder { 18 public: 19 // These must match the values in net/tools/huffman_trie/trie/trie_writer.h. 20 enum : char { kEndOfString = 0, kEndOfTable = 127 }; 21 22 // BitReader is a class that allows a bytestring to be read bit-by-bit. 23 class BitReader { 24 public: 25 BitReader(const uint8_t* bytes, size_t num_bits); 26 27 BitReader(const BitReader&) = delete; 28 BitReader& operator=(const BitReader&) = delete; 29 30 // Next sets |*out| to the next bit from the input. It returns false if no 31 // more bits are available or true otherwise. 32 bool Next(bool* out); 33 34 // Read sets the |num_bits| least-significant bits of |*out| to the value of 35 // the next |num_bits| bits from the input. It returns false if there are 36 // insufficient bits in the input or true otherwise. 37 bool Read(unsigned num_bits, uint32_t* out); 38 39 // Decodes a size_t from the reader, putting the resulting value in |*out|. 40 // Returns false if there are insufficient bits to read and true otherwise. 41 // 42 // This function's inverse is TrieBitBuffer::WriteSize. 43 // 44 // The encoding is a prefix code optimized for small values (less than 4). 45 // It is designed for the lengths of prefixes in the HSTS Preload list trie. 46 // Compared to the unary encoding that was previously used (where the number 47 // of bits used is one plus the value being encoded), this uses one more bit 48 // for encoding 0 and 1, and the same number of bits for encoding 2, and 49 // fewer bits for encoding values greater than 2. At the time of writing, 50 // 35% of the lengths encoded in the trie were 0 or 1, 11% were 2, and the 51 // remaining 54% were greater than 2. 52 // 53 // This encoding scheme uses a variable number of bits to encode each value. 54 // There are fixed values for 0, 1, 2, and 3, and then a simple rule is used 55 // for 4 and greater. 0 uses 2 bits; 1 through 3 use 3 bits. The fixed 56 // values are as follows: 57 // 58 // 0: 0b00 59 // 1: 0b100 60 // 2: 0b101 61 // 3: 0b110 62 // 63 // Note that none of the fixed values are prefixed with 0b01 or 0b111. These 64 // prefixes are used with a unary-like encoding for values 4 and above. 65 // Zero or more 1s, followed by a 0, are appended to one of those prefixes. 66 // Even values use the prefix 0b01, and odd values use the prefix 0b111. The 67 // number of 1s to append is half the value (rounded down) minus 1. 68 bool DecodeSize(size_t* out); 69 70 // Seek sets the current offest in the input to bit number |offset|. It 71 // returns true if |offset| is within the range of the input and false 72 // otherwise. 73 bool Seek(size_t offset); 74 75 private: 76 const uint8_t* const bytes_; 77 const size_t num_bits_; 78 const size_t num_bytes_; 79 // current_byte_index_ contains the current byte offset in |bytes_|. 80 size_t current_byte_index_ = 0; 81 // current_byte_ contains the current byte of the input. 82 uint8_t current_byte_; 83 // num_bits_used_ contains the number of bits of |current_byte_| that have 84 // been read. 85 unsigned num_bits_used_ = 8; 86 }; 87 88 // HuffmanDecoder is a very simple Huffman reader. The input Huffman tree is 89 // simply encoded as a series of two-byte structures. The first byte 90 // determines the "0" pointer for that node and the second the "1" pointer. 91 // Each byte either has the MSB set, in which case the bottom 7 bits are the 92 // value for that position, or else the bottom seven bits contain the index of 93 // a node. 94 // 95 // The tree is decoded by walking rather than a table-driven approach. 96 class HuffmanDecoder { 97 public: 98 HuffmanDecoder(const uint8_t* tree, size_t tree_bytes); 99 100 HuffmanDecoder(const HuffmanDecoder&) = delete; 101 HuffmanDecoder& operator=(const HuffmanDecoder&) = delete; 102 103 bool Decode(PreloadDecoder::BitReader* reader, char* out) const; 104 105 private: 106 const uint8_t* const tree_; 107 const size_t tree_bytes_; 108 }; 109 110 PreloadDecoder(const uint8_t* huffman_tree, 111 size_t huffman_tree_size, 112 const uint8_t* trie, 113 size_t trie_bits, 114 size_t trie_root_position); 115 116 PreloadDecoder(const PreloadDecoder&) = delete; 117 PreloadDecoder& operator=(const PreloadDecoder&) = delete; 118 119 virtual ~PreloadDecoder(); 120 121 // Resolves search keyword given by |search| in the preloaded data. Returns 122 // false on internal error and true otherwise. After a successful return, 123 // |*out_found| is true iff a relevant entry has been found. In the case of 124 // HSTS data, |search| is the hostname being searched. 125 // 126 // Although this code should be robust, it never processes attacker-controlled 127 // data -- it only operates on the preloaded data built into the binary. 128 // 129 // The preloaded data is represented as a trie and matches |search| 130 // backwards. Each node in the trie starts with a number of characters, which 131 // must match exactly. After that is a dispatch table which maps the next 132 // character in the search keyword to another node in the trie. 133 // 134 // In the dispatch table, the zero character represents the "end of string" 135 // (which is the *beginning* of the search keyword since we process it 136 // backwards). The value in that case is special -- rather than an offset to 137 // another trie node, it contains the searched entry (for HSTS data, it 138 // contains whether subdomains are included, pinsets etc.). Clients must 139 // implement ReadEntry to read the entry at this location. 140 // 141 // Dispatch tables are always given in order, but the "end of string" (zero) 142 // value always comes before an entry for '.'. 143 bool Decode(const std::string& search, bool* out_found); 144 145 protected: 146 virtual bool ReadEntry(BitReader* reader, 147 const std::string& search, 148 size_t current_search_offset, 149 bool* out_found) = 0; 150 huffman_decoder()151 const HuffmanDecoder& huffman_decoder() const { return huffman_decoder_; } 152 153 private: 154 HuffmanDecoder huffman_decoder_; 155 BitReader bit_reader_; 156 157 const size_t trie_root_position_; 158 }; 159 160 } // namespace net::extras 161 162 #endif // NET_EXTRAS_PRELOAD_DATA_DECODER_H_ 163