1 // Copyright 2018 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/extras/preload_data/decoder.h"
6 #include "base/check_op.h"
7 #include "base/notreached.h"
8
9 namespace net::extras {
10
BitReader(const uint8_t * bytes,size_t num_bits)11 PreloadDecoder::BitReader::BitReader(const uint8_t* bytes, size_t num_bits)
12 : bytes_(bytes), num_bits_(num_bits), num_bytes_((num_bits + 7) / 8) {}
13
14 // Next sets |*out| to the next bit from the input. It returns false if no
15 // more bits are available or true otherwise.
Next(bool * out)16 bool PreloadDecoder::BitReader::Next(bool* out) {
17 if (num_bits_used_ == 8) {
18 if (current_byte_index_ >= num_bytes_) {
19 return false;
20 }
21 current_byte_ = bytes_[current_byte_index_++];
22 num_bits_used_ = 0;
23 }
24
25 *out = 1 & (current_byte_ >> (7 - num_bits_used_));
26 num_bits_used_++;
27 return true;
28 }
29
30 // Read sets the |num_bits| least-significant bits of |*out| to the value of
31 // the next |num_bits| bits from the input. It returns false if there are
32 // insufficient bits in the input or true otherwise.
Read(unsigned num_bits,uint32_t * out)33 bool PreloadDecoder::BitReader::Read(unsigned num_bits, uint32_t* out) {
34 DCHECK_LE(num_bits, 32u);
35
36 uint32_t ret = 0;
37 for (unsigned i = 0; i < num_bits; ++i) {
38 bool bit;
39 if (!Next(&bit)) {
40 return false;
41 }
42 ret |= static_cast<uint32_t>(bit) << (num_bits - 1 - i);
43 }
44
45 *out = ret;
46 return true;
47 }
48
49 namespace {
50
51 // Reads one bit from |reader|, shifts |*bits| left by 1, and adds the read bit
52 // to the end of |*bits|.
ReadBit(PreloadDecoder::BitReader * reader,uint8_t * bits)53 bool ReadBit(PreloadDecoder::BitReader* reader, uint8_t* bits) {
54 bool bit;
55 if (!reader->Next(&bit)) {
56 return false;
57 }
58 *bits <<= 1;
59 if (bit) {
60 (*bits)++;
61 }
62 return true;
63 }
64
65 } // namespace
66
DecodeSize(size_t * out)67 bool PreloadDecoder::BitReader::DecodeSize(size_t* out) {
68 uint8_t bits = 0;
69 if (!ReadBit(this, &bits) || !ReadBit(this, &bits)) {
70 return false;
71 }
72 if (bits == 0) {
73 *out = 0;
74 return true;
75 }
76 if (!ReadBit(this, &bits)) {
77 return false;
78 }
79 // We've parsed 3 bits so far. Check all possible combinations:
80 bool is_even;
81 switch (bits) {
82 case 0b000:
83 case 0b001:
84 // This should have been handled in the if (bits == 0) check.
85 NOTREACHED();
86 return false;
87 case 0b010:
88 // A specialization of the 0b01 prefix for unary-like even numbers.
89 *out = 4;
90 return true;
91 case 0b011:
92 // This will be handled with the prefixes for unary-like encoding below.
93 is_even = true;
94 break;
95 case 0b100:
96 *out = 1;
97 return true;
98 case 0b101:
99 *out = 2;
100 return true;
101 case 0b110:
102 *out = 3;
103 return true;
104 case 0b111:
105 // This will be handled with the prefixes for unary-like encoding below.
106 is_even = false;
107 break;
108 default:
109 // All cases should be covered above.
110 NOTREACHED();
111 return false;
112 }
113 size_t bit_length = 3;
114 while (true) {
115 bit_length++;
116 bool bit;
117 if (!Next(&bit)) {
118 return false;
119 }
120 if (!bit) {
121 break;
122 }
123 }
124 size_t ret = (bit_length - 2) * 2;
125 if (!is_even) {
126 ret--;
127 }
128 *out = ret;
129 return true;
130 }
131
132 // Seek sets the current offest in the input to bit number |offset|. It
133 // returns true if |offset| is within the range of the input and false
134 // otherwise.
Seek(size_t offset)135 bool PreloadDecoder::BitReader::Seek(size_t offset) {
136 if (offset >= num_bits_) {
137 return false;
138 }
139 current_byte_index_ = offset / 8;
140 current_byte_ = bytes_[current_byte_index_++];
141 num_bits_used_ = offset % 8;
142 return true;
143 }
144
HuffmanDecoder(const uint8_t * tree,size_t tree_bytes)145 PreloadDecoder::HuffmanDecoder::HuffmanDecoder(const uint8_t* tree,
146 size_t tree_bytes)
147 : tree_(tree), tree_bytes_(tree_bytes) {}
148
Decode(PreloadDecoder::BitReader * reader,char * out) const149 bool PreloadDecoder::HuffmanDecoder::Decode(PreloadDecoder::BitReader* reader,
150 char* out) const {
151 const uint8_t* current = &tree_[tree_bytes_ - 2];
152
153 for (;;) {
154 bool bit;
155 if (!reader->Next(&bit)) {
156 return false;
157 }
158
159 uint8_t b = current[bit];
160 if (b & 0x80) {
161 *out = static_cast<char>(b & 0x7f);
162 return true;
163 }
164
165 unsigned offset = static_cast<unsigned>(b) * 2;
166 DCHECK_LT(offset, tree_bytes_);
167 if (offset >= tree_bytes_) {
168 return false;
169 }
170
171 current = &tree_[offset];
172 }
173 }
174
PreloadDecoder(const uint8_t * huffman_tree,size_t huffman_tree_size,const uint8_t * trie,size_t trie_bits,size_t trie_root_position)175 PreloadDecoder::PreloadDecoder(const uint8_t* huffman_tree,
176 size_t huffman_tree_size,
177 const uint8_t* trie,
178 size_t trie_bits,
179 size_t trie_root_position)
180 : huffman_decoder_(huffman_tree, huffman_tree_size),
181 bit_reader_(trie, trie_bits),
182 trie_root_position_(trie_root_position) {}
183
184 PreloadDecoder::~PreloadDecoder() = default;
185
Decode(const std::string & search,bool * out_found)186 bool PreloadDecoder::Decode(const std::string& search, bool* out_found) {
187 size_t bit_offset = trie_root_position_;
188 *out_found = false;
189
190 // current_search_offset contains one more than the index of the current
191 // character in the search keyword that is being considered. It's one greater
192 // so that we can represent the position just before the beginning (with
193 // zero).
194 size_t current_search_offset = search.size();
195
196 for (;;) {
197 // Seek to the desired location.
198 if (!bit_reader_.Seek(bit_offset)) {
199 return false;
200 }
201
202 // Decode the length of the common prefix.
203 size_t prefix_length;
204 if (!bit_reader_.DecodeSize(&prefix_length)) {
205 return false;
206 }
207
208 // Match each character in the prefix.
209 for (size_t i = 0; i < prefix_length; ++i) {
210 if (current_search_offset == 0) {
211 // We can't match the terminator with a prefix string.
212 return true;
213 }
214
215 char c;
216 if (!huffman_decoder_.Decode(&bit_reader_, &c)) {
217 return false;
218 }
219 if (search[current_search_offset - 1] != c) {
220 return true;
221 }
222 current_search_offset--;
223 }
224
225 bool is_first_offset = true;
226 size_t current_offset = 0;
227
228 // Next is the dispatch table.
229 for (;;) {
230 char c;
231 if (!huffman_decoder_.Decode(&bit_reader_, &c)) {
232 return false;
233 }
234 if (c == kEndOfTable) {
235 // No exact match.
236 return true;
237 }
238
239 if (c == kEndOfString) {
240 if (!ReadEntry(&bit_reader_, search, current_search_offset,
241 out_found)) {
242 return false;
243 }
244 if (current_search_offset == 0) {
245 CHECK(*out_found);
246 return true;
247 }
248 continue;
249 }
250
251 // The entries in a dispatch table are in order thus we can tell if there
252 // will be no match if the current character past the one that we want.
253 if (current_search_offset == 0 || search[current_search_offset - 1] < c) {
254 return true;
255 }
256
257 if (is_first_offset) {
258 // The first offset is backwards from the current position.
259 uint32_t jump_delta_bits;
260 uint32_t jump_delta;
261 if (!bit_reader_.Read(5, &jump_delta_bits) ||
262 !bit_reader_.Read(jump_delta_bits, &jump_delta)) {
263 return false;
264 }
265
266 if (bit_offset < jump_delta) {
267 return false;
268 }
269
270 current_offset = bit_offset - jump_delta;
271 is_first_offset = false;
272 } else {
273 // Subsequent offsets are forward from the target of the first offset.
274 uint32_t is_long_jump;
275 if (!bit_reader_.Read(1, &is_long_jump)) {
276 return false;
277 }
278
279 uint32_t jump_delta;
280 if (!is_long_jump) {
281 if (!bit_reader_.Read(7, &jump_delta)) {
282 return false;
283 }
284 } else {
285 uint32_t jump_delta_bits;
286 if (!bit_reader_.Read(4, &jump_delta_bits) ||
287 !bit_reader_.Read(jump_delta_bits + 8, &jump_delta)) {
288 return false;
289 }
290 }
291
292 current_offset += jump_delta;
293 if (current_offset >= bit_offset) {
294 return false;
295 }
296 }
297
298 DCHECK_LT(0u, current_search_offset);
299 if (search[current_search_offset - 1] == c) {
300 bit_offset = current_offset;
301 current_search_offset--;
302 break;
303 }
304 }
305 }
306 NOTREACHED();
307 }
308
309 } // namespace net::extras
310