xref: /aosp_15_r20/external/cronet/net/extras/preload_data/decoder.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2018 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/extras/preload_data/decoder.h"
6 #include "base/check_op.h"
7 #include "base/notreached.h"
8 
9 namespace net::extras {
10 
BitReader(const uint8_t * bytes,size_t num_bits)11 PreloadDecoder::BitReader::BitReader(const uint8_t* bytes, size_t num_bits)
12     : bytes_(bytes), num_bits_(num_bits), num_bytes_((num_bits + 7) / 8) {}
13 
14 // Next sets |*out| to the next bit from the input. It returns false if no
15 // more bits are available or true otherwise.
Next(bool * out)16 bool PreloadDecoder::BitReader::Next(bool* out) {
17   if (num_bits_used_ == 8) {
18     if (current_byte_index_ >= num_bytes_) {
19       return false;
20     }
21     current_byte_ = bytes_[current_byte_index_++];
22     num_bits_used_ = 0;
23   }
24 
25   *out = 1 & (current_byte_ >> (7 - num_bits_used_));
26   num_bits_used_++;
27   return true;
28 }
29 
30 // Read sets the |num_bits| least-significant bits of |*out| to the value of
31 // the next |num_bits| bits from the input. It returns false if there are
32 // insufficient bits in the input or true otherwise.
Read(unsigned num_bits,uint32_t * out)33 bool PreloadDecoder::BitReader::Read(unsigned num_bits, uint32_t* out) {
34   DCHECK_LE(num_bits, 32u);
35 
36   uint32_t ret = 0;
37   for (unsigned i = 0; i < num_bits; ++i) {
38     bool bit;
39     if (!Next(&bit)) {
40       return false;
41     }
42     ret |= static_cast<uint32_t>(bit) << (num_bits - 1 - i);
43   }
44 
45   *out = ret;
46   return true;
47 }
48 
49 namespace {
50 
51 // Reads one bit from |reader|, shifts |*bits| left by 1, and adds the read bit
52 // to the end of |*bits|.
ReadBit(PreloadDecoder::BitReader * reader,uint8_t * bits)53 bool ReadBit(PreloadDecoder::BitReader* reader, uint8_t* bits) {
54   bool bit;
55   if (!reader->Next(&bit)) {
56     return false;
57   }
58   *bits <<= 1;
59   if (bit) {
60     (*bits)++;
61   }
62   return true;
63 }
64 
65 }  // namespace
66 
DecodeSize(size_t * out)67 bool PreloadDecoder::BitReader::DecodeSize(size_t* out) {
68   uint8_t bits = 0;
69   if (!ReadBit(this, &bits) || !ReadBit(this, &bits)) {
70     return false;
71   }
72   if (bits == 0) {
73     *out = 0;
74     return true;
75   }
76   if (!ReadBit(this, &bits)) {
77     return false;
78   }
79   // We've parsed 3 bits so far. Check all possible combinations:
80   bool is_even;
81   switch (bits) {
82     case 0b000:
83     case 0b001:
84       // This should have been handled in the if (bits == 0) check.
85       NOTREACHED();
86       return false;
87     case 0b010:
88       // A specialization of the 0b01 prefix for unary-like even numbers.
89       *out = 4;
90       return true;
91     case 0b011:
92       // This will be handled with the prefixes for unary-like encoding below.
93       is_even = true;
94       break;
95     case 0b100:
96       *out = 1;
97       return true;
98     case 0b101:
99       *out = 2;
100       return true;
101     case 0b110:
102       *out = 3;
103       return true;
104     case 0b111:
105       // This will be handled with the prefixes for unary-like encoding below.
106       is_even = false;
107       break;
108     default:
109       // All cases should be covered above.
110       NOTREACHED();
111       return false;
112   }
113   size_t bit_length = 3;
114   while (true) {
115     bit_length++;
116     bool bit;
117     if (!Next(&bit)) {
118       return false;
119     }
120     if (!bit) {
121       break;
122     }
123   }
124   size_t ret = (bit_length - 2) * 2;
125   if (!is_even) {
126     ret--;
127   }
128   *out = ret;
129   return true;
130 }
131 
132 // Seek sets the current offest in the input to bit number |offset|. It
133 // returns true if |offset| is within the range of the input and false
134 // otherwise.
Seek(size_t offset)135 bool PreloadDecoder::BitReader::Seek(size_t offset) {
136   if (offset >= num_bits_) {
137     return false;
138   }
139   current_byte_index_ = offset / 8;
140   current_byte_ = bytes_[current_byte_index_++];
141   num_bits_used_ = offset % 8;
142   return true;
143 }
144 
HuffmanDecoder(const uint8_t * tree,size_t tree_bytes)145 PreloadDecoder::HuffmanDecoder::HuffmanDecoder(const uint8_t* tree,
146                                                size_t tree_bytes)
147     : tree_(tree), tree_bytes_(tree_bytes) {}
148 
Decode(PreloadDecoder::BitReader * reader,char * out) const149 bool PreloadDecoder::HuffmanDecoder::Decode(PreloadDecoder::BitReader* reader,
150                                             char* out) const {
151   const uint8_t* current = &tree_[tree_bytes_ - 2];
152 
153   for (;;) {
154     bool bit;
155     if (!reader->Next(&bit)) {
156       return false;
157     }
158 
159     uint8_t b = current[bit];
160     if (b & 0x80) {
161       *out = static_cast<char>(b & 0x7f);
162       return true;
163     }
164 
165     unsigned offset = static_cast<unsigned>(b) * 2;
166     DCHECK_LT(offset, tree_bytes_);
167     if (offset >= tree_bytes_) {
168       return false;
169     }
170 
171     current = &tree_[offset];
172   }
173 }
174 
PreloadDecoder(const uint8_t * huffman_tree,size_t huffman_tree_size,const uint8_t * trie,size_t trie_bits,size_t trie_root_position)175 PreloadDecoder::PreloadDecoder(const uint8_t* huffman_tree,
176                                size_t huffman_tree_size,
177                                const uint8_t* trie,
178                                size_t trie_bits,
179                                size_t trie_root_position)
180     : huffman_decoder_(huffman_tree, huffman_tree_size),
181       bit_reader_(trie, trie_bits),
182       trie_root_position_(trie_root_position) {}
183 
184 PreloadDecoder::~PreloadDecoder() = default;
185 
Decode(const std::string & search,bool * out_found)186 bool PreloadDecoder::Decode(const std::string& search, bool* out_found) {
187   size_t bit_offset = trie_root_position_;
188   *out_found = false;
189 
190   // current_search_offset contains one more than the index of the current
191   // character in the search keyword that is being considered. It's one greater
192   // so that we can represent the position just before the beginning (with
193   // zero).
194   size_t current_search_offset = search.size();
195 
196   for (;;) {
197     // Seek to the desired location.
198     if (!bit_reader_.Seek(bit_offset)) {
199       return false;
200     }
201 
202     // Decode the length of the common prefix.
203     size_t prefix_length;
204     if (!bit_reader_.DecodeSize(&prefix_length)) {
205       return false;
206     }
207 
208     // Match each character in the prefix.
209     for (size_t i = 0; i < prefix_length; ++i) {
210       if (current_search_offset == 0) {
211         // We can't match the terminator with a prefix string.
212         return true;
213       }
214 
215       char c;
216       if (!huffman_decoder_.Decode(&bit_reader_, &c)) {
217         return false;
218       }
219       if (search[current_search_offset - 1] != c) {
220         return true;
221       }
222       current_search_offset--;
223     }
224 
225     bool is_first_offset = true;
226     size_t current_offset = 0;
227 
228     // Next is the dispatch table.
229     for (;;) {
230       char c;
231       if (!huffman_decoder_.Decode(&bit_reader_, &c)) {
232         return false;
233       }
234       if (c == kEndOfTable) {
235         // No exact match.
236         return true;
237       }
238 
239       if (c == kEndOfString) {
240         if (!ReadEntry(&bit_reader_, search, current_search_offset,
241                        out_found)) {
242           return false;
243         }
244         if (current_search_offset == 0) {
245           CHECK(*out_found);
246           return true;
247         }
248         continue;
249       }
250 
251       // The entries in a dispatch table are in order thus we can tell if there
252       // will be no match if the current character past the one that we want.
253       if (current_search_offset == 0 || search[current_search_offset - 1] < c) {
254         return true;
255       }
256 
257       if (is_first_offset) {
258         // The first offset is backwards from the current position.
259         uint32_t jump_delta_bits;
260         uint32_t jump_delta;
261         if (!bit_reader_.Read(5, &jump_delta_bits) ||
262             !bit_reader_.Read(jump_delta_bits, &jump_delta)) {
263           return false;
264         }
265 
266         if (bit_offset < jump_delta) {
267           return false;
268         }
269 
270         current_offset = bit_offset - jump_delta;
271         is_first_offset = false;
272       } else {
273         // Subsequent offsets are forward from the target of the first offset.
274         uint32_t is_long_jump;
275         if (!bit_reader_.Read(1, &is_long_jump)) {
276           return false;
277         }
278 
279         uint32_t jump_delta;
280         if (!is_long_jump) {
281           if (!bit_reader_.Read(7, &jump_delta)) {
282             return false;
283           }
284         } else {
285           uint32_t jump_delta_bits;
286           if (!bit_reader_.Read(4, &jump_delta_bits) ||
287               !bit_reader_.Read(jump_delta_bits + 8, &jump_delta)) {
288             return false;
289           }
290         }
291 
292         current_offset += jump_delta;
293         if (current_offset >= bit_offset) {
294           return false;
295         }
296       }
297 
298       DCHECK_LT(0u, current_search_offset);
299       if (search[current_search_offset - 1] == c) {
300         bit_offset = current_offset;
301         current_search_offset--;
302         break;
303       }
304     }
305   }
306   NOTREACHED();
307 }
308 
309 }  // namespace net::extras
310