xref: /aosp_15_r20/external/cronet/net/third_party/quiche/src/quiche/http2/hpack/huffman/hpack_huffman_encoder.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright (c) 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "quiche/http2/hpack/huffman/hpack_huffman_encoder.h"
6 
7 #include "quiche/http2/hpack/huffman/huffman_spec_tables.h"
8 #include "quiche/common/platform/api/quiche_logging.h"
9 
10 namespace http2 {
11 
HuffmanSize(absl::string_view plain)12 size_t HuffmanSize(absl::string_view plain) {
13   size_t bits = 0;
14   for (const uint8_t c : plain) {
15     bits += HuffmanSpecTables::kCodeLengths[c];
16   }
17   return (bits + 7) / 8;
18 }
19 
HuffmanEncode(absl::string_view plain,size_t encoded_size,std::string * huffman)20 void HuffmanEncode(absl::string_view plain, size_t encoded_size,
21                    std::string* huffman) {
22   QUICHE_DCHECK(huffman != nullptr);
23   huffman->reserve(huffman->size() + encoded_size);
24   uint64_t bit_buffer = 0;  // High-bit is next bit to output. Not clear if that
25                             // is more performant than having the low-bit be the
26                             // last to be output.
27   size_t bits_unused = 64;  // Number of bits available for the next code.
28   for (uint8_t c : plain) {
29     size_t code_length = HuffmanSpecTables::kCodeLengths[c];
30     if (bits_unused < code_length) {
31       // There isn't enough room in bit_buffer for the code of c.
32       // Flush until bits_unused > 56 (i.e. 64 - 8).
33       do {
34         char h = static_cast<char>(bit_buffer >> 56);
35         bit_buffer <<= 8;
36         bits_unused += 8;
37         // Perhaps would be more efficient if we populated an array of chars,
38         // so we don't have to call push_back each time. Reconsider if used
39         // for production.
40         huffman->push_back(h);
41       } while (bits_unused <= 56);
42     }
43     uint64_t code = HuffmanSpecTables::kRightCodes[c];
44     size_t shift_by = bits_unused - code_length;
45     bit_buffer |= (code << shift_by);
46     bits_unused -= code_length;
47   }
48   // bit_buffer contains (64-bits_unused) bits that still need to be flushed.
49   // Output whole bytes until we don't have any whole bytes left.
50   size_t bits_used = 64 - bits_unused;
51   while (bits_used >= 8) {
52     char h = static_cast<char>(bit_buffer >> 56);
53     bit_buffer <<= 8;
54     bits_used -= 8;
55     huffman->push_back(h);
56   }
57   if (bits_used > 0) {
58     // We have less than a byte left to output. The spec calls for padding out
59     // the final byte with the leading bits of the EOS symbol (30 1-bits).
60     constexpr uint64_t leading_eos_bits = 0b11111111;
61     bit_buffer |= (leading_eos_bits << (56 - bits_used));
62     char h = static_cast<char>(bit_buffer >> 56);
63     huffman->push_back(h);
64   }
65 }
66 
HuffmanEncodeFast(absl::string_view input,size_t encoded_size,std::string * output)67 void HuffmanEncodeFast(absl::string_view input, size_t encoded_size,
68                        std::string* output) {
69   const size_t original_size = output->size();
70   const size_t final_size = original_size + encoded_size;
71   // Reserve an extra four bytes to avoid accessing unallocated memory (even
72   // though it would only be OR'd with zeros and thus not modified).
73   output->resize(final_size + 4, 0);
74 
75   // Pointer to first appended byte.
76   char* const first = &*output->begin() + original_size;
77   size_t bit_counter = 0;
78   for (uint8_t c : input) {
79     // Align the Huffman code to byte boundaries as it needs to be written.
80     // The longest Huffman code is 30 bits long, and it can be shifted by up to
81     // 7 bits, requiring 37 bits in total.  The most significant 25 bits and
82     // least significant 2 bits of |code| are always zero.
83     uint64_t code = static_cast<uint64_t>(HuffmanSpecTables::kLeftCodes[c])
84                     << (8 - (bit_counter % 8));
85     // The byte where the first bit of |code| needs to be written.
86     char* const current = first + (bit_counter / 8);
87 
88     bit_counter += HuffmanSpecTables::kCodeLengths[c];
89 
90     *current |= code >> 32;
91 
92     // Do not check if this write is zero before executing it, because with
93     // uniformly random shifts and an ideal random input distribution
94     // corresponding to the Huffman tree it would only be zero in 29% of the
95     // cases.
96     *(current + 1) |= (code >> 24) & 0xff;
97 
98     // Continue to next input character if there is nothing else to write.
99     // (If next byte is zero, then rest must also be zero.)
100     if ((code & 0xff0000) == 0) {
101       continue;
102     }
103     *(current + 2) |= (code >> 16) & 0xff;
104 
105     // Continue to next input character if there is nothing else to write.
106     // (If next byte is zero, then rest must also be zero.)
107     if ((code & 0xff00) == 0) {
108       continue;
109     }
110     *(current + 3) |= (code >> 8) & 0xff;
111 
112     // Do not check if this write is zero, because the check would probably be
113     // as expensive as the write.
114     *(current + 4) |= code & 0xff;
115   }
116 
117   QUICHE_DCHECK_EQ(encoded_size, (bit_counter + 7) / 8);
118 
119   // EOF
120   if (bit_counter % 8 != 0) {
121     *(first + encoded_size - 1) |= 0xff >> (bit_counter & 7);
122   }
123 
124   output->resize(final_size);
125 }
126 
127 }  // namespace http2
128