xref: /aosp_15_r20/external/cronet/net/filter/gzip_source_stream.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2016 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/filter/gzip_source_stream.h"
6 
7 #include <algorithm>
8 #include <memory>
9 #include <utility>
10 
11 #include "base/check_op.h"
12 #include "base/functional/bind.h"
13 #include "base/memory/ptr_util.h"
14 #include "base/memory/ref_counted.h"
15 #include "base/notreached.h"
16 #include "base/numerics/checked_math.h"
17 #include "net/base/io_buffer.h"
18 #include "third_party/zlib/zlib.h"
19 
20 namespace net {
21 
22 namespace {
23 
24 const char kDeflate[] = "DEFLATE";
25 const char kGzip[] = "GZIP";
26 
27 // For deflate streams, if more than this many bytes have been received without
28 // an error and without adding a Zlib header, assume the original stream had a
29 // Zlib header. In practice, don't need nearly this much data, but since the
30 // detection logic is a heuristic, best to be safe. Data is freed once it's been
31 // determined whether the stream has a zlib header or not, so larger values
32 // shouldn't affect memory usage, in practice.
33 const int kMaxZlibHeaderSniffBytes = 1000;
34 
35 }  // namespace
36 
~GzipSourceStream()37 GzipSourceStream::~GzipSourceStream() {
38   if (zlib_stream_)
39     inflateEnd(zlib_stream_.get());
40 }
41 
Create(std::unique_ptr<SourceStream> upstream,SourceStream::SourceType type)42 std::unique_ptr<GzipSourceStream> GzipSourceStream::Create(
43     std::unique_ptr<SourceStream> upstream,
44     SourceStream::SourceType type) {
45   DCHECK(type == TYPE_GZIP || type == TYPE_DEFLATE);
46   auto source =
47       base::WrapUnique(new GzipSourceStream(std::move(upstream), type));
48 
49   if (!source->Init())
50     return nullptr;
51   return source;
52 }
53 
GzipSourceStream(std::unique_ptr<SourceStream> upstream,SourceStream::SourceType type)54 GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream,
55                                    SourceStream::SourceType type)
56     : FilterSourceStream(type, std::move(upstream)) {}
57 
Init()58 bool GzipSourceStream::Init() {
59   zlib_stream_ = std::make_unique<z_stream>();
60   if (!zlib_stream_)
61     return false;
62   memset(zlib_stream_.get(), 0, sizeof(z_stream));
63 
64   int ret;
65   if (type() == TYPE_GZIP) {
66     ret = inflateInit2(zlib_stream_.get(), -MAX_WBITS);
67   } else {
68     ret = inflateInit(zlib_stream_.get());
69   }
70   DCHECK_NE(Z_VERSION_ERROR, ret);
71   return ret == Z_OK;
72 }
73 
GetTypeAsString() const74 std::string GzipSourceStream::GetTypeAsString() const {
75   switch (type()) {
76     case TYPE_GZIP:
77       return kGzip;
78     case TYPE_DEFLATE:
79       return kDeflate;
80     default:
81       NOTREACHED();
82       return "";
83   }
84 }
85 
FilterData(IOBuffer * output_buffer,size_t output_buffer_size,IOBuffer * input_buffer,size_t input_buffer_size,size_t * consumed_bytes,bool upstream_end_reached)86 base::expected<size_t, Error> GzipSourceStream::FilterData(
87     IOBuffer* output_buffer,
88     size_t output_buffer_size,
89     IOBuffer* input_buffer,
90     size_t input_buffer_size,
91     size_t* consumed_bytes,
92     bool upstream_end_reached) {
93   *consumed_bytes = 0;
94   char* input_data = input_buffer->data();
95   size_t input_data_size = input_buffer_size;
96   size_t bytes_out = 0;
97   bool state_compressed_entered = false;
98   while (input_data_size > 0 && bytes_out < output_buffer_size) {
99     InputState state = input_state_;
100     switch (state) {
101       case STATE_START: {
102         if (type() == TYPE_DEFLATE) {
103           input_state_ = STATE_SNIFFING_DEFLATE_HEADER;
104           break;
105         }
106         DCHECK_GT(input_data_size, 0u);
107         input_state_ = STATE_GZIP_HEADER;
108         break;
109       }
110       case STATE_GZIP_HEADER: {
111         DCHECK_NE(TYPE_DEFLATE, type());
112 
113         const size_t kGzipFooterBytes = 8;
114         const char* end = nullptr;
115         GZipHeader::Status status =
116             gzip_header_.ReadMore(input_data, input_data_size, &end);
117         if (status == GZipHeader::INCOMPLETE_HEADER) {
118           input_data += input_data_size;
119           input_data_size = 0;
120         } else if (status == GZipHeader::COMPLETE_HEADER) {
121           // If there is a valid header, there should also be a valid footer.
122           gzip_footer_bytes_left_ = kGzipFooterBytes;
123           size_t bytes_consumed = static_cast<size_t>(end - input_data);
124           input_data += bytes_consumed;
125           input_data_size -= bytes_consumed;
126           input_state_ = STATE_COMPRESSED_BODY;
127         } else if (status == GZipHeader::INVALID_HEADER) {
128           return base::unexpected(ERR_CONTENT_DECODING_FAILED);
129         }
130         break;
131       }
132       case STATE_SNIFFING_DEFLATE_HEADER: {
133         DCHECK_EQ(TYPE_DEFLATE, type());
134 
135         zlib_stream_.get()->next_in = reinterpret_cast<Bytef*>(input_data);
136         zlib_stream_.get()->avail_in = input_data_size;
137         zlib_stream_.get()->next_out =
138             reinterpret_cast<Bytef*>(output_buffer->data());
139         zlib_stream_.get()->avail_out = output_buffer_size;
140 
141         int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
142 
143         // On error, try adding a zlib header and replaying the response. Note
144         // that data just received doesn't have to be replayed, since it hasn't
145         // been removed from input_data yet, only data from previous FilterData
146         // calls needs to be replayed.
147         if (ret != Z_STREAM_END && ret != Z_OK) {
148           if (!InsertZlibHeader())
149             return base::unexpected(ERR_CONTENT_DECODING_FAILED);
150 
151           input_state_ = STATE_REPLAY_DATA;
152           // |replay_state_| should still have its initial value.
153           DCHECK_EQ(STATE_COMPRESSED_BODY, replay_state_);
154           break;
155         }
156 
157         size_t bytes_used = input_data_size - zlib_stream_.get()->avail_in;
158         bytes_out = output_buffer_size - zlib_stream_.get()->avail_out;
159         // If any bytes are output, enough total bytes have been received, or at
160         // the end of the stream, assume the response had a valid Zlib header.
161         if (bytes_out > 0 ||
162             bytes_used + replay_data_.size() >= kMaxZlibHeaderSniffBytes ||
163             ret == Z_STREAM_END) {
164           replay_data_.clear();
165           if (ret == Z_STREAM_END) {
166             input_state_ = STATE_GZIP_FOOTER;
167           } else {
168             input_state_ = STATE_COMPRESSED_BODY;
169           }
170         } else {
171           replay_data_.append(input_data, bytes_used);
172         }
173 
174         input_data_size -= bytes_used;
175         input_data += bytes_used;
176         break;
177       }
178       case STATE_REPLAY_DATA: {
179         DCHECK_EQ(TYPE_DEFLATE, type());
180 
181         if (replay_data_.empty()) {
182           input_state_ = replay_state_;
183           break;
184         }
185 
186         // Call FilterData recursively, after updating |input_state_|, with
187         // |replay_data_|. This recursive call makes handling data from
188         // |replay_data_| and |input_buffer| much simpler than the alternative
189         // operations, though it's not pretty.
190         input_state_ = replay_state_;
191         size_t bytes_used;
192         scoped_refptr<IOBuffer> replay_buffer =
193             base::MakeRefCounted<WrappedIOBuffer>(replay_data_);
194         base::expected<size_t, Error> result =
195             FilterData(output_buffer, output_buffer_size, replay_buffer.get(),
196                        replay_data_.size(), &bytes_used, upstream_end_reached);
197         replay_data_.erase(0, bytes_used);
198         // Back up resulting state, and return state to STATE_REPLAY_DATA.
199         replay_state_ = input_state_;
200         input_state_ = STATE_REPLAY_DATA;
201 
202         // Could continue consuming data in the success case, but simplest not
203         // to.
204         if (!result.has_value() || result.value() != 0)
205           return result;
206         break;
207       }
208       case STATE_COMPRESSED_BODY: {
209         DCHECK(!state_compressed_entered);
210 
211         state_compressed_entered = true;
212         zlib_stream_.get()->next_in = reinterpret_cast<Bytef*>(input_data);
213         zlib_stream_.get()->avail_in = input_data_size;
214         zlib_stream_.get()->next_out =
215             reinterpret_cast<Bytef*>(output_buffer->data());
216         zlib_stream_.get()->avail_out = output_buffer_size;
217 
218         int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
219         if (ret != Z_STREAM_END && ret != Z_OK)
220           return base::unexpected(ERR_CONTENT_DECODING_FAILED);
221 
222         size_t bytes_used = input_data_size - zlib_stream_.get()->avail_in;
223         bytes_out = output_buffer_size - zlib_stream_.get()->avail_out;
224         input_data_size -= bytes_used;
225         input_data += bytes_used;
226         if (ret == Z_STREAM_END)
227           input_state_ = STATE_GZIP_FOOTER;
228         // zlib has written as much data to |output_buffer| as it could.
229         // There might still be some unconsumed data in |input_buffer| if there
230         // is no space in |output_buffer|.
231         break;
232       }
233       case STATE_GZIP_FOOTER: {
234         size_t to_read = std::min(gzip_footer_bytes_left_, input_data_size);
235         gzip_footer_bytes_left_ -= to_read;
236         input_data_size -= to_read;
237         input_data += to_read;
238         if (gzip_footer_bytes_left_ == 0)
239           input_state_ = STATE_IGNORING_EXTRA_BYTES;
240         break;
241       }
242       case STATE_IGNORING_EXTRA_BYTES: {
243         input_data_size = 0;
244         break;
245       }
246     }
247   }
248   *consumed_bytes = input_buffer_size - input_data_size;
249   return bytes_out;
250 }
251 
InsertZlibHeader()252 bool GzipSourceStream::InsertZlibHeader() {
253   char dummy_header[] = {0x78, 0x01};
254   char dummy_output[4];
255 
256   inflateReset(zlib_stream_.get());
257   zlib_stream_.get()->next_in = reinterpret_cast<Bytef*>(&dummy_header[0]);
258   zlib_stream_.get()->avail_in = sizeof(dummy_header);
259   zlib_stream_.get()->next_out = reinterpret_cast<Bytef*>(&dummy_output[0]);
260   zlib_stream_.get()->avail_out = sizeof(dummy_output);
261 
262   int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
263   return ret == Z_OK;
264 }
265 
266 }  // namespace net
267