xref: /aosp_15_r20/external/cronet/net/http/http_chunked_decoder.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Derived from:
6 //   mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
7 // The license block is:
8 /* ***** BEGIN LICENSE BLOCK *****
9  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10  *
11  * The contents of this file are subject to the Mozilla Public License Version
12  * 1.1 (the "License"); you may not use this file except in compliance with
13  * the License. You may obtain a copy of the License at
14  * http://www.mozilla.org/MPL/
15  *
16  * Software distributed under the License is distributed on an "AS IS" basis,
17  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
18  * for the specific language governing rights and limitations under the
19  * License.
20  *
21  * The Original Code is Mozilla.
22  *
23  * The Initial Developer of the Original Code is
24  * Netscape Communications.
25  * Portions created by the Initial Developer are Copyright (C) 2001
26  * the Initial Developer. All Rights Reserved.
27  *
28  * Contributor(s):
29  *   Darin Fisher <[email protected]> (original author)
30  *
31  * Alternatively, the contents of this file may be used under the terms of
32  * either the GNU General Public License Version 2 or later (the "GPL"), or
33  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34  * in which case the provisions of the GPL or the LGPL are applicable instead
35  * of those above. If you wish to allow use of your version of this file only
36  * under the terms of either the GPL or the LGPL, and not to allow others to
37  * use your version of this file under the terms of the MPL, indicate your
38  * decision by deleting the provisions above and replace them with the notice
39  * and other provisions required by the GPL or the LGPL. If you do not delete
40  * the provisions above, a recipient may use your version of this file under
41  * the terms of any one of the MPL, the GPL or the LGPL.
42  *
43  * ***** END LICENSE BLOCK ***** */
44 
45 #include "net/http/http_chunked_decoder.h"
46 
47 #include <algorithm>
48 #include <string_view>
49 
50 #include "base/logging.h"
51 #include "base/ranges/algorithm.h"
52 #include "base/strings/string_number_conversions.h"
53 #include "base/strings/string_util.h"
54 #include "net/base/net_errors.h"
55 
56 namespace net {
57 
58 // Absurdly long size to avoid imposing a constraint on chunked encoding
59 // extensions.
60 const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384;
61 
62 HttpChunkedDecoder::HttpChunkedDecoder() = default;
63 
FilterBuf(char * buf,int buf_len)64 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
65   int result = 0;
66 
67   while (buf_len > 0) {
68     if (chunk_remaining_ > 0) {
69       // Since |chunk_remaining_| is positive and |buf_len| an int, the minimum
70       // of the two must be an int.
71       int num = static_cast<int>(
72           std::min(chunk_remaining_, static_cast<int64_t>(buf_len)));
73 
74       buf_len -= num;
75       chunk_remaining_ -= num;
76 
77       result += num;
78       buf += num;
79 
80       // After each chunk's data there should be a CRLF.
81       if (chunk_remaining_ == 0)
82         chunk_terminator_remaining_ = true;
83       continue;
84     } else if (reached_eof_) {
85       bytes_after_eof_ += buf_len;
86       break;  // Done!
87     }
88 
89     int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
90     if (bytes_consumed < 0)
91       return bytes_consumed; // Error
92 
93     buf_len -= bytes_consumed;
94     if (buf_len > 0)
95       memmove(buf, buf + bytes_consumed, buf_len);
96   }
97 
98   return result;
99 }
100 
ScanForChunkRemaining(const char * buf,int buf_len)101 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
102   DCHECK_EQ(0, chunk_remaining_);
103   DCHECK_GT(buf_len, 0);
104 
105   int bytes_consumed = 0;
106 
107   size_t index_of_lf = std::string_view(buf, buf_len).find('\n');
108   if (index_of_lf != std::string_view::npos) {
109     buf_len = static_cast<int>(index_of_lf);
110     if (buf_len && buf[buf_len - 1] == '\r')  // Eliminate a preceding CR.
111       buf_len--;
112     bytes_consumed = static_cast<int>(index_of_lf) + 1;
113 
114     // Make buf point to the full line buffer to parse.
115     if (!line_buf_.empty()) {
116       line_buf_.append(buf, buf_len);
117       buf = line_buf_.data();
118       buf_len = static_cast<int>(line_buf_.size());
119     }
120 
121     if (reached_last_chunk_) {
122       if (buf_len > 0)
123         DVLOG(1) << "ignoring http trailer";
124       else
125         reached_eof_ = true;
126     } else if (chunk_terminator_remaining_) {
127       if (buf_len > 0) {
128         DLOG(ERROR) << "chunk data not terminated properly";
129         return ERR_INVALID_CHUNKED_ENCODING;
130       }
131       chunk_terminator_remaining_ = false;
132     } else if (buf_len > 0) {
133       // Ignore any chunk-extensions.
134       size_t index_of_semicolon = std::string_view(buf, buf_len).find(';');
135       if (index_of_semicolon != std::string_view::npos) {
136         buf_len = static_cast<int>(index_of_semicolon);
137       }
138 
139       if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
140         DLOG(ERROR) << "Failed parsing HEX from: " <<
141             std::string(buf, buf_len);
142         return ERR_INVALID_CHUNKED_ENCODING;
143       }
144 
145       if (chunk_remaining_ == 0)
146         reached_last_chunk_ = true;
147     } else {
148       DLOG(ERROR) << "missing chunk-size";
149       return ERR_INVALID_CHUNKED_ENCODING;
150     }
151     line_buf_.clear();
152   } else {
153     // Save the partial line; wait for more data.
154     bytes_consumed = buf_len;
155 
156     // Ignore a trailing CR
157     if (buf[buf_len - 1] == '\r')
158       buf_len--;
159 
160     if (line_buf_.length() + buf_len > kMaxLineBufLen) {
161       DLOG(ERROR) << "Chunked line length too long";
162       return ERR_INVALID_CHUNKED_ENCODING;
163     }
164 
165     line_buf_.append(buf, buf_len);
166   }
167   return bytes_consumed;
168 }
169 
170 
171 // While the HTTP 1.1 specification defines chunk-size as 1*HEX
172 // some sites rely on more lenient parsing.
173 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
174 // (0x20) to be 7 characters long, such as "819b   ".
175 //
176 // A comparison of browsers running on WindowsXP shows that
177 // they will parse the following inputs (egrep syntax):
178 //
179 // Let \X be the character class for a hex digit: [0-9a-fA-F]
180 //
181 //   RFC 7230: ^\X+$
182 //        IE7: ^\X+[^\X]*$
183 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$
184 //  Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
185 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
186 //
187 // Our strategy is to be as strict as possible, while not breaking
188 // known sites.
189 //
190 //         Us: ^\X+[ ]*$
ParseChunkSize(const char * start,int len,int64_t * out)191 bool HttpChunkedDecoder::ParseChunkSize(const char* start,
192                                         int len,
193                                         int64_t* out) {
194   DCHECK_GE(len, 0);
195 
196   // Strip trailing spaces
197   while (len > 0 && start[len - 1] == ' ')
198     len--;
199 
200   // Be more restrictive than HexStringToInt64;
201   // don't allow inputs with leading "-", "+", "0x", "0X"
202   std::string_view chunk_size(start, len);
203   if (!base::ranges::all_of(chunk_size, base::IsHexDigit<char>)) {
204     return false;
205   }
206 
207   int64_t parsed_number;
208   bool ok = base::HexStringToInt64(chunk_size, &parsed_number);
209   if (ok && parsed_number >= 0) {
210     *out = parsed_number;
211     return true;
212   }
213   return false;
214 }
215 
216 }  // namespace net
217