xref: /aosp_15_r20/external/cronet/net/http/http_chunked_decoder.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2011 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker // Derived from:
6*6777b538SAndroid Build Coastguard Worker //   mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
7*6777b538SAndroid Build Coastguard Worker // The license block is:
8*6777b538SAndroid Build Coastguard Worker /* ***** BEGIN LICENSE BLOCK *****
9*6777b538SAndroid Build Coastguard Worker  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10*6777b538SAndroid Build Coastguard Worker  *
11*6777b538SAndroid Build Coastguard Worker  * The contents of this file are subject to the Mozilla Public License Version
12*6777b538SAndroid Build Coastguard Worker  * 1.1 (the "License"); you may not use this file except in compliance with
13*6777b538SAndroid Build Coastguard Worker  * the License. You may obtain a copy of the License at
14*6777b538SAndroid Build Coastguard Worker  * http://www.mozilla.org/MPL/
15*6777b538SAndroid Build Coastguard Worker  *
16*6777b538SAndroid Build Coastguard Worker  * Software distributed under the License is distributed on an "AS IS" basis,
17*6777b538SAndroid Build Coastguard Worker  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
18*6777b538SAndroid Build Coastguard Worker  * for the specific language governing rights and limitations under the
19*6777b538SAndroid Build Coastguard Worker  * License.
20*6777b538SAndroid Build Coastguard Worker  *
21*6777b538SAndroid Build Coastguard Worker  * The Original Code is Mozilla.
22*6777b538SAndroid Build Coastguard Worker  *
23*6777b538SAndroid Build Coastguard Worker  * The Initial Developer of the Original Code is
24*6777b538SAndroid Build Coastguard Worker  * Netscape Communications.
25*6777b538SAndroid Build Coastguard Worker  * Portions created by the Initial Developer are Copyright (C) 2001
26*6777b538SAndroid Build Coastguard Worker  * the Initial Developer. All Rights Reserved.
27*6777b538SAndroid Build Coastguard Worker  *
28*6777b538SAndroid Build Coastguard Worker  * Contributor(s):
29*6777b538SAndroid Build Coastguard Worker  *   Darin Fisher <[email protected]> (original author)
30*6777b538SAndroid Build Coastguard Worker  *
31*6777b538SAndroid Build Coastguard Worker  * Alternatively, the contents of this file may be used under the terms of
32*6777b538SAndroid Build Coastguard Worker  * either the GNU General Public License Version 2 or later (the "GPL"), or
33*6777b538SAndroid Build Coastguard Worker  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34*6777b538SAndroid Build Coastguard Worker  * in which case the provisions of the GPL or the LGPL are applicable instead
35*6777b538SAndroid Build Coastguard Worker  * of those above. If you wish to allow use of your version of this file only
36*6777b538SAndroid Build Coastguard Worker  * under the terms of either the GPL or the LGPL, and not to allow others to
37*6777b538SAndroid Build Coastguard Worker  * use your version of this file under the terms of the MPL, indicate your
38*6777b538SAndroid Build Coastguard Worker  * decision by deleting the provisions above and replace them with the notice
39*6777b538SAndroid Build Coastguard Worker  * and other provisions required by the GPL or the LGPL. If you do not delete
40*6777b538SAndroid Build Coastguard Worker  * the provisions above, a recipient may use your version of this file under
41*6777b538SAndroid Build Coastguard Worker  * the terms of any one of the MPL, the GPL or the LGPL.
42*6777b538SAndroid Build Coastguard Worker  *
43*6777b538SAndroid Build Coastguard Worker  * ***** END LICENSE BLOCK ***** */
44*6777b538SAndroid Build Coastguard Worker 
45*6777b538SAndroid Build Coastguard Worker #include "net/http/http_chunked_decoder.h"
46*6777b538SAndroid Build Coastguard Worker 
47*6777b538SAndroid Build Coastguard Worker #include <algorithm>
48*6777b538SAndroid Build Coastguard Worker #include <string_view>
49*6777b538SAndroid Build Coastguard Worker 
50*6777b538SAndroid Build Coastguard Worker #include "base/logging.h"
51*6777b538SAndroid Build Coastguard Worker #include "base/ranges/algorithm.h"
52*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_number_conversions.h"
53*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
54*6777b538SAndroid Build Coastguard Worker #include "net/base/net_errors.h"
55*6777b538SAndroid Build Coastguard Worker 
56*6777b538SAndroid Build Coastguard Worker namespace net {
57*6777b538SAndroid Build Coastguard Worker 
58*6777b538SAndroid Build Coastguard Worker // Absurdly long size to avoid imposing a constraint on chunked encoding
59*6777b538SAndroid Build Coastguard Worker // extensions.
60*6777b538SAndroid Build Coastguard Worker const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384;
61*6777b538SAndroid Build Coastguard Worker 
62*6777b538SAndroid Build Coastguard Worker HttpChunkedDecoder::HttpChunkedDecoder() = default;
63*6777b538SAndroid Build Coastguard Worker 
FilterBuf(char * buf,int buf_len)64*6777b538SAndroid Build Coastguard Worker int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
65*6777b538SAndroid Build Coastguard Worker   int result = 0;
66*6777b538SAndroid Build Coastguard Worker 
67*6777b538SAndroid Build Coastguard Worker   while (buf_len > 0) {
68*6777b538SAndroid Build Coastguard Worker     if (chunk_remaining_ > 0) {
69*6777b538SAndroid Build Coastguard Worker       // Since |chunk_remaining_| is positive and |buf_len| an int, the minimum
70*6777b538SAndroid Build Coastguard Worker       // of the two must be an int.
71*6777b538SAndroid Build Coastguard Worker       int num = static_cast<int>(
72*6777b538SAndroid Build Coastguard Worker           std::min(chunk_remaining_, static_cast<int64_t>(buf_len)));
73*6777b538SAndroid Build Coastguard Worker 
74*6777b538SAndroid Build Coastguard Worker       buf_len -= num;
75*6777b538SAndroid Build Coastguard Worker       chunk_remaining_ -= num;
76*6777b538SAndroid Build Coastguard Worker 
77*6777b538SAndroid Build Coastguard Worker       result += num;
78*6777b538SAndroid Build Coastguard Worker       buf += num;
79*6777b538SAndroid Build Coastguard Worker 
80*6777b538SAndroid Build Coastguard Worker       // After each chunk's data there should be a CRLF.
81*6777b538SAndroid Build Coastguard Worker       if (chunk_remaining_ == 0)
82*6777b538SAndroid Build Coastguard Worker         chunk_terminator_remaining_ = true;
83*6777b538SAndroid Build Coastguard Worker       continue;
84*6777b538SAndroid Build Coastguard Worker     } else if (reached_eof_) {
85*6777b538SAndroid Build Coastguard Worker       bytes_after_eof_ += buf_len;
86*6777b538SAndroid Build Coastguard Worker       break;  // Done!
87*6777b538SAndroid Build Coastguard Worker     }
88*6777b538SAndroid Build Coastguard Worker 
89*6777b538SAndroid Build Coastguard Worker     int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
90*6777b538SAndroid Build Coastguard Worker     if (bytes_consumed < 0)
91*6777b538SAndroid Build Coastguard Worker       return bytes_consumed; // Error
92*6777b538SAndroid Build Coastguard Worker 
93*6777b538SAndroid Build Coastguard Worker     buf_len -= bytes_consumed;
94*6777b538SAndroid Build Coastguard Worker     if (buf_len > 0)
95*6777b538SAndroid Build Coastguard Worker       memmove(buf, buf + bytes_consumed, buf_len);
96*6777b538SAndroid Build Coastguard Worker   }
97*6777b538SAndroid Build Coastguard Worker 
98*6777b538SAndroid Build Coastguard Worker   return result;
99*6777b538SAndroid Build Coastguard Worker }
100*6777b538SAndroid Build Coastguard Worker 
ScanForChunkRemaining(const char * buf,int buf_len)101*6777b538SAndroid Build Coastguard Worker int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
102*6777b538SAndroid Build Coastguard Worker   DCHECK_EQ(0, chunk_remaining_);
103*6777b538SAndroid Build Coastguard Worker   DCHECK_GT(buf_len, 0);
104*6777b538SAndroid Build Coastguard Worker 
105*6777b538SAndroid Build Coastguard Worker   int bytes_consumed = 0;
106*6777b538SAndroid Build Coastguard Worker 
107*6777b538SAndroid Build Coastguard Worker   size_t index_of_lf = std::string_view(buf, buf_len).find('\n');
108*6777b538SAndroid Build Coastguard Worker   if (index_of_lf != std::string_view::npos) {
109*6777b538SAndroid Build Coastguard Worker     buf_len = static_cast<int>(index_of_lf);
110*6777b538SAndroid Build Coastguard Worker     if (buf_len && buf[buf_len - 1] == '\r')  // Eliminate a preceding CR.
111*6777b538SAndroid Build Coastguard Worker       buf_len--;
112*6777b538SAndroid Build Coastguard Worker     bytes_consumed = static_cast<int>(index_of_lf) + 1;
113*6777b538SAndroid Build Coastguard Worker 
114*6777b538SAndroid Build Coastguard Worker     // Make buf point to the full line buffer to parse.
115*6777b538SAndroid Build Coastguard Worker     if (!line_buf_.empty()) {
116*6777b538SAndroid Build Coastguard Worker       line_buf_.append(buf, buf_len);
117*6777b538SAndroid Build Coastguard Worker       buf = line_buf_.data();
118*6777b538SAndroid Build Coastguard Worker       buf_len = static_cast<int>(line_buf_.size());
119*6777b538SAndroid Build Coastguard Worker     }
120*6777b538SAndroid Build Coastguard Worker 
121*6777b538SAndroid Build Coastguard Worker     if (reached_last_chunk_) {
122*6777b538SAndroid Build Coastguard Worker       if (buf_len > 0)
123*6777b538SAndroid Build Coastguard Worker         DVLOG(1) << "ignoring http trailer";
124*6777b538SAndroid Build Coastguard Worker       else
125*6777b538SAndroid Build Coastguard Worker         reached_eof_ = true;
126*6777b538SAndroid Build Coastguard Worker     } else if (chunk_terminator_remaining_) {
127*6777b538SAndroid Build Coastguard Worker       if (buf_len > 0) {
128*6777b538SAndroid Build Coastguard Worker         DLOG(ERROR) << "chunk data not terminated properly";
129*6777b538SAndroid Build Coastguard Worker         return ERR_INVALID_CHUNKED_ENCODING;
130*6777b538SAndroid Build Coastguard Worker       }
131*6777b538SAndroid Build Coastguard Worker       chunk_terminator_remaining_ = false;
132*6777b538SAndroid Build Coastguard Worker     } else if (buf_len > 0) {
133*6777b538SAndroid Build Coastguard Worker       // Ignore any chunk-extensions.
134*6777b538SAndroid Build Coastguard Worker       size_t index_of_semicolon = std::string_view(buf, buf_len).find(';');
135*6777b538SAndroid Build Coastguard Worker       if (index_of_semicolon != std::string_view::npos) {
136*6777b538SAndroid Build Coastguard Worker         buf_len = static_cast<int>(index_of_semicolon);
137*6777b538SAndroid Build Coastguard Worker       }
138*6777b538SAndroid Build Coastguard Worker 
139*6777b538SAndroid Build Coastguard Worker       if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
140*6777b538SAndroid Build Coastguard Worker         DLOG(ERROR) << "Failed parsing HEX from: " <<
141*6777b538SAndroid Build Coastguard Worker             std::string(buf, buf_len);
142*6777b538SAndroid Build Coastguard Worker         return ERR_INVALID_CHUNKED_ENCODING;
143*6777b538SAndroid Build Coastguard Worker       }
144*6777b538SAndroid Build Coastguard Worker 
145*6777b538SAndroid Build Coastguard Worker       if (chunk_remaining_ == 0)
146*6777b538SAndroid Build Coastguard Worker         reached_last_chunk_ = true;
147*6777b538SAndroid Build Coastguard Worker     } else {
148*6777b538SAndroid Build Coastguard Worker       DLOG(ERROR) << "missing chunk-size";
149*6777b538SAndroid Build Coastguard Worker       return ERR_INVALID_CHUNKED_ENCODING;
150*6777b538SAndroid Build Coastguard Worker     }
151*6777b538SAndroid Build Coastguard Worker     line_buf_.clear();
152*6777b538SAndroid Build Coastguard Worker   } else {
153*6777b538SAndroid Build Coastguard Worker     // Save the partial line; wait for more data.
154*6777b538SAndroid Build Coastguard Worker     bytes_consumed = buf_len;
155*6777b538SAndroid Build Coastguard Worker 
156*6777b538SAndroid Build Coastguard Worker     // Ignore a trailing CR
157*6777b538SAndroid Build Coastguard Worker     if (buf[buf_len - 1] == '\r')
158*6777b538SAndroid Build Coastguard Worker       buf_len--;
159*6777b538SAndroid Build Coastguard Worker 
160*6777b538SAndroid Build Coastguard Worker     if (line_buf_.length() + buf_len > kMaxLineBufLen) {
161*6777b538SAndroid Build Coastguard Worker       DLOG(ERROR) << "Chunked line length too long";
162*6777b538SAndroid Build Coastguard Worker       return ERR_INVALID_CHUNKED_ENCODING;
163*6777b538SAndroid Build Coastguard Worker     }
164*6777b538SAndroid Build Coastguard Worker 
165*6777b538SAndroid Build Coastguard Worker     line_buf_.append(buf, buf_len);
166*6777b538SAndroid Build Coastguard Worker   }
167*6777b538SAndroid Build Coastguard Worker   return bytes_consumed;
168*6777b538SAndroid Build Coastguard Worker }
169*6777b538SAndroid Build Coastguard Worker 
170*6777b538SAndroid Build Coastguard Worker 
171*6777b538SAndroid Build Coastguard Worker // While the HTTP 1.1 specification defines chunk-size as 1*HEX
172*6777b538SAndroid Build Coastguard Worker // some sites rely on more lenient parsing.
173*6777b538SAndroid Build Coastguard Worker // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
174*6777b538SAndroid Build Coastguard Worker // (0x20) to be 7 characters long, such as "819b   ".
175*6777b538SAndroid Build Coastguard Worker //
176*6777b538SAndroid Build Coastguard Worker // A comparison of browsers running on WindowsXP shows that
177*6777b538SAndroid Build Coastguard Worker // they will parse the following inputs (egrep syntax):
178*6777b538SAndroid Build Coastguard Worker //
179*6777b538SAndroid Build Coastguard Worker // Let \X be the character class for a hex digit: [0-9a-fA-F]
180*6777b538SAndroid Build Coastguard Worker //
181*6777b538SAndroid Build Coastguard Worker //   RFC 7230: ^\X+$
182*6777b538SAndroid Build Coastguard Worker //        IE7: ^\X+[^\X]*$
183*6777b538SAndroid Build Coastguard Worker // Safari 3.1: ^[\t\r ]*\X+[\t ]*$
184*6777b538SAndroid Build Coastguard Worker //  Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
185*6777b538SAndroid Build Coastguard Worker // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
186*6777b538SAndroid Build Coastguard Worker //
187*6777b538SAndroid Build Coastguard Worker // Our strategy is to be as strict as possible, while not breaking
188*6777b538SAndroid Build Coastguard Worker // known sites.
189*6777b538SAndroid Build Coastguard Worker //
190*6777b538SAndroid Build Coastguard Worker //         Us: ^\X+[ ]*$
ParseChunkSize(const char * start,int len,int64_t * out)191*6777b538SAndroid Build Coastguard Worker bool HttpChunkedDecoder::ParseChunkSize(const char* start,
192*6777b538SAndroid Build Coastguard Worker                                         int len,
193*6777b538SAndroid Build Coastguard Worker                                         int64_t* out) {
194*6777b538SAndroid Build Coastguard Worker   DCHECK_GE(len, 0);
195*6777b538SAndroid Build Coastguard Worker 
196*6777b538SAndroid Build Coastguard Worker   // Strip trailing spaces
197*6777b538SAndroid Build Coastguard Worker   while (len > 0 && start[len - 1] == ' ')
198*6777b538SAndroid Build Coastguard Worker     len--;
199*6777b538SAndroid Build Coastguard Worker 
200*6777b538SAndroid Build Coastguard Worker   // Be more restrictive than HexStringToInt64;
201*6777b538SAndroid Build Coastguard Worker   // don't allow inputs with leading "-", "+", "0x", "0X"
202*6777b538SAndroid Build Coastguard Worker   std::string_view chunk_size(start, len);
203*6777b538SAndroid Build Coastguard Worker   if (!base::ranges::all_of(chunk_size, base::IsHexDigit<char>)) {
204*6777b538SAndroid Build Coastguard Worker     return false;
205*6777b538SAndroid Build Coastguard Worker   }
206*6777b538SAndroid Build Coastguard Worker 
207*6777b538SAndroid Build Coastguard Worker   int64_t parsed_number;
208*6777b538SAndroid Build Coastguard Worker   bool ok = base::HexStringToInt64(chunk_size, &parsed_number);
209*6777b538SAndroid Build Coastguard Worker   if (ok && parsed_number >= 0) {
210*6777b538SAndroid Build Coastguard Worker     *out = parsed_number;
211*6777b538SAndroid Build Coastguard Worker     return true;
212*6777b538SAndroid Build Coastguard Worker   }
213*6777b538SAndroid Build Coastguard Worker   return false;
214*6777b538SAndroid Build Coastguard Worker }
215*6777b538SAndroid Build Coastguard Worker 
216*6777b538SAndroid Build Coastguard Worker }  // namespace net
217