1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // The rules for header parsing were borrowed from Firefox:
6 // http://lxr.mozilla.org/seamonkey/source/netwerk/protocol/http/src/nsHttpResponseHead.cpp
7 // The rules for parsing content-types were also borrowed from Firefox:
8 // http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
9
10 #include "net/http/http_response_headers.h"
11
12 #include <algorithm>
13 #include <limits>
14 #include <memory>
15 #include <string_view>
16 #include <utility>
17
18 #include "base/format_macros.h"
19 #include "base/logging.h"
20 #include "base/metrics/histogram_macros.h"
21 #include "base/pickle.h"
22 #include "base/ranges/algorithm.h"
23 #include "base/strings/escape.h"
24 #include "base/strings/strcat.h"
25 #include "base/strings/string_number_conversions.h"
26 #include "base/strings/string_util.h"
27 #include "base/strings/stringprintf.h"
28 #include "base/time/time.h"
29 #include "base/values.h"
30 #include "net/base/features.h"
31 #include "net/base/parse_number.h"
32 #include "net/base/tracing.h"
33 #include "net/http/http_byte_range.h"
34 #include "net/http/http_log_util.h"
35 #include "net/http/http_status_code.h"
36 #include "net/http/http_util.h"
37 #include "net/log/net_log_capture_mode.h"
38 #include "net/log/net_log_values.h"
39
40 using base::Time;
41
42 namespace net {
43
44 //-----------------------------------------------------------------------------
45
46 namespace {
47
48 // These headers are RFC 2616 hop-by-hop headers;
49 // not to be stored by caches.
50 const char* const kHopByHopResponseHeaders[] = {
51 "connection",
52 "proxy-connection",
53 "keep-alive",
54 "trailer",
55 "transfer-encoding",
56 "upgrade"
57 };
58
59 // These headers are challenge response headers;
60 // not to be stored by caches.
61 const char* const kChallengeResponseHeaders[] = {
62 "www-authenticate",
63 "proxy-authenticate"
64 };
65
66 // These headers are cookie setting headers;
67 // not to be stored by caches or disclosed otherwise.
68 const char* const kCookieResponseHeaders[] = {
69 "set-cookie",
70 "set-cookie2",
71 "clear-site-data",
72 };
73
74 // By default, do not cache Strict-Transport-Security.
75 // This avoids erroneously re-processing it on page loads from cache ---
76 // it is defined to be valid only on live and error-free HTTPS connections.
77 const char* const kSecurityStateHeaders[] = {
78 "strict-transport-security",
79 };
80
81 // These response headers are not copied from a 304/206 response to the cached
82 // response headers. This list is based on Mozilla's nsHttpResponseHead.cpp.
83 const char* const kNonUpdatedHeaders[] = {
84 "connection",
85 "proxy-connection",
86 "keep-alive",
87 "www-authenticate",
88 "proxy-authenticate",
89 "proxy-authorization",
90 "te",
91 "trailer",
92 "transfer-encoding",
93 "upgrade",
94 "content-location",
95 "content-md5",
96 "etag",
97 "content-encoding",
98 "content-range",
99 "content-type",
100 "content-length",
101 "x-frame-options",
102 "x-xss-protection",
103 };
104
105 // Some header prefixes mean "Don't copy this header from a 304 response.".
106 // Rather than listing all the relevant headers, we can consolidate them into
107 // this list:
108 const char* const kNonUpdatedHeaderPrefixes[] = {
109 "x-content-",
110 "x-webkit-"
111 };
112
ShouldUpdateHeader(std::string_view name)113 bool ShouldUpdateHeader(std::string_view name) {
114 for (const auto* header : kNonUpdatedHeaders) {
115 if (base::EqualsCaseInsensitiveASCII(name, header))
116 return false;
117 }
118 for (const auto* prefix : kNonUpdatedHeaderPrefixes) {
119 if (base::StartsWith(name, prefix, base::CompareCase::INSENSITIVE_ASCII))
120 return false;
121 }
122 return true;
123 }
124
HasEmbeddedNulls(std::string_view str)125 bool HasEmbeddedNulls(std::string_view str) {
126 return str.find('\0') != std::string::npos;
127 }
128
CheckDoesNotHaveEmbeddedNulls(std::string_view str)129 void CheckDoesNotHaveEmbeddedNulls(std::string_view str) {
130 // Care needs to be taken when adding values to the raw headers string to
131 // make sure it does not contain embeded NULLs. Any embeded '\0' may be
132 // understood as line terminators and change how header lines get tokenized.
133 CHECK(!HasEmbeddedNulls(str));
134 }
135
RemoveLeadingSpaces(std::string_view * s)136 void RemoveLeadingSpaces(std::string_view* s) {
137 s->remove_prefix(std::min(s->find_first_not_of(' '), s->size()));
138 }
139
140 // Parses `status` for response code and status text. Returns the response code,
141 // and appends the response code and trimmed status text preceded by a space to
142 // `append_to`. For example, given the input " 404 Not found " would return 404
143 // and append " 404 Not found" to `append_to`. The odd calling convention is
144 // necessary to avoid extra copies in the implementation of
145 // HttpResponseHeaders::ParseStatusLine().
ParseStatus(std::string_view status,std::string & append_to)146 int ParseStatus(std::string_view status, std::string& append_to) {
147 // Skip whitespace. Tabs are not skipped, for backwards compatibility.
148 RemoveLeadingSpaces(&status);
149
150 auto first_non_digit = std::ranges::find_if(
151 status, [](char c) { return !base::IsAsciiDigit(c); });
152
153 if (first_non_digit == status.begin()) {
154 DVLOG(1) << "missing response status number; assuming 200";
155 append_to.append(" 200");
156 return net::HTTP_OK;
157 }
158
159 append_to.push_back(' ');
160 append_to.append(status.begin(), first_non_digit);
161 int response_code = -1;
162 // For backwards compatibility, overlarge response codes are permitted.
163 // base::StringToInt will clamp the value to INT_MAX.
164 base::StringToInt(base::MakeStringPiece(status.begin(), first_non_digit),
165 &response_code);
166 CHECK_GE(response_code, 0);
167
168 status.remove_prefix(first_non_digit - status.begin());
169
170 // Skip whitespace. Tabs are not skipped, as before.
171 RemoveLeadingSpaces(&status);
172
173 // Trim trailing whitespace. Tabs are not trimmed.
174 const size_t last_non_space_pos = status.find_last_not_of(' ');
175 if (last_non_space_pos != std::string_view::npos) {
176 status.remove_suffix(status.size() - last_non_space_pos - 1);
177 }
178
179 if (status.empty()) {
180 return response_code;
181 }
182
183 CheckDoesNotHaveEmbeddedNulls(status);
184
185 append_to.push_back(' ');
186 append_to.append(status);
187 return response_code;
188 }
189
190 } // namespace
191
192 const char HttpResponseHeaders::kContentRange[] = "Content-Range";
193 const char HttpResponseHeaders::kLastModified[] = "Last-Modified";
194 const char HttpResponseHeaders::kVary[] = "Vary";
195
196 struct HttpResponseHeaders::ParsedHeader {
197 // A header "continuation" contains only a subsequent value for the
198 // preceding header. (Header values are comma separated.)
is_continuationnet::HttpResponseHeaders::ParsedHeader199 bool is_continuation() const { return name_begin == name_end; }
200
201 std::string::const_iterator name_begin;
202 std::string::const_iterator name_end;
203 std::string::const_iterator value_begin;
204 std::string::const_iterator value_end;
205
206 // Write a representation of this object into a tracing proto.
WriteIntoTracenet::HttpResponseHeaders::ParsedHeader207 void WriteIntoTrace(perfetto::TracedValue context) const {
208 auto dict = std::move(context).WriteDictionary();
209 dict.Add("name", base::MakeStringPiece(name_begin, name_end));
210 dict.Add("value", base::MakeStringPiece(value_begin, value_end));
211 }
212 };
213
214 //-----------------------------------------------------------------------------
215
Builder(HttpVersion version,std::string_view status)216 HttpResponseHeaders::Builder::Builder(HttpVersion version,
217 std::string_view status)
218 : version_(version), status_(status) {
219 DCHECK(version == HttpVersion(1, 0) || version == HttpVersion(1, 1) ||
220 version == HttpVersion(2, 0));
221 }
222
223 HttpResponseHeaders::Builder::~Builder() = default;
224
Build()225 scoped_refptr<HttpResponseHeaders> HttpResponseHeaders::Builder::Build() {
226 return base::MakeRefCounted<HttpResponseHeaders>(BuilderPassKey(), version_,
227 status_, headers_);
228 }
229
HttpResponseHeaders(const std::string & raw_input)230 HttpResponseHeaders::HttpResponseHeaders(const std::string& raw_input)
231 : response_code_(-1) {
232 Parse(raw_input);
233
234 // The most important thing to do with this histogram is find out
235 // the existence of unusual HTTP status codes. As it happens
236 // right now, there aren't double-constructions of response headers
237 // using this constructor, so our counts should also be accurate,
238 // without instantiating the histogram in two places. It is also
239 // important that this histogram not collect data in the other
240 // constructor, which rebuilds an histogram from a pickle, since
241 // that would actually create a double call between the original
242 // HttpResponseHeader that was serialized, and initialization of the
243 // new object from that pickle.
244 UMA_HISTOGRAM_CUSTOM_ENUMERATION(
245 "Net.HttpResponseCode",
246 HttpUtil::MapStatusCodeForHistogram(response_code_),
247 // Note the third argument is only
248 // evaluated once, see macro
249 // definition for details.
250 HttpUtil::GetStatusCodesForHistogram());
251 }
252
HttpResponseHeaders(base::PickleIterator * iter)253 HttpResponseHeaders::HttpResponseHeaders(base::PickleIterator* iter)
254 : response_code_(-1) {
255 std::string raw_input;
256 if (iter->ReadString(&raw_input))
257 Parse(raw_input);
258 }
259
HttpResponseHeaders(BuilderPassKey,HttpVersion version,std::string_view status,base::span<const std::pair<std::string_view,std::string_view>> headers)260 HttpResponseHeaders::HttpResponseHeaders(
261 BuilderPassKey,
262 HttpVersion version,
263 std::string_view status,
264 base::span<const std::pair<std::string_view, std::string_view>> headers)
265 : http_version_(version) {
266 // This must match the behaviour of Parse(). We don't use Parse() because
267 // avoiding the overhead of parsing is the point of this constructor.
268
269 std::string formatted_status;
270 formatted_status.reserve(status.size() + 1); // ParseStatus() may add a space
271 response_code_ = ParseStatus(status, formatted_status);
272
273 // First calculate how big the output will be so that we can allocate the
274 // right amount of memory.
275 size_t expected_size = 8; // "HTTP/x.x"
276 expected_size += formatted_status.size();
277 expected_size += 1; // "\0"
278 size_t expected_parsed_size = 0;
279
280 // Track which headers (by index) have a comma in the value. Since bools are
281 // only 1 byte, we can afford to put 100 of them on the stack and avoid
282 // allocating more memory 99.9% of the time.
283 absl::InlinedVector<bool, 100> header_contains_comma;
284 for (const auto& [key, value] : headers) {
285 expected_size += key.size();
286 expected_size += 1; // ":"
287 expected_size += value.size();
288 expected_size += 1; // "\0"
289 // It's okay if we over-estimate the size of `parsed_`, so treat all ','
290 // characters as if they might split the value to avoid parsing the value
291 // carefully here.
292 const size_t comma_count = base::ranges::count(value, ',') + 1;
293 expected_parsed_size += comma_count;
294 header_contains_comma.push_back(comma_count);
295 }
296 expected_size += 1; // "\0"
297 raw_headers_.reserve(expected_size);
298 parsed_.reserve(expected_parsed_size);
299
300 // Now fill in the output.
301 const uint16_t major = version.major_value();
302 const uint16_t minor = version.minor_value();
303 CHECK_LE(major, 9);
304 CHECK_LE(minor, 9);
305 raw_headers_.append("HTTP/");
306 raw_headers_.push_back('0' + major);
307 raw_headers_.push_back('.');
308 raw_headers_.push_back('0' + minor);
309 raw_headers_.append(formatted_status);
310 raw_headers_.push_back('\0');
311 // It is vital that `raw_headers_` iterators are not invalidated after this
312 // point.
313 const char* const data_at_start = raw_headers_.data();
314 size_t index = 0;
315 for (const auto& [key, value] : headers) {
316 CheckDoesNotHaveEmbeddedNulls(key);
317 CheckDoesNotHaveEmbeddedNulls(value);
318 // Because std::string iterators are random-access, end() has to point to
319 // the position where the next character will be appended.
320 const auto name_begin = raw_headers_.cend();
321 raw_headers_.append(key);
322 const auto name_end = raw_headers_.cend();
323 raw_headers_.push_back(':');
324 auto values_begin = raw_headers_.cend();
325 raw_headers_.append(value);
326 auto values_end = raw_headers_.cend();
327 raw_headers_.push_back('\0');
328 // The HTTP/2 standard disallows header values starting or ending with
329 // whitespace (RFC 9113 8.2.1). Hopefully the same is also true of HTTP/3.
330 // TODO(https://crbug.com/1485670): Validate that our implementations
331 // actually enforce this constraint and change this TrimLWS() to a DCHECK.
332 HttpUtil::TrimLWS(&values_begin, &values_end);
333 AddHeader(name_begin, name_end, values_begin, values_end,
334 header_contains_comma[index] ? ContainsCommas::kYes
335 : ContainsCommas::kNo);
336 ++index;
337 }
338 raw_headers_.push_back('\0');
339 CHECK_EQ(expected_size, raw_headers_.size());
340 CHECK_EQ(data_at_start, raw_headers_.data());
341 DCHECK_LE(parsed_.size(), expected_parsed_size);
342
343 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 2]);
344 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 1]);
345 }
346
TryToCreate(std::string_view headers)347 scoped_refptr<HttpResponseHeaders> HttpResponseHeaders::TryToCreate(
348 std::string_view headers) {
349 // Reject strings with nulls.
350 if (HasEmbeddedNulls(headers) ||
351 headers.size() > std::numeric_limits<int>::max()) {
352 return nullptr;
353 }
354
355 return base::MakeRefCounted<HttpResponseHeaders>(
356 HttpUtil::AssembleRawHeaders(headers));
357 }
358
Persist(base::Pickle * pickle,PersistOptions options)359 void HttpResponseHeaders::Persist(base::Pickle* pickle,
360 PersistOptions options) {
361 if (options == PERSIST_RAW) {
362 pickle->WriteString(raw_headers_);
363 return; // Done.
364 }
365
366 HeaderSet filter_headers;
367
368 // Construct set of headers to filter out based on options.
369 if ((options & PERSIST_SANS_NON_CACHEABLE) == PERSIST_SANS_NON_CACHEABLE)
370 AddNonCacheableHeaders(&filter_headers);
371
372 if ((options & PERSIST_SANS_COOKIES) == PERSIST_SANS_COOKIES)
373 AddCookieHeaders(&filter_headers);
374
375 if ((options & PERSIST_SANS_CHALLENGES) == PERSIST_SANS_CHALLENGES)
376 AddChallengeHeaders(&filter_headers);
377
378 if ((options & PERSIST_SANS_HOP_BY_HOP) == PERSIST_SANS_HOP_BY_HOP)
379 AddHopByHopHeaders(&filter_headers);
380
381 if ((options & PERSIST_SANS_RANGES) == PERSIST_SANS_RANGES)
382 AddHopContentRangeHeaders(&filter_headers);
383
384 if ((options & PERSIST_SANS_SECURITY_STATE) == PERSIST_SANS_SECURITY_STATE)
385 AddSecurityStateHeaders(&filter_headers);
386
387 std::string blob;
388 blob.reserve(raw_headers_.size());
389
390 // This copies the status line w/ terminator null.
391 // Note raw_headers_ has embedded nulls instead of \n,
392 // so this just copies the first header line.
393 blob.assign(raw_headers_.c_str(), strlen(raw_headers_.c_str()) + 1);
394
395 for (size_t i = 0; i < parsed_.size(); ++i) {
396 DCHECK(!parsed_[i].is_continuation());
397
398 // Locate the start of the next header.
399 size_t k = i;
400 while (++k < parsed_.size() && parsed_[k].is_continuation()) {}
401 --k;
402
403 std::string header_name = base::ToLowerASCII(
404 base::MakeStringPiece(parsed_[i].name_begin, parsed_[i].name_end));
405 if (filter_headers.find(header_name) == filter_headers.end()) {
406 // Make sure there is a null after the value.
407 blob.append(parsed_[i].name_begin, parsed_[k].value_end);
408 blob.push_back('\0');
409 }
410
411 i = k;
412 }
413 blob.push_back('\0');
414
415 pickle->WriteString(blob);
416 }
417
Update(const HttpResponseHeaders & new_headers)418 void HttpResponseHeaders::Update(const HttpResponseHeaders& new_headers) {
419 DCHECK(new_headers.response_code() == net::HTTP_NOT_MODIFIED ||
420 new_headers.response_code() == net::HTTP_PARTIAL_CONTENT);
421
422 // Copy up to the null byte. This just copies the status line.
423 std::string new_raw_headers(raw_headers_.c_str());
424 new_raw_headers.push_back('\0');
425
426 HeaderSet updated_headers;
427
428 // NOTE: we write the new headers then the old headers for convenience. The
429 // order should not matter.
430
431 // Figure out which headers we want to take from new_headers:
432 for (size_t i = 0; i < new_headers.parsed_.size(); ++i) {
433 const HeaderList& new_parsed = new_headers.parsed_;
434
435 DCHECK(!new_parsed[i].is_continuation());
436
437 // Locate the start of the next header.
438 size_t k = i;
439 while (++k < new_parsed.size() && new_parsed[k].is_continuation()) {}
440 --k;
441
442 auto name =
443 base::MakeStringPiece(new_parsed[i].name_begin, new_parsed[i].name_end);
444 if (ShouldUpdateHeader(name)) {
445 std::string name_lower = base::ToLowerASCII(name);
446 updated_headers.insert(name_lower);
447
448 // Preserve this header line in the merged result, making sure there is
449 // a null after the value.
450 new_raw_headers.append(new_parsed[i].name_begin, new_parsed[k].value_end);
451 new_raw_headers.push_back('\0');
452 }
453
454 i = k;
455 }
456
457 // Now, build the new raw headers.
458 MergeWithHeaders(std::move(new_raw_headers), updated_headers);
459 }
460
MergeWithHeaders(std::string raw_headers,const HeaderSet & headers_to_remove)461 void HttpResponseHeaders::MergeWithHeaders(std::string raw_headers,
462 const HeaderSet& headers_to_remove) {
463 for (size_t i = 0; i < parsed_.size(); ++i) {
464 DCHECK(!parsed_[i].is_continuation());
465
466 // Locate the start of the next header.
467 size_t k = i;
468 while (++k < parsed_.size() && parsed_[k].is_continuation()) {}
469 --k;
470
471 std::string name = base::ToLowerASCII(
472 base::MakeStringPiece(parsed_[i].name_begin, parsed_[i].name_end));
473 if (headers_to_remove.find(name) == headers_to_remove.end()) {
474 // It's ok to preserve this header in the final result.
475 raw_headers.append(parsed_[i].name_begin, parsed_[k].value_end);
476 raw_headers.push_back('\0');
477 }
478
479 i = k;
480 }
481 raw_headers.push_back('\0');
482
483 // Make this object hold the new data.
484 raw_headers_.clear();
485 parsed_.clear();
486 Parse(raw_headers);
487 }
488
RemoveHeader(std::string_view name)489 void HttpResponseHeaders::RemoveHeader(std::string_view name) {
490 // Copy up to the null byte. This just copies the status line.
491 std::string new_raw_headers(raw_headers_.c_str());
492 new_raw_headers.push_back('\0');
493
494 HeaderSet to_remove;
495 to_remove.insert(base::ToLowerASCII(name));
496 MergeWithHeaders(std::move(new_raw_headers), to_remove);
497 }
498
RemoveHeaders(const std::unordered_set<std::string> & header_names)499 void HttpResponseHeaders::RemoveHeaders(
500 const std::unordered_set<std::string>& header_names) {
501 // Copy up to the null byte. This just copies the status line.
502 std::string new_raw_headers(raw_headers_.c_str());
503 new_raw_headers.push_back('\0');
504
505 HeaderSet to_remove;
506 for (const auto& header_name : header_names) {
507 to_remove.insert(base::ToLowerASCII(header_name));
508 }
509 MergeWithHeaders(std::move(new_raw_headers), to_remove);
510 }
511
RemoveHeaderLine(const std::string & name,const std::string & value)512 void HttpResponseHeaders::RemoveHeaderLine(const std::string& name,
513 const std::string& value) {
514 std::string name_lowercase = base::ToLowerASCII(name);
515
516 std::string new_raw_headers(GetStatusLine());
517 new_raw_headers.push_back('\0');
518
519 new_raw_headers.reserve(raw_headers_.size());
520
521 size_t iter = 0;
522 std::string old_header_name;
523 std::string old_header_value;
524 while (EnumerateHeaderLines(&iter, &old_header_name, &old_header_value)) {
525 std::string old_header_name_lowercase = base::ToLowerASCII(old_header_name);
526 if (name_lowercase == old_header_name_lowercase &&
527 value == old_header_value)
528 continue;
529
530 new_raw_headers.append(old_header_name);
531 new_raw_headers.push_back(':');
532 new_raw_headers.push_back(' ');
533 new_raw_headers.append(old_header_value);
534 new_raw_headers.push_back('\0');
535 }
536 new_raw_headers.push_back('\0');
537
538 // Make this object hold the new data.
539 raw_headers_.clear();
540 parsed_.clear();
541 Parse(new_raw_headers);
542 }
543
AddHeader(std::string_view name,std::string_view value)544 void HttpResponseHeaders::AddHeader(std::string_view name,
545 std::string_view value) {
546 DCHECK(HttpUtil::IsValidHeaderName(name));
547 DCHECK(HttpUtil::IsValidHeaderValue(value));
548
549 // Don't copy the last null.
550 std::string new_raw_headers(raw_headers_, 0, raw_headers_.size() - 1);
551 new_raw_headers.append(name.begin(), name.end());
552 new_raw_headers.append(": ");
553 new_raw_headers.append(value.begin(), value.end());
554 new_raw_headers.push_back('\0');
555 new_raw_headers.push_back('\0');
556
557 // Make this object hold the new data.
558 raw_headers_.clear();
559 parsed_.clear();
560 Parse(new_raw_headers);
561 }
562
SetHeader(std::string_view name,std::string_view value)563 void HttpResponseHeaders::SetHeader(std::string_view name,
564 std::string_view value) {
565 RemoveHeader(name);
566 AddHeader(name, value);
567 }
568
AddCookie(const std::string & cookie_string)569 void HttpResponseHeaders::AddCookie(const std::string& cookie_string) {
570 AddHeader("Set-Cookie", cookie_string);
571 }
572
ReplaceStatusLine(const std::string & new_status)573 void HttpResponseHeaders::ReplaceStatusLine(const std::string& new_status) {
574 CheckDoesNotHaveEmbeddedNulls(new_status);
575 // Copy up to the null byte. This just copies the status line.
576 std::string new_raw_headers(new_status);
577 new_raw_headers.push_back('\0');
578
579 HeaderSet empty_to_remove;
580 MergeWithHeaders(std::move(new_raw_headers), empty_to_remove);
581 }
582
UpdateWithNewRange(const HttpByteRange & byte_range,int64_t resource_size,bool replace_status_line)583 void HttpResponseHeaders::UpdateWithNewRange(const HttpByteRange& byte_range,
584 int64_t resource_size,
585 bool replace_status_line) {
586 DCHECK(byte_range.IsValid());
587 DCHECK(byte_range.HasFirstBytePosition());
588 DCHECK(byte_range.HasLastBytePosition());
589
590 const char kLengthHeader[] = "Content-Length";
591 const char kRangeHeader[] = "Content-Range";
592
593 RemoveHeader(kLengthHeader);
594 RemoveHeader(kRangeHeader);
595
596 int64_t start = byte_range.first_byte_position();
597 int64_t end = byte_range.last_byte_position();
598 int64_t range_len = end - start + 1;
599
600 if (replace_status_line)
601 ReplaceStatusLine("HTTP/1.1 206 Partial Content");
602
603 AddHeader(kRangeHeader,
604 base::StringPrintf("bytes %" PRId64 "-%" PRId64 "/%" PRId64, start,
605 end, resource_size));
606 AddHeader(kLengthHeader, base::StringPrintf("%" PRId64, range_len));
607 }
608
Parse(const std::string & raw_input)609 void HttpResponseHeaders::Parse(const std::string& raw_input) {
610 raw_headers_.reserve(raw_input.size());
611 // TODO(https://crbug.com/1470137): Call reserve() on `parsed_` with an
612 // appropriate value.
613
614 // ParseStatusLine adds a normalized status line to raw_headers_
615 std::string::const_iterator line_begin = raw_input.begin();
616 std::string::const_iterator line_end = base::ranges::find(raw_input, '\0');
617 // has_headers = true, if there is any data following the status line.
618 // Used by ParseStatusLine() to decide if a HTTP/0.9 is really a HTTP/1.0.
619 bool has_headers =
620 (line_end != raw_input.end() && (line_end + 1) != raw_input.end() &&
621 *(line_end + 1) != '\0');
622 ParseStatusLine(line_begin, line_end, has_headers);
623 raw_headers_.push_back('\0'); // Terminate status line with a null.
624
625 if (line_end == raw_input.end()) {
626 raw_headers_.push_back('\0'); // Ensure the headers end with a double null.
627
628 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 2]);
629 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 1]);
630 return;
631 }
632
633 // Including a terminating null byte.
634 size_t status_line_len = raw_headers_.size();
635
636 // Now, we add the rest of the raw headers to raw_headers_, and begin parsing
637 // it (to populate our parsed_ vector).
638 raw_headers_.append(line_end + 1, raw_input.end());
639
640 // Ensure the headers end with a double null.
641 while (raw_headers_.size() < 2 ||
642 raw_headers_[raw_headers_.size() - 2] != '\0' ||
643 raw_headers_[raw_headers_.size() - 1] != '\0') {
644 raw_headers_.push_back('\0');
645 }
646
647 // Adjust to point at the null byte following the status line
648 line_end = raw_headers_.begin() + status_line_len - 1;
649
650 HttpUtil::HeadersIterator headers(line_end + 1, raw_headers_.end(),
651 std::string(1, '\0'));
652 while (headers.GetNext()) {
653 AddHeader(headers.name_begin(), headers.name_end(), headers.values_begin(),
654 headers.values_end(), ContainsCommas::kMaybe);
655 }
656
657 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 2]);
658 DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 1]);
659 }
660
GetNormalizedHeader(std::string_view name,std::string * value) const661 bool HttpResponseHeaders::GetNormalizedHeader(std::string_view name,
662 std::string* value) const {
663 // If you hit this assertion, please use EnumerateHeader instead!
664 DCHECK(!HttpUtil::IsNonCoalescingHeader(name));
665
666 value->clear();
667
668 bool found = false;
669 size_t i = 0;
670 while (i < parsed_.size()) {
671 i = FindHeader(i, name);
672 if (i == std::string::npos)
673 break;
674
675 if (found)
676 value->append(", ");
677
678 found = true;
679
680 std::string::const_iterator value_begin = parsed_[i].value_begin;
681 std::string::const_iterator value_end = parsed_[i].value_end;
682 while (++i < parsed_.size() && parsed_[i].is_continuation())
683 value_end = parsed_[i].value_end;
684 value->append(value_begin, value_end);
685 }
686
687 return found;
688 }
689
GetStatusLine() const690 std::string HttpResponseHeaders::GetStatusLine() const {
691 // copy up to the null byte.
692 return std::string(raw_headers_.c_str());
693 }
694
GetStatusText() const695 std::string HttpResponseHeaders::GetStatusText() const {
696 // GetStatusLine() is already normalized, so it has the format:
697 // '<http_version> SP <response_code>' or
698 // '<http_version> SP <response_code> SP <status_text>'.
699 std::string status_text = GetStatusLine();
700 // Seek to beginning of <response_code>.
701 std::string::const_iterator begin = base::ranges::find(status_text, ' ');
702 std::string::const_iterator end = status_text.end();
703 CHECK(begin != end);
704 ++begin;
705 CHECK(begin != end);
706 // See if there is another space.
707 begin = std::find(begin, end, ' ');
708 if (begin == end)
709 return std::string();
710 ++begin;
711 CHECK(begin != end);
712 return std::string(begin, end);
713 }
714
EnumerateHeaderLines(size_t * iter,std::string * name,std::string * value) const715 bool HttpResponseHeaders::EnumerateHeaderLines(size_t* iter,
716 std::string* name,
717 std::string* value) const {
718 size_t i = *iter;
719 if (i == parsed_.size())
720 return false;
721
722 DCHECK(!parsed_[i].is_continuation());
723
724 name->assign(parsed_[i].name_begin, parsed_[i].name_end);
725
726 std::string::const_iterator value_begin = parsed_[i].value_begin;
727 std::string::const_iterator value_end = parsed_[i].value_end;
728 while (++i < parsed_.size() && parsed_[i].is_continuation())
729 value_end = parsed_[i].value_end;
730
731 value->assign(value_begin, value_end);
732
733 *iter = i;
734 return true;
735 }
736
EnumerateHeader(size_t * iter,std::string_view name,std::string * value) const737 bool HttpResponseHeaders::EnumerateHeader(size_t* iter,
738 std::string_view name,
739 std::string* value) const {
740 size_t i;
741 if (!iter || !*iter) {
742 i = FindHeader(0, name);
743 } else {
744 i = *iter;
745 if (i >= parsed_.size()) {
746 i = std::string::npos;
747 } else if (!parsed_[i].is_continuation()) {
748 i = FindHeader(i, name);
749 }
750 }
751
752 if (i == std::string::npos) {
753 value->clear();
754 return false;
755 }
756
757 if (iter)
758 *iter = i + 1;
759 value->assign(parsed_[i].value_begin, parsed_[i].value_end);
760 return true;
761 }
762
HasHeaderValue(std::string_view name,std::string_view value) const763 bool HttpResponseHeaders::HasHeaderValue(std::string_view name,
764 std::string_view value) const {
765 // The value has to be an exact match. This is important since
766 // 'cache-control: no-cache' != 'cache-control: no-cache="foo"'
767 size_t iter = 0;
768 std::string temp;
769 while (EnumerateHeader(&iter, name, &temp)) {
770 if (base::EqualsCaseInsensitiveASCII(value, temp))
771 return true;
772 }
773 return false;
774 }
775
HasHeader(std::string_view name) const776 bool HttpResponseHeaders::HasHeader(std::string_view name) const {
777 return FindHeader(0, name) != std::string::npos;
778 }
779
780 HttpResponseHeaders::~HttpResponseHeaders() = default;
781
782 // Note: this implementation implicitly assumes that line_end points at a valid
783 // sentinel character (such as '\0').
784 // static
ParseVersion(std::string::const_iterator line_begin,std::string::const_iterator line_end)785 HttpVersion HttpResponseHeaders::ParseVersion(
786 std::string::const_iterator line_begin,
787 std::string::const_iterator line_end) {
788 std::string::const_iterator p = line_begin;
789
790 // RFC9112 Section 2.3:
791 // HTTP-version = HTTP-name "/" DIGIT "." DIGIT
792 // HTTP-name = %s"HTTP"
793
794 if (!base::StartsWith(base::MakeStringPiece(line_begin, line_end), "http",
795 base::CompareCase::INSENSITIVE_ASCII)) {
796 DVLOG(1) << "missing status line";
797 return HttpVersion();
798 }
799
800 p += 4;
801
802 if (p >= line_end || *p != '/') {
803 DVLOG(1) << "missing version";
804 return HttpVersion();
805 }
806
807 std::string::const_iterator dot = std::find(p, line_end, '.');
808 if (dot == line_end) {
809 DVLOG(1) << "malformed version";
810 return HttpVersion();
811 }
812
813 ++p; // from / to first digit.
814 ++dot; // from . to second digit.
815
816 if (!(base::IsAsciiDigit(*p) && base::IsAsciiDigit(*dot))) {
817 DVLOG(1) << "malformed version number";
818 return HttpVersion();
819 }
820
821 uint16_t major = *p - '0';
822 uint16_t minor = *dot - '0';
823
824 return HttpVersion(major, minor);
825 }
826
827 // Note: this implementation implicitly assumes that line_end points at a valid
828 // sentinel character (such as '\0').
ParseStatusLine(std::string::const_iterator line_begin,std::string::const_iterator line_end,bool has_headers)829 void HttpResponseHeaders::ParseStatusLine(
830 std::string::const_iterator line_begin,
831 std::string::const_iterator line_end,
832 bool has_headers) {
833 // Extract the version number
834 HttpVersion parsed_http_version = ParseVersion(line_begin, line_end);
835
836 // Clamp the version number to one of: {0.9, 1.0, 1.1, 2.0}
837 if (parsed_http_version == HttpVersion(0, 9) && !has_headers) {
838 http_version_ = HttpVersion(0, 9);
839 raw_headers_ = "HTTP/0.9";
840 } else if (parsed_http_version == HttpVersion(2, 0)) {
841 http_version_ = HttpVersion(2, 0);
842 raw_headers_ = "HTTP/2.0";
843 } else if (parsed_http_version >= HttpVersion(1, 1)) {
844 http_version_ = HttpVersion(1, 1);
845 raw_headers_ = "HTTP/1.1";
846 } else {
847 // Treat everything else like HTTP 1.0
848 http_version_ = HttpVersion(1, 0);
849 raw_headers_ = "HTTP/1.0";
850 }
851 if (parsed_http_version != http_version_) {
852 DVLOG(1) << "assuming HTTP/" << http_version_.major_value() << "."
853 << http_version_.minor_value();
854 }
855
856 // TODO(eroman): this doesn't make sense if ParseVersion failed.
857 std::string::const_iterator p = std::find(line_begin, line_end, ' ');
858
859 if (p == line_end) {
860 DVLOG(1) << "missing response status; assuming 200 OK";
861 raw_headers_.append(" 200 OK");
862 response_code_ = net::HTTP_OK;
863 return;
864 }
865
866 response_code_ =
867 ParseStatus(base::MakeStringPiece(p + 1, line_end), raw_headers_);
868 }
869
FindHeader(size_t from,std::string_view search) const870 size_t HttpResponseHeaders::FindHeader(size_t from,
871 std::string_view search) const {
872 for (size_t i = from; i < parsed_.size(); ++i) {
873 if (parsed_[i].is_continuation())
874 continue;
875 auto name =
876 base::MakeStringPiece(parsed_[i].name_begin, parsed_[i].name_end);
877 if (base::EqualsCaseInsensitiveASCII(search, name))
878 return i;
879 }
880
881 return std::string::npos;
882 }
883
GetCacheControlDirective(std::string_view directive,base::TimeDelta * result) const884 bool HttpResponseHeaders::GetCacheControlDirective(
885 std::string_view directive,
886 base::TimeDelta* result) const {
887 static constexpr std::string_view name("cache-control");
888 std::string value;
889
890 size_t directive_size = directive.size();
891
892 size_t iter = 0;
893 while (EnumerateHeader(&iter, name, &value)) {
894 if (!base::StartsWith(value, directive,
895 base::CompareCase::INSENSITIVE_ASCII)) {
896 continue;
897 }
898 if (value.size() == directive_size || value[directive_size] != '=')
899 continue;
900 // 1*DIGIT with leading and trailing spaces, as described at
901 // https://datatracker.ietf.org/doc/html/rfc7234#section-1.2.1.
902 auto start = value.cbegin() + directive_size + 1;
903 auto end = value.cend();
904 while (start < end && *start == ' ') {
905 // leading spaces
906 ++start;
907 }
908 while (start < end - 1 && *(end - 1) == ' ') {
909 // trailing spaces
910 --end;
911 }
912 if (start == end ||
913 !std::all_of(start, end, [](char c) { return '0' <= c && c <= '9'; })) {
914 continue;
915 }
916 int64_t seconds = 0;
917 base::StringToInt64(base::MakeStringPiece(start, end), &seconds);
918 // We ignore the return value because we've already checked the input
919 // string. For the overflow case we use
920 // base::TimeDelta::FiniteMax().InSeconds().
921 seconds = std::min(seconds, base::TimeDelta::FiniteMax().InSeconds());
922 *result = base::Seconds(seconds);
923 return true;
924 }
925
926 return false;
927 }
928
AddHeader(std::string::const_iterator name_begin,std::string::const_iterator name_end,std::string::const_iterator values_begin,std::string::const_iterator values_end,ContainsCommas contains_commas)929 void HttpResponseHeaders::AddHeader(std::string::const_iterator name_begin,
930 std::string::const_iterator name_end,
931 std::string::const_iterator values_begin,
932 std::string::const_iterator values_end,
933 ContainsCommas contains_commas) {
934 // If the header can be coalesced, then we should split it up.
935 if (values_begin == values_end ||
936 HttpUtil::IsNonCoalescingHeader(
937 base::MakeStringPiece(name_begin, name_end)) ||
938 contains_commas == ContainsCommas::kNo) {
939 AddToParsed(name_begin, name_end, values_begin, values_end);
940 } else {
941 HttpUtil::ValuesIterator it(values_begin, values_end, ',',
942 false /* ignore_empty_values */);
943 while (it.GetNext()) {
944 AddToParsed(name_begin, name_end, it.value_begin(), it.value_end());
945 // clobber these so that subsequent values are treated as continuations
946 name_begin = name_end = values_end;
947 }
948 }
949 }
950
AddToParsed(std::string::const_iterator name_begin,std::string::const_iterator name_end,std::string::const_iterator value_begin,std::string::const_iterator value_end)951 void HttpResponseHeaders::AddToParsed(std::string::const_iterator name_begin,
952 std::string::const_iterator name_end,
953 std::string::const_iterator value_begin,
954 std::string::const_iterator value_end) {
955 ParsedHeader header;
956 header.name_begin = name_begin;
957 header.name_end = name_end;
958 header.value_begin = value_begin;
959 header.value_end = value_end;
960 parsed_.push_back(header);
961 }
962
AddNonCacheableHeaders(HeaderSet * result) const963 void HttpResponseHeaders::AddNonCacheableHeaders(HeaderSet* result) const {
964 // Add server specified transients. Any 'cache-control: no-cache="foo,bar"'
965 // headers present in the response specify additional headers that we should
966 // not store in the cache.
967 const char kCacheControl[] = "cache-control";
968 const char kPrefix[] = "no-cache=\"";
969 const size_t kPrefixLen = sizeof(kPrefix) - 1;
970
971 std::string value;
972 size_t iter = 0;
973 while (EnumerateHeader(&iter, kCacheControl, &value)) {
974 // If the value is smaller than the prefix and a terminal quote, skip
975 // it.
976 if (value.size() <= kPrefixLen ||
977 value.compare(0, kPrefixLen, kPrefix) != 0) {
978 continue;
979 }
980 // if it doesn't end with a quote, then treat as malformed
981 if (value[value.size() - 1] != '\"')
982 continue;
983
984 // process the value as a comma-separated list of items. Each
985 // item can be wrapped by linear white space.
986 std::string::const_iterator item = value.begin() + kPrefixLen;
987 std::string::const_iterator end = value.end() - 1;
988 while (item != end) {
989 // Find the comma to compute the length of the current item,
990 // and the position of the next one.
991 std::string::const_iterator item_next = std::find(item, end, ',');
992 std::string::const_iterator item_end = end;
993 if (item_next != end) {
994 // Skip over comma for next position.
995 item_end = item_next;
996 item_next++;
997 }
998 // trim off leading and trailing whitespace in this item.
999 HttpUtil::TrimLWS(&item, &item_end);
1000
1001 // assuming the header is not empty, lowercase and insert into set
1002 if (item_end > item) {
1003 result->insert(
1004 base::ToLowerASCII(std::string_view(&*item, item_end - item)));
1005 }
1006
1007 // Continue to next item.
1008 item = item_next;
1009 }
1010 }
1011 }
1012
AddHopByHopHeaders(HeaderSet * result)1013 void HttpResponseHeaders::AddHopByHopHeaders(HeaderSet* result) {
1014 for (const auto* header : kHopByHopResponseHeaders)
1015 result->insert(std::string(header));
1016 }
1017
AddCookieHeaders(HeaderSet * result)1018 void HttpResponseHeaders::AddCookieHeaders(HeaderSet* result) {
1019 for (const auto* header : kCookieResponseHeaders)
1020 result->insert(std::string(header));
1021 }
1022
AddChallengeHeaders(HeaderSet * result)1023 void HttpResponseHeaders::AddChallengeHeaders(HeaderSet* result) {
1024 for (const auto* header : kChallengeResponseHeaders)
1025 result->insert(std::string(header));
1026 }
1027
AddHopContentRangeHeaders(HeaderSet * result)1028 void HttpResponseHeaders::AddHopContentRangeHeaders(HeaderSet* result) {
1029 result->insert(kContentRange);
1030 }
1031
AddSecurityStateHeaders(HeaderSet * result)1032 void HttpResponseHeaders::AddSecurityStateHeaders(HeaderSet* result) {
1033 for (const auto* header : kSecurityStateHeaders)
1034 result->insert(std::string(header));
1035 }
1036
GetMimeTypeAndCharset(std::string * mime_type,std::string * charset) const1037 void HttpResponseHeaders::GetMimeTypeAndCharset(std::string* mime_type,
1038 std::string* charset) const {
1039 mime_type->clear();
1040 charset->clear();
1041
1042 std::string name = "content-type";
1043 std::string value;
1044
1045 bool had_charset = false;
1046
1047 size_t iter = 0;
1048 while (EnumerateHeader(&iter, name, &value))
1049 HttpUtil::ParseContentType(value, mime_type, charset, &had_charset,
1050 nullptr);
1051 }
1052
GetMimeType(std::string * mime_type) const1053 bool HttpResponseHeaders::GetMimeType(std::string* mime_type) const {
1054 std::string unused;
1055 GetMimeTypeAndCharset(mime_type, &unused);
1056 return !mime_type->empty();
1057 }
1058
GetCharset(std::string * charset) const1059 bool HttpResponseHeaders::GetCharset(std::string* charset) const {
1060 std::string unused;
1061 GetMimeTypeAndCharset(&unused, charset);
1062 return !charset->empty();
1063 }
1064
IsRedirect(std::string * location) const1065 bool HttpResponseHeaders::IsRedirect(std::string* location) const {
1066 if (!IsRedirectResponseCode(response_code_))
1067 return false;
1068
1069 // If we lack a Location header, then we can't treat this as a redirect.
1070 // We assume that the first non-empty location value is the target URL that
1071 // we want to follow. TODO(darin): Is this consistent with other browsers?
1072 size_t i = std::string::npos;
1073 do {
1074 i = FindHeader(++i, "location");
1075 if (i == std::string::npos)
1076 return false;
1077 // If the location value is empty, then it doesn't count.
1078 } while (parsed_[i].value_begin == parsed_[i].value_end);
1079
1080 if (location) {
1081 auto location_strpiece =
1082 base::MakeStringPiece(parsed_[i].value_begin, parsed_[i].value_end);
1083 // Escape any non-ASCII characters to preserve them. The server should
1084 // only be returning ASCII here, but for compat we need to do this.
1085 //
1086 // The URL parser escapes things internally, but it expect the bytes to be
1087 // valid UTF-8, so encoding errors turn into replacement characters before
1088 // escaping. Escaping here preserves the bytes as-is. See
1089 // https://crbug.com/942073#c14.
1090 *location = base::EscapeNonASCII(location_strpiece);
1091 }
1092
1093 return true;
1094 }
1095
1096 // static
IsRedirectResponseCode(int response_code)1097 bool HttpResponseHeaders::IsRedirectResponseCode(int response_code) {
1098 // Users probably want to see 300 (multiple choice) pages, so we don't count
1099 // them as redirects that need to be followed.
1100 return (response_code == net::HTTP_MOVED_PERMANENTLY ||
1101 response_code == net::HTTP_FOUND ||
1102 response_code == net::HTTP_SEE_OTHER ||
1103 response_code == net::HTTP_TEMPORARY_REDIRECT ||
1104 response_code == net::HTTP_PERMANENT_REDIRECT);
1105 }
1106
1107 // From RFC 2616 section 13.2.4:
1108 //
1109 // The calculation to determine if a response has expired is quite simple:
1110 //
1111 // response_is_fresh = (freshness_lifetime > current_age)
1112 //
1113 // Of course, there are other factors that can force a response to always be
1114 // validated or re-fetched.
1115 //
1116 // From RFC 5861 section 3, a stale response may be used while revalidation is
1117 // performed in the background if
1118 //
1119 // freshness_lifetime + stale_while_revalidate > current_age
1120 //
RequiresValidation(const Time & request_time,const Time & response_time,const Time & current_time) const1121 ValidationType HttpResponseHeaders::RequiresValidation(
1122 const Time& request_time,
1123 const Time& response_time,
1124 const Time& current_time) const {
1125 FreshnessLifetimes lifetimes = GetFreshnessLifetimes(response_time);
1126 if (lifetimes.freshness.is_zero() && lifetimes.staleness.is_zero())
1127 return VALIDATION_SYNCHRONOUS;
1128
1129 base::TimeDelta age =
1130 GetCurrentAge(request_time, response_time, current_time);
1131
1132 if (lifetimes.freshness > age)
1133 return VALIDATION_NONE;
1134
1135 if (lifetimes.freshness + lifetimes.staleness > age)
1136 return VALIDATION_ASYNCHRONOUS;
1137
1138 return VALIDATION_SYNCHRONOUS;
1139 }
1140
1141 // From RFC 2616 section 13.2.4:
1142 //
1143 // The max-age directive takes priority over Expires, so if max-age is present
1144 // in a response, the calculation is simply:
1145 //
1146 // freshness_lifetime = max_age_value
1147 //
1148 // Otherwise, if Expires is present in the response, the calculation is:
1149 //
1150 // freshness_lifetime = expires_value - date_value
1151 //
1152 // Note that neither of these calculations is vulnerable to clock skew, since
1153 // all of the information comes from the origin server.
1154 //
1155 // Also, if the response does have a Last-Modified time, the heuristic
1156 // expiration value SHOULD be no more than some fraction of the interval since
1157 // that time. A typical setting of this fraction might be 10%:
1158 //
1159 // freshness_lifetime = (date_value - last_modified_value) * 0.10
1160 //
1161 // If the stale-while-revalidate directive is present, then it is used to set
1162 // the |staleness| time, unless it overridden by another directive.
1163 //
1164 HttpResponseHeaders::FreshnessLifetimes
GetFreshnessLifetimes(const Time & response_time) const1165 HttpResponseHeaders::GetFreshnessLifetimes(const Time& response_time) const {
1166 FreshnessLifetimes lifetimes;
1167 // Check for headers that force a response to never be fresh. For backwards
1168 // compat, we treat "Pragma: no-cache" as a synonym for "Cache-Control:
1169 // no-cache" even though RFC 2616 does not specify it.
1170 if (HasHeaderValue("cache-control", "no-cache") ||
1171 HasHeaderValue("cache-control", "no-store") ||
1172 HasHeaderValue("pragma", "no-cache")) {
1173 return lifetimes;
1174 }
1175
1176 // Cache-Control directive must_revalidate overrides stale-while-revalidate.
1177 bool must_revalidate = HasHeaderValue("cache-control", "must-revalidate");
1178
1179 if (must_revalidate || !GetStaleWhileRevalidateValue(&lifetimes.staleness)) {
1180 DCHECK_EQ(base::TimeDelta(), lifetimes.staleness);
1181 }
1182
1183 // NOTE: "Cache-Control: max-age" overrides Expires, so we only check the
1184 // Expires header after checking for max-age in GetFreshnessLifetimes. This
1185 // is important since "Expires: <date in the past>" means not fresh, but
1186 // it should not trump a max-age value.
1187 if (GetMaxAgeValue(&lifetimes.freshness))
1188 return lifetimes;
1189
1190 // If there is no Date header, then assume that the server response was
1191 // generated at the time when we received the response.
1192 Time date_value;
1193 if (!GetDateValue(&date_value))
1194 date_value = response_time;
1195
1196 Time expires_value;
1197 if (GetExpiresValue(&expires_value)) {
1198 // The expires value can be a date in the past!
1199 if (expires_value > date_value) {
1200 lifetimes.freshness = expires_value - date_value;
1201 return lifetimes;
1202 }
1203
1204 DCHECK_EQ(base::TimeDelta(), lifetimes.freshness);
1205 return lifetimes;
1206 }
1207
1208 // From RFC 2616 section 13.4:
1209 //
1210 // A response received with a status code of 200, 203, 206, 300, 301 or 410
1211 // MAY be stored by a cache and used in reply to a subsequent request,
1212 // subject to the expiration mechanism, unless a cache-control directive
1213 // prohibits caching.
1214 // ...
1215 // A response received with any other status code (e.g. status codes 302
1216 // and 307) MUST NOT be returned in a reply to a subsequent request unless
1217 // there are cache-control directives or another header(s) that explicitly
1218 // allow it.
1219 //
1220 // From RFC 2616 section 14.9.4:
1221 //
1222 // When the must-revalidate directive is present in a response received by
1223 // a cache, that cache MUST NOT use the entry after it becomes stale to
1224 // respond to a subsequent request without first revalidating it with the
1225 // origin server. (I.e., the cache MUST do an end-to-end revalidation every
1226 // time, if, based solely on the origin server's Expires or max-age value,
1227 // the cached response is stale.)
1228 //
1229 // https://datatracker.ietf.org/doc/draft-reschke-http-status-308/ is an
1230 // experimental RFC that adds 308 permanent redirect as well, for which "any
1231 // future references ... SHOULD use one of the returned URIs."
1232 if ((response_code_ == net::HTTP_OK ||
1233 response_code_ == net::HTTP_NON_AUTHORITATIVE_INFORMATION ||
1234 response_code_ == net::HTTP_PARTIAL_CONTENT) &&
1235 !must_revalidate) {
1236 // TODO(darin): Implement a smarter heuristic.
1237 Time last_modified_value;
1238 if (GetLastModifiedValue(&last_modified_value)) {
1239 // The last-modified value can be a date in the future!
1240 if (last_modified_value <= date_value) {
1241 lifetimes.freshness = (date_value - last_modified_value) / 10;
1242 return lifetimes;
1243 }
1244 }
1245 }
1246
1247 // These responses are implicitly fresh (unless otherwise overruled):
1248 if (response_code_ == net::HTTP_MULTIPLE_CHOICES ||
1249 response_code_ == net::HTTP_MOVED_PERMANENTLY ||
1250 response_code_ == net::HTTP_PERMANENT_REDIRECT ||
1251 response_code_ == net::HTTP_GONE) {
1252 lifetimes.freshness = base::TimeDelta::Max();
1253 lifetimes.staleness = base::TimeDelta(); // It should never be stale.
1254 return lifetimes;
1255 }
1256
1257 // Our heuristic freshness estimate for this resource is 0 seconds, in
1258 // accordance with common browser behaviour. However, stale-while-revalidate
1259 // may still apply.
1260 DCHECK_EQ(base::TimeDelta(), lifetimes.freshness);
1261 return lifetimes;
1262 }
1263
1264 // From RFC 7234 section 4.2.3:
1265 //
1266 // The following data is used for the age calculation:
1267 //
1268 // age_value
1269 //
1270 // The term "age_value" denotes the value of the Age header field
1271 // (Section 5.1), in a form appropriate for arithmetic operation; or
1272 // 0, if not available.
1273 //
1274 // date_value
1275 //
1276 // The term "date_value" denotes the value of the Date header field,
1277 // in a form appropriate for arithmetic operations. See Section
1278 // 7.1.1.2 of [RFC7231] for the definition of the Date header field,
1279 // and for requirements regarding responses without it.
1280 //
1281 // now
1282 //
1283 // The term "now" means "the current value of the clock at the host
1284 // performing the calculation". A host ought to use NTP ([RFC5905])
1285 // or some similar protocol to synchronize its clocks to Coordinated
1286 // Universal Time.
1287 //
1288 // request_time
1289 //
1290 // The current value of the clock at the host at the time the request
1291 // resulting in the stored response was made.
1292 //
1293 // response_time
1294 //
1295 // The current value of the clock at the host at the time the
1296 // response was received.
1297 //
1298 // The age is then calculated as
1299 //
1300 // apparent_age = max(0, response_time - date_value);
1301 // response_delay = response_time - request_time;
1302 // corrected_age_value = age_value + response_delay;
1303 // corrected_initial_age = max(apparent_age, corrected_age_value);
1304 // resident_time = now - response_time;
1305 // current_age = corrected_initial_age + resident_time;
1306 //
GetCurrentAge(const Time & request_time,const Time & response_time,const Time & current_time) const1307 base::TimeDelta HttpResponseHeaders::GetCurrentAge(
1308 const Time& request_time,
1309 const Time& response_time,
1310 const Time& current_time) const {
1311 // If there is no Date header, then assume that the server response was
1312 // generated at the time when we received the response.
1313 Time date_value;
1314 if (!GetDateValue(&date_value))
1315 date_value = response_time;
1316
1317 // If there is no Age header, then assume age is zero. GetAgeValue does not
1318 // modify its out param if the value does not exist.
1319 base::TimeDelta age_value;
1320 GetAgeValue(&age_value);
1321
1322 base::TimeDelta apparent_age =
1323 std::max(base::TimeDelta(), response_time - date_value);
1324 base::TimeDelta response_delay = response_time - request_time;
1325 base::TimeDelta corrected_age_value = age_value + response_delay;
1326 base::TimeDelta corrected_initial_age =
1327 std::max(apparent_age, corrected_age_value);
1328 base::TimeDelta resident_time = current_time - response_time;
1329 base::TimeDelta current_age = corrected_initial_age + resident_time;
1330
1331 return current_age;
1332 }
1333
GetMaxAgeValue(base::TimeDelta * result) const1334 bool HttpResponseHeaders::GetMaxAgeValue(base::TimeDelta* result) const {
1335 return GetCacheControlDirective("max-age", result);
1336 }
1337
GetAgeValue(base::TimeDelta * result) const1338 bool HttpResponseHeaders::GetAgeValue(base::TimeDelta* result) const {
1339 std::string value;
1340 if (!EnumerateHeader(nullptr, "Age", &value))
1341 return false;
1342
1343 // Parse the delta-seconds as 1*DIGIT.
1344 uint32_t seconds;
1345 ParseIntError error;
1346 if (!ParseUint32(value, ParseIntFormat::NON_NEGATIVE, &seconds, &error)) {
1347 if (error == ParseIntError::FAILED_OVERFLOW) {
1348 // If the Age value cannot fit in a uint32_t, saturate it to a maximum
1349 // value. This is similar to what RFC 2616 says in section 14.6 for how
1350 // caches should transmit values that overflow.
1351 seconds = std::numeric_limits<decltype(seconds)>::max();
1352 } else {
1353 return false;
1354 }
1355 }
1356
1357 *result = base::Seconds(seconds);
1358 return true;
1359 }
1360
GetDateValue(Time * result) const1361 bool HttpResponseHeaders::GetDateValue(Time* result) const {
1362 return GetTimeValuedHeader("Date", result);
1363 }
1364
GetLastModifiedValue(Time * result) const1365 bool HttpResponseHeaders::GetLastModifiedValue(Time* result) const {
1366 return GetTimeValuedHeader("Last-Modified", result);
1367 }
1368
GetExpiresValue(Time * result) const1369 bool HttpResponseHeaders::GetExpiresValue(Time* result) const {
1370 return GetTimeValuedHeader("Expires", result);
1371 }
1372
GetStaleWhileRevalidateValue(base::TimeDelta * result) const1373 bool HttpResponseHeaders::GetStaleWhileRevalidateValue(
1374 base::TimeDelta* result) const {
1375 return GetCacheControlDirective("stale-while-revalidate", result);
1376 }
1377
GetTimeValuedHeader(const std::string & name,Time * result) const1378 bool HttpResponseHeaders::GetTimeValuedHeader(const std::string& name,
1379 Time* result) const {
1380 std::string value;
1381 if (!EnumerateHeader(nullptr, name, &value))
1382 return false;
1383
1384 // In case of parsing the Expires header value, an invalid string 0 should be
1385 // treated as expired according to the RFC 9111 section 5.3 as below:
1386 //
1387 // > A cache recipient MUST interpret invalid date formats, especially the
1388 // > value "0", as representing a time in the past (i.e., "already expired").
1389 if (base::FeatureList::IsEnabled(
1390 features::kTreatHTTPExpiresHeaderValueZeroAsExpired) &&
1391 name == "Expires" && value == "0") {
1392 *result = Time::Min();
1393 return true;
1394 }
1395
1396 // When parsing HTTP dates it's beneficial to default to GMT because:
1397 // 1. RFC2616 3.3.1 says times should always be specified in GMT
1398 // 2. Only counter-example incorrectly appended "UTC" (crbug.com/153759)
1399 // 3. When adjusting cookie expiration times for clock skew
1400 // (crbug.com/135131) this better matches our cookie expiration
1401 // time parser which ignores timezone specifiers and assumes GMT.
1402 // 4. This is exactly what Firefox does.
1403 // TODO(pauljensen): The ideal solution would be to return false if the
1404 // timezone could not be understood so as to avoid makeing other calculations
1405 // based on an incorrect time. This would require modifying the time
1406 // library or duplicating the code. (http://crbug.com/158327)
1407 return Time::FromUTCString(value.c_str(), result);
1408 }
1409
1410 // We accept the first value of "close" or "keep-alive" in a Connection or
1411 // Proxy-Connection header, in that order. Obeying "keep-alive" in HTTP/1.1 or
1412 // "close" in 1.0 is not strictly standards-compliant, but we'd like to
1413 // avoid looking at the Proxy-Connection header whenever it is reasonable to do
1414 // so.
1415 // TODO(ricea): Measure real-world usage of the "Proxy-Connection" header,
1416 // with a view to reducing support for it in order to make our Connection header
1417 // handling more RFC 7230 compliant.
IsKeepAlive() const1418 bool HttpResponseHeaders::IsKeepAlive() const {
1419 // NOTE: It is perhaps risky to assume that a Proxy-Connection header is
1420 // meaningful when we don't know that this response was from a proxy, but
1421 // Mozilla also does this, so we'll do the same.
1422 static const char* const kConnectionHeaders[] = {"connection",
1423 "proxy-connection"};
1424 struct KeepAliveToken {
1425 const char* const token;
1426 bool keep_alive;
1427 };
1428 static const KeepAliveToken kKeepAliveTokens[] = {{"keep-alive", true},
1429 {"close", false}};
1430
1431 if (http_version_ < HttpVersion(1, 0))
1432 return false;
1433
1434 for (const char* header : kConnectionHeaders) {
1435 size_t iterator = 0;
1436 std::string token;
1437 while (EnumerateHeader(&iterator, header, &token)) {
1438 for (const KeepAliveToken& keep_alive_token : kKeepAliveTokens) {
1439 if (base::EqualsCaseInsensitiveASCII(token, keep_alive_token.token))
1440 return keep_alive_token.keep_alive;
1441 }
1442 }
1443 }
1444 return http_version_ != HttpVersion(1, 0);
1445 }
1446
HasStrongValidators() const1447 bool HttpResponseHeaders::HasStrongValidators() const {
1448 std::string etag_header;
1449 EnumerateHeader(nullptr, "etag", &etag_header);
1450 std::string last_modified_header;
1451 EnumerateHeader(nullptr, "Last-Modified", &last_modified_header);
1452 std::string date_header;
1453 EnumerateHeader(nullptr, "Date", &date_header);
1454 return HttpUtil::HasStrongValidators(GetHttpVersion(), etag_header,
1455 last_modified_header, date_header);
1456 }
1457
HasValidators() const1458 bool HttpResponseHeaders::HasValidators() const {
1459 std::string etag_header;
1460 EnumerateHeader(nullptr, "etag", &etag_header);
1461 std::string last_modified_header;
1462 EnumerateHeader(nullptr, "Last-Modified", &last_modified_header);
1463 return HttpUtil::HasValidators(GetHttpVersion(), etag_header,
1464 last_modified_header);
1465 }
1466
1467 // From RFC 2616:
1468 // Content-Length = "Content-Length" ":" 1*DIGIT
GetContentLength() const1469 int64_t HttpResponseHeaders::GetContentLength() const {
1470 return GetInt64HeaderValue("content-length");
1471 }
1472
GetInt64HeaderValue(const std::string & header) const1473 int64_t HttpResponseHeaders::GetInt64HeaderValue(
1474 const std::string& header) const {
1475 size_t iter = 0;
1476 std::string content_length_val;
1477 if (!EnumerateHeader(&iter, header, &content_length_val))
1478 return -1;
1479
1480 if (content_length_val.empty())
1481 return -1;
1482
1483 if (content_length_val[0] == '+')
1484 return -1;
1485
1486 int64_t result;
1487 bool ok = base::StringToInt64(content_length_val, &result);
1488 if (!ok || result < 0)
1489 return -1;
1490
1491 return result;
1492 }
1493
GetContentRangeFor206(int64_t * first_byte_position,int64_t * last_byte_position,int64_t * instance_length) const1494 bool HttpResponseHeaders::GetContentRangeFor206(
1495 int64_t* first_byte_position,
1496 int64_t* last_byte_position,
1497 int64_t* instance_length) const {
1498 size_t iter = 0;
1499 std::string content_range_spec;
1500 if (!EnumerateHeader(&iter, kContentRange, &content_range_spec)) {
1501 *first_byte_position = *last_byte_position = *instance_length = -1;
1502 return false;
1503 }
1504
1505 return HttpUtil::ParseContentRangeHeaderFor206(
1506 content_range_spec, first_byte_position, last_byte_position,
1507 instance_length);
1508 }
1509
NetLogParams(NetLogCaptureMode capture_mode) const1510 base::Value::Dict HttpResponseHeaders::NetLogParams(
1511 NetLogCaptureMode capture_mode) const {
1512 base::Value::Dict dict;
1513 base::Value::List headers;
1514 headers.Append(NetLogStringValue(GetStatusLine()));
1515 size_t iterator = 0;
1516 std::string name;
1517 std::string value;
1518 while (EnumerateHeaderLines(&iterator, &name, &value)) {
1519 std::string log_value =
1520 ElideHeaderValueForNetLog(capture_mode, name, value);
1521 headers.Append(NetLogStringValue(base::StrCat({name, ": ", log_value})));
1522 }
1523 dict.Set("headers", std::move(headers));
1524 return dict;
1525 }
1526
IsChunkEncoded() const1527 bool HttpResponseHeaders::IsChunkEncoded() const {
1528 // Ignore spurious chunked responses from HTTP/1.0 servers and proxies.
1529 return GetHttpVersion() >= HttpVersion(1, 1) &&
1530 HasHeaderValue("Transfer-Encoding", "chunked");
1531 }
1532
IsCookieResponseHeader(std::string_view name)1533 bool HttpResponseHeaders::IsCookieResponseHeader(std::string_view name) {
1534 for (const char* cookie_header : kCookieResponseHeaders) {
1535 if (base::EqualsCaseInsensitiveASCII(cookie_header, name))
1536 return true;
1537 }
1538 return false;
1539 }
1540
WriteIntoTrace(perfetto::TracedValue context) const1541 void HttpResponseHeaders::WriteIntoTrace(perfetto::TracedValue context) const {
1542 perfetto::TracedDictionary dict = std::move(context).WriteDictionary();
1543 dict.Add("response_code", response_code_);
1544 dict.Add("headers", parsed_);
1545 }
1546
StrictlyEquals(const HttpResponseHeaders & other) const1547 bool HttpResponseHeaders::StrictlyEquals(
1548 const HttpResponseHeaders& other) const {
1549 if (http_version_ != other.http_version_ ||
1550 response_code_ != other.response_code_ ||
1551 raw_headers_ != other.raw_headers_ ||
1552 parsed_.size() != other.parsed_.size()) {
1553 return false;
1554 }
1555
1556 auto offsets_match = [&](std::string::const_iterator this_offset,
1557 std::string::const_iterator other_offset) {
1558 return this_offset - raw_headers_.begin() ==
1559 other_offset - other.raw_headers_.begin();
1560 };
1561 return std::mismatch(parsed_.begin(), parsed_.end(), other.parsed_.begin(),
1562 [&](const ParsedHeader& lhs, const ParsedHeader& rhs) {
1563 return offsets_match(lhs.name_begin, rhs.name_begin) &&
1564 offsets_match(lhs.name_end, rhs.name_end) &&
1565 offsets_match(lhs.value_begin,
1566 rhs.value_begin) &&
1567 offsets_match(lhs.value_end, rhs.value_end);
1568 }) == std::pair(parsed_.end(), other.parsed_.end());
1569 }
1570
1571 } // namespace net
1572