1*6777b538SAndroid Build Coastguard Worker // Copyright 2012 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker #include "net/dns/dns_hosts.h"
6*6777b538SAndroid Build Coastguard Worker
7*6777b538SAndroid Build Coastguard Worker #include <string>
8*6777b538SAndroid Build Coastguard Worker #include <utility>
9*6777b538SAndroid Build Coastguard Worker
10*6777b538SAndroid Build Coastguard Worker #include "base/check.h"
11*6777b538SAndroid Build Coastguard Worker #include "base/files/file_path.h"
12*6777b538SAndroid Build Coastguard Worker #include "base/files/file_util.h"
13*6777b538SAndroid Build Coastguard Worker #include "base/metrics/histogram_functions.h"
14*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_piece.h"
15*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
16*6777b538SAndroid Build Coastguard Worker #include "base/trace_event/memory_usage_estimator.h"
17*6777b538SAndroid Build Coastguard Worker #include "build/build_config.h"
18*6777b538SAndroid Build Coastguard Worker #include "net/base/cronet_buildflags.h"
19*6777b538SAndroid Build Coastguard Worker #include "net/base/url_util.h"
20*6777b538SAndroid Build Coastguard Worker #include "net/dns/dns_util.h"
21*6777b538SAndroid Build Coastguard Worker #include "url/url_canon.h"
22*6777b538SAndroid Build Coastguard Worker
23*6777b538SAndroid Build Coastguard Worker using base::StringPiece;
24*6777b538SAndroid Build Coastguard Worker
25*6777b538SAndroid Build Coastguard Worker namespace net {
26*6777b538SAndroid Build Coastguard Worker
27*6777b538SAndroid Build Coastguard Worker namespace {
28*6777b538SAndroid Build Coastguard Worker
29*6777b538SAndroid Build Coastguard Worker // Parses the contents of a hosts file. Returns one token (IP or hostname) at
30*6777b538SAndroid Build Coastguard Worker // a time. Doesn't copy anything; accepts the file as a StringPiece and
31*6777b538SAndroid Build Coastguard Worker // returns tokens as StringPieces.
32*6777b538SAndroid Build Coastguard Worker class HostsParser {
33*6777b538SAndroid Build Coastguard Worker public:
HostsParser(const StringPiece & text,ParseHostsCommaMode comma_mode)34*6777b538SAndroid Build Coastguard Worker explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode)
35*6777b538SAndroid Build Coastguard Worker : text_(text),
36*6777b538SAndroid Build Coastguard Worker data_(text.data()),
37*6777b538SAndroid Build Coastguard Worker end_(text.size()),
38*6777b538SAndroid Build Coastguard Worker comma_mode_(comma_mode) {}
39*6777b538SAndroid Build Coastguard Worker
40*6777b538SAndroid Build Coastguard Worker HostsParser(const HostsParser&) = delete;
41*6777b538SAndroid Build Coastguard Worker HostsParser& operator=(const HostsParser&) = delete;
42*6777b538SAndroid Build Coastguard Worker
43*6777b538SAndroid Build Coastguard Worker // Advances to the next token (IP or hostname). Returns whether another
44*6777b538SAndroid Build Coastguard Worker // token was available. |token_is_ip| and |token| can be used to find out
45*6777b538SAndroid Build Coastguard Worker // the type and text of the token.
Advance()46*6777b538SAndroid Build Coastguard Worker bool Advance() {
47*6777b538SAndroid Build Coastguard Worker bool next_is_ip = (pos_ == 0);
48*6777b538SAndroid Build Coastguard Worker while (pos_ < end_ && pos_ != std::string::npos) {
49*6777b538SAndroid Build Coastguard Worker switch (text_[pos_]) {
50*6777b538SAndroid Build Coastguard Worker case ' ':
51*6777b538SAndroid Build Coastguard Worker case '\t':
52*6777b538SAndroid Build Coastguard Worker SkipWhitespace();
53*6777b538SAndroid Build Coastguard Worker break;
54*6777b538SAndroid Build Coastguard Worker
55*6777b538SAndroid Build Coastguard Worker case '\r':
56*6777b538SAndroid Build Coastguard Worker case '\n':
57*6777b538SAndroid Build Coastguard Worker next_is_ip = true;
58*6777b538SAndroid Build Coastguard Worker pos_++;
59*6777b538SAndroid Build Coastguard Worker break;
60*6777b538SAndroid Build Coastguard Worker
61*6777b538SAndroid Build Coastguard Worker case '#':
62*6777b538SAndroid Build Coastguard Worker SkipRestOfLine();
63*6777b538SAndroid Build Coastguard Worker break;
64*6777b538SAndroid Build Coastguard Worker
65*6777b538SAndroid Build Coastguard Worker case ',':
66*6777b538SAndroid Build Coastguard Worker if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) {
67*6777b538SAndroid Build Coastguard Worker SkipWhitespace();
68*6777b538SAndroid Build Coastguard Worker break;
69*6777b538SAndroid Build Coastguard Worker }
70*6777b538SAndroid Build Coastguard Worker
71*6777b538SAndroid Build Coastguard Worker // If comma_mode_ is COMMA_IS_TOKEN, fall through:
72*6777b538SAndroid Build Coastguard Worker [[fallthrough]];
73*6777b538SAndroid Build Coastguard Worker
74*6777b538SAndroid Build Coastguard Worker default: {
75*6777b538SAndroid Build Coastguard Worker size_t token_start = pos_;
76*6777b538SAndroid Build Coastguard Worker SkipToken();
77*6777b538SAndroid Build Coastguard Worker size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
78*6777b538SAndroid Build Coastguard Worker
79*6777b538SAndroid Build Coastguard Worker token_ = StringPiece(data_ + token_start, token_end - token_start);
80*6777b538SAndroid Build Coastguard Worker token_is_ip_ = next_is_ip;
81*6777b538SAndroid Build Coastguard Worker
82*6777b538SAndroid Build Coastguard Worker return true;
83*6777b538SAndroid Build Coastguard Worker }
84*6777b538SAndroid Build Coastguard Worker }
85*6777b538SAndroid Build Coastguard Worker }
86*6777b538SAndroid Build Coastguard Worker
87*6777b538SAndroid Build Coastguard Worker return false;
88*6777b538SAndroid Build Coastguard Worker }
89*6777b538SAndroid Build Coastguard Worker
90*6777b538SAndroid Build Coastguard Worker // Fast-forwards the parser to the next line. Should be called if an IP
91*6777b538SAndroid Build Coastguard Worker // address doesn't parse, to avoid wasting time tokenizing hostnames that
92*6777b538SAndroid Build Coastguard Worker // will be ignored.
SkipRestOfLine()93*6777b538SAndroid Build Coastguard Worker void SkipRestOfLine() { pos_ = text_.find("\n", pos_); }
94*6777b538SAndroid Build Coastguard Worker
95*6777b538SAndroid Build Coastguard Worker // Returns whether the last-parsed token is an IP address (true) or a
96*6777b538SAndroid Build Coastguard Worker // hostname (false).
token_is_ip()97*6777b538SAndroid Build Coastguard Worker bool token_is_ip() { return token_is_ip_; }
98*6777b538SAndroid Build Coastguard Worker
99*6777b538SAndroid Build Coastguard Worker // Returns the text of the last-parsed token as a StringPiece referencing
100*6777b538SAndroid Build Coastguard Worker // the same underlying memory as the StringPiece passed to the constructor.
101*6777b538SAndroid Build Coastguard Worker // Returns an empty StringPiece if no token has been parsed or the end of
102*6777b538SAndroid Build Coastguard Worker // the input string has been reached.
token()103*6777b538SAndroid Build Coastguard Worker const StringPiece& token() { return token_; }
104*6777b538SAndroid Build Coastguard Worker
105*6777b538SAndroid Build Coastguard Worker private:
SkipToken()106*6777b538SAndroid Build Coastguard Worker void SkipToken() {
107*6777b538SAndroid Build Coastguard Worker switch (comma_mode_) {
108*6777b538SAndroid Build Coastguard Worker case PARSE_HOSTS_COMMA_IS_TOKEN:
109*6777b538SAndroid Build Coastguard Worker pos_ = text_.find_first_of(" \t\n\r#", pos_);
110*6777b538SAndroid Build Coastguard Worker break;
111*6777b538SAndroid Build Coastguard Worker case PARSE_HOSTS_COMMA_IS_WHITESPACE:
112*6777b538SAndroid Build Coastguard Worker pos_ = text_.find_first_of(" ,\t\n\r#", pos_);
113*6777b538SAndroid Build Coastguard Worker break;
114*6777b538SAndroid Build Coastguard Worker }
115*6777b538SAndroid Build Coastguard Worker }
116*6777b538SAndroid Build Coastguard Worker
SkipWhitespace()117*6777b538SAndroid Build Coastguard Worker void SkipWhitespace() {
118*6777b538SAndroid Build Coastguard Worker switch (comma_mode_) {
119*6777b538SAndroid Build Coastguard Worker case PARSE_HOSTS_COMMA_IS_TOKEN:
120*6777b538SAndroid Build Coastguard Worker pos_ = text_.find_first_not_of(" \t", pos_);
121*6777b538SAndroid Build Coastguard Worker break;
122*6777b538SAndroid Build Coastguard Worker case PARSE_HOSTS_COMMA_IS_WHITESPACE:
123*6777b538SAndroid Build Coastguard Worker pos_ = text_.find_first_not_of(" ,\t", pos_);
124*6777b538SAndroid Build Coastguard Worker break;
125*6777b538SAndroid Build Coastguard Worker }
126*6777b538SAndroid Build Coastguard Worker }
127*6777b538SAndroid Build Coastguard Worker
128*6777b538SAndroid Build Coastguard Worker const StringPiece text_;
129*6777b538SAndroid Build Coastguard Worker const char* data_;
130*6777b538SAndroid Build Coastguard Worker const size_t end_;
131*6777b538SAndroid Build Coastguard Worker
132*6777b538SAndroid Build Coastguard Worker size_t pos_ = 0;
133*6777b538SAndroid Build Coastguard Worker StringPiece token_;
134*6777b538SAndroid Build Coastguard Worker bool token_is_ip_ = false;
135*6777b538SAndroid Build Coastguard Worker
136*6777b538SAndroid Build Coastguard Worker const ParseHostsCommaMode comma_mode_;
137*6777b538SAndroid Build Coastguard Worker };
138*6777b538SAndroid Build Coastguard Worker
ParseHostsWithCommaMode(const std::string & contents,DnsHosts * dns_hosts,ParseHostsCommaMode comma_mode)139*6777b538SAndroid Build Coastguard Worker void ParseHostsWithCommaMode(const std::string& contents,
140*6777b538SAndroid Build Coastguard Worker DnsHosts* dns_hosts,
141*6777b538SAndroid Build Coastguard Worker ParseHostsCommaMode comma_mode) {
142*6777b538SAndroid Build Coastguard Worker CHECK(dns_hosts);
143*6777b538SAndroid Build Coastguard Worker
144*6777b538SAndroid Build Coastguard Worker StringPiece ip_text;
145*6777b538SAndroid Build Coastguard Worker IPAddress ip;
146*6777b538SAndroid Build Coastguard Worker AddressFamily family = ADDRESS_FAMILY_IPV4;
147*6777b538SAndroid Build Coastguard Worker HostsParser parser(contents, comma_mode);
148*6777b538SAndroid Build Coastguard Worker while (parser.Advance()) {
149*6777b538SAndroid Build Coastguard Worker if (parser.token_is_ip()) {
150*6777b538SAndroid Build Coastguard Worker StringPiece new_ip_text = parser.token();
151*6777b538SAndroid Build Coastguard Worker // Some ad-blocking hosts files contain thousands of entries pointing to
152*6777b538SAndroid Build Coastguard Worker // the same IP address (usually 127.0.0.1). Don't bother parsing the IP
153*6777b538SAndroid Build Coastguard Worker // again if it's the same as the one above it.
154*6777b538SAndroid Build Coastguard Worker if (new_ip_text != ip_text) {
155*6777b538SAndroid Build Coastguard Worker IPAddress new_ip;
156*6777b538SAndroid Build Coastguard Worker if (new_ip.AssignFromIPLiteral(parser.token())) {
157*6777b538SAndroid Build Coastguard Worker ip_text = new_ip_text;
158*6777b538SAndroid Build Coastguard Worker ip = new_ip;
159*6777b538SAndroid Build Coastguard Worker family = (ip.IsIPv4()) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
160*6777b538SAndroid Build Coastguard Worker } else {
161*6777b538SAndroid Build Coastguard Worker parser.SkipRestOfLine();
162*6777b538SAndroid Build Coastguard Worker }
163*6777b538SAndroid Build Coastguard Worker }
164*6777b538SAndroid Build Coastguard Worker } else {
165*6777b538SAndroid Build Coastguard Worker url::CanonHostInfo canonicalization_info;
166*6777b538SAndroid Build Coastguard Worker std::string canonicalized_host =
167*6777b538SAndroid Build Coastguard Worker CanonicalizeHost(parser.token(), &canonicalization_info);
168*6777b538SAndroid Build Coastguard Worker
169*6777b538SAndroid Build Coastguard Worker // Skip if token is invalid for host canonicalization, or if it
170*6777b538SAndroid Build Coastguard Worker // canonicalizes as an IP address.
171*6777b538SAndroid Build Coastguard Worker if (canonicalization_info.family != url::CanonHostInfo::NEUTRAL)
172*6777b538SAndroid Build Coastguard Worker continue;
173*6777b538SAndroid Build Coastguard Worker
174*6777b538SAndroid Build Coastguard Worker DnsHostsKey key(std::move(canonicalized_host), family);
175*6777b538SAndroid Build Coastguard Worker if (!IsCanonicalizedHostCompliant(key.first))
176*6777b538SAndroid Build Coastguard Worker continue;
177*6777b538SAndroid Build Coastguard Worker IPAddress* mapped_ip = &(*dns_hosts)[key];
178*6777b538SAndroid Build Coastguard Worker if (mapped_ip->empty())
179*6777b538SAndroid Build Coastguard Worker *mapped_ip = ip;
180*6777b538SAndroid Build Coastguard Worker // else ignore this entry (first hit counts)
181*6777b538SAndroid Build Coastguard Worker }
182*6777b538SAndroid Build Coastguard Worker }
183*6777b538SAndroid Build Coastguard Worker }
184*6777b538SAndroid Build Coastguard Worker
185*6777b538SAndroid Build Coastguard Worker } // namespace
186*6777b538SAndroid Build Coastguard Worker
ParseHostsWithCommaModeForTesting(const std::string & contents,DnsHosts * dns_hosts,ParseHostsCommaMode comma_mode)187*6777b538SAndroid Build Coastguard Worker void ParseHostsWithCommaModeForTesting(const std::string& contents,
188*6777b538SAndroid Build Coastguard Worker DnsHosts* dns_hosts,
189*6777b538SAndroid Build Coastguard Worker ParseHostsCommaMode comma_mode) {
190*6777b538SAndroid Build Coastguard Worker ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
191*6777b538SAndroid Build Coastguard Worker }
192*6777b538SAndroid Build Coastguard Worker
ParseHosts(const std::string & contents,DnsHosts * dns_hosts)193*6777b538SAndroid Build Coastguard Worker void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
194*6777b538SAndroid Build Coastguard Worker ParseHostsCommaMode comma_mode;
195*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_APPLE)
196*6777b538SAndroid Build Coastguard Worker // Mac OS X allows commas to separate hostnames.
197*6777b538SAndroid Build Coastguard Worker comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE;
198*6777b538SAndroid Build Coastguard Worker #else
199*6777b538SAndroid Build Coastguard Worker // Linux allows commas in hostnames.
200*6777b538SAndroid Build Coastguard Worker comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN;
201*6777b538SAndroid Build Coastguard Worker #endif
202*6777b538SAndroid Build Coastguard Worker
203*6777b538SAndroid Build Coastguard Worker ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
204*6777b538SAndroid Build Coastguard Worker
205*6777b538SAndroid Build Coastguard Worker // TODO(crbug.com/1377305): Remove this when we have enough data.
206*6777b538SAndroid Build Coastguard Worker base::UmaHistogramCounts100000("Net.DNS.DnsHosts.Count", dns_hosts->size());
207*6777b538SAndroid Build Coastguard Worker
208*6777b538SAndroid Build Coastguard Worker #if !BUILDFLAG(CRONET_BUILD)
209*6777b538SAndroid Build Coastguard Worker // Cronet disables tracing and doesn't provide an implementation of
210*6777b538SAndroid Build Coastguard Worker // base::trace_event::EstimateMemoryUsage for DnsHosts. Having this
211*6777b538SAndroid Build Coastguard Worker // conditional is preferred over a fake implementation to avoid reporting fake
212*6777b538SAndroid Build Coastguard Worker // metrics.
213*6777b538SAndroid Build Coastguard Worker base::UmaHistogramMemoryKB(
214*6777b538SAndroid Build Coastguard Worker "Net.DNS.DnsHosts.EstimateMemoryUsage",
215*6777b538SAndroid Build Coastguard Worker base::trace_event::EstimateMemoryUsage(*dns_hosts));
216*6777b538SAndroid Build Coastguard Worker #endif // !BUILDFLAG(CRONET_BUILD)
217*6777b538SAndroid Build Coastguard Worker }
218*6777b538SAndroid Build Coastguard Worker
219*6777b538SAndroid Build Coastguard Worker DnsHostsParser::~DnsHostsParser() = default;
220*6777b538SAndroid Build Coastguard Worker
DnsHostsFileParser(base::FilePath hosts_file_path)221*6777b538SAndroid Build Coastguard Worker DnsHostsFileParser::DnsHostsFileParser(base::FilePath hosts_file_path)
222*6777b538SAndroid Build Coastguard Worker : hosts_file_path_(std::move(hosts_file_path)) {}
223*6777b538SAndroid Build Coastguard Worker
224*6777b538SAndroid Build Coastguard Worker DnsHostsFileParser::~DnsHostsFileParser() = default;
225*6777b538SAndroid Build Coastguard Worker
ParseHosts(DnsHosts * dns_hosts) const226*6777b538SAndroid Build Coastguard Worker bool DnsHostsFileParser::ParseHosts(DnsHosts* dns_hosts) const {
227*6777b538SAndroid Build Coastguard Worker dns_hosts->clear();
228*6777b538SAndroid Build Coastguard Worker // Missing file indicates empty HOSTS.
229*6777b538SAndroid Build Coastguard Worker if (!base::PathExists(hosts_file_path_))
230*6777b538SAndroid Build Coastguard Worker return true;
231*6777b538SAndroid Build Coastguard Worker
232*6777b538SAndroid Build Coastguard Worker int64_t size;
233*6777b538SAndroid Build Coastguard Worker if (!base::GetFileSize(hosts_file_path_, &size))
234*6777b538SAndroid Build Coastguard Worker return false;
235*6777b538SAndroid Build Coastguard Worker
236*6777b538SAndroid Build Coastguard Worker // Reject HOSTS files larger than |kMaxHostsSize| bytes.
237*6777b538SAndroid Build Coastguard Worker const int64_t kMaxHostsSize = 1 << 25; // 32MB
238*6777b538SAndroid Build Coastguard Worker
239*6777b538SAndroid Build Coastguard Worker // TODO(crbug.com/1377305): Remove this when we have enough data.
240*6777b538SAndroid Build Coastguard Worker base::UmaHistogramCustomCounts("Net.DNS.DnsHosts.FileSize", size, 1,
241*6777b538SAndroid Build Coastguard Worker kMaxHostsSize * 2, 50);
242*6777b538SAndroid Build Coastguard Worker if (size > kMaxHostsSize)
243*6777b538SAndroid Build Coastguard Worker return false;
244*6777b538SAndroid Build Coastguard Worker
245*6777b538SAndroid Build Coastguard Worker std::string contents;
246*6777b538SAndroid Build Coastguard Worker if (!base::ReadFileToString(hosts_file_path_, &contents))
247*6777b538SAndroid Build Coastguard Worker return false;
248*6777b538SAndroid Build Coastguard Worker
249*6777b538SAndroid Build Coastguard Worker net::ParseHosts(contents, dns_hosts);
250*6777b538SAndroid Build Coastguard Worker return true;
251*6777b538SAndroid Build Coastguard Worker }
252*6777b538SAndroid Build Coastguard Worker
253*6777b538SAndroid Build Coastguard Worker } // namespace net
254