xref: /aosp_15_r20/external/cronet/net/dns/dns_hosts.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2012 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker #include "net/dns/dns_hosts.h"
6*6777b538SAndroid Build Coastguard Worker 
7*6777b538SAndroid Build Coastguard Worker #include <string>
8*6777b538SAndroid Build Coastguard Worker #include <utility>
9*6777b538SAndroid Build Coastguard Worker 
10*6777b538SAndroid Build Coastguard Worker #include "base/check.h"
11*6777b538SAndroid Build Coastguard Worker #include "base/files/file_path.h"
12*6777b538SAndroid Build Coastguard Worker #include "base/files/file_util.h"
13*6777b538SAndroid Build Coastguard Worker #include "base/metrics/histogram_functions.h"
14*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_piece.h"
15*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
16*6777b538SAndroid Build Coastguard Worker #include "base/trace_event/memory_usage_estimator.h"
17*6777b538SAndroid Build Coastguard Worker #include "build/build_config.h"
18*6777b538SAndroid Build Coastguard Worker #include "net/base/cronet_buildflags.h"
19*6777b538SAndroid Build Coastguard Worker #include "net/base/url_util.h"
20*6777b538SAndroid Build Coastguard Worker #include "net/dns/dns_util.h"
21*6777b538SAndroid Build Coastguard Worker #include "url/url_canon.h"
22*6777b538SAndroid Build Coastguard Worker 
23*6777b538SAndroid Build Coastguard Worker using base::StringPiece;
24*6777b538SAndroid Build Coastguard Worker 
25*6777b538SAndroid Build Coastguard Worker namespace net {
26*6777b538SAndroid Build Coastguard Worker 
27*6777b538SAndroid Build Coastguard Worker namespace {
28*6777b538SAndroid Build Coastguard Worker 
29*6777b538SAndroid Build Coastguard Worker // Parses the contents of a hosts file.  Returns one token (IP or hostname) at
30*6777b538SAndroid Build Coastguard Worker // a time.  Doesn't copy anything; accepts the file as a StringPiece and
31*6777b538SAndroid Build Coastguard Worker // returns tokens as StringPieces.
32*6777b538SAndroid Build Coastguard Worker class HostsParser {
33*6777b538SAndroid Build Coastguard Worker  public:
HostsParser(const StringPiece & text,ParseHostsCommaMode comma_mode)34*6777b538SAndroid Build Coastguard Worker   explicit HostsParser(const StringPiece& text, ParseHostsCommaMode comma_mode)
35*6777b538SAndroid Build Coastguard Worker       : text_(text),
36*6777b538SAndroid Build Coastguard Worker         data_(text.data()),
37*6777b538SAndroid Build Coastguard Worker         end_(text.size()),
38*6777b538SAndroid Build Coastguard Worker         comma_mode_(comma_mode) {}
39*6777b538SAndroid Build Coastguard Worker 
40*6777b538SAndroid Build Coastguard Worker   HostsParser(const HostsParser&) = delete;
41*6777b538SAndroid Build Coastguard Worker   HostsParser& operator=(const HostsParser&) = delete;
42*6777b538SAndroid Build Coastguard Worker 
43*6777b538SAndroid Build Coastguard Worker   // Advances to the next token (IP or hostname).  Returns whether another
44*6777b538SAndroid Build Coastguard Worker   // token was available.  |token_is_ip| and |token| can be used to find out
45*6777b538SAndroid Build Coastguard Worker   // the type and text of the token.
Advance()46*6777b538SAndroid Build Coastguard Worker   bool Advance() {
47*6777b538SAndroid Build Coastguard Worker     bool next_is_ip = (pos_ == 0);
48*6777b538SAndroid Build Coastguard Worker     while (pos_ < end_ && pos_ != std::string::npos) {
49*6777b538SAndroid Build Coastguard Worker       switch (text_[pos_]) {
50*6777b538SAndroid Build Coastguard Worker         case ' ':
51*6777b538SAndroid Build Coastguard Worker         case '\t':
52*6777b538SAndroid Build Coastguard Worker           SkipWhitespace();
53*6777b538SAndroid Build Coastguard Worker           break;
54*6777b538SAndroid Build Coastguard Worker 
55*6777b538SAndroid Build Coastguard Worker         case '\r':
56*6777b538SAndroid Build Coastguard Worker         case '\n':
57*6777b538SAndroid Build Coastguard Worker           next_is_ip = true;
58*6777b538SAndroid Build Coastguard Worker           pos_++;
59*6777b538SAndroid Build Coastguard Worker           break;
60*6777b538SAndroid Build Coastguard Worker 
61*6777b538SAndroid Build Coastguard Worker         case '#':
62*6777b538SAndroid Build Coastguard Worker           SkipRestOfLine();
63*6777b538SAndroid Build Coastguard Worker           break;
64*6777b538SAndroid Build Coastguard Worker 
65*6777b538SAndroid Build Coastguard Worker         case ',':
66*6777b538SAndroid Build Coastguard Worker           if (comma_mode_ == PARSE_HOSTS_COMMA_IS_WHITESPACE) {
67*6777b538SAndroid Build Coastguard Worker             SkipWhitespace();
68*6777b538SAndroid Build Coastguard Worker             break;
69*6777b538SAndroid Build Coastguard Worker           }
70*6777b538SAndroid Build Coastguard Worker 
71*6777b538SAndroid Build Coastguard Worker           // If comma_mode_ is COMMA_IS_TOKEN, fall through:
72*6777b538SAndroid Build Coastguard Worker           [[fallthrough]];
73*6777b538SAndroid Build Coastguard Worker 
74*6777b538SAndroid Build Coastguard Worker         default: {
75*6777b538SAndroid Build Coastguard Worker           size_t token_start = pos_;
76*6777b538SAndroid Build Coastguard Worker           SkipToken();
77*6777b538SAndroid Build Coastguard Worker           size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
78*6777b538SAndroid Build Coastguard Worker 
79*6777b538SAndroid Build Coastguard Worker           token_ = StringPiece(data_ + token_start, token_end - token_start);
80*6777b538SAndroid Build Coastguard Worker           token_is_ip_ = next_is_ip;
81*6777b538SAndroid Build Coastguard Worker 
82*6777b538SAndroid Build Coastguard Worker           return true;
83*6777b538SAndroid Build Coastguard Worker         }
84*6777b538SAndroid Build Coastguard Worker       }
85*6777b538SAndroid Build Coastguard Worker     }
86*6777b538SAndroid Build Coastguard Worker 
87*6777b538SAndroid Build Coastguard Worker     return false;
88*6777b538SAndroid Build Coastguard Worker   }
89*6777b538SAndroid Build Coastguard Worker 
90*6777b538SAndroid Build Coastguard Worker   // Fast-forwards the parser to the next line.  Should be called if an IP
91*6777b538SAndroid Build Coastguard Worker   // address doesn't parse, to avoid wasting time tokenizing hostnames that
92*6777b538SAndroid Build Coastguard Worker   // will be ignored.
SkipRestOfLine()93*6777b538SAndroid Build Coastguard Worker   void SkipRestOfLine() { pos_ = text_.find("\n", pos_); }
94*6777b538SAndroid Build Coastguard Worker 
95*6777b538SAndroid Build Coastguard Worker   // Returns whether the last-parsed token is an IP address (true) or a
96*6777b538SAndroid Build Coastguard Worker   // hostname (false).
token_is_ip()97*6777b538SAndroid Build Coastguard Worker   bool token_is_ip() { return token_is_ip_; }
98*6777b538SAndroid Build Coastguard Worker 
99*6777b538SAndroid Build Coastguard Worker   // Returns the text of the last-parsed token as a StringPiece referencing
100*6777b538SAndroid Build Coastguard Worker   // the same underlying memory as the StringPiece passed to the constructor.
101*6777b538SAndroid Build Coastguard Worker   // Returns an empty StringPiece if no token has been parsed or the end of
102*6777b538SAndroid Build Coastguard Worker   // the input string has been reached.
token()103*6777b538SAndroid Build Coastguard Worker   const StringPiece& token() { return token_; }
104*6777b538SAndroid Build Coastguard Worker 
105*6777b538SAndroid Build Coastguard Worker  private:
SkipToken()106*6777b538SAndroid Build Coastguard Worker   void SkipToken() {
107*6777b538SAndroid Build Coastguard Worker     switch (comma_mode_) {
108*6777b538SAndroid Build Coastguard Worker       case PARSE_HOSTS_COMMA_IS_TOKEN:
109*6777b538SAndroid Build Coastguard Worker         pos_ = text_.find_first_of(" \t\n\r#", pos_);
110*6777b538SAndroid Build Coastguard Worker         break;
111*6777b538SAndroid Build Coastguard Worker       case PARSE_HOSTS_COMMA_IS_WHITESPACE:
112*6777b538SAndroid Build Coastguard Worker         pos_ = text_.find_first_of(" ,\t\n\r#", pos_);
113*6777b538SAndroid Build Coastguard Worker         break;
114*6777b538SAndroid Build Coastguard Worker     }
115*6777b538SAndroid Build Coastguard Worker   }
116*6777b538SAndroid Build Coastguard Worker 
SkipWhitespace()117*6777b538SAndroid Build Coastguard Worker   void SkipWhitespace() {
118*6777b538SAndroid Build Coastguard Worker     switch (comma_mode_) {
119*6777b538SAndroid Build Coastguard Worker       case PARSE_HOSTS_COMMA_IS_TOKEN:
120*6777b538SAndroid Build Coastguard Worker         pos_ = text_.find_first_not_of(" \t", pos_);
121*6777b538SAndroid Build Coastguard Worker         break;
122*6777b538SAndroid Build Coastguard Worker       case PARSE_HOSTS_COMMA_IS_WHITESPACE:
123*6777b538SAndroid Build Coastguard Worker         pos_ = text_.find_first_not_of(" ,\t", pos_);
124*6777b538SAndroid Build Coastguard Worker         break;
125*6777b538SAndroid Build Coastguard Worker     }
126*6777b538SAndroid Build Coastguard Worker   }
127*6777b538SAndroid Build Coastguard Worker 
128*6777b538SAndroid Build Coastguard Worker   const StringPiece text_;
129*6777b538SAndroid Build Coastguard Worker   const char* data_;
130*6777b538SAndroid Build Coastguard Worker   const size_t end_;
131*6777b538SAndroid Build Coastguard Worker 
132*6777b538SAndroid Build Coastguard Worker   size_t pos_ = 0;
133*6777b538SAndroid Build Coastguard Worker   StringPiece token_;
134*6777b538SAndroid Build Coastguard Worker   bool token_is_ip_ = false;
135*6777b538SAndroid Build Coastguard Worker 
136*6777b538SAndroid Build Coastguard Worker   const ParseHostsCommaMode comma_mode_;
137*6777b538SAndroid Build Coastguard Worker };
138*6777b538SAndroid Build Coastguard Worker 
ParseHostsWithCommaMode(const std::string & contents,DnsHosts * dns_hosts,ParseHostsCommaMode comma_mode)139*6777b538SAndroid Build Coastguard Worker void ParseHostsWithCommaMode(const std::string& contents,
140*6777b538SAndroid Build Coastguard Worker                              DnsHosts* dns_hosts,
141*6777b538SAndroid Build Coastguard Worker                              ParseHostsCommaMode comma_mode) {
142*6777b538SAndroid Build Coastguard Worker   CHECK(dns_hosts);
143*6777b538SAndroid Build Coastguard Worker 
144*6777b538SAndroid Build Coastguard Worker   StringPiece ip_text;
145*6777b538SAndroid Build Coastguard Worker   IPAddress ip;
146*6777b538SAndroid Build Coastguard Worker   AddressFamily family = ADDRESS_FAMILY_IPV4;
147*6777b538SAndroid Build Coastguard Worker   HostsParser parser(contents, comma_mode);
148*6777b538SAndroid Build Coastguard Worker   while (parser.Advance()) {
149*6777b538SAndroid Build Coastguard Worker     if (parser.token_is_ip()) {
150*6777b538SAndroid Build Coastguard Worker       StringPiece new_ip_text = parser.token();
151*6777b538SAndroid Build Coastguard Worker       // Some ad-blocking hosts files contain thousands of entries pointing to
152*6777b538SAndroid Build Coastguard Worker       // the same IP address (usually 127.0.0.1).  Don't bother parsing the IP
153*6777b538SAndroid Build Coastguard Worker       // again if it's the same as the one above it.
154*6777b538SAndroid Build Coastguard Worker       if (new_ip_text != ip_text) {
155*6777b538SAndroid Build Coastguard Worker         IPAddress new_ip;
156*6777b538SAndroid Build Coastguard Worker         if (new_ip.AssignFromIPLiteral(parser.token())) {
157*6777b538SAndroid Build Coastguard Worker           ip_text = new_ip_text;
158*6777b538SAndroid Build Coastguard Worker           ip = new_ip;
159*6777b538SAndroid Build Coastguard Worker           family = (ip.IsIPv4()) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
160*6777b538SAndroid Build Coastguard Worker         } else {
161*6777b538SAndroid Build Coastguard Worker           parser.SkipRestOfLine();
162*6777b538SAndroid Build Coastguard Worker         }
163*6777b538SAndroid Build Coastguard Worker       }
164*6777b538SAndroid Build Coastguard Worker     } else {
165*6777b538SAndroid Build Coastguard Worker       url::CanonHostInfo canonicalization_info;
166*6777b538SAndroid Build Coastguard Worker       std::string canonicalized_host =
167*6777b538SAndroid Build Coastguard Worker           CanonicalizeHost(parser.token(), &canonicalization_info);
168*6777b538SAndroid Build Coastguard Worker 
169*6777b538SAndroid Build Coastguard Worker       // Skip if token is invalid for host canonicalization, or if it
170*6777b538SAndroid Build Coastguard Worker       // canonicalizes as an IP address.
171*6777b538SAndroid Build Coastguard Worker       if (canonicalization_info.family != url::CanonHostInfo::NEUTRAL)
172*6777b538SAndroid Build Coastguard Worker         continue;
173*6777b538SAndroid Build Coastguard Worker 
174*6777b538SAndroid Build Coastguard Worker       DnsHostsKey key(std::move(canonicalized_host), family);
175*6777b538SAndroid Build Coastguard Worker       if (!IsCanonicalizedHostCompliant(key.first))
176*6777b538SAndroid Build Coastguard Worker         continue;
177*6777b538SAndroid Build Coastguard Worker       IPAddress* mapped_ip = &(*dns_hosts)[key];
178*6777b538SAndroid Build Coastguard Worker       if (mapped_ip->empty())
179*6777b538SAndroid Build Coastguard Worker         *mapped_ip = ip;
180*6777b538SAndroid Build Coastguard Worker       // else ignore this entry (first hit counts)
181*6777b538SAndroid Build Coastguard Worker     }
182*6777b538SAndroid Build Coastguard Worker   }
183*6777b538SAndroid Build Coastguard Worker }
184*6777b538SAndroid Build Coastguard Worker 
185*6777b538SAndroid Build Coastguard Worker }  // namespace
186*6777b538SAndroid Build Coastguard Worker 
ParseHostsWithCommaModeForTesting(const std::string & contents,DnsHosts * dns_hosts,ParseHostsCommaMode comma_mode)187*6777b538SAndroid Build Coastguard Worker void ParseHostsWithCommaModeForTesting(const std::string& contents,
188*6777b538SAndroid Build Coastguard Worker                                        DnsHosts* dns_hosts,
189*6777b538SAndroid Build Coastguard Worker                                        ParseHostsCommaMode comma_mode) {
190*6777b538SAndroid Build Coastguard Worker   ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
191*6777b538SAndroid Build Coastguard Worker }
192*6777b538SAndroid Build Coastguard Worker 
ParseHosts(const std::string & contents,DnsHosts * dns_hosts)193*6777b538SAndroid Build Coastguard Worker void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
194*6777b538SAndroid Build Coastguard Worker   ParseHostsCommaMode comma_mode;
195*6777b538SAndroid Build Coastguard Worker #if BUILDFLAG(IS_APPLE)
196*6777b538SAndroid Build Coastguard Worker   // Mac OS X allows commas to separate hostnames.
197*6777b538SAndroid Build Coastguard Worker   comma_mode = PARSE_HOSTS_COMMA_IS_WHITESPACE;
198*6777b538SAndroid Build Coastguard Worker #else
199*6777b538SAndroid Build Coastguard Worker   // Linux allows commas in hostnames.
200*6777b538SAndroid Build Coastguard Worker   comma_mode = PARSE_HOSTS_COMMA_IS_TOKEN;
201*6777b538SAndroid Build Coastguard Worker #endif
202*6777b538SAndroid Build Coastguard Worker 
203*6777b538SAndroid Build Coastguard Worker   ParseHostsWithCommaMode(contents, dns_hosts, comma_mode);
204*6777b538SAndroid Build Coastguard Worker 
205*6777b538SAndroid Build Coastguard Worker   // TODO(crbug.com/1377305): Remove this when we have enough data.
206*6777b538SAndroid Build Coastguard Worker   base::UmaHistogramCounts100000("Net.DNS.DnsHosts.Count", dns_hosts->size());
207*6777b538SAndroid Build Coastguard Worker 
208*6777b538SAndroid Build Coastguard Worker #if !BUILDFLAG(CRONET_BUILD)
209*6777b538SAndroid Build Coastguard Worker   // Cronet disables tracing and doesn't provide an implementation of
210*6777b538SAndroid Build Coastguard Worker   // base::trace_event::EstimateMemoryUsage for DnsHosts. Having this
211*6777b538SAndroid Build Coastguard Worker   // conditional is preferred over a fake implementation to avoid reporting fake
212*6777b538SAndroid Build Coastguard Worker   // metrics.
213*6777b538SAndroid Build Coastguard Worker   base::UmaHistogramMemoryKB(
214*6777b538SAndroid Build Coastguard Worker       "Net.DNS.DnsHosts.EstimateMemoryUsage",
215*6777b538SAndroid Build Coastguard Worker       base::trace_event::EstimateMemoryUsage(*dns_hosts));
216*6777b538SAndroid Build Coastguard Worker #endif  // !BUILDFLAG(CRONET_BUILD)
217*6777b538SAndroid Build Coastguard Worker }
218*6777b538SAndroid Build Coastguard Worker 
219*6777b538SAndroid Build Coastguard Worker DnsHostsParser::~DnsHostsParser() = default;
220*6777b538SAndroid Build Coastguard Worker 
DnsHostsFileParser(base::FilePath hosts_file_path)221*6777b538SAndroid Build Coastguard Worker DnsHostsFileParser::DnsHostsFileParser(base::FilePath hosts_file_path)
222*6777b538SAndroid Build Coastguard Worker     : hosts_file_path_(std::move(hosts_file_path)) {}
223*6777b538SAndroid Build Coastguard Worker 
224*6777b538SAndroid Build Coastguard Worker DnsHostsFileParser::~DnsHostsFileParser() = default;
225*6777b538SAndroid Build Coastguard Worker 
ParseHosts(DnsHosts * dns_hosts) const226*6777b538SAndroid Build Coastguard Worker bool DnsHostsFileParser::ParseHosts(DnsHosts* dns_hosts) const {
227*6777b538SAndroid Build Coastguard Worker   dns_hosts->clear();
228*6777b538SAndroid Build Coastguard Worker   // Missing file indicates empty HOSTS.
229*6777b538SAndroid Build Coastguard Worker   if (!base::PathExists(hosts_file_path_))
230*6777b538SAndroid Build Coastguard Worker     return true;
231*6777b538SAndroid Build Coastguard Worker 
232*6777b538SAndroid Build Coastguard Worker   int64_t size;
233*6777b538SAndroid Build Coastguard Worker   if (!base::GetFileSize(hosts_file_path_, &size))
234*6777b538SAndroid Build Coastguard Worker     return false;
235*6777b538SAndroid Build Coastguard Worker 
236*6777b538SAndroid Build Coastguard Worker   // Reject HOSTS files larger than |kMaxHostsSize| bytes.
237*6777b538SAndroid Build Coastguard Worker   const int64_t kMaxHostsSize = 1 << 25;  // 32MB
238*6777b538SAndroid Build Coastguard Worker 
239*6777b538SAndroid Build Coastguard Worker   // TODO(crbug.com/1377305): Remove this when we have enough data.
240*6777b538SAndroid Build Coastguard Worker   base::UmaHistogramCustomCounts("Net.DNS.DnsHosts.FileSize", size, 1,
241*6777b538SAndroid Build Coastguard Worker                                  kMaxHostsSize * 2, 50);
242*6777b538SAndroid Build Coastguard Worker   if (size > kMaxHostsSize)
243*6777b538SAndroid Build Coastguard Worker     return false;
244*6777b538SAndroid Build Coastguard Worker 
245*6777b538SAndroid Build Coastguard Worker   std::string contents;
246*6777b538SAndroid Build Coastguard Worker   if (!base::ReadFileToString(hosts_file_path_, &contents))
247*6777b538SAndroid Build Coastguard Worker     return false;
248*6777b538SAndroid Build Coastguard Worker 
249*6777b538SAndroid Build Coastguard Worker   net::ParseHosts(contents, dns_hosts);
250*6777b538SAndroid Build Coastguard Worker   return true;
251*6777b538SAndroid Build Coastguard Worker }
252*6777b538SAndroid Build Coastguard Worker 
253*6777b538SAndroid Build Coastguard Worker }  // namespace net
254