1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/proxy_resolution/pac_file_fetcher_impl.h"
6
7 #include <string_view>
8
9 #include "base/compiler_specific.h"
10 #include "base/functional/bind.h"
11 #include "base/location.h"
12 #include "base/logging.h"
13 #include "base/memory/ptr_util.h"
14 #include "base/metrics/histogram_macros.h"
15 #include "base/ranges/algorithm.h"
16 #include "base/strings/string_util.h"
17 #include "base/task/single_thread_task_runner.h"
18 #include "net/base/data_url.h"
19 #include "net/base/io_buffer.h"
20 #include "net/base/load_flags.h"
21 #include "net/base/net_errors.h"
22 #include "net/base/net_string_util.h"
23 #include "net/base/request_priority.h"
24 #include "net/cert/cert_status_flags.h"
25 #include "net/http/http_response_headers.h"
26 #include "net/url_request/redirect_info.h"
27 #include "net/url_request/url_request_context.h"
28
29 // TODO(eroman):
30 // - Support auth-prompts (http://crbug.com/77366)
31
32 namespace net {
33
34 namespace {
35
36 // The maximum size (in bytes) allowed for a PAC script. Responses exceeding
37 // this will fail with ERR_FILE_TOO_BIG.
38 const int kDefaultMaxResponseBytes = 1048576; // 1 megabyte
39
40 // The maximum duration (in milliseconds) allowed for fetching the PAC script.
41 // Responses exceeding this will fail with ERR_TIMED_OUT.
42 //
43 // This timeout applies to both scripts fetched in the course of WPAD, as well
44 // as explicitly configured ones.
45 //
46 // If the default timeout is too high, auto-detect can stall for a long time,
47 // and if it is too low then slow loading scripts may be skipped.
48 //
49 // 30 seconds is a compromise between those competing goals. This value also
50 // appears to match Microsoft Edge (based on testing).
51 constexpr base::TimeDelta kDefaultMaxDuration = base::Seconds(30);
52
53 // Returns true if |mime_type| is one of the known PAC mime type.
IsPacMimeType(std::string_view mime_type)54 constexpr bool IsPacMimeType(std::string_view mime_type) {
55 constexpr std::string_view kSupportedPacMimeTypes[] = {
56 "application/x-ns-proxy-autoconfig",
57 "application/x-javascript-config",
58 };
59 return base::ranges::any_of(kSupportedPacMimeTypes, [&](auto pac_mime_type) {
60 return base::EqualsCaseInsensitiveASCII(pac_mime_type, mime_type);
61 });
62 }
63
64 struct BomMapping {
65 std::string_view prefix;
66 const char* charset;
67 };
68
69 const BomMapping kBomMappings[] = {
70 {"\xFE\xFF", "utf-16be"},
71 {"\xFF\xFE", "utf-16le"},
72 {"\xEF\xBB\xBF", "utf-8"},
73 };
74
75 // Converts |bytes| (which is encoded by |charset|) to UTF16, saving the resul
76 // to |*utf16|.
77 // If |charset| is empty, then we don't know what it was and guess.
ConvertResponseToUTF16(const std::string & charset,const std::string & bytes,std::u16string * utf16)78 void ConvertResponseToUTF16(const std::string& charset,
79 const std::string& bytes,
80 std::u16string* utf16) {
81 if (charset.empty()) {
82 // Guess the charset by looking at the BOM.
83 std::string_view bytes_str(bytes);
84 for (const auto& bom : kBomMappings) {
85 if (bytes_str.starts_with(bom.prefix)) {
86 return ConvertResponseToUTF16(
87 bom.charset,
88 // Strip the BOM in the converted response.
89 bytes.substr(bom.prefix.size()), utf16);
90 }
91 }
92
93 // Otherwise assume ISO-8859-1 if no charset was specified.
94 return ConvertResponseToUTF16(kCharsetLatin1, bytes, utf16);
95 }
96
97 DCHECK(!charset.empty());
98
99 // Be generous in the conversion -- if any characters lie outside of |charset|
100 // (i.e. invalid), then substitute them with U+FFFD rather than failing.
101 ConvertToUTF16WithSubstitutions(bytes, charset.c_str(), utf16);
102 }
103
104 } // namespace
105
Create(URLRequestContext * url_request_context)106 std::unique_ptr<PacFileFetcherImpl> PacFileFetcherImpl::Create(
107 URLRequestContext* url_request_context) {
108 return base::WrapUnique(new PacFileFetcherImpl(url_request_context));
109 }
110
~PacFileFetcherImpl()111 PacFileFetcherImpl::~PacFileFetcherImpl() {
112 // The URLRequest's destructor will cancel the outstanding request, and
113 // ensure that the delegate (this) is not called again.
114 }
115
SetTimeoutConstraint(base::TimeDelta timeout)116 base::TimeDelta PacFileFetcherImpl::SetTimeoutConstraint(
117 base::TimeDelta timeout) {
118 base::TimeDelta prev = max_duration_;
119 max_duration_ = timeout;
120 return prev;
121 }
122
SetSizeConstraint(size_t size_bytes)123 size_t PacFileFetcherImpl::SetSizeConstraint(size_t size_bytes) {
124 size_t prev = max_response_bytes_;
125 max_response_bytes_ = size_bytes;
126 return prev;
127 }
128
OnResponseCompleted(URLRequest * request,int net_error)129 void PacFileFetcherImpl::OnResponseCompleted(URLRequest* request,
130 int net_error) {
131 DCHECK_EQ(request, cur_request_.get());
132
133 // Use |result_code_| as the request's error if we have already set it to
134 // something specific.
135 if (result_code_ == OK && net_error != OK)
136 result_code_ = net_error;
137
138 FetchCompleted();
139 }
140
Fetch(const GURL & url,std::u16string * text,CompletionOnceCallback callback,const NetworkTrafficAnnotationTag traffic_annotation)141 int PacFileFetcherImpl::Fetch(
142 const GURL& url,
143 std::u16string* text,
144 CompletionOnceCallback callback,
145 const NetworkTrafficAnnotationTag traffic_annotation) {
146 // It is invalid to call Fetch() while a request is already in progress.
147 DCHECK(!cur_request_.get());
148 DCHECK(!callback.is_null());
149 DCHECK(text);
150
151 if (!url_request_context_)
152 return ERR_CONTEXT_SHUT_DOWN;
153
154 if (!IsUrlSchemeAllowed(url))
155 return ERR_DISALLOWED_URL_SCHEME;
156
157 // Handle base-64 encoded data-urls that contain custom PAC scripts.
158 if (url.SchemeIs("data")) {
159 std::string mime_type;
160 std::string charset;
161 std::string data;
162 if (!DataURL::Parse(url, &mime_type, &charset, &data))
163 return ERR_FAILED;
164
165 ConvertResponseToUTF16(charset, data, text);
166 return OK;
167 }
168
169 DCHECK(fetch_start_time_.is_null());
170 fetch_start_time_ = base::TimeTicks::Now();
171
172 // Use highest priority, so if socket pools are being used for other types of
173 // requests, PAC requests are aren't blocked on them.
174 cur_request_ = url_request_context_->CreateRequest(url, MAXIMUM_PRIORITY,
175 this, traffic_annotation);
176
177 cur_request_->set_isolation_info(isolation_info());
178
179 // Make sure that the PAC script is downloaded using a direct connection,
180 // to avoid circular dependencies (fetching is a part of proxy resolution).
181 // Also disable the use of the disk cache. The cache is disabled so that if
182 // the user switches networks we don't potentially use the cached response
183 // from old network when we should in fact be re-fetching on the new network.
184 // If the PAC script is hosted on an HTTPS server we bypass revocation
185 // checking in order to avoid a circular dependency when attempting to fetch
186 // the OCSP response or CRL. We could make the revocation check go direct but
187 // the proxy might be the only way to the outside world. IGNORE_LIMITS is
188 // used to avoid blocking proxy resolution on other network requests.
189 cur_request_->SetLoadFlags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE |
190 LOAD_DISABLE_CERT_NETWORK_FETCHES |
191 LOAD_IGNORE_LIMITS);
192
193 // Save the caller's info for notification on completion.
194 callback_ = std::move(callback);
195 result_text_ = text;
196
197 bytes_read_so_far_.clear();
198
199 // Post a task to timeout this request if it takes too long.
200 cur_request_id_ = ++next_id_;
201
202 base::SingleThreadTaskRunner::GetCurrentDefault()->PostDelayedTask(
203 FROM_HERE,
204 base::BindOnce(&PacFileFetcherImpl::OnTimeout, weak_factory_.GetWeakPtr(),
205 cur_request_id_),
206 max_duration_);
207
208 // Start the request.
209 cur_request_->Start();
210 return ERR_IO_PENDING;
211 }
212
Cancel()213 void PacFileFetcherImpl::Cancel() {
214 // ResetCurRequestState will free the URLRequest, which will cause
215 // cancellation.
216 ResetCurRequestState();
217 }
218
GetRequestContext() const219 URLRequestContext* PacFileFetcherImpl::GetRequestContext() const {
220 return url_request_context_;
221 }
222
OnShutdown()223 void PacFileFetcherImpl::OnShutdown() {
224 url_request_context_ = nullptr;
225
226 if (cur_request_) {
227 result_code_ = ERR_CONTEXT_SHUT_DOWN;
228 FetchCompleted();
229 }
230 }
231
OnReceivedRedirect(URLRequest * request,const RedirectInfo & redirect_info,bool * defer_redirect)232 void PacFileFetcherImpl::OnReceivedRedirect(URLRequest* request,
233 const RedirectInfo& redirect_info,
234 bool* defer_redirect) {
235 int error = OK;
236
237 // Redirection to file:// is never OK. Ordinarily this is handled lower in the
238 // stack (|FileProtocolHandler::IsSafeRedirectTarget|), but this is reachable
239 // when built without file:// suppport. Return the same error for consistency.
240 if (redirect_info.new_url.SchemeIsFile()) {
241 error = ERR_UNSAFE_REDIRECT;
242 } else if (!IsUrlSchemeAllowed(redirect_info.new_url)) {
243 error = ERR_DISALLOWED_URL_SCHEME;
244 }
245
246 if (error != OK) {
247 // Fail the redirect.
248 request->CancelWithError(error);
249 OnResponseCompleted(request, error);
250 }
251 }
252
OnAuthRequired(URLRequest * request,const AuthChallengeInfo & auth_info)253 void PacFileFetcherImpl::OnAuthRequired(URLRequest* request,
254 const AuthChallengeInfo& auth_info) {
255 DCHECK_EQ(request, cur_request_.get());
256 // TODO(eroman): http://crbug.com/77366
257 LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
258 result_code_ = ERR_NOT_IMPLEMENTED;
259 request->CancelAuth();
260 }
261
OnSSLCertificateError(URLRequest * request,int net_error,const SSLInfo & ssl_info,bool fatal)262 void PacFileFetcherImpl::OnSSLCertificateError(URLRequest* request,
263 int net_error,
264 const SSLInfo& ssl_info,
265 bool fatal) {
266 DCHECK_EQ(request, cur_request_.get());
267 LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
268 // Certificate errors are in same space as net errors.
269 result_code_ = net_error;
270 request->Cancel();
271 }
272
OnResponseStarted(URLRequest * request,int net_error)273 void PacFileFetcherImpl::OnResponseStarted(URLRequest* request, int net_error) {
274 DCHECK_EQ(request, cur_request_.get());
275 DCHECK_NE(ERR_IO_PENDING, net_error);
276
277 if (net_error != OK) {
278 OnResponseCompleted(request, net_error);
279 return;
280 }
281
282 // Require HTTP responses to have a success status code.
283 if (request->url().SchemeIsHTTPOrHTTPS()) {
284 // NOTE about status codes: We are like Firefox 3 in this respect.
285 // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
286 if (request->GetResponseCode() != 200) {
287 VLOG(1) << "Fetched PAC script had (bad) status line: "
288 << request->response_headers()->GetStatusLine();
289 result_code_ = ERR_HTTP_RESPONSE_CODE_FAILURE;
290 request->Cancel();
291 return;
292 }
293
294 // NOTE about mime types: We do not enforce mime types on PAC files.
295 // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
296 // however log mismatches to help with debugging.
297 std::string mime_type;
298 cur_request_->GetMimeType(&mime_type);
299 if (!IsPacMimeType(mime_type)) {
300 VLOG(1) << "Fetched PAC script does not have a proper mime type: "
301 << mime_type;
302 }
303 }
304
305 ReadBody(request);
306 }
307
OnReadCompleted(URLRequest * request,int num_bytes)308 void PacFileFetcherImpl::OnReadCompleted(URLRequest* request, int num_bytes) {
309 DCHECK_NE(ERR_IO_PENDING, num_bytes);
310
311 DCHECK_EQ(request, cur_request_.get());
312 if (ConsumeBytesRead(request, num_bytes)) {
313 // Keep reading.
314 ReadBody(request);
315 }
316 }
317
PacFileFetcherImpl(URLRequestContext * url_request_context)318 PacFileFetcherImpl::PacFileFetcherImpl(URLRequestContext* url_request_context)
319 : url_request_context_(url_request_context),
320 buf_(base::MakeRefCounted<IOBufferWithSize>(kBufSize)),
321 max_response_bytes_(kDefaultMaxResponseBytes),
322 max_duration_(kDefaultMaxDuration) {
323 DCHECK(url_request_context);
324 }
325
IsUrlSchemeAllowed(const GURL & url) const326 bool PacFileFetcherImpl::IsUrlSchemeAllowed(const GURL& url) const {
327 // Always allow http://, https://, and data:.
328 if (url.SchemeIsHTTPOrHTTPS() || url.SchemeIs("data"))
329 return true;
330
331 // Disallow any other URL scheme.
332 return false;
333 }
334
ReadBody(URLRequest * request)335 void PacFileFetcherImpl::ReadBody(URLRequest* request) {
336 // Read as many bytes as are available synchronously.
337 while (true) {
338 int num_bytes = request->Read(buf_.get(), kBufSize);
339 if (num_bytes == ERR_IO_PENDING)
340 return;
341
342 if (num_bytes < 0) {
343 OnResponseCompleted(request, num_bytes);
344 return;
345 }
346
347 if (!ConsumeBytesRead(request, num_bytes))
348 return;
349 }
350 }
351
ConsumeBytesRead(URLRequest * request,int num_bytes)352 bool PacFileFetcherImpl::ConsumeBytesRead(URLRequest* request, int num_bytes) {
353 if (fetch_time_to_first_byte_.is_null())
354 fetch_time_to_first_byte_ = base::TimeTicks::Now();
355
356 if (num_bytes <= 0) {
357 // Error while reading, or EOF.
358 OnResponseCompleted(request, num_bytes);
359 return false;
360 }
361
362 // Enforce maximum size bound.
363 if (num_bytes + bytes_read_so_far_.size() >
364 static_cast<size_t>(max_response_bytes_)) {
365 result_code_ = ERR_FILE_TOO_BIG;
366 request->Cancel();
367 return false;
368 }
369
370 bytes_read_so_far_.append(buf_->data(), num_bytes);
371 return true;
372 }
373
FetchCompleted()374 void PacFileFetcherImpl::FetchCompleted() {
375 if (result_code_ == OK) {
376 // Calculate duration of time for PAC file fetch to complete.
377 DCHECK(!fetch_start_time_.is_null());
378 DCHECK(!fetch_time_to_first_byte_.is_null());
379 UMA_HISTOGRAM_MEDIUM_TIMES("Net.ProxyScriptFetcher.FirstByteDuration",
380 fetch_time_to_first_byte_ - fetch_start_time_);
381
382 // The caller expects the response to be encoded as UTF16.
383 std::string charset;
384 cur_request_->GetCharset(&charset);
385 ConvertResponseToUTF16(charset, bytes_read_so_far_, result_text_);
386 } else {
387 // On error, the caller expects empty string for bytes.
388 result_text_->clear();
389 }
390
391 int result_code = result_code_;
392 CompletionOnceCallback callback = std::move(callback_);
393
394 ResetCurRequestState();
395
396 std::move(callback).Run(result_code);
397 }
398
ResetCurRequestState()399 void PacFileFetcherImpl::ResetCurRequestState() {
400 cur_request_.reset();
401 cur_request_id_ = 0;
402 callback_.Reset();
403 result_code_ = OK;
404 result_text_ = nullptr;
405 fetch_start_time_ = base::TimeTicks();
406 fetch_time_to_first_byte_ = base::TimeTicks();
407 }
408
OnTimeout(int id)409 void PacFileFetcherImpl::OnTimeout(int id) {
410 // Timeout tasks may outlive the URLRequest they reference. Make sure it
411 // is still applicable.
412 if (cur_request_id_ != id)
413 return;
414
415 DCHECK(cur_request_.get());
416 result_code_ = ERR_TIMED_OUT;
417 FetchCompleted();
418 }
419
420 } // namespace net
421