xref: /aosp_15_r20/external/cronet/net/proxy_resolution/pac_file_fetcher_impl.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/proxy_resolution/pac_file_fetcher_impl.h"
6 
7 #include <string_view>
8 
9 #include "base/compiler_specific.h"
10 #include "base/functional/bind.h"
11 #include "base/location.h"
12 #include "base/logging.h"
13 #include "base/memory/ptr_util.h"
14 #include "base/metrics/histogram_macros.h"
15 #include "base/ranges/algorithm.h"
16 #include "base/strings/string_util.h"
17 #include "base/task/single_thread_task_runner.h"
18 #include "net/base/data_url.h"
19 #include "net/base/io_buffer.h"
20 #include "net/base/load_flags.h"
21 #include "net/base/net_errors.h"
22 #include "net/base/net_string_util.h"
23 #include "net/base/request_priority.h"
24 #include "net/cert/cert_status_flags.h"
25 #include "net/http/http_response_headers.h"
26 #include "net/url_request/redirect_info.h"
27 #include "net/url_request/url_request_context.h"
28 
29 // TODO(eroman):
30 //   - Support auth-prompts (http://crbug.com/77366)
31 
32 namespace net {
33 
34 namespace {
35 
36 // The maximum size (in bytes) allowed for a PAC script. Responses exceeding
37 // this will fail with ERR_FILE_TOO_BIG.
38 const int kDefaultMaxResponseBytes = 1048576;  // 1 megabyte
39 
40 // The maximum duration (in milliseconds) allowed for fetching the PAC script.
41 // Responses exceeding this will fail with ERR_TIMED_OUT.
42 //
43 // This timeout applies to both scripts fetched in the course of WPAD, as well
44 // as explicitly configured ones.
45 //
46 // If the default timeout is too high, auto-detect can stall for a long time,
47 // and if it is too low then slow loading scripts may be skipped.
48 //
49 // 30 seconds is a compromise between those competing goals. This value also
50 // appears to match Microsoft Edge (based on testing).
51 constexpr base::TimeDelta kDefaultMaxDuration = base::Seconds(30);
52 
53 // Returns true if |mime_type| is one of the known PAC mime type.
IsPacMimeType(std::string_view mime_type)54 constexpr bool IsPacMimeType(std::string_view mime_type) {
55   constexpr std::string_view kSupportedPacMimeTypes[] = {
56       "application/x-ns-proxy-autoconfig",
57       "application/x-javascript-config",
58   };
59   return base::ranges::any_of(kSupportedPacMimeTypes, [&](auto pac_mime_type) {
60     return base::EqualsCaseInsensitiveASCII(pac_mime_type, mime_type);
61   });
62 }
63 
64 struct BomMapping {
65   std::string_view prefix;
66   const char* charset;
67 };
68 
69 const BomMapping kBomMappings[] = {
70     {"\xFE\xFF", "utf-16be"},
71     {"\xFF\xFE", "utf-16le"},
72     {"\xEF\xBB\xBF", "utf-8"},
73 };
74 
75 // Converts |bytes| (which is encoded by |charset|) to UTF16, saving the resul
76 // to |*utf16|.
77 // If |charset| is empty, then we don't know what it was and guess.
ConvertResponseToUTF16(const std::string & charset,const std::string & bytes,std::u16string * utf16)78 void ConvertResponseToUTF16(const std::string& charset,
79                             const std::string& bytes,
80                             std::u16string* utf16) {
81   if (charset.empty()) {
82     // Guess the charset by looking at the BOM.
83     std::string_view bytes_str(bytes);
84     for (const auto& bom : kBomMappings) {
85       if (bytes_str.starts_with(bom.prefix)) {
86         return ConvertResponseToUTF16(
87             bom.charset,
88             // Strip the BOM in the converted response.
89             bytes.substr(bom.prefix.size()), utf16);
90       }
91     }
92 
93     // Otherwise assume ISO-8859-1 if no charset was specified.
94     return ConvertResponseToUTF16(kCharsetLatin1, bytes, utf16);
95   }
96 
97   DCHECK(!charset.empty());
98 
99   // Be generous in the conversion -- if any characters lie outside of |charset|
100   // (i.e. invalid), then substitute them with U+FFFD rather than failing.
101   ConvertToUTF16WithSubstitutions(bytes, charset.c_str(), utf16);
102 }
103 
104 }  // namespace
105 
Create(URLRequestContext * url_request_context)106 std::unique_ptr<PacFileFetcherImpl> PacFileFetcherImpl::Create(
107     URLRequestContext* url_request_context) {
108   return base::WrapUnique(new PacFileFetcherImpl(url_request_context));
109 }
110 
~PacFileFetcherImpl()111 PacFileFetcherImpl::~PacFileFetcherImpl() {
112   // The URLRequest's destructor will cancel the outstanding request, and
113   // ensure that the delegate (this) is not called again.
114 }
115 
SetTimeoutConstraint(base::TimeDelta timeout)116 base::TimeDelta PacFileFetcherImpl::SetTimeoutConstraint(
117     base::TimeDelta timeout) {
118   base::TimeDelta prev = max_duration_;
119   max_duration_ = timeout;
120   return prev;
121 }
122 
SetSizeConstraint(size_t size_bytes)123 size_t PacFileFetcherImpl::SetSizeConstraint(size_t size_bytes) {
124   size_t prev = max_response_bytes_;
125   max_response_bytes_ = size_bytes;
126   return prev;
127 }
128 
OnResponseCompleted(URLRequest * request,int net_error)129 void PacFileFetcherImpl::OnResponseCompleted(URLRequest* request,
130                                              int net_error) {
131   DCHECK_EQ(request, cur_request_.get());
132 
133   // Use |result_code_| as the request's error if we have already set it to
134   // something specific.
135   if (result_code_ == OK && net_error != OK)
136     result_code_ = net_error;
137 
138   FetchCompleted();
139 }
140 
Fetch(const GURL & url,std::u16string * text,CompletionOnceCallback callback,const NetworkTrafficAnnotationTag traffic_annotation)141 int PacFileFetcherImpl::Fetch(
142     const GURL& url,
143     std::u16string* text,
144     CompletionOnceCallback callback,
145     const NetworkTrafficAnnotationTag traffic_annotation) {
146   // It is invalid to call Fetch() while a request is already in progress.
147   DCHECK(!cur_request_.get());
148   DCHECK(!callback.is_null());
149   DCHECK(text);
150 
151   if (!url_request_context_)
152     return ERR_CONTEXT_SHUT_DOWN;
153 
154   if (!IsUrlSchemeAllowed(url))
155     return ERR_DISALLOWED_URL_SCHEME;
156 
157   // Handle base-64 encoded data-urls that contain custom PAC scripts.
158   if (url.SchemeIs("data")) {
159     std::string mime_type;
160     std::string charset;
161     std::string data;
162     if (!DataURL::Parse(url, &mime_type, &charset, &data))
163       return ERR_FAILED;
164 
165     ConvertResponseToUTF16(charset, data, text);
166     return OK;
167   }
168 
169   DCHECK(fetch_start_time_.is_null());
170   fetch_start_time_ = base::TimeTicks::Now();
171 
172   // Use highest priority, so if socket pools are being used for other types of
173   // requests, PAC requests are aren't blocked on them.
174   cur_request_ = url_request_context_->CreateRequest(url, MAXIMUM_PRIORITY,
175                                                      this, traffic_annotation);
176 
177   cur_request_->set_isolation_info(isolation_info());
178 
179   // Make sure that the PAC script is downloaded using a direct connection,
180   // to avoid circular dependencies (fetching is a part of proxy resolution).
181   // Also disable the use of the disk cache. The cache is disabled so that if
182   // the user switches networks we don't potentially use the cached response
183   // from old network when we should in fact be re-fetching on the new network.
184   // If the PAC script is hosted on an HTTPS server we bypass revocation
185   // checking in order to avoid a circular dependency when attempting to fetch
186   // the OCSP response or CRL. We could make the revocation check go direct but
187   // the proxy might be the only way to the outside world.  IGNORE_LIMITS is
188   // used to avoid blocking proxy resolution on other network requests.
189   cur_request_->SetLoadFlags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE |
190                              LOAD_DISABLE_CERT_NETWORK_FETCHES |
191                              LOAD_IGNORE_LIMITS);
192 
193   // Save the caller's info for notification on completion.
194   callback_ = std::move(callback);
195   result_text_ = text;
196 
197   bytes_read_so_far_.clear();
198 
199   // Post a task to timeout this request if it takes too long.
200   cur_request_id_ = ++next_id_;
201 
202   base::SingleThreadTaskRunner::GetCurrentDefault()->PostDelayedTask(
203       FROM_HERE,
204       base::BindOnce(&PacFileFetcherImpl::OnTimeout, weak_factory_.GetWeakPtr(),
205                      cur_request_id_),
206       max_duration_);
207 
208   // Start the request.
209   cur_request_->Start();
210   return ERR_IO_PENDING;
211 }
212 
Cancel()213 void PacFileFetcherImpl::Cancel() {
214   // ResetCurRequestState will free the URLRequest, which will cause
215   // cancellation.
216   ResetCurRequestState();
217 }
218 
GetRequestContext() const219 URLRequestContext* PacFileFetcherImpl::GetRequestContext() const {
220   return url_request_context_;
221 }
222 
OnShutdown()223 void PacFileFetcherImpl::OnShutdown() {
224   url_request_context_ = nullptr;
225 
226   if (cur_request_) {
227     result_code_ = ERR_CONTEXT_SHUT_DOWN;
228     FetchCompleted();
229   }
230 }
231 
OnReceivedRedirect(URLRequest * request,const RedirectInfo & redirect_info,bool * defer_redirect)232 void PacFileFetcherImpl::OnReceivedRedirect(URLRequest* request,
233                                             const RedirectInfo& redirect_info,
234                                             bool* defer_redirect) {
235   int error = OK;
236 
237   // Redirection to file:// is never OK. Ordinarily this is handled lower in the
238   // stack (|FileProtocolHandler::IsSafeRedirectTarget|), but this is reachable
239   // when built without file:// suppport. Return the same error for consistency.
240   if (redirect_info.new_url.SchemeIsFile()) {
241     error = ERR_UNSAFE_REDIRECT;
242   } else if (!IsUrlSchemeAllowed(redirect_info.new_url)) {
243     error = ERR_DISALLOWED_URL_SCHEME;
244   }
245 
246   if (error != OK) {
247     // Fail the redirect.
248     request->CancelWithError(error);
249     OnResponseCompleted(request, error);
250   }
251 }
252 
OnAuthRequired(URLRequest * request,const AuthChallengeInfo & auth_info)253 void PacFileFetcherImpl::OnAuthRequired(URLRequest* request,
254                                         const AuthChallengeInfo& auth_info) {
255   DCHECK_EQ(request, cur_request_.get());
256   // TODO(eroman): http://crbug.com/77366
257   LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
258   result_code_ = ERR_NOT_IMPLEMENTED;
259   request->CancelAuth();
260 }
261 
OnSSLCertificateError(URLRequest * request,int net_error,const SSLInfo & ssl_info,bool fatal)262 void PacFileFetcherImpl::OnSSLCertificateError(URLRequest* request,
263                                                int net_error,
264                                                const SSLInfo& ssl_info,
265                                                bool fatal) {
266   DCHECK_EQ(request, cur_request_.get());
267   LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
268   // Certificate errors are in same space as net errors.
269   result_code_ = net_error;
270   request->Cancel();
271 }
272 
OnResponseStarted(URLRequest * request,int net_error)273 void PacFileFetcherImpl::OnResponseStarted(URLRequest* request, int net_error) {
274   DCHECK_EQ(request, cur_request_.get());
275   DCHECK_NE(ERR_IO_PENDING, net_error);
276 
277   if (net_error != OK) {
278     OnResponseCompleted(request, net_error);
279     return;
280   }
281 
282   // Require HTTP responses to have a success status code.
283   if (request->url().SchemeIsHTTPOrHTTPS()) {
284     // NOTE about status codes: We are like Firefox 3 in this respect.
285     // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
286     if (request->GetResponseCode() != 200) {
287       VLOG(1) << "Fetched PAC script had (bad) status line: "
288               << request->response_headers()->GetStatusLine();
289       result_code_ = ERR_HTTP_RESPONSE_CODE_FAILURE;
290       request->Cancel();
291       return;
292     }
293 
294     // NOTE about mime types: We do not enforce mime types on PAC files.
295     // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
296     // however log mismatches to help with debugging.
297     std::string mime_type;
298     cur_request_->GetMimeType(&mime_type);
299     if (!IsPacMimeType(mime_type)) {
300       VLOG(1) << "Fetched PAC script does not have a proper mime type: "
301               << mime_type;
302     }
303   }
304 
305   ReadBody(request);
306 }
307 
OnReadCompleted(URLRequest * request,int num_bytes)308 void PacFileFetcherImpl::OnReadCompleted(URLRequest* request, int num_bytes) {
309   DCHECK_NE(ERR_IO_PENDING, num_bytes);
310 
311   DCHECK_EQ(request, cur_request_.get());
312   if (ConsumeBytesRead(request, num_bytes)) {
313     // Keep reading.
314     ReadBody(request);
315   }
316 }
317 
PacFileFetcherImpl(URLRequestContext * url_request_context)318 PacFileFetcherImpl::PacFileFetcherImpl(URLRequestContext* url_request_context)
319     : url_request_context_(url_request_context),
320       buf_(base::MakeRefCounted<IOBufferWithSize>(kBufSize)),
321       max_response_bytes_(kDefaultMaxResponseBytes),
322       max_duration_(kDefaultMaxDuration) {
323   DCHECK(url_request_context);
324 }
325 
IsUrlSchemeAllowed(const GURL & url) const326 bool PacFileFetcherImpl::IsUrlSchemeAllowed(const GURL& url) const {
327   // Always allow http://, https://, and data:.
328   if (url.SchemeIsHTTPOrHTTPS() || url.SchemeIs("data"))
329     return true;
330 
331   // Disallow any other URL scheme.
332   return false;
333 }
334 
ReadBody(URLRequest * request)335 void PacFileFetcherImpl::ReadBody(URLRequest* request) {
336   // Read as many bytes as are available synchronously.
337   while (true) {
338     int num_bytes = request->Read(buf_.get(), kBufSize);
339     if (num_bytes == ERR_IO_PENDING)
340       return;
341 
342     if (num_bytes < 0) {
343       OnResponseCompleted(request, num_bytes);
344       return;
345     }
346 
347     if (!ConsumeBytesRead(request, num_bytes))
348       return;
349   }
350 }
351 
ConsumeBytesRead(URLRequest * request,int num_bytes)352 bool PacFileFetcherImpl::ConsumeBytesRead(URLRequest* request, int num_bytes) {
353   if (fetch_time_to_first_byte_.is_null())
354     fetch_time_to_first_byte_ = base::TimeTicks::Now();
355 
356   if (num_bytes <= 0) {
357     // Error while reading, or EOF.
358     OnResponseCompleted(request, num_bytes);
359     return false;
360   }
361 
362   // Enforce maximum size bound.
363   if (num_bytes + bytes_read_so_far_.size() >
364       static_cast<size_t>(max_response_bytes_)) {
365     result_code_ = ERR_FILE_TOO_BIG;
366     request->Cancel();
367     return false;
368   }
369 
370   bytes_read_so_far_.append(buf_->data(), num_bytes);
371   return true;
372 }
373 
FetchCompleted()374 void PacFileFetcherImpl::FetchCompleted() {
375   if (result_code_ == OK) {
376     // Calculate duration of time for PAC file fetch to complete.
377     DCHECK(!fetch_start_time_.is_null());
378     DCHECK(!fetch_time_to_first_byte_.is_null());
379     UMA_HISTOGRAM_MEDIUM_TIMES("Net.ProxyScriptFetcher.FirstByteDuration",
380                                fetch_time_to_first_byte_ - fetch_start_time_);
381 
382     // The caller expects the response to be encoded as UTF16.
383     std::string charset;
384     cur_request_->GetCharset(&charset);
385     ConvertResponseToUTF16(charset, bytes_read_so_far_, result_text_);
386   } else {
387     // On error, the caller expects empty string for bytes.
388     result_text_->clear();
389   }
390 
391   int result_code = result_code_;
392   CompletionOnceCallback callback = std::move(callback_);
393 
394   ResetCurRequestState();
395 
396   std::move(callback).Run(result_code);
397 }
398 
ResetCurRequestState()399 void PacFileFetcherImpl::ResetCurRequestState() {
400   cur_request_.reset();
401   cur_request_id_ = 0;
402   callback_.Reset();
403   result_code_ = OK;
404   result_text_ = nullptr;
405   fetch_start_time_ = base::TimeTicks();
406   fetch_time_to_first_byte_ = base::TimeTicks();
407 }
408 
OnTimeout(int id)409 void PacFileFetcherImpl::OnTimeout(int id) {
410   // Timeout tasks may outlive the URLRequest they reference. Make sure it
411   // is still applicable.
412   if (cur_request_id_ != id)
413     return;
414 
415   DCHECK(cur_request_.get());
416   result_code_ = ERR_TIMED_OUT;
417   FetchCompleted();
418 }
419 
420 }  // namespace net
421