xref: /aosp_15_r20/external/cronet/url/url_canon_fileurl.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker // Functions for canonicalizing "file:" URLs.
6*6777b538SAndroid Build Coastguard Worker 
7*6777b538SAndroid Build Coastguard Worker #include <string_view>
8*6777b538SAndroid Build Coastguard Worker 
9*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
10*6777b538SAndroid Build Coastguard Worker #include "url/url_canon.h"
11*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_internal.h"
12*6777b538SAndroid Build Coastguard Worker #include "url/url_file.h"
13*6777b538SAndroid Build Coastguard Worker #include "url/url_parse_internal.h"
14*6777b538SAndroid Build Coastguard Worker 
15*6777b538SAndroid Build Coastguard Worker namespace url {
16*6777b538SAndroid Build Coastguard Worker 
17*6777b538SAndroid Build Coastguard Worker namespace {
18*6777b538SAndroid Build Coastguard Worker 
IsLocalhost(const char * spec,int begin,int end)19*6777b538SAndroid Build Coastguard Worker bool IsLocalhost(const char* spec, int begin, int end) {
20*6777b538SAndroid Build Coastguard Worker   if (begin > end)
21*6777b538SAndroid Build Coastguard Worker     return false;
22*6777b538SAndroid Build Coastguard Worker   return std::string_view(&spec[begin], end - begin) == "localhost";
23*6777b538SAndroid Build Coastguard Worker }
24*6777b538SAndroid Build Coastguard Worker 
IsLocalhost(const char16_t * spec,int begin,int end)25*6777b538SAndroid Build Coastguard Worker bool IsLocalhost(const char16_t* spec, int begin, int end) {
26*6777b538SAndroid Build Coastguard Worker   if (begin > end)
27*6777b538SAndroid Build Coastguard Worker     return false;
28*6777b538SAndroid Build Coastguard Worker   return std::u16string_view(&spec[begin], end - begin) == u"localhost";
29*6777b538SAndroid Build Coastguard Worker }
30*6777b538SAndroid Build Coastguard Worker 
31*6777b538SAndroid Build Coastguard Worker template <typename CHAR>
DoFindWindowsDriveLetter(const CHAR * spec,int begin,int end)32*6777b538SAndroid Build Coastguard Worker int DoFindWindowsDriveLetter(const CHAR* spec, int begin, int end) {
33*6777b538SAndroid Build Coastguard Worker   if (begin > end)
34*6777b538SAndroid Build Coastguard Worker     return -1;
35*6777b538SAndroid Build Coastguard Worker 
36*6777b538SAndroid Build Coastguard Worker   // First guess the beginning of the drive letter.
37*6777b538SAndroid Build Coastguard Worker   // If there is something that looks like a drive letter in the spec between
38*6777b538SAndroid Build Coastguard Worker   // begin and end, store its position in drive_letter_pos.
39*6777b538SAndroid Build Coastguard Worker   int drive_letter_pos =
40*6777b538SAndroid Build Coastguard Worker       DoesContainWindowsDriveSpecUntil(spec, begin, end, end);
41*6777b538SAndroid Build Coastguard Worker   if (drive_letter_pos < begin)
42*6777b538SAndroid Build Coastguard Worker     return -1;
43*6777b538SAndroid Build Coastguard Worker 
44*6777b538SAndroid Build Coastguard Worker   // Check if the path up to the drive letter candidate can be canonicalized as
45*6777b538SAndroid Build Coastguard Worker   // "/".
46*6777b538SAndroid Build Coastguard Worker   Component sub_path = MakeRange(begin, drive_letter_pos);
47*6777b538SAndroid Build Coastguard Worker   RawCanonOutput<1024> output;
48*6777b538SAndroid Build Coastguard Worker   Component output_path;
49*6777b538SAndroid Build Coastguard Worker   bool success = CanonicalizePath(spec, sub_path, &output, &output_path);
50*6777b538SAndroid Build Coastguard Worker   if (!success || output_path.len != 1 || output.at(output_path.begin) != '/') {
51*6777b538SAndroid Build Coastguard Worker     return -1;
52*6777b538SAndroid Build Coastguard Worker   }
53*6777b538SAndroid Build Coastguard Worker 
54*6777b538SAndroid Build Coastguard Worker   return drive_letter_pos;
55*6777b538SAndroid Build Coastguard Worker }
56*6777b538SAndroid Build Coastguard Worker 
57*6777b538SAndroid Build Coastguard Worker #ifdef WIN32
58*6777b538SAndroid Build Coastguard Worker 
59*6777b538SAndroid Build Coastguard Worker // Given a pointer into the spec, this copies and canonicalizes the drive
60*6777b538SAndroid Build Coastguard Worker // letter and colon to the output, if one is found. If there is not a drive
61*6777b538SAndroid Build Coastguard Worker // spec, it won't do anything. The index of the next character in the input
62*6777b538SAndroid Build Coastguard Worker // spec is returned (after the colon when a drive spec is found, the begin
63*6777b538SAndroid Build Coastguard Worker // offset if one is not).
64*6777b538SAndroid Build Coastguard Worker template <typename CHAR>
FileDoDriveSpec(const CHAR * spec,int begin,int end,CanonOutput * output)65*6777b538SAndroid Build Coastguard Worker int FileDoDriveSpec(const CHAR* spec, int begin, int end, CanonOutput* output) {
66*6777b538SAndroid Build Coastguard Worker   int drive_letter_pos = FindWindowsDriveLetter(spec, begin, end);
67*6777b538SAndroid Build Coastguard Worker   if (drive_letter_pos < begin)
68*6777b538SAndroid Build Coastguard Worker     return begin;
69*6777b538SAndroid Build Coastguard Worker 
70*6777b538SAndroid Build Coastguard Worker   // By now, a valid drive letter is confirmed at position drive_letter_pos,
71*6777b538SAndroid Build Coastguard Worker   // followed by a valid drive letter separator (a colon or a pipe).
72*6777b538SAndroid Build Coastguard Worker 
73*6777b538SAndroid Build Coastguard Worker   output->push_back('/');
74*6777b538SAndroid Build Coastguard Worker 
75*6777b538SAndroid Build Coastguard Worker   // Normalize Windows drive letters to uppercase.
76*6777b538SAndroid Build Coastguard Worker   if (base::IsAsciiLower(spec[drive_letter_pos]))
77*6777b538SAndroid Build Coastguard Worker     output->push_back(static_cast<char>(spec[drive_letter_pos] - 'a' + 'A'));
78*6777b538SAndroid Build Coastguard Worker   else
79*6777b538SAndroid Build Coastguard Worker     output->push_back(static_cast<char>(spec[drive_letter_pos]));
80*6777b538SAndroid Build Coastguard Worker 
81*6777b538SAndroid Build Coastguard Worker   // Normalize the character following it to a colon rather than pipe.
82*6777b538SAndroid Build Coastguard Worker   output->push_back(':');
83*6777b538SAndroid Build Coastguard Worker   return drive_letter_pos + 2;
84*6777b538SAndroid Build Coastguard Worker }
85*6777b538SAndroid Build Coastguard Worker 
86*6777b538SAndroid Build Coastguard Worker #endif  // WIN32
87*6777b538SAndroid Build Coastguard Worker 
88*6777b538SAndroid Build Coastguard Worker template<typename CHAR, typename UCHAR>
DoFileCanonicalizePath(const CHAR * spec,const Component & path,CanonOutput * output,Component * out_path)89*6777b538SAndroid Build Coastguard Worker bool DoFileCanonicalizePath(const CHAR* spec,
90*6777b538SAndroid Build Coastguard Worker                             const Component& path,
91*6777b538SAndroid Build Coastguard Worker                             CanonOutput* output,
92*6777b538SAndroid Build Coastguard Worker                             Component* out_path) {
93*6777b538SAndroid Build Coastguard Worker   // Copies and normalizes the "c:" at the beginning, if present.
94*6777b538SAndroid Build Coastguard Worker   out_path->begin = output->length();
95*6777b538SAndroid Build Coastguard Worker   int after_drive;
96*6777b538SAndroid Build Coastguard Worker #ifdef WIN32
97*6777b538SAndroid Build Coastguard Worker   after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
98*6777b538SAndroid Build Coastguard Worker #else
99*6777b538SAndroid Build Coastguard Worker   after_drive = path.begin;
100*6777b538SAndroid Build Coastguard Worker #endif
101*6777b538SAndroid Build Coastguard Worker 
102*6777b538SAndroid Build Coastguard Worker   // Copies the rest of the path, starting from the slash following the
103*6777b538SAndroid Build Coastguard Worker   // drive colon (if any, Windows only), or the first slash of the path.
104*6777b538SAndroid Build Coastguard Worker   bool success = true;
105*6777b538SAndroid Build Coastguard Worker   if (after_drive < path.end()) {
106*6777b538SAndroid Build Coastguard Worker     // Use the regular path canonicalizer to canonicalize the rest of the path
107*6777b538SAndroid Build Coastguard Worker     // after the drive.
108*6777b538SAndroid Build Coastguard Worker     //
109*6777b538SAndroid Build Coastguard Worker     // Give it a fake output component to write into, since we will be
110*6777b538SAndroid Build Coastguard Worker     // calculating the out_path ourselves (consisting of both the drive and the
111*6777b538SAndroid Build Coastguard Worker     // path we canonicalize here).
112*6777b538SAndroid Build Coastguard Worker     Component sub_path = MakeRange(after_drive, path.end());
113*6777b538SAndroid Build Coastguard Worker     Component fake_output_path;
114*6777b538SAndroid Build Coastguard Worker     success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
115*6777b538SAndroid Build Coastguard Worker   } else if (after_drive == path.begin) {
116*6777b538SAndroid Build Coastguard Worker     // No input path and no drive spec, canonicalize to a slash.
117*6777b538SAndroid Build Coastguard Worker     output->push_back('/');
118*6777b538SAndroid Build Coastguard Worker   }
119*6777b538SAndroid Build Coastguard Worker 
120*6777b538SAndroid Build Coastguard Worker   out_path->len = output->length() - out_path->begin;
121*6777b538SAndroid Build Coastguard Worker   return success;
122*6777b538SAndroid Build Coastguard Worker }
123*6777b538SAndroid Build Coastguard Worker 
124*6777b538SAndroid Build Coastguard Worker template<typename CHAR, typename UCHAR>
DoCanonicalizeFileURL(const URLComponentSource<CHAR> & source,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)125*6777b538SAndroid Build Coastguard Worker bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
126*6777b538SAndroid Build Coastguard Worker                            const Parsed& parsed,
127*6777b538SAndroid Build Coastguard Worker                            CharsetConverter* query_converter,
128*6777b538SAndroid Build Coastguard Worker                            CanonOutput* output,
129*6777b538SAndroid Build Coastguard Worker                            Parsed* new_parsed) {
130*6777b538SAndroid Build Coastguard Worker   DCHECK(!parsed.has_opaque_path);
131*6777b538SAndroid Build Coastguard Worker 
132*6777b538SAndroid Build Coastguard Worker   // Things we don't set in file: URLs.
133*6777b538SAndroid Build Coastguard Worker   new_parsed->username = Component();
134*6777b538SAndroid Build Coastguard Worker   new_parsed->password = Component();
135*6777b538SAndroid Build Coastguard Worker   new_parsed->port = Component();
136*6777b538SAndroid Build Coastguard Worker 
137*6777b538SAndroid Build Coastguard Worker   // Scheme (known, so we don't bother running it through the more
138*6777b538SAndroid Build Coastguard Worker   // complicated scheme canonicalizer).
139*6777b538SAndroid Build Coastguard Worker   new_parsed->scheme.begin = output->length();
140*6777b538SAndroid Build Coastguard Worker   output->Append("file://");
141*6777b538SAndroid Build Coastguard Worker   new_parsed->scheme.len = 4;
142*6777b538SAndroid Build Coastguard Worker 
143*6777b538SAndroid Build Coastguard Worker   // If the host is localhost, and the path starts with a Windows drive letter,
144*6777b538SAndroid Build Coastguard Worker   // remove the host component. This does the following transformation:
145*6777b538SAndroid Build Coastguard Worker   //     file://localhost/C:/hello.txt -> file:///C:/hello.txt
146*6777b538SAndroid Build Coastguard Worker   //
147*6777b538SAndroid Build Coastguard Worker   // Note: we do this on every platform per URL Standard, not just Windows.
148*6777b538SAndroid Build Coastguard Worker   //
149*6777b538SAndroid Build Coastguard Worker   // TODO(https://crbug.com/688961): According to the latest URL spec, this
150*6777b538SAndroid Build Coastguard Worker   // transformation should be done regardless of the path.
151*6777b538SAndroid Build Coastguard Worker   Component host_range = parsed.host;
152*6777b538SAndroid Build Coastguard Worker   if (IsLocalhost(source.host, host_range.begin, host_range.end()) &&
153*6777b538SAndroid Build Coastguard Worker       FindWindowsDriveLetter(source.path, parsed.path.begin,
154*6777b538SAndroid Build Coastguard Worker                              parsed.path.end()) >= parsed.path.begin) {
155*6777b538SAndroid Build Coastguard Worker     host_range.reset();
156*6777b538SAndroid Build Coastguard Worker   }
157*6777b538SAndroid Build Coastguard Worker 
158*6777b538SAndroid Build Coastguard Worker   // Append the host. For many file URLs, this will be empty. For UNC, this
159*6777b538SAndroid Build Coastguard Worker   // will be present.
160*6777b538SAndroid Build Coastguard Worker   // TODO(brettw) This doesn't do any checking for host name validity. We
161*6777b538SAndroid Build Coastguard Worker   // should probably handle validity checking of UNC hosts differently than
162*6777b538SAndroid Build Coastguard Worker   // for regular IP hosts.
163*6777b538SAndroid Build Coastguard Worker   bool success =
164*6777b538SAndroid Build Coastguard Worker       CanonicalizeHost(source.host, host_range, output, &new_parsed->host);
165*6777b538SAndroid Build Coastguard Worker   success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
166*6777b538SAndroid Build Coastguard Worker                                     output, &new_parsed->path);
167*6777b538SAndroid Build Coastguard Worker 
168*6777b538SAndroid Build Coastguard Worker   CanonicalizeQuery(source.query, parsed.query, query_converter,
169*6777b538SAndroid Build Coastguard Worker                     output, &new_parsed->query);
170*6777b538SAndroid Build Coastguard Worker   CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
171*6777b538SAndroid Build Coastguard Worker 
172*6777b538SAndroid Build Coastguard Worker   return success;
173*6777b538SAndroid Build Coastguard Worker }
174*6777b538SAndroid Build Coastguard Worker 
175*6777b538SAndroid Build Coastguard Worker } // namespace
176*6777b538SAndroid Build Coastguard Worker 
FindWindowsDriveLetter(const char * spec,int begin,int end)177*6777b538SAndroid Build Coastguard Worker int FindWindowsDriveLetter(const char* spec, int begin, int end) {
178*6777b538SAndroid Build Coastguard Worker   return DoFindWindowsDriveLetter(spec, begin, end);
179*6777b538SAndroid Build Coastguard Worker }
180*6777b538SAndroid Build Coastguard Worker 
FindWindowsDriveLetter(const char16_t * spec,int begin,int end)181*6777b538SAndroid Build Coastguard Worker int FindWindowsDriveLetter(const char16_t* spec, int begin, int end) {
182*6777b538SAndroid Build Coastguard Worker   return DoFindWindowsDriveLetter(spec, begin, end);
183*6777b538SAndroid Build Coastguard Worker }
184*6777b538SAndroid Build Coastguard Worker 
CanonicalizeFileURL(const char * spec,int spec_len,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)185*6777b538SAndroid Build Coastguard Worker bool CanonicalizeFileURL(const char* spec,
186*6777b538SAndroid Build Coastguard Worker                          int spec_len,
187*6777b538SAndroid Build Coastguard Worker                          const Parsed& parsed,
188*6777b538SAndroid Build Coastguard Worker                          CharsetConverter* query_converter,
189*6777b538SAndroid Build Coastguard Worker                          CanonOutput* output,
190*6777b538SAndroid Build Coastguard Worker                          Parsed* new_parsed) {
191*6777b538SAndroid Build Coastguard Worker   return DoCanonicalizeFileURL<char, unsigned char>(
192*6777b538SAndroid Build Coastguard Worker       URLComponentSource<char>(spec), parsed, query_converter,
193*6777b538SAndroid Build Coastguard Worker       output, new_parsed);
194*6777b538SAndroid Build Coastguard Worker }
195*6777b538SAndroid Build Coastguard Worker 
CanonicalizeFileURL(const char16_t * spec,int spec_len,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)196*6777b538SAndroid Build Coastguard Worker bool CanonicalizeFileURL(const char16_t* spec,
197*6777b538SAndroid Build Coastguard Worker                          int spec_len,
198*6777b538SAndroid Build Coastguard Worker                          const Parsed& parsed,
199*6777b538SAndroid Build Coastguard Worker                          CharsetConverter* query_converter,
200*6777b538SAndroid Build Coastguard Worker                          CanonOutput* output,
201*6777b538SAndroid Build Coastguard Worker                          Parsed* new_parsed) {
202*6777b538SAndroid Build Coastguard Worker   return DoCanonicalizeFileURL<char16_t, char16_t>(
203*6777b538SAndroid Build Coastguard Worker       URLComponentSource<char16_t>(spec), parsed, query_converter, output,
204*6777b538SAndroid Build Coastguard Worker       new_parsed);
205*6777b538SAndroid Build Coastguard Worker }
206*6777b538SAndroid Build Coastguard Worker 
FileCanonicalizePath(const char * spec,const Component & path,CanonOutput * output,Component * out_path)207*6777b538SAndroid Build Coastguard Worker bool FileCanonicalizePath(const char* spec,
208*6777b538SAndroid Build Coastguard Worker                           const Component& path,
209*6777b538SAndroid Build Coastguard Worker                           CanonOutput* output,
210*6777b538SAndroid Build Coastguard Worker                           Component* out_path) {
211*6777b538SAndroid Build Coastguard Worker   return DoFileCanonicalizePath<char, unsigned char>(spec, path,
212*6777b538SAndroid Build Coastguard Worker                                                      output, out_path);
213*6777b538SAndroid Build Coastguard Worker }
214*6777b538SAndroid Build Coastguard Worker 
FileCanonicalizePath(const char16_t * spec,const Component & path,CanonOutput * output,Component * out_path)215*6777b538SAndroid Build Coastguard Worker bool FileCanonicalizePath(const char16_t* spec,
216*6777b538SAndroid Build Coastguard Worker                           const Component& path,
217*6777b538SAndroid Build Coastguard Worker                           CanonOutput* output,
218*6777b538SAndroid Build Coastguard Worker                           Component* out_path) {
219*6777b538SAndroid Build Coastguard Worker   return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output,
220*6777b538SAndroid Build Coastguard Worker                                                     out_path);
221*6777b538SAndroid Build Coastguard Worker }
222*6777b538SAndroid Build Coastguard Worker 
ReplaceFileURL(const char * base,const Parsed & base_parsed,const Replacements<char> & replacements,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)223*6777b538SAndroid Build Coastguard Worker bool ReplaceFileURL(const char* base,
224*6777b538SAndroid Build Coastguard Worker                     const Parsed& base_parsed,
225*6777b538SAndroid Build Coastguard Worker                     const Replacements<char>& replacements,
226*6777b538SAndroid Build Coastguard Worker                     CharsetConverter* query_converter,
227*6777b538SAndroid Build Coastguard Worker                     CanonOutput* output,
228*6777b538SAndroid Build Coastguard Worker                     Parsed* new_parsed) {
229*6777b538SAndroid Build Coastguard Worker   URLComponentSource<char> source(base);
230*6777b538SAndroid Build Coastguard Worker   Parsed parsed(base_parsed);
231*6777b538SAndroid Build Coastguard Worker   SetupOverrideComponents(base, replacements, &source, &parsed);
232*6777b538SAndroid Build Coastguard Worker   return DoCanonicalizeFileURL<char, unsigned char>(
233*6777b538SAndroid Build Coastguard Worker       source, parsed, query_converter, output, new_parsed);
234*6777b538SAndroid Build Coastguard Worker }
235*6777b538SAndroid Build Coastguard Worker 
ReplaceFileURL(const char * base,const Parsed & base_parsed,const Replacements<char16_t> & replacements,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)236*6777b538SAndroid Build Coastguard Worker bool ReplaceFileURL(const char* base,
237*6777b538SAndroid Build Coastguard Worker                     const Parsed& base_parsed,
238*6777b538SAndroid Build Coastguard Worker                     const Replacements<char16_t>& replacements,
239*6777b538SAndroid Build Coastguard Worker                     CharsetConverter* query_converter,
240*6777b538SAndroid Build Coastguard Worker                     CanonOutput* output,
241*6777b538SAndroid Build Coastguard Worker                     Parsed* new_parsed) {
242*6777b538SAndroid Build Coastguard Worker   RawCanonOutput<1024> utf8;
243*6777b538SAndroid Build Coastguard Worker   URLComponentSource<char> source(base);
244*6777b538SAndroid Build Coastguard Worker   Parsed parsed(base_parsed);
245*6777b538SAndroid Build Coastguard Worker   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
246*6777b538SAndroid Build Coastguard Worker   return DoCanonicalizeFileURL<char, unsigned char>(
247*6777b538SAndroid Build Coastguard Worker       source, parsed, query_converter, output, new_parsed);
248*6777b538SAndroid Build Coastguard Worker }
249*6777b538SAndroid Build Coastguard Worker 
250*6777b538SAndroid Build Coastguard Worker }  // namespace url
251