1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker // Functions for canonicalizing "file:" URLs.
6*6777b538SAndroid Build Coastguard Worker
7*6777b538SAndroid Build Coastguard Worker #include <string_view>
8*6777b538SAndroid Build Coastguard Worker
9*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
10*6777b538SAndroid Build Coastguard Worker #include "url/url_canon.h"
11*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_internal.h"
12*6777b538SAndroid Build Coastguard Worker #include "url/url_file.h"
13*6777b538SAndroid Build Coastguard Worker #include "url/url_parse_internal.h"
14*6777b538SAndroid Build Coastguard Worker
15*6777b538SAndroid Build Coastguard Worker namespace url {
16*6777b538SAndroid Build Coastguard Worker
17*6777b538SAndroid Build Coastguard Worker namespace {
18*6777b538SAndroid Build Coastguard Worker
IsLocalhost(const char * spec,int begin,int end)19*6777b538SAndroid Build Coastguard Worker bool IsLocalhost(const char* spec, int begin, int end) {
20*6777b538SAndroid Build Coastguard Worker if (begin > end)
21*6777b538SAndroid Build Coastguard Worker return false;
22*6777b538SAndroid Build Coastguard Worker return std::string_view(&spec[begin], end - begin) == "localhost";
23*6777b538SAndroid Build Coastguard Worker }
24*6777b538SAndroid Build Coastguard Worker
IsLocalhost(const char16_t * spec,int begin,int end)25*6777b538SAndroid Build Coastguard Worker bool IsLocalhost(const char16_t* spec, int begin, int end) {
26*6777b538SAndroid Build Coastguard Worker if (begin > end)
27*6777b538SAndroid Build Coastguard Worker return false;
28*6777b538SAndroid Build Coastguard Worker return std::u16string_view(&spec[begin], end - begin) == u"localhost";
29*6777b538SAndroid Build Coastguard Worker }
30*6777b538SAndroid Build Coastguard Worker
31*6777b538SAndroid Build Coastguard Worker template <typename CHAR>
DoFindWindowsDriveLetter(const CHAR * spec,int begin,int end)32*6777b538SAndroid Build Coastguard Worker int DoFindWindowsDriveLetter(const CHAR* spec, int begin, int end) {
33*6777b538SAndroid Build Coastguard Worker if (begin > end)
34*6777b538SAndroid Build Coastguard Worker return -1;
35*6777b538SAndroid Build Coastguard Worker
36*6777b538SAndroid Build Coastguard Worker // First guess the beginning of the drive letter.
37*6777b538SAndroid Build Coastguard Worker // If there is something that looks like a drive letter in the spec between
38*6777b538SAndroid Build Coastguard Worker // begin and end, store its position in drive_letter_pos.
39*6777b538SAndroid Build Coastguard Worker int drive_letter_pos =
40*6777b538SAndroid Build Coastguard Worker DoesContainWindowsDriveSpecUntil(spec, begin, end, end);
41*6777b538SAndroid Build Coastguard Worker if (drive_letter_pos < begin)
42*6777b538SAndroid Build Coastguard Worker return -1;
43*6777b538SAndroid Build Coastguard Worker
44*6777b538SAndroid Build Coastguard Worker // Check if the path up to the drive letter candidate can be canonicalized as
45*6777b538SAndroid Build Coastguard Worker // "/".
46*6777b538SAndroid Build Coastguard Worker Component sub_path = MakeRange(begin, drive_letter_pos);
47*6777b538SAndroid Build Coastguard Worker RawCanonOutput<1024> output;
48*6777b538SAndroid Build Coastguard Worker Component output_path;
49*6777b538SAndroid Build Coastguard Worker bool success = CanonicalizePath(spec, sub_path, &output, &output_path);
50*6777b538SAndroid Build Coastguard Worker if (!success || output_path.len != 1 || output.at(output_path.begin) != '/') {
51*6777b538SAndroid Build Coastguard Worker return -1;
52*6777b538SAndroid Build Coastguard Worker }
53*6777b538SAndroid Build Coastguard Worker
54*6777b538SAndroid Build Coastguard Worker return drive_letter_pos;
55*6777b538SAndroid Build Coastguard Worker }
56*6777b538SAndroid Build Coastguard Worker
57*6777b538SAndroid Build Coastguard Worker #ifdef WIN32
58*6777b538SAndroid Build Coastguard Worker
59*6777b538SAndroid Build Coastguard Worker // Given a pointer into the spec, this copies and canonicalizes the drive
60*6777b538SAndroid Build Coastguard Worker // letter and colon to the output, if one is found. If there is not a drive
61*6777b538SAndroid Build Coastguard Worker // spec, it won't do anything. The index of the next character in the input
62*6777b538SAndroid Build Coastguard Worker // spec is returned (after the colon when a drive spec is found, the begin
63*6777b538SAndroid Build Coastguard Worker // offset if one is not).
64*6777b538SAndroid Build Coastguard Worker template <typename CHAR>
FileDoDriveSpec(const CHAR * spec,int begin,int end,CanonOutput * output)65*6777b538SAndroid Build Coastguard Worker int FileDoDriveSpec(const CHAR* spec, int begin, int end, CanonOutput* output) {
66*6777b538SAndroid Build Coastguard Worker int drive_letter_pos = FindWindowsDriveLetter(spec, begin, end);
67*6777b538SAndroid Build Coastguard Worker if (drive_letter_pos < begin)
68*6777b538SAndroid Build Coastguard Worker return begin;
69*6777b538SAndroid Build Coastguard Worker
70*6777b538SAndroid Build Coastguard Worker // By now, a valid drive letter is confirmed at position drive_letter_pos,
71*6777b538SAndroid Build Coastguard Worker // followed by a valid drive letter separator (a colon or a pipe).
72*6777b538SAndroid Build Coastguard Worker
73*6777b538SAndroid Build Coastguard Worker output->push_back('/');
74*6777b538SAndroid Build Coastguard Worker
75*6777b538SAndroid Build Coastguard Worker // Normalize Windows drive letters to uppercase.
76*6777b538SAndroid Build Coastguard Worker if (base::IsAsciiLower(spec[drive_letter_pos]))
77*6777b538SAndroid Build Coastguard Worker output->push_back(static_cast<char>(spec[drive_letter_pos] - 'a' + 'A'));
78*6777b538SAndroid Build Coastguard Worker else
79*6777b538SAndroid Build Coastguard Worker output->push_back(static_cast<char>(spec[drive_letter_pos]));
80*6777b538SAndroid Build Coastguard Worker
81*6777b538SAndroid Build Coastguard Worker // Normalize the character following it to a colon rather than pipe.
82*6777b538SAndroid Build Coastguard Worker output->push_back(':');
83*6777b538SAndroid Build Coastguard Worker return drive_letter_pos + 2;
84*6777b538SAndroid Build Coastguard Worker }
85*6777b538SAndroid Build Coastguard Worker
86*6777b538SAndroid Build Coastguard Worker #endif // WIN32
87*6777b538SAndroid Build Coastguard Worker
88*6777b538SAndroid Build Coastguard Worker template<typename CHAR, typename UCHAR>
DoFileCanonicalizePath(const CHAR * spec,const Component & path,CanonOutput * output,Component * out_path)89*6777b538SAndroid Build Coastguard Worker bool DoFileCanonicalizePath(const CHAR* spec,
90*6777b538SAndroid Build Coastguard Worker const Component& path,
91*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
92*6777b538SAndroid Build Coastguard Worker Component* out_path) {
93*6777b538SAndroid Build Coastguard Worker // Copies and normalizes the "c:" at the beginning, if present.
94*6777b538SAndroid Build Coastguard Worker out_path->begin = output->length();
95*6777b538SAndroid Build Coastguard Worker int after_drive;
96*6777b538SAndroid Build Coastguard Worker #ifdef WIN32
97*6777b538SAndroid Build Coastguard Worker after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
98*6777b538SAndroid Build Coastguard Worker #else
99*6777b538SAndroid Build Coastguard Worker after_drive = path.begin;
100*6777b538SAndroid Build Coastguard Worker #endif
101*6777b538SAndroid Build Coastguard Worker
102*6777b538SAndroid Build Coastguard Worker // Copies the rest of the path, starting from the slash following the
103*6777b538SAndroid Build Coastguard Worker // drive colon (if any, Windows only), or the first slash of the path.
104*6777b538SAndroid Build Coastguard Worker bool success = true;
105*6777b538SAndroid Build Coastguard Worker if (after_drive < path.end()) {
106*6777b538SAndroid Build Coastguard Worker // Use the regular path canonicalizer to canonicalize the rest of the path
107*6777b538SAndroid Build Coastguard Worker // after the drive.
108*6777b538SAndroid Build Coastguard Worker //
109*6777b538SAndroid Build Coastguard Worker // Give it a fake output component to write into, since we will be
110*6777b538SAndroid Build Coastguard Worker // calculating the out_path ourselves (consisting of both the drive and the
111*6777b538SAndroid Build Coastguard Worker // path we canonicalize here).
112*6777b538SAndroid Build Coastguard Worker Component sub_path = MakeRange(after_drive, path.end());
113*6777b538SAndroid Build Coastguard Worker Component fake_output_path;
114*6777b538SAndroid Build Coastguard Worker success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
115*6777b538SAndroid Build Coastguard Worker } else if (after_drive == path.begin) {
116*6777b538SAndroid Build Coastguard Worker // No input path and no drive spec, canonicalize to a slash.
117*6777b538SAndroid Build Coastguard Worker output->push_back('/');
118*6777b538SAndroid Build Coastguard Worker }
119*6777b538SAndroid Build Coastguard Worker
120*6777b538SAndroid Build Coastguard Worker out_path->len = output->length() - out_path->begin;
121*6777b538SAndroid Build Coastguard Worker return success;
122*6777b538SAndroid Build Coastguard Worker }
123*6777b538SAndroid Build Coastguard Worker
124*6777b538SAndroid Build Coastguard Worker template<typename CHAR, typename UCHAR>
DoCanonicalizeFileURL(const URLComponentSource<CHAR> & source,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)125*6777b538SAndroid Build Coastguard Worker bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
126*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
127*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
128*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
129*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed) {
130*6777b538SAndroid Build Coastguard Worker DCHECK(!parsed.has_opaque_path);
131*6777b538SAndroid Build Coastguard Worker
132*6777b538SAndroid Build Coastguard Worker // Things we don't set in file: URLs.
133*6777b538SAndroid Build Coastguard Worker new_parsed->username = Component();
134*6777b538SAndroid Build Coastguard Worker new_parsed->password = Component();
135*6777b538SAndroid Build Coastguard Worker new_parsed->port = Component();
136*6777b538SAndroid Build Coastguard Worker
137*6777b538SAndroid Build Coastguard Worker // Scheme (known, so we don't bother running it through the more
138*6777b538SAndroid Build Coastguard Worker // complicated scheme canonicalizer).
139*6777b538SAndroid Build Coastguard Worker new_parsed->scheme.begin = output->length();
140*6777b538SAndroid Build Coastguard Worker output->Append("file://");
141*6777b538SAndroid Build Coastguard Worker new_parsed->scheme.len = 4;
142*6777b538SAndroid Build Coastguard Worker
143*6777b538SAndroid Build Coastguard Worker // If the host is localhost, and the path starts with a Windows drive letter,
144*6777b538SAndroid Build Coastguard Worker // remove the host component. This does the following transformation:
145*6777b538SAndroid Build Coastguard Worker // file://localhost/C:/hello.txt -> file:///C:/hello.txt
146*6777b538SAndroid Build Coastguard Worker //
147*6777b538SAndroid Build Coastguard Worker // Note: we do this on every platform per URL Standard, not just Windows.
148*6777b538SAndroid Build Coastguard Worker //
149*6777b538SAndroid Build Coastguard Worker // TODO(https://crbug.com/688961): According to the latest URL spec, this
150*6777b538SAndroid Build Coastguard Worker // transformation should be done regardless of the path.
151*6777b538SAndroid Build Coastguard Worker Component host_range = parsed.host;
152*6777b538SAndroid Build Coastguard Worker if (IsLocalhost(source.host, host_range.begin, host_range.end()) &&
153*6777b538SAndroid Build Coastguard Worker FindWindowsDriveLetter(source.path, parsed.path.begin,
154*6777b538SAndroid Build Coastguard Worker parsed.path.end()) >= parsed.path.begin) {
155*6777b538SAndroid Build Coastguard Worker host_range.reset();
156*6777b538SAndroid Build Coastguard Worker }
157*6777b538SAndroid Build Coastguard Worker
158*6777b538SAndroid Build Coastguard Worker // Append the host. For many file URLs, this will be empty. For UNC, this
159*6777b538SAndroid Build Coastguard Worker // will be present.
160*6777b538SAndroid Build Coastguard Worker // TODO(brettw) This doesn't do any checking for host name validity. We
161*6777b538SAndroid Build Coastguard Worker // should probably handle validity checking of UNC hosts differently than
162*6777b538SAndroid Build Coastguard Worker // for regular IP hosts.
163*6777b538SAndroid Build Coastguard Worker bool success =
164*6777b538SAndroid Build Coastguard Worker CanonicalizeHost(source.host, host_range, output, &new_parsed->host);
165*6777b538SAndroid Build Coastguard Worker success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
166*6777b538SAndroid Build Coastguard Worker output, &new_parsed->path);
167*6777b538SAndroid Build Coastguard Worker
168*6777b538SAndroid Build Coastguard Worker CanonicalizeQuery(source.query, parsed.query, query_converter,
169*6777b538SAndroid Build Coastguard Worker output, &new_parsed->query);
170*6777b538SAndroid Build Coastguard Worker CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
171*6777b538SAndroid Build Coastguard Worker
172*6777b538SAndroid Build Coastguard Worker return success;
173*6777b538SAndroid Build Coastguard Worker }
174*6777b538SAndroid Build Coastguard Worker
175*6777b538SAndroid Build Coastguard Worker } // namespace
176*6777b538SAndroid Build Coastguard Worker
FindWindowsDriveLetter(const char * spec,int begin,int end)177*6777b538SAndroid Build Coastguard Worker int FindWindowsDriveLetter(const char* spec, int begin, int end) {
178*6777b538SAndroid Build Coastguard Worker return DoFindWindowsDriveLetter(spec, begin, end);
179*6777b538SAndroid Build Coastguard Worker }
180*6777b538SAndroid Build Coastguard Worker
FindWindowsDriveLetter(const char16_t * spec,int begin,int end)181*6777b538SAndroid Build Coastguard Worker int FindWindowsDriveLetter(const char16_t* spec, int begin, int end) {
182*6777b538SAndroid Build Coastguard Worker return DoFindWindowsDriveLetter(spec, begin, end);
183*6777b538SAndroid Build Coastguard Worker }
184*6777b538SAndroid Build Coastguard Worker
CanonicalizeFileURL(const char * spec,int spec_len,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)185*6777b538SAndroid Build Coastguard Worker bool CanonicalizeFileURL(const char* spec,
186*6777b538SAndroid Build Coastguard Worker int spec_len,
187*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
188*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
189*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
190*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed) {
191*6777b538SAndroid Build Coastguard Worker return DoCanonicalizeFileURL<char, unsigned char>(
192*6777b538SAndroid Build Coastguard Worker URLComponentSource<char>(spec), parsed, query_converter,
193*6777b538SAndroid Build Coastguard Worker output, new_parsed);
194*6777b538SAndroid Build Coastguard Worker }
195*6777b538SAndroid Build Coastguard Worker
CanonicalizeFileURL(const char16_t * spec,int spec_len,const Parsed & parsed,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)196*6777b538SAndroid Build Coastguard Worker bool CanonicalizeFileURL(const char16_t* spec,
197*6777b538SAndroid Build Coastguard Worker int spec_len,
198*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
199*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
200*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
201*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed) {
202*6777b538SAndroid Build Coastguard Worker return DoCanonicalizeFileURL<char16_t, char16_t>(
203*6777b538SAndroid Build Coastguard Worker URLComponentSource<char16_t>(spec), parsed, query_converter, output,
204*6777b538SAndroid Build Coastguard Worker new_parsed);
205*6777b538SAndroid Build Coastguard Worker }
206*6777b538SAndroid Build Coastguard Worker
FileCanonicalizePath(const char * spec,const Component & path,CanonOutput * output,Component * out_path)207*6777b538SAndroid Build Coastguard Worker bool FileCanonicalizePath(const char* spec,
208*6777b538SAndroid Build Coastguard Worker const Component& path,
209*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
210*6777b538SAndroid Build Coastguard Worker Component* out_path) {
211*6777b538SAndroid Build Coastguard Worker return DoFileCanonicalizePath<char, unsigned char>(spec, path,
212*6777b538SAndroid Build Coastguard Worker output, out_path);
213*6777b538SAndroid Build Coastguard Worker }
214*6777b538SAndroid Build Coastguard Worker
FileCanonicalizePath(const char16_t * spec,const Component & path,CanonOutput * output,Component * out_path)215*6777b538SAndroid Build Coastguard Worker bool FileCanonicalizePath(const char16_t* spec,
216*6777b538SAndroid Build Coastguard Worker const Component& path,
217*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
218*6777b538SAndroid Build Coastguard Worker Component* out_path) {
219*6777b538SAndroid Build Coastguard Worker return DoFileCanonicalizePath<char16_t, char16_t>(spec, path, output,
220*6777b538SAndroid Build Coastguard Worker out_path);
221*6777b538SAndroid Build Coastguard Worker }
222*6777b538SAndroid Build Coastguard Worker
ReplaceFileURL(const char * base,const Parsed & base_parsed,const Replacements<char> & replacements,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)223*6777b538SAndroid Build Coastguard Worker bool ReplaceFileURL(const char* base,
224*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
225*6777b538SAndroid Build Coastguard Worker const Replacements<char>& replacements,
226*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
227*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
228*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed) {
229*6777b538SAndroid Build Coastguard Worker URLComponentSource<char> source(base);
230*6777b538SAndroid Build Coastguard Worker Parsed parsed(base_parsed);
231*6777b538SAndroid Build Coastguard Worker SetupOverrideComponents(base, replacements, &source, &parsed);
232*6777b538SAndroid Build Coastguard Worker return DoCanonicalizeFileURL<char, unsigned char>(
233*6777b538SAndroid Build Coastguard Worker source, parsed, query_converter, output, new_parsed);
234*6777b538SAndroid Build Coastguard Worker }
235*6777b538SAndroid Build Coastguard Worker
ReplaceFileURL(const char * base,const Parsed & base_parsed,const Replacements<char16_t> & replacements,CharsetConverter * query_converter,CanonOutput * output,Parsed * new_parsed)236*6777b538SAndroid Build Coastguard Worker bool ReplaceFileURL(const char* base,
237*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
238*6777b538SAndroid Build Coastguard Worker const Replacements<char16_t>& replacements,
239*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
240*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
241*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed) {
242*6777b538SAndroid Build Coastguard Worker RawCanonOutput<1024> utf8;
243*6777b538SAndroid Build Coastguard Worker URLComponentSource<char> source(base);
244*6777b538SAndroid Build Coastguard Worker Parsed parsed(base_parsed);
245*6777b538SAndroid Build Coastguard Worker SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
246*6777b538SAndroid Build Coastguard Worker return DoCanonicalizeFileURL<char, unsigned char>(
247*6777b538SAndroid Build Coastguard Worker source, parsed, query_converter, output, new_parsed);
248*6777b538SAndroid Build Coastguard Worker }
249*6777b538SAndroid Build Coastguard Worker
250*6777b538SAndroid Build Coastguard Worker } // namespace url
251