1*3f982cf4SFabien Sanglard /* Based on nsURLParsers.cc from Mozilla
2*3f982cf4SFabien Sanglard * -------------------------------------
3*3f982cf4SFabien Sanglard * The contents of this file are subject to the Mozilla Public License Version
4*3f982cf4SFabien Sanglard * 1.1 (the "License"); you may not use this file except in compliance with
5*3f982cf4SFabien Sanglard * the License. You may obtain a copy of the License at
6*3f982cf4SFabien Sanglard * http://www.mozilla.org/MPL/
7*3f982cf4SFabien Sanglard *
8*3f982cf4SFabien Sanglard * Software distributed under the License is distributed on an "AS IS" basis,
9*3f982cf4SFabien Sanglard * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10*3f982cf4SFabien Sanglard * for the specific language governing rights and limitations under the
11*3f982cf4SFabien Sanglard * License.
12*3f982cf4SFabien Sanglard *
13*3f982cf4SFabien Sanglard * The Original Code is mozilla.org code.
14*3f982cf4SFabien Sanglard *
15*3f982cf4SFabien Sanglard * The Initial Developer of the Original Code is
16*3f982cf4SFabien Sanglard * Netscape Communications Corporation.
17*3f982cf4SFabien Sanglard * Portions created by the Initial Developer are Copyright (C) 1998
18*3f982cf4SFabien Sanglard * the Initial Developer. All Rights Reserved.
19*3f982cf4SFabien Sanglard *
20*3f982cf4SFabien Sanglard * Contributor(s):
21*3f982cf4SFabien Sanglard * Darin Fisher (original author)
22*3f982cf4SFabien Sanglard *
23*3f982cf4SFabien Sanglard * Alternatively, the contents of this file may be used under the terms of
24*3f982cf4SFabien Sanglard * either the GNU General Public License Version 2 or later (the "GPL"), or
25*3f982cf4SFabien Sanglard * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26*3f982cf4SFabien Sanglard * in which case the provisions of the GPL or the LGPL are applicable instead
27*3f982cf4SFabien Sanglard * of those above. If you wish to allow use of your version of this file only
28*3f982cf4SFabien Sanglard * under the terms of either the GPL or the LGPL, and not to allow others to
29*3f982cf4SFabien Sanglard * use your version of this file under the terms of the MPL, indicate your
30*3f982cf4SFabien Sanglard * decision by deleting the provisions above and replace them with the notice
31*3f982cf4SFabien Sanglard * and other provisions required by the GPL or the LGPL. If you do not delete
32*3f982cf4SFabien Sanglard * the provisions above, a recipient may use your version of this file under
33*3f982cf4SFabien Sanglard * the terms of any one of the MPL, the GPL or the LGPL.
34*3f982cf4SFabien Sanglard *
35*3f982cf4SFabien Sanglard * ***** END LICENSE BLOCK ***** */
36*3f982cf4SFabien Sanglard
37*3f982cf4SFabien Sanglard #include "third_party/mozilla/url_parse.h"
38*3f982cf4SFabien Sanglard
39*3f982cf4SFabien Sanglard #include <assert.h>
40*3f982cf4SFabien Sanglard #include <ctype.h>
41*3f982cf4SFabien Sanglard #include <stdlib.h>
42*3f982cf4SFabien Sanglard
43*3f982cf4SFabien Sanglard #include "third_party/mozilla/url_parse_internal.h"
44*3f982cf4SFabien Sanglard
45*3f982cf4SFabien Sanglard namespace openscreen {
46*3f982cf4SFabien Sanglard namespace {
47*3f982cf4SFabien Sanglard
48*3f982cf4SFabien Sanglard // Returns true if the given character is a valid digit to use in a port.
IsPortDigit(char ch)49*3f982cf4SFabien Sanglard bool IsPortDigit(char ch) {
50*3f982cf4SFabien Sanglard return ch >= '0' && ch <= '9';
51*3f982cf4SFabien Sanglard }
52*3f982cf4SFabien Sanglard
53*3f982cf4SFabien Sanglard // Returns the offset of the next authority terminator in the input starting
54*3f982cf4SFabien Sanglard // from start_offset. If no terminator is found, the return value will be equal
55*3f982cf4SFabien Sanglard // to spec_len.
FindNextAuthorityTerminator(const char * spec,int start_offset,int spec_len)56*3f982cf4SFabien Sanglard int FindNextAuthorityTerminator(const char* spec,
57*3f982cf4SFabien Sanglard int start_offset,
58*3f982cf4SFabien Sanglard int spec_len) {
59*3f982cf4SFabien Sanglard for (int i = start_offset; i < spec_len; i++) {
60*3f982cf4SFabien Sanglard if (IsAuthorityTerminator(spec[i]))
61*3f982cf4SFabien Sanglard return i;
62*3f982cf4SFabien Sanglard }
63*3f982cf4SFabien Sanglard return spec_len; // Not found.
64*3f982cf4SFabien Sanglard }
65*3f982cf4SFabien Sanglard
ParseUserInfo(const char * spec,const Component & user,Component * username,Component * password)66*3f982cf4SFabien Sanglard void ParseUserInfo(const char* spec,
67*3f982cf4SFabien Sanglard const Component& user,
68*3f982cf4SFabien Sanglard Component* username,
69*3f982cf4SFabien Sanglard Component* password) {
70*3f982cf4SFabien Sanglard // Find the first colon in the user section, which separates the username and
71*3f982cf4SFabien Sanglard // password.
72*3f982cf4SFabien Sanglard int colon_offset = 0;
73*3f982cf4SFabien Sanglard while (colon_offset < user.len && spec[user.begin + colon_offset] != ':')
74*3f982cf4SFabien Sanglard colon_offset++;
75*3f982cf4SFabien Sanglard
76*3f982cf4SFabien Sanglard if (colon_offset < user.len) {
77*3f982cf4SFabien Sanglard // Found separator: <username>:<password>
78*3f982cf4SFabien Sanglard *username = Component(user.begin, colon_offset);
79*3f982cf4SFabien Sanglard *password = MakeRange(user.begin + colon_offset + 1, user.begin + user.len);
80*3f982cf4SFabien Sanglard } else {
81*3f982cf4SFabien Sanglard // No separator, treat everything as the username
82*3f982cf4SFabien Sanglard *username = user;
83*3f982cf4SFabien Sanglard *password = Component();
84*3f982cf4SFabien Sanglard }
85*3f982cf4SFabien Sanglard }
86*3f982cf4SFabien Sanglard
ParseServerInfo(const char * spec,const Component & serverinfo,Component * hostname,Component * port_num)87*3f982cf4SFabien Sanglard void ParseServerInfo(const char* spec,
88*3f982cf4SFabien Sanglard const Component& serverinfo,
89*3f982cf4SFabien Sanglard Component* hostname,
90*3f982cf4SFabien Sanglard Component* port_num) {
91*3f982cf4SFabien Sanglard if (serverinfo.len == 0) {
92*3f982cf4SFabien Sanglard // No server info, host name is empty.
93*3f982cf4SFabien Sanglard hostname->reset();
94*3f982cf4SFabien Sanglard port_num->reset();
95*3f982cf4SFabien Sanglard return;
96*3f982cf4SFabien Sanglard }
97*3f982cf4SFabien Sanglard
98*3f982cf4SFabien Sanglard // If the host starts with a left-bracket, assume the entire host is an
99*3f982cf4SFabien Sanglard // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal.
100*3f982cf4SFabien Sanglard // This assumption will be overridden if we find a right-bracket.
101*3f982cf4SFabien Sanglard //
102*3f982cf4SFabien Sanglard // Our IPv6 address canonicalization code requires both brackets to exist,
103*3f982cf4SFabien Sanglard // but the ability to locate an incomplete address can still be useful.
104*3f982cf4SFabien Sanglard int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1;
105*3f982cf4SFabien Sanglard int colon = -1;
106*3f982cf4SFabien Sanglard
107*3f982cf4SFabien Sanglard // Find the last right-bracket, and the last colon.
108*3f982cf4SFabien Sanglard for (int i = serverinfo.begin; i < serverinfo.end(); i++) {
109*3f982cf4SFabien Sanglard switch (spec[i]) {
110*3f982cf4SFabien Sanglard case ']':
111*3f982cf4SFabien Sanglard ipv6_terminator = i;
112*3f982cf4SFabien Sanglard break;
113*3f982cf4SFabien Sanglard case ':':
114*3f982cf4SFabien Sanglard colon = i;
115*3f982cf4SFabien Sanglard break;
116*3f982cf4SFabien Sanglard }
117*3f982cf4SFabien Sanglard }
118*3f982cf4SFabien Sanglard
119*3f982cf4SFabien Sanglard if (colon > ipv6_terminator) {
120*3f982cf4SFabien Sanglard // Found a port number: <hostname>:<port>
121*3f982cf4SFabien Sanglard *hostname = MakeRange(serverinfo.begin, colon);
122*3f982cf4SFabien Sanglard if (hostname->len == 0)
123*3f982cf4SFabien Sanglard hostname->reset();
124*3f982cf4SFabien Sanglard *port_num = MakeRange(colon + 1, serverinfo.end());
125*3f982cf4SFabien Sanglard } else {
126*3f982cf4SFabien Sanglard // No port: <hostname>
127*3f982cf4SFabien Sanglard *hostname = serverinfo;
128*3f982cf4SFabien Sanglard port_num->reset();
129*3f982cf4SFabien Sanglard }
130*3f982cf4SFabien Sanglard }
131*3f982cf4SFabien Sanglard
132*3f982cf4SFabien Sanglard // Given an already-identified auth section, breaks it into its consituent
133*3f982cf4SFabien Sanglard // parts. The port number will be parsed and the resulting integer will be
134*3f982cf4SFabien Sanglard // filled into the given *port variable, or -1 if there is no port number or it
135*3f982cf4SFabien Sanglard // is invalid.
DoParseAuthority(const char * spec,const Component & auth,Component * username,Component * password,Component * hostname,Component * port_num)136*3f982cf4SFabien Sanglard void DoParseAuthority(const char* spec,
137*3f982cf4SFabien Sanglard const Component& auth,
138*3f982cf4SFabien Sanglard Component* username,
139*3f982cf4SFabien Sanglard Component* password,
140*3f982cf4SFabien Sanglard Component* hostname,
141*3f982cf4SFabien Sanglard Component* port_num) {
142*3f982cf4SFabien Sanglard assert(auth.is_valid());
143*3f982cf4SFabien Sanglard if (auth.len == 0) {
144*3f982cf4SFabien Sanglard username->reset();
145*3f982cf4SFabien Sanglard password->reset();
146*3f982cf4SFabien Sanglard hostname->reset();
147*3f982cf4SFabien Sanglard port_num->reset();
148*3f982cf4SFabien Sanglard return;
149*3f982cf4SFabien Sanglard }
150*3f982cf4SFabien Sanglard
151*3f982cf4SFabien Sanglard // Search backwards for @, which is the separator between the user info and
152*3f982cf4SFabien Sanglard // the server info.
153*3f982cf4SFabien Sanglard int i = auth.begin + auth.len - 1;
154*3f982cf4SFabien Sanglard while (i > auth.begin && spec[i] != '@')
155*3f982cf4SFabien Sanglard i--;
156*3f982cf4SFabien Sanglard
157*3f982cf4SFabien Sanglard if (spec[i] == '@') {
158*3f982cf4SFabien Sanglard // Found user info: <user-info>@<server-info>
159*3f982cf4SFabien Sanglard ParseUserInfo(spec, Component(auth.begin, i - auth.begin), username,
160*3f982cf4SFabien Sanglard password);
161*3f982cf4SFabien Sanglard ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len), hostname,
162*3f982cf4SFabien Sanglard port_num);
163*3f982cf4SFabien Sanglard } else {
164*3f982cf4SFabien Sanglard // No user info, everything is server info.
165*3f982cf4SFabien Sanglard username->reset();
166*3f982cf4SFabien Sanglard password->reset();
167*3f982cf4SFabien Sanglard ParseServerInfo(spec, auth, hostname, port_num);
168*3f982cf4SFabien Sanglard }
169*3f982cf4SFabien Sanglard }
170*3f982cf4SFabien Sanglard
FindQueryAndRefParts(const char * spec,const Component & path,int * query_separator,int * ref_separator)171*3f982cf4SFabien Sanglard inline void FindQueryAndRefParts(const char* spec,
172*3f982cf4SFabien Sanglard const Component& path,
173*3f982cf4SFabien Sanglard int* query_separator,
174*3f982cf4SFabien Sanglard int* ref_separator) {
175*3f982cf4SFabien Sanglard int path_end = path.begin + path.len;
176*3f982cf4SFabien Sanglard for (int i = path.begin; i < path_end; i++) {
177*3f982cf4SFabien Sanglard switch (spec[i]) {
178*3f982cf4SFabien Sanglard case '?':
179*3f982cf4SFabien Sanglard // Only match the query string if it precedes the reference fragment
180*3f982cf4SFabien Sanglard // and when we haven't found one already.
181*3f982cf4SFabien Sanglard if (*query_separator < 0)
182*3f982cf4SFabien Sanglard *query_separator = i;
183*3f982cf4SFabien Sanglard break;
184*3f982cf4SFabien Sanglard case '#':
185*3f982cf4SFabien Sanglard // Record the first # sign only.
186*3f982cf4SFabien Sanglard if (*ref_separator < 0) {
187*3f982cf4SFabien Sanglard *ref_separator = i;
188*3f982cf4SFabien Sanglard return;
189*3f982cf4SFabien Sanglard }
190*3f982cf4SFabien Sanglard break;
191*3f982cf4SFabien Sanglard }
192*3f982cf4SFabien Sanglard }
193*3f982cf4SFabien Sanglard }
194*3f982cf4SFabien Sanglard
ParsePath(const char * spec,const Component & path,Component * filepath,Component * query,Component * ref)195*3f982cf4SFabien Sanglard void ParsePath(const char* spec,
196*3f982cf4SFabien Sanglard const Component& path,
197*3f982cf4SFabien Sanglard Component* filepath,
198*3f982cf4SFabien Sanglard Component* query,
199*3f982cf4SFabien Sanglard Component* ref) {
200*3f982cf4SFabien Sanglard // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>
201*3f982cf4SFabien Sanglard
202*3f982cf4SFabien Sanglard // Special case when there is no path.
203*3f982cf4SFabien Sanglard if (path.len == -1) {
204*3f982cf4SFabien Sanglard filepath->reset();
205*3f982cf4SFabien Sanglard query->reset();
206*3f982cf4SFabien Sanglard ref->reset();
207*3f982cf4SFabien Sanglard return;
208*3f982cf4SFabien Sanglard }
209*3f982cf4SFabien Sanglard assert(path.len > 0);
210*3f982cf4SFabien Sanglard
211*3f982cf4SFabien Sanglard // Search for first occurrence of either ? or #.
212*3f982cf4SFabien Sanglard int query_separator = -1; // Index of the '?'
213*3f982cf4SFabien Sanglard int ref_separator = -1; // Index of the '#'
214*3f982cf4SFabien Sanglard FindQueryAndRefParts(spec, path, &query_separator, &ref_separator);
215*3f982cf4SFabien Sanglard
216*3f982cf4SFabien Sanglard // Markers pointing to the character after each of these corresponding
217*3f982cf4SFabien Sanglard // components. The code below words from the end back to the beginning,
218*3f982cf4SFabien Sanglard // and will update these indices as it finds components that exist.
219*3f982cf4SFabien Sanglard int file_end, query_end;
220*3f982cf4SFabien Sanglard
221*3f982cf4SFabien Sanglard // Ref fragment: from the # to the end of the path.
222*3f982cf4SFabien Sanglard int path_end = path.begin + path.len;
223*3f982cf4SFabien Sanglard if (ref_separator >= 0) {
224*3f982cf4SFabien Sanglard file_end = query_end = ref_separator;
225*3f982cf4SFabien Sanglard *ref = MakeRange(ref_separator + 1, path_end);
226*3f982cf4SFabien Sanglard } else {
227*3f982cf4SFabien Sanglard file_end = query_end = path_end;
228*3f982cf4SFabien Sanglard ref->reset();
229*3f982cf4SFabien Sanglard }
230*3f982cf4SFabien Sanglard
231*3f982cf4SFabien Sanglard // Query fragment: everything from the ? to the next boundary (either the end
232*3f982cf4SFabien Sanglard // of the path or the ref fragment).
233*3f982cf4SFabien Sanglard if (query_separator >= 0) {
234*3f982cf4SFabien Sanglard file_end = query_separator;
235*3f982cf4SFabien Sanglard *query = MakeRange(query_separator + 1, query_end);
236*3f982cf4SFabien Sanglard } else {
237*3f982cf4SFabien Sanglard query->reset();
238*3f982cf4SFabien Sanglard }
239*3f982cf4SFabien Sanglard
240*3f982cf4SFabien Sanglard // File path: treat an empty file path as no file path.
241*3f982cf4SFabien Sanglard if (file_end != path.begin)
242*3f982cf4SFabien Sanglard *filepath = MakeRange(path.begin, file_end);
243*3f982cf4SFabien Sanglard else
244*3f982cf4SFabien Sanglard filepath->reset();
245*3f982cf4SFabien Sanglard }
246*3f982cf4SFabien Sanglard
DoExtractScheme(const char * url,int url_len,Component * scheme)247*3f982cf4SFabien Sanglard bool DoExtractScheme(const char* url, int url_len, Component* scheme) {
248*3f982cf4SFabien Sanglard // Skip leading whitespace and control characters.
249*3f982cf4SFabien Sanglard int begin = 0;
250*3f982cf4SFabien Sanglard while (begin < url_len && ShouldTrimFromURL(url[begin]))
251*3f982cf4SFabien Sanglard begin++;
252*3f982cf4SFabien Sanglard if (begin == url_len)
253*3f982cf4SFabien Sanglard return false; // Input is empty or all whitespace.
254*3f982cf4SFabien Sanglard
255*3f982cf4SFabien Sanglard // Find the first colon character.
256*3f982cf4SFabien Sanglard for (int i = begin; i < url_len; i++) {
257*3f982cf4SFabien Sanglard if (url[i] == ':') {
258*3f982cf4SFabien Sanglard *scheme = MakeRange(begin, i);
259*3f982cf4SFabien Sanglard return true;
260*3f982cf4SFabien Sanglard }
261*3f982cf4SFabien Sanglard }
262*3f982cf4SFabien Sanglard return false; // No colon found: no scheme
263*3f982cf4SFabien Sanglard }
264*3f982cf4SFabien Sanglard
265*3f982cf4SFabien Sanglard // Fills in all members of the Parsed structure except for the scheme.
266*3f982cf4SFabien Sanglard //
267*3f982cf4SFabien Sanglard // |spec| is the full spec being parsed, of length |spec_len|.
268*3f982cf4SFabien Sanglard // |after_scheme| is the character immediately following the scheme (after the
269*3f982cf4SFabien Sanglard // colon) where we'll begin parsing.
270*3f982cf4SFabien Sanglard //
271*3f982cf4SFabien Sanglard // Compatability data points. I list "host", "path" extracted:
272*3f982cf4SFabien Sanglard // Input IE6 Firefox Us
273*3f982cf4SFabien Sanglard // ----- -------------- -------------- --------------
274*3f982cf4SFabien Sanglard // http://foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"
275*3f982cf4SFabien Sanglard // http:foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"
276*3f982cf4SFabien Sanglard // http:/foo.com/ fail(*) "foo.com", "/" "foo.com", "/"
277*3f982cf4SFabien Sanglard // http:\foo.com/ fail(*) "\foo.com", "/"(fail) "foo.com", "/"
278*3f982cf4SFabien Sanglard // http:////foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"
279*3f982cf4SFabien Sanglard //
280*3f982cf4SFabien Sanglard // (*) Interestingly, although IE fails to load these URLs, its history
281*3f982cf4SFabien Sanglard // canonicalizer handles them, meaning if you've been to the corresponding
282*3f982cf4SFabien Sanglard // "http://foo.com/" link, it will be colored.
DoParseAfterScheme(const char * spec,int spec_len,int after_scheme,Parsed * parsed)283*3f982cf4SFabien Sanglard void DoParseAfterScheme(const char* spec,
284*3f982cf4SFabien Sanglard int spec_len,
285*3f982cf4SFabien Sanglard int after_scheme,
286*3f982cf4SFabien Sanglard Parsed* parsed) {
287*3f982cf4SFabien Sanglard int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
288*3f982cf4SFabien Sanglard int after_slashes = after_scheme + num_slashes;
289*3f982cf4SFabien Sanglard
290*3f982cf4SFabien Sanglard // First split into two main parts, the authority (username, password, host,
291*3f982cf4SFabien Sanglard // and port) and the full path (path, query, and reference).
292*3f982cf4SFabien Sanglard Component authority;
293*3f982cf4SFabien Sanglard Component full_path;
294*3f982cf4SFabien Sanglard
295*3f982cf4SFabien Sanglard // Found "//<some data>", looks like an authority section. Treat everything
296*3f982cf4SFabien Sanglard // from there to the next slash (or end of spec) to be the authority. Note
297*3f982cf4SFabien Sanglard // that we ignore the number of slashes and treat it as the authority.
298*3f982cf4SFabien Sanglard int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);
299*3f982cf4SFabien Sanglard authority = Component(after_slashes, end_auth - after_slashes);
300*3f982cf4SFabien Sanglard
301*3f982cf4SFabien Sanglard if (end_auth == spec_len) // No beginning of path found.
302*3f982cf4SFabien Sanglard full_path = Component();
303*3f982cf4SFabien Sanglard else // Everything starting from the slash to the end is the path.
304*3f982cf4SFabien Sanglard full_path = Component(end_auth, spec_len - end_auth);
305*3f982cf4SFabien Sanglard
306*3f982cf4SFabien Sanglard // Now parse those two sub-parts.
307*3f982cf4SFabien Sanglard DoParseAuthority(spec, authority, &parsed->username, &parsed->password,
308*3f982cf4SFabien Sanglard &parsed->host, &parsed->port);
309*3f982cf4SFabien Sanglard ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);
310*3f982cf4SFabien Sanglard }
311*3f982cf4SFabien Sanglard
312*3f982cf4SFabien Sanglard // The main parsing function for standard URLs. Standard URLs have a scheme,
313*3f982cf4SFabien Sanglard // host, path, etc.
DoParseStandardURL(const char * spec,int spec_len,Parsed * parsed)314*3f982cf4SFabien Sanglard void DoParseStandardURL(const char* spec, int spec_len, Parsed* parsed) {
315*3f982cf4SFabien Sanglard assert(spec_len >= 0);
316*3f982cf4SFabien Sanglard
317*3f982cf4SFabien Sanglard // Strip leading & trailing spaces and control characters.
318*3f982cf4SFabien Sanglard int begin = 0;
319*3f982cf4SFabien Sanglard TrimURL(spec, &begin, &spec_len);
320*3f982cf4SFabien Sanglard
321*3f982cf4SFabien Sanglard int after_scheme;
322*3f982cf4SFabien Sanglard if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {
323*3f982cf4SFabien Sanglard after_scheme = parsed->scheme.end() + 1; // Skip past the colon.
324*3f982cf4SFabien Sanglard } else {
325*3f982cf4SFabien Sanglard // Say there's no scheme when there is no colon. We could also say that
326*3f982cf4SFabien Sanglard // everything is the scheme. Both would produce an invalid URL, but this way
327*3f982cf4SFabien Sanglard // seems less wrong in more cases.
328*3f982cf4SFabien Sanglard parsed->scheme.reset();
329*3f982cf4SFabien Sanglard after_scheme = begin;
330*3f982cf4SFabien Sanglard }
331*3f982cf4SFabien Sanglard DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
332*3f982cf4SFabien Sanglard }
333*3f982cf4SFabien Sanglard
DoParseFileSystemURL(const char * spec,int spec_len,Parsed * parsed)334*3f982cf4SFabien Sanglard void DoParseFileSystemURL(const char* spec, int spec_len, Parsed* parsed) {
335*3f982cf4SFabien Sanglard assert(spec_len >= 0);
336*3f982cf4SFabien Sanglard
337*3f982cf4SFabien Sanglard // Get the unused parts of the URL out of the way.
338*3f982cf4SFabien Sanglard parsed->username.reset();
339*3f982cf4SFabien Sanglard parsed->password.reset();
340*3f982cf4SFabien Sanglard parsed->host.reset();
341*3f982cf4SFabien Sanglard parsed->port.reset();
342*3f982cf4SFabien Sanglard parsed->path.reset(); // May use this; reset for convenience.
343*3f982cf4SFabien Sanglard parsed->ref.reset(); // May use this; reset for convenience.
344*3f982cf4SFabien Sanglard parsed->query.reset(); // May use this; reset for convenience.
345*3f982cf4SFabien Sanglard parsed->clear_inner_parsed(); // May use this; reset for convenience.
346*3f982cf4SFabien Sanglard
347*3f982cf4SFabien Sanglard // Strip leading & trailing spaces and control characters.
348*3f982cf4SFabien Sanglard int begin = 0;
349*3f982cf4SFabien Sanglard TrimURL(spec, &begin, &spec_len);
350*3f982cf4SFabien Sanglard
351*3f982cf4SFabien Sanglard // Handle empty specs or ones that contain only whitespace or control chars.
352*3f982cf4SFabien Sanglard if (begin == spec_len) {
353*3f982cf4SFabien Sanglard parsed->scheme.reset();
354*3f982cf4SFabien Sanglard return;
355*3f982cf4SFabien Sanglard }
356*3f982cf4SFabien Sanglard
357*3f982cf4SFabien Sanglard int inner_start = -1;
358*3f982cf4SFabien Sanglard
359*3f982cf4SFabien Sanglard // Extract the scheme. We also handle the case where there is no scheme.
360*3f982cf4SFabien Sanglard if (DoExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
361*3f982cf4SFabien Sanglard // Offset the results since we gave ExtractScheme a substring.
362*3f982cf4SFabien Sanglard parsed->scheme.begin += begin;
363*3f982cf4SFabien Sanglard
364*3f982cf4SFabien Sanglard if (parsed->scheme.end() == spec_len - 1)
365*3f982cf4SFabien Sanglard return;
366*3f982cf4SFabien Sanglard
367*3f982cf4SFabien Sanglard inner_start = parsed->scheme.end() + 1;
368*3f982cf4SFabien Sanglard } else {
369*3f982cf4SFabien Sanglard // No scheme found; that's not valid for filesystem URLs.
370*3f982cf4SFabien Sanglard parsed->scheme.reset();
371*3f982cf4SFabien Sanglard return;
372*3f982cf4SFabien Sanglard }
373*3f982cf4SFabien Sanglard
374*3f982cf4SFabien Sanglard Component inner_scheme;
375*3f982cf4SFabien Sanglard const char* inner_spec = &spec[inner_start];
376*3f982cf4SFabien Sanglard int inner_spec_len = spec_len - inner_start;
377*3f982cf4SFabien Sanglard
378*3f982cf4SFabien Sanglard if (DoExtractScheme(inner_spec, inner_spec_len, &inner_scheme)) {
379*3f982cf4SFabien Sanglard // Offset the results since we gave ExtractScheme a substring.
380*3f982cf4SFabien Sanglard inner_scheme.begin += inner_start;
381*3f982cf4SFabien Sanglard
382*3f982cf4SFabien Sanglard if (inner_scheme.end() == spec_len - 1)
383*3f982cf4SFabien Sanglard return;
384*3f982cf4SFabien Sanglard } else {
385*3f982cf4SFabien Sanglard // No scheme found; that's not valid for filesystem URLs.
386*3f982cf4SFabien Sanglard // The best we can do is return "filesystem://".
387*3f982cf4SFabien Sanglard return;
388*3f982cf4SFabien Sanglard }
389*3f982cf4SFabien Sanglard
390*3f982cf4SFabien Sanglard Parsed inner_parsed;
391*3f982cf4SFabien Sanglard
392*3f982cf4SFabien Sanglard if (CompareSchemeComponent(spec, inner_scheme, kFileScheme)) {
393*3f982cf4SFabien Sanglard // File URLs are special.
394*3f982cf4SFabien Sanglard ParseFileURL(inner_spec, inner_spec_len, &inner_parsed);
395*3f982cf4SFabien Sanglard } else if (CompareSchemeComponent(spec, inner_scheme, kFileSystemScheme)) {
396*3f982cf4SFabien Sanglard // Filesystem URLs don't nest.
397*3f982cf4SFabien Sanglard return;
398*3f982cf4SFabien Sanglard } else if (IsStandard(spec, inner_scheme)) {
399*3f982cf4SFabien Sanglard // All "normal" URLs.
400*3f982cf4SFabien Sanglard DoParseStandardURL(inner_spec, inner_spec_len, &inner_parsed);
401*3f982cf4SFabien Sanglard } else {
402*3f982cf4SFabien Sanglard return;
403*3f982cf4SFabien Sanglard }
404*3f982cf4SFabien Sanglard
405*3f982cf4SFabien Sanglard // All members of inner_parsed need to be offset by inner_start.
406*3f982cf4SFabien Sanglard // If we had any scheme that supported nesting more than one level deep,
407*3f982cf4SFabien Sanglard // we'd have to recurse into the inner_parsed's inner_parsed when
408*3f982cf4SFabien Sanglard // adjusting by inner_start.
409*3f982cf4SFabien Sanglard inner_parsed.scheme.begin += inner_start;
410*3f982cf4SFabien Sanglard inner_parsed.username.begin += inner_start;
411*3f982cf4SFabien Sanglard inner_parsed.password.begin += inner_start;
412*3f982cf4SFabien Sanglard inner_parsed.host.begin += inner_start;
413*3f982cf4SFabien Sanglard inner_parsed.port.begin += inner_start;
414*3f982cf4SFabien Sanglard inner_parsed.query.begin += inner_start;
415*3f982cf4SFabien Sanglard inner_parsed.ref.begin += inner_start;
416*3f982cf4SFabien Sanglard inner_parsed.path.begin += inner_start;
417*3f982cf4SFabien Sanglard
418*3f982cf4SFabien Sanglard // Query and ref move from inner_parsed to parsed.
419*3f982cf4SFabien Sanglard parsed->query = inner_parsed.query;
420*3f982cf4SFabien Sanglard inner_parsed.query.reset();
421*3f982cf4SFabien Sanglard parsed->ref = inner_parsed.ref;
422*3f982cf4SFabien Sanglard inner_parsed.ref.reset();
423*3f982cf4SFabien Sanglard
424*3f982cf4SFabien Sanglard parsed->set_inner_parsed(inner_parsed);
425*3f982cf4SFabien Sanglard if (!inner_parsed.scheme.is_valid() || !inner_parsed.path.is_valid() ||
426*3f982cf4SFabien Sanglard inner_parsed.inner_parsed()) {
427*3f982cf4SFabien Sanglard return;
428*3f982cf4SFabien Sanglard }
429*3f982cf4SFabien Sanglard
430*3f982cf4SFabien Sanglard // The path in inner_parsed should start with a slash, then have a filesystem
431*3f982cf4SFabien Sanglard // type followed by a slash. From the first slash up to but excluding the
432*3f982cf4SFabien Sanglard // second should be what it keeps; the rest goes to parsed. If the path ends
433*3f982cf4SFabien Sanglard // before the second slash, it's still pretty clear what the user meant, so
434*3f982cf4SFabien Sanglard // we'll let that through.
435*3f982cf4SFabien Sanglard if (!IsURLSlash(spec[inner_parsed.path.begin])) {
436*3f982cf4SFabien Sanglard return;
437*3f982cf4SFabien Sanglard }
438*3f982cf4SFabien Sanglard int inner_path_end = inner_parsed.path.begin + 1; // skip the leading slash
439*3f982cf4SFabien Sanglard while (inner_path_end < spec_len && !IsURLSlash(spec[inner_path_end]))
440*3f982cf4SFabien Sanglard ++inner_path_end;
441*3f982cf4SFabien Sanglard parsed->path.begin = inner_path_end;
442*3f982cf4SFabien Sanglard int new_inner_path_length = inner_path_end - inner_parsed.path.begin;
443*3f982cf4SFabien Sanglard parsed->path.len = inner_parsed.path.len - new_inner_path_length;
444*3f982cf4SFabien Sanglard parsed->inner_parsed()->path.len = new_inner_path_length;
445*3f982cf4SFabien Sanglard }
446*3f982cf4SFabien Sanglard
447*3f982cf4SFabien Sanglard // Initializes a path URL which is merely a scheme followed by a path. Examples
448*3f982cf4SFabien Sanglard // include "about:foo" and "javascript:alert('bar');"
DoParsePathURL(const char * spec,int spec_len,bool trim_path_end,Parsed * parsed)449*3f982cf4SFabien Sanglard void DoParsePathURL(const char* spec,
450*3f982cf4SFabien Sanglard int spec_len,
451*3f982cf4SFabien Sanglard bool trim_path_end,
452*3f982cf4SFabien Sanglard Parsed* parsed) {
453*3f982cf4SFabien Sanglard // Get the non-path and non-scheme parts of the URL out of the way, we never
454*3f982cf4SFabien Sanglard // use them.
455*3f982cf4SFabien Sanglard parsed->username.reset();
456*3f982cf4SFabien Sanglard parsed->password.reset();
457*3f982cf4SFabien Sanglard parsed->host.reset();
458*3f982cf4SFabien Sanglard parsed->port.reset();
459*3f982cf4SFabien Sanglard parsed->path.reset();
460*3f982cf4SFabien Sanglard parsed->query.reset();
461*3f982cf4SFabien Sanglard parsed->ref.reset();
462*3f982cf4SFabien Sanglard
463*3f982cf4SFabien Sanglard // Strip leading & trailing spaces and control characters.
464*3f982cf4SFabien Sanglard int scheme_begin = 0;
465*3f982cf4SFabien Sanglard TrimURL(spec, &scheme_begin, &spec_len, trim_path_end);
466*3f982cf4SFabien Sanglard
467*3f982cf4SFabien Sanglard // Handle empty specs or ones that contain only whitespace or control chars.
468*3f982cf4SFabien Sanglard if (scheme_begin == spec_len) {
469*3f982cf4SFabien Sanglard parsed->scheme.reset();
470*3f982cf4SFabien Sanglard parsed->path.reset();
471*3f982cf4SFabien Sanglard return;
472*3f982cf4SFabien Sanglard }
473*3f982cf4SFabien Sanglard
474*3f982cf4SFabien Sanglard int path_begin;
475*3f982cf4SFabien Sanglard // Extract the scheme, with the path being everything following. We also
476*3f982cf4SFabien Sanglard // handle the case where there is no scheme.
477*3f982cf4SFabien Sanglard if (ExtractScheme(&spec[scheme_begin], spec_len - scheme_begin,
478*3f982cf4SFabien Sanglard &parsed->scheme)) {
479*3f982cf4SFabien Sanglard // Offset the results since we gave ExtractScheme a substring.
480*3f982cf4SFabien Sanglard parsed->scheme.begin += scheme_begin;
481*3f982cf4SFabien Sanglard path_begin = parsed->scheme.end() + 1;
482*3f982cf4SFabien Sanglard } else {
483*3f982cf4SFabien Sanglard // No scheme case.
484*3f982cf4SFabien Sanglard parsed->scheme.reset();
485*3f982cf4SFabien Sanglard path_begin = scheme_begin;
486*3f982cf4SFabien Sanglard }
487*3f982cf4SFabien Sanglard
488*3f982cf4SFabien Sanglard if (path_begin == spec_len)
489*3f982cf4SFabien Sanglard return;
490*3f982cf4SFabien Sanglard assert(path_begin < spec_len);
491*3f982cf4SFabien Sanglard
492*3f982cf4SFabien Sanglard ParsePath(spec, MakeRange(path_begin, spec_len), &parsed->path,
493*3f982cf4SFabien Sanglard &parsed->query, &parsed->ref);
494*3f982cf4SFabien Sanglard }
495*3f982cf4SFabien Sanglard
DoParseMailtoURL(const char * spec,int spec_len,Parsed * parsed)496*3f982cf4SFabien Sanglard void DoParseMailtoURL(const char* spec, int spec_len, Parsed* parsed) {
497*3f982cf4SFabien Sanglard assert(spec_len >= 0);
498*3f982cf4SFabien Sanglard
499*3f982cf4SFabien Sanglard // Get the non-path and non-scheme parts of the URL out of the way, we never
500*3f982cf4SFabien Sanglard // use them.
501*3f982cf4SFabien Sanglard parsed->username.reset();
502*3f982cf4SFabien Sanglard parsed->password.reset();
503*3f982cf4SFabien Sanglard parsed->host.reset();
504*3f982cf4SFabien Sanglard parsed->port.reset();
505*3f982cf4SFabien Sanglard parsed->ref.reset();
506*3f982cf4SFabien Sanglard parsed->query.reset(); // May use this; reset for convenience.
507*3f982cf4SFabien Sanglard
508*3f982cf4SFabien Sanglard // Strip leading & trailing spaces and control characters.
509*3f982cf4SFabien Sanglard int begin = 0;
510*3f982cf4SFabien Sanglard TrimURL(spec, &begin, &spec_len);
511*3f982cf4SFabien Sanglard
512*3f982cf4SFabien Sanglard // Handle empty specs or ones that contain only whitespace or control chars.
513*3f982cf4SFabien Sanglard if (begin == spec_len) {
514*3f982cf4SFabien Sanglard parsed->scheme.reset();
515*3f982cf4SFabien Sanglard parsed->path.reset();
516*3f982cf4SFabien Sanglard return;
517*3f982cf4SFabien Sanglard }
518*3f982cf4SFabien Sanglard
519*3f982cf4SFabien Sanglard int path_begin = -1;
520*3f982cf4SFabien Sanglard int path_end = -1;
521*3f982cf4SFabien Sanglard
522*3f982cf4SFabien Sanglard // Extract the scheme, with the path being everything following. We also
523*3f982cf4SFabien Sanglard // handle the case where there is no scheme.
524*3f982cf4SFabien Sanglard if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
525*3f982cf4SFabien Sanglard // Offset the results since we gave ExtractScheme a substring.
526*3f982cf4SFabien Sanglard parsed->scheme.begin += begin;
527*3f982cf4SFabien Sanglard
528*3f982cf4SFabien Sanglard if (parsed->scheme.end() != spec_len - 1) {
529*3f982cf4SFabien Sanglard path_begin = parsed->scheme.end() + 1;
530*3f982cf4SFabien Sanglard path_end = spec_len;
531*3f982cf4SFabien Sanglard }
532*3f982cf4SFabien Sanglard } else {
533*3f982cf4SFabien Sanglard // No scheme found, just path.
534*3f982cf4SFabien Sanglard parsed->scheme.reset();
535*3f982cf4SFabien Sanglard path_begin = begin;
536*3f982cf4SFabien Sanglard path_end = spec_len;
537*3f982cf4SFabien Sanglard }
538*3f982cf4SFabien Sanglard
539*3f982cf4SFabien Sanglard // Split [path_begin, path_end) into a path + query.
540*3f982cf4SFabien Sanglard for (int i = path_begin; i < path_end; ++i) {
541*3f982cf4SFabien Sanglard if (spec[i] == '?') {
542*3f982cf4SFabien Sanglard parsed->query = MakeRange(i + 1, path_end);
543*3f982cf4SFabien Sanglard path_end = i;
544*3f982cf4SFabien Sanglard break;
545*3f982cf4SFabien Sanglard }
546*3f982cf4SFabien Sanglard }
547*3f982cf4SFabien Sanglard
548*3f982cf4SFabien Sanglard // For compatability with the standard URL parser, treat no path as
549*3f982cf4SFabien Sanglard // -1, rather than having a length of 0
550*3f982cf4SFabien Sanglard if (path_begin == path_end) {
551*3f982cf4SFabien Sanglard parsed->path.reset();
552*3f982cf4SFabien Sanglard } else {
553*3f982cf4SFabien Sanglard parsed->path = MakeRange(path_begin, path_end);
554*3f982cf4SFabien Sanglard }
555*3f982cf4SFabien Sanglard }
556*3f982cf4SFabien Sanglard
557*3f982cf4SFabien Sanglard // Converts a port number in a string to an integer. We'd like to just call
558*3f982cf4SFabien Sanglard // sscanf but our input is not NULL-terminated, which sscanf requires. Instead,
559*3f982cf4SFabien Sanglard // we copy the digits to a small stack buffer (since we know the maximum number
560*3f982cf4SFabien Sanglard // of digits in a valid port number) that we can NULL terminate.
DoParsePort(const char * spec,const Component & component)561*3f982cf4SFabien Sanglard int DoParsePort(const char* spec, const Component& component) {
562*3f982cf4SFabien Sanglard // Easy success case when there is no port.
563*3f982cf4SFabien Sanglard const int kMaxDigits = 5;
564*3f982cf4SFabien Sanglard if (!component.is_nonempty())
565*3f982cf4SFabien Sanglard return PORT_UNSPECIFIED;
566*3f982cf4SFabien Sanglard
567*3f982cf4SFabien Sanglard // Skip over any leading 0s.
568*3f982cf4SFabien Sanglard Component digits_comp(component.end(), 0);
569*3f982cf4SFabien Sanglard for (int i = 0; i < component.len; i++) {
570*3f982cf4SFabien Sanglard if (spec[component.begin + i] != '0') {
571*3f982cf4SFabien Sanglard digits_comp = MakeRange(component.begin + i, component.end());
572*3f982cf4SFabien Sanglard break;
573*3f982cf4SFabien Sanglard }
574*3f982cf4SFabien Sanglard }
575*3f982cf4SFabien Sanglard if (digits_comp.len == 0)
576*3f982cf4SFabien Sanglard return 0; // All digits were 0.
577*3f982cf4SFabien Sanglard
578*3f982cf4SFabien Sanglard // Verify we don't have too many digits (we'll be copying to our buffer so
579*3f982cf4SFabien Sanglard // we need to double-check).
580*3f982cf4SFabien Sanglard if (digits_comp.len > kMaxDigits)
581*3f982cf4SFabien Sanglard return PORT_INVALID;
582*3f982cf4SFabien Sanglard
583*3f982cf4SFabien Sanglard // Copy valid digits to the buffer.
584*3f982cf4SFabien Sanglard char digits[kMaxDigits + 1]; // +1 for null terminator
585*3f982cf4SFabien Sanglard for (int i = 0; i < digits_comp.len; i++) {
586*3f982cf4SFabien Sanglard char ch = spec[digits_comp.begin + i];
587*3f982cf4SFabien Sanglard if (!IsPortDigit(ch)) {
588*3f982cf4SFabien Sanglard // Invalid port digit, fail.
589*3f982cf4SFabien Sanglard return PORT_INVALID;
590*3f982cf4SFabien Sanglard }
591*3f982cf4SFabien Sanglard digits[i] = static_cast<char>(ch);
592*3f982cf4SFabien Sanglard }
593*3f982cf4SFabien Sanglard
594*3f982cf4SFabien Sanglard // Null-terminate the string and convert to integer. Since we guarantee
595*3f982cf4SFabien Sanglard // only digits, atoi's lack of error handling is OK.
596*3f982cf4SFabien Sanglard digits[digits_comp.len] = 0;
597*3f982cf4SFabien Sanglard int port = atoi(digits);
598*3f982cf4SFabien Sanglard if (port > 65535)
599*3f982cf4SFabien Sanglard return PORT_INVALID; // Out of range.
600*3f982cf4SFabien Sanglard return port;
601*3f982cf4SFabien Sanglard }
602*3f982cf4SFabien Sanglard
DoExtractFileName(const char * spec,const Component & path,Component * file_name)603*3f982cf4SFabien Sanglard void DoExtractFileName(const char* spec,
604*3f982cf4SFabien Sanglard const Component& path,
605*3f982cf4SFabien Sanglard Component* file_name) {
606*3f982cf4SFabien Sanglard // Handle empty paths: they have no file names.
607*3f982cf4SFabien Sanglard if (!path.is_nonempty()) {
608*3f982cf4SFabien Sanglard file_name->reset();
609*3f982cf4SFabien Sanglard return;
610*3f982cf4SFabien Sanglard }
611*3f982cf4SFabien Sanglard
612*3f982cf4SFabien Sanglard // Extract the filename range from the path which is between
613*3f982cf4SFabien Sanglard // the last slash and the following semicolon.
614*3f982cf4SFabien Sanglard int file_end = path.end();
615*3f982cf4SFabien Sanglard for (int i = path.end() - 1; i >= path.begin; i--) {
616*3f982cf4SFabien Sanglard if (spec[i] == ';') {
617*3f982cf4SFabien Sanglard file_end = i;
618*3f982cf4SFabien Sanglard } else if (IsURLSlash(spec[i])) {
619*3f982cf4SFabien Sanglard // File name is everything following this character to the end
620*3f982cf4SFabien Sanglard *file_name = MakeRange(i + 1, file_end);
621*3f982cf4SFabien Sanglard return;
622*3f982cf4SFabien Sanglard }
623*3f982cf4SFabien Sanglard }
624*3f982cf4SFabien Sanglard
625*3f982cf4SFabien Sanglard // No slash found, this means the input was degenerate (generally paths
626*3f982cf4SFabien Sanglard // will start with a slash). Let's call everything the file name.
627*3f982cf4SFabien Sanglard *file_name = MakeRange(path.begin, file_end);
628*3f982cf4SFabien Sanglard return;
629*3f982cf4SFabien Sanglard }
630*3f982cf4SFabien Sanglard
DoExtractQueryKeyValue(const char * spec,Component * query,Component * key,Component * value)631*3f982cf4SFabien Sanglard bool DoExtractQueryKeyValue(const char* spec,
632*3f982cf4SFabien Sanglard Component* query,
633*3f982cf4SFabien Sanglard Component* key,
634*3f982cf4SFabien Sanglard Component* value) {
635*3f982cf4SFabien Sanglard if (!query->is_nonempty())
636*3f982cf4SFabien Sanglard return false;
637*3f982cf4SFabien Sanglard
638*3f982cf4SFabien Sanglard int start = query->begin;
639*3f982cf4SFabien Sanglard int cur = start;
640*3f982cf4SFabien Sanglard int end = query->end();
641*3f982cf4SFabien Sanglard
642*3f982cf4SFabien Sanglard // We assume the beginning of the input is the beginning of the "key" and we
643*3f982cf4SFabien Sanglard // skip to the end of it.
644*3f982cf4SFabien Sanglard key->begin = cur;
645*3f982cf4SFabien Sanglard while (cur < end && spec[cur] != '&' && spec[cur] != '=')
646*3f982cf4SFabien Sanglard cur++;
647*3f982cf4SFabien Sanglard key->len = cur - key->begin;
648*3f982cf4SFabien Sanglard
649*3f982cf4SFabien Sanglard // Skip the separator after the key (if any).
650*3f982cf4SFabien Sanglard if (cur < end && spec[cur] == '=')
651*3f982cf4SFabien Sanglard cur++;
652*3f982cf4SFabien Sanglard
653*3f982cf4SFabien Sanglard // Find the value part.
654*3f982cf4SFabien Sanglard value->begin = cur;
655*3f982cf4SFabien Sanglard while (cur < end && spec[cur] != '&')
656*3f982cf4SFabien Sanglard cur++;
657*3f982cf4SFabien Sanglard value->len = cur - value->begin;
658*3f982cf4SFabien Sanglard
659*3f982cf4SFabien Sanglard // Finally skip the next separator if any
660*3f982cf4SFabien Sanglard if (cur < end && spec[cur] == '&')
661*3f982cf4SFabien Sanglard cur++;
662*3f982cf4SFabien Sanglard
663*3f982cf4SFabien Sanglard // Save the new query
664*3f982cf4SFabien Sanglard *query = MakeRange(cur, end);
665*3f982cf4SFabien Sanglard return true;
666*3f982cf4SFabien Sanglard }
667*3f982cf4SFabien Sanglard
668*3f982cf4SFabien Sanglard } // namespace
669*3f982cf4SFabien Sanglard
Parsed()670*3f982cf4SFabien Sanglard Parsed::Parsed() : potentially_dangling_markup(false), inner_parsed_(NULL) {}
671*3f982cf4SFabien Sanglard
Parsed(const Parsed & other)672*3f982cf4SFabien Sanglard Parsed::Parsed(const Parsed& other)
673*3f982cf4SFabien Sanglard : scheme(other.scheme),
674*3f982cf4SFabien Sanglard username(other.username),
675*3f982cf4SFabien Sanglard password(other.password),
676*3f982cf4SFabien Sanglard host(other.host),
677*3f982cf4SFabien Sanglard port(other.port),
678*3f982cf4SFabien Sanglard path(other.path),
679*3f982cf4SFabien Sanglard query(other.query),
680*3f982cf4SFabien Sanglard ref(other.ref),
681*3f982cf4SFabien Sanglard potentially_dangling_markup(other.potentially_dangling_markup),
682*3f982cf4SFabien Sanglard inner_parsed_(NULL) {
683*3f982cf4SFabien Sanglard if (other.inner_parsed_)
684*3f982cf4SFabien Sanglard set_inner_parsed(*other.inner_parsed_);
685*3f982cf4SFabien Sanglard }
686*3f982cf4SFabien Sanglard
operator =(const Parsed & other)687*3f982cf4SFabien Sanglard Parsed& Parsed::operator=(const Parsed& other) {
688*3f982cf4SFabien Sanglard if (this != &other) {
689*3f982cf4SFabien Sanglard scheme = other.scheme;
690*3f982cf4SFabien Sanglard username = other.username;
691*3f982cf4SFabien Sanglard password = other.password;
692*3f982cf4SFabien Sanglard host = other.host;
693*3f982cf4SFabien Sanglard port = other.port;
694*3f982cf4SFabien Sanglard path = other.path;
695*3f982cf4SFabien Sanglard query = other.query;
696*3f982cf4SFabien Sanglard ref = other.ref;
697*3f982cf4SFabien Sanglard potentially_dangling_markup = other.potentially_dangling_markup;
698*3f982cf4SFabien Sanglard if (other.inner_parsed_)
699*3f982cf4SFabien Sanglard set_inner_parsed(*other.inner_parsed_);
700*3f982cf4SFabien Sanglard else
701*3f982cf4SFabien Sanglard clear_inner_parsed();
702*3f982cf4SFabien Sanglard }
703*3f982cf4SFabien Sanglard return *this;
704*3f982cf4SFabien Sanglard }
705*3f982cf4SFabien Sanglard
~Parsed()706*3f982cf4SFabien Sanglard Parsed::~Parsed() {
707*3f982cf4SFabien Sanglard delete inner_parsed_;
708*3f982cf4SFabien Sanglard }
709*3f982cf4SFabien Sanglard
Length() const710*3f982cf4SFabien Sanglard int Parsed::Length() const {
711*3f982cf4SFabien Sanglard if (ref.is_valid())
712*3f982cf4SFabien Sanglard return ref.end();
713*3f982cf4SFabien Sanglard return CountCharactersBefore(REF, false);
714*3f982cf4SFabien Sanglard }
715*3f982cf4SFabien Sanglard
CountCharactersBefore(ComponentType type,bool include_delimiter) const716*3f982cf4SFabien Sanglard int Parsed::CountCharactersBefore(ComponentType type,
717*3f982cf4SFabien Sanglard bool include_delimiter) const {
718*3f982cf4SFabien Sanglard if (type == SCHEME)
719*3f982cf4SFabien Sanglard return scheme.begin;
720*3f982cf4SFabien Sanglard
721*3f982cf4SFabien Sanglard // There will be some characters after the scheme like "://" and we don't
722*3f982cf4SFabien Sanglard // know how many. Search forwards for the next thing until we find one.
723*3f982cf4SFabien Sanglard int cur = 0;
724*3f982cf4SFabien Sanglard if (scheme.is_valid())
725*3f982cf4SFabien Sanglard cur = scheme.end() + 1; // Advance over the ':' at the end of the scheme.
726*3f982cf4SFabien Sanglard
727*3f982cf4SFabien Sanglard if (username.is_valid()) {
728*3f982cf4SFabien Sanglard if (type <= USERNAME)
729*3f982cf4SFabien Sanglard return username.begin;
730*3f982cf4SFabien Sanglard cur = username.end() + 1; // Advance over the '@' or ':' at the end.
731*3f982cf4SFabien Sanglard }
732*3f982cf4SFabien Sanglard
733*3f982cf4SFabien Sanglard if (password.is_valid()) {
734*3f982cf4SFabien Sanglard if (type <= PASSWORD)
735*3f982cf4SFabien Sanglard return password.begin;
736*3f982cf4SFabien Sanglard cur = password.end() + 1; // Advance over the '@' at the end.
737*3f982cf4SFabien Sanglard }
738*3f982cf4SFabien Sanglard
739*3f982cf4SFabien Sanglard if (host.is_valid()) {
740*3f982cf4SFabien Sanglard if (type <= HOST)
741*3f982cf4SFabien Sanglard return host.begin;
742*3f982cf4SFabien Sanglard cur = host.end();
743*3f982cf4SFabien Sanglard }
744*3f982cf4SFabien Sanglard
745*3f982cf4SFabien Sanglard if (port.is_valid()) {
746*3f982cf4SFabien Sanglard if (type < PORT || (type == PORT && include_delimiter))
747*3f982cf4SFabien Sanglard return port.begin - 1; // Back over delimiter.
748*3f982cf4SFabien Sanglard if (type == PORT)
749*3f982cf4SFabien Sanglard return port.begin; // Don't want delimiter counted.
750*3f982cf4SFabien Sanglard cur = port.end();
751*3f982cf4SFabien Sanglard }
752*3f982cf4SFabien Sanglard
753*3f982cf4SFabien Sanglard if (path.is_valid()) {
754*3f982cf4SFabien Sanglard if (type <= PATH)
755*3f982cf4SFabien Sanglard return path.begin;
756*3f982cf4SFabien Sanglard cur = path.end();
757*3f982cf4SFabien Sanglard }
758*3f982cf4SFabien Sanglard
759*3f982cf4SFabien Sanglard if (query.is_valid()) {
760*3f982cf4SFabien Sanglard if (type < QUERY || (type == QUERY && include_delimiter))
761*3f982cf4SFabien Sanglard return query.begin - 1; // Back over delimiter.
762*3f982cf4SFabien Sanglard if (type == QUERY)
763*3f982cf4SFabien Sanglard return query.begin; // Don't want delimiter counted.
764*3f982cf4SFabien Sanglard cur = query.end();
765*3f982cf4SFabien Sanglard }
766*3f982cf4SFabien Sanglard
767*3f982cf4SFabien Sanglard if (ref.is_valid()) {
768*3f982cf4SFabien Sanglard if (type == REF && !include_delimiter)
769*3f982cf4SFabien Sanglard return ref.begin; // Back over delimiter.
770*3f982cf4SFabien Sanglard
771*3f982cf4SFabien Sanglard // When there is a ref and we get here, the component we wanted was before
772*3f982cf4SFabien Sanglard // this and not found, so we always know the beginning of the ref is right.
773*3f982cf4SFabien Sanglard return ref.begin - 1; // Don't want delimiter counted.
774*3f982cf4SFabien Sanglard }
775*3f982cf4SFabien Sanglard
776*3f982cf4SFabien Sanglard return cur;
777*3f982cf4SFabien Sanglard }
778*3f982cf4SFabien Sanglard
GetContent() const779*3f982cf4SFabien Sanglard Component Parsed::GetContent() const {
780*3f982cf4SFabien Sanglard const int begin = CountCharactersBefore(USERNAME, false);
781*3f982cf4SFabien Sanglard const int len = Length() - begin;
782*3f982cf4SFabien Sanglard // For compatability with the standard URL parser, we treat no content as
783*3f982cf4SFabien Sanglard // -1, rather than having a length of 0 (we normally wouldn't care so
784*3f982cf4SFabien Sanglard // much for these non-standard URLs).
785*3f982cf4SFabien Sanglard return len ? Component(begin, len) : Component();
786*3f982cf4SFabien Sanglard }
787*3f982cf4SFabien Sanglard
ExtractScheme(const char * url,int url_len,Component * scheme)788*3f982cf4SFabien Sanglard bool ExtractScheme(const char* url, int url_len, Component* scheme) {
789*3f982cf4SFabien Sanglard return DoExtractScheme(url, url_len, scheme);
790*3f982cf4SFabien Sanglard }
791*3f982cf4SFabien Sanglard
792*3f982cf4SFabien Sanglard // This handles everything that may be an authority terminator, including
793*3f982cf4SFabien Sanglard // backslash. For special backslash handling see DoParseAfterScheme.
IsAuthorityTerminator(char ch)794*3f982cf4SFabien Sanglard bool IsAuthorityTerminator(char ch) {
795*3f982cf4SFabien Sanglard return IsURLSlash(ch) || ch == '?' || ch == '#';
796*3f982cf4SFabien Sanglard }
797*3f982cf4SFabien Sanglard
ExtractFileName(const char * url,const Component & path,Component * file_name)798*3f982cf4SFabien Sanglard void ExtractFileName(const char* url,
799*3f982cf4SFabien Sanglard const Component& path,
800*3f982cf4SFabien Sanglard Component* file_name) {
801*3f982cf4SFabien Sanglard DoExtractFileName(url, path, file_name);
802*3f982cf4SFabien Sanglard }
803*3f982cf4SFabien Sanglard
ExtractQueryKeyValue(const char * url,Component * query,Component * key,Component * value)804*3f982cf4SFabien Sanglard bool ExtractQueryKeyValue(const char* url,
805*3f982cf4SFabien Sanglard Component* query,
806*3f982cf4SFabien Sanglard Component* key,
807*3f982cf4SFabien Sanglard Component* value) {
808*3f982cf4SFabien Sanglard return DoExtractQueryKeyValue(url, query, key, value);
809*3f982cf4SFabien Sanglard }
810*3f982cf4SFabien Sanglard
ParseAuthority(const char * spec,const Component & auth,Component * username,Component * password,Component * hostname,Component * port_num)811*3f982cf4SFabien Sanglard void ParseAuthority(const char* spec,
812*3f982cf4SFabien Sanglard const Component& auth,
813*3f982cf4SFabien Sanglard Component* username,
814*3f982cf4SFabien Sanglard Component* password,
815*3f982cf4SFabien Sanglard Component* hostname,
816*3f982cf4SFabien Sanglard Component* port_num) {
817*3f982cf4SFabien Sanglard DoParseAuthority(spec, auth, username, password, hostname, port_num);
818*3f982cf4SFabien Sanglard }
819*3f982cf4SFabien Sanglard
ParsePort(const char * url,const Component & port)820*3f982cf4SFabien Sanglard int ParsePort(const char* url, const Component& port) {
821*3f982cf4SFabien Sanglard return DoParsePort(url, port);
822*3f982cf4SFabien Sanglard }
823*3f982cf4SFabien Sanglard
ParseStandardURL(const char * url,int url_len,Parsed * parsed)824*3f982cf4SFabien Sanglard void ParseStandardURL(const char* url, int url_len, Parsed* parsed) {
825*3f982cf4SFabien Sanglard DoParseStandardURL(url, url_len, parsed);
826*3f982cf4SFabien Sanglard }
827*3f982cf4SFabien Sanglard
ParsePathURL(const char * url,int url_len,bool trim_path_end,Parsed * parsed)828*3f982cf4SFabien Sanglard void ParsePathURL(const char* url,
829*3f982cf4SFabien Sanglard int url_len,
830*3f982cf4SFabien Sanglard bool trim_path_end,
831*3f982cf4SFabien Sanglard Parsed* parsed) {
832*3f982cf4SFabien Sanglard DoParsePathURL(url, url_len, trim_path_end, parsed);
833*3f982cf4SFabien Sanglard }
834*3f982cf4SFabien Sanglard
ParseFileSystemURL(const char * url,int url_len,Parsed * parsed)835*3f982cf4SFabien Sanglard void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) {
836*3f982cf4SFabien Sanglard DoParseFileSystemURL(url, url_len, parsed);
837*3f982cf4SFabien Sanglard }
838*3f982cf4SFabien Sanglard
ParseMailtoURL(const char * url,int url_len,Parsed * parsed)839*3f982cf4SFabien Sanglard void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {
840*3f982cf4SFabien Sanglard DoParseMailtoURL(url, url_len, parsed);
841*3f982cf4SFabien Sanglard }
842*3f982cf4SFabien Sanglard
ParsePathInternal(const char * spec,const Component & path,Component * filepath,Component * query,Component * ref)843*3f982cf4SFabien Sanglard void ParsePathInternal(const char* spec,
844*3f982cf4SFabien Sanglard const Component& path,
845*3f982cf4SFabien Sanglard Component* filepath,
846*3f982cf4SFabien Sanglard Component* query,
847*3f982cf4SFabien Sanglard Component* ref) {
848*3f982cf4SFabien Sanglard ParsePath(spec, path, filepath, query, ref);
849*3f982cf4SFabien Sanglard }
850*3f982cf4SFabien Sanglard
ParseAfterScheme(const char * spec,int spec_len,int after_scheme,Parsed * parsed)851*3f982cf4SFabien Sanglard void ParseAfterScheme(const char* spec,
852*3f982cf4SFabien Sanglard int spec_len,
853*3f982cf4SFabien Sanglard int after_scheme,
854*3f982cf4SFabien Sanglard Parsed* parsed) {
855*3f982cf4SFabien Sanglard DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
856*3f982cf4SFabien Sanglard }
857*3f982cf4SFabien Sanglard
858*3f982cf4SFabien Sanglard } // namespace openscreen
859