xref: /aosp_15_r20/external/openscreen/third_party/mozilla/url_parse.cc (revision 3f982cf4871df8771c9d4abe6e9a6f8d829b2736)
1*3f982cf4SFabien Sanglard /* Based on nsURLParsers.cc from Mozilla
2*3f982cf4SFabien Sanglard  * -------------------------------------
3*3f982cf4SFabien Sanglard  * The contents of this file are subject to the Mozilla Public License Version
4*3f982cf4SFabien Sanglard  * 1.1 (the "License"); you may not use this file except in compliance with
5*3f982cf4SFabien Sanglard  * the License. You may obtain a copy of the License at
6*3f982cf4SFabien Sanglard  * http://www.mozilla.org/MPL/
7*3f982cf4SFabien Sanglard  *
8*3f982cf4SFabien Sanglard  * Software distributed under the License is distributed on an "AS IS" basis,
9*3f982cf4SFabien Sanglard  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10*3f982cf4SFabien Sanglard  * for the specific language governing rights and limitations under the
11*3f982cf4SFabien Sanglard  * License.
12*3f982cf4SFabien Sanglard  *
13*3f982cf4SFabien Sanglard  * The Original Code is mozilla.org code.
14*3f982cf4SFabien Sanglard  *
15*3f982cf4SFabien Sanglard  * The Initial Developer of the Original Code is
16*3f982cf4SFabien Sanglard  * Netscape Communications Corporation.
17*3f982cf4SFabien Sanglard  * Portions created by the Initial Developer are Copyright (C) 1998
18*3f982cf4SFabien Sanglard  * the Initial Developer. All Rights Reserved.
19*3f982cf4SFabien Sanglard  *
20*3f982cf4SFabien Sanglard  * Contributor(s):
21*3f982cf4SFabien Sanglard  *   Darin Fisher (original author)
22*3f982cf4SFabien Sanglard  *
23*3f982cf4SFabien Sanglard  * Alternatively, the contents of this file may be used under the terms of
24*3f982cf4SFabien Sanglard  * either the GNU General Public License Version 2 or later (the "GPL"), or
25*3f982cf4SFabien Sanglard  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26*3f982cf4SFabien Sanglard  * in which case the provisions of the GPL or the LGPL are applicable instead
27*3f982cf4SFabien Sanglard  * of those above. If you wish to allow use of your version of this file only
28*3f982cf4SFabien Sanglard  * under the terms of either the GPL or the LGPL, and not to allow others to
29*3f982cf4SFabien Sanglard  * use your version of this file under the terms of the MPL, indicate your
30*3f982cf4SFabien Sanglard  * decision by deleting the provisions above and replace them with the notice
31*3f982cf4SFabien Sanglard  * and other provisions required by the GPL or the LGPL. If you do not delete
32*3f982cf4SFabien Sanglard  * the provisions above, a recipient may use your version of this file under
33*3f982cf4SFabien Sanglard  * the terms of any one of the MPL, the GPL or the LGPL.
34*3f982cf4SFabien Sanglard  *
35*3f982cf4SFabien Sanglard  * ***** END LICENSE BLOCK ***** */
36*3f982cf4SFabien Sanglard 
37*3f982cf4SFabien Sanglard #include "third_party/mozilla/url_parse.h"
38*3f982cf4SFabien Sanglard 
39*3f982cf4SFabien Sanglard #include <assert.h>
40*3f982cf4SFabien Sanglard #include <ctype.h>
41*3f982cf4SFabien Sanglard #include <stdlib.h>
42*3f982cf4SFabien Sanglard 
43*3f982cf4SFabien Sanglard #include "third_party/mozilla/url_parse_internal.h"
44*3f982cf4SFabien Sanglard 
45*3f982cf4SFabien Sanglard namespace openscreen {
46*3f982cf4SFabien Sanglard namespace {
47*3f982cf4SFabien Sanglard 
48*3f982cf4SFabien Sanglard // Returns true if the given character is a valid digit to use in a port.
IsPortDigit(char ch)49*3f982cf4SFabien Sanglard bool IsPortDigit(char ch) {
50*3f982cf4SFabien Sanglard   return ch >= '0' && ch <= '9';
51*3f982cf4SFabien Sanglard }
52*3f982cf4SFabien Sanglard 
53*3f982cf4SFabien Sanglard // Returns the offset of the next authority terminator in the input starting
54*3f982cf4SFabien Sanglard // from start_offset. If no terminator is found, the return value will be equal
55*3f982cf4SFabien Sanglard // to spec_len.
FindNextAuthorityTerminator(const char * spec,int start_offset,int spec_len)56*3f982cf4SFabien Sanglard int FindNextAuthorityTerminator(const char* spec,
57*3f982cf4SFabien Sanglard                                 int start_offset,
58*3f982cf4SFabien Sanglard                                 int spec_len) {
59*3f982cf4SFabien Sanglard   for (int i = start_offset; i < spec_len; i++) {
60*3f982cf4SFabien Sanglard     if (IsAuthorityTerminator(spec[i]))
61*3f982cf4SFabien Sanglard       return i;
62*3f982cf4SFabien Sanglard   }
63*3f982cf4SFabien Sanglard   return spec_len;  // Not found.
64*3f982cf4SFabien Sanglard }
65*3f982cf4SFabien Sanglard 
ParseUserInfo(const char * spec,const Component & user,Component * username,Component * password)66*3f982cf4SFabien Sanglard void ParseUserInfo(const char* spec,
67*3f982cf4SFabien Sanglard                    const Component& user,
68*3f982cf4SFabien Sanglard                    Component* username,
69*3f982cf4SFabien Sanglard                    Component* password) {
70*3f982cf4SFabien Sanglard   // Find the first colon in the user section, which separates the username and
71*3f982cf4SFabien Sanglard   // password.
72*3f982cf4SFabien Sanglard   int colon_offset = 0;
73*3f982cf4SFabien Sanglard   while (colon_offset < user.len && spec[user.begin + colon_offset] != ':')
74*3f982cf4SFabien Sanglard     colon_offset++;
75*3f982cf4SFabien Sanglard 
76*3f982cf4SFabien Sanglard   if (colon_offset < user.len) {
77*3f982cf4SFabien Sanglard     // Found separator: <username>:<password>
78*3f982cf4SFabien Sanglard     *username = Component(user.begin, colon_offset);
79*3f982cf4SFabien Sanglard     *password = MakeRange(user.begin + colon_offset + 1, user.begin + user.len);
80*3f982cf4SFabien Sanglard   } else {
81*3f982cf4SFabien Sanglard     // No separator, treat everything as the username
82*3f982cf4SFabien Sanglard     *username = user;
83*3f982cf4SFabien Sanglard     *password = Component();
84*3f982cf4SFabien Sanglard   }
85*3f982cf4SFabien Sanglard }
86*3f982cf4SFabien Sanglard 
ParseServerInfo(const char * spec,const Component & serverinfo,Component * hostname,Component * port_num)87*3f982cf4SFabien Sanglard void ParseServerInfo(const char* spec,
88*3f982cf4SFabien Sanglard                      const Component& serverinfo,
89*3f982cf4SFabien Sanglard                      Component* hostname,
90*3f982cf4SFabien Sanglard                      Component* port_num) {
91*3f982cf4SFabien Sanglard   if (serverinfo.len == 0) {
92*3f982cf4SFabien Sanglard     // No server info, host name is empty.
93*3f982cf4SFabien Sanglard     hostname->reset();
94*3f982cf4SFabien Sanglard     port_num->reset();
95*3f982cf4SFabien Sanglard     return;
96*3f982cf4SFabien Sanglard   }
97*3f982cf4SFabien Sanglard 
98*3f982cf4SFabien Sanglard   // If the host starts with a left-bracket, assume the entire host is an
99*3f982cf4SFabien Sanglard   // IPv6 literal.  Otherwise, assume none of the host is an IPv6 literal.
100*3f982cf4SFabien Sanglard   // This assumption will be overridden if we find a right-bracket.
101*3f982cf4SFabien Sanglard   //
102*3f982cf4SFabien Sanglard   // Our IPv6 address canonicalization code requires both brackets to exist,
103*3f982cf4SFabien Sanglard   // but the ability to locate an incomplete address can still be useful.
104*3f982cf4SFabien Sanglard   int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1;
105*3f982cf4SFabien Sanglard   int colon = -1;
106*3f982cf4SFabien Sanglard 
107*3f982cf4SFabien Sanglard   // Find the last right-bracket, and the last colon.
108*3f982cf4SFabien Sanglard   for (int i = serverinfo.begin; i < serverinfo.end(); i++) {
109*3f982cf4SFabien Sanglard     switch (spec[i]) {
110*3f982cf4SFabien Sanglard       case ']':
111*3f982cf4SFabien Sanglard         ipv6_terminator = i;
112*3f982cf4SFabien Sanglard         break;
113*3f982cf4SFabien Sanglard       case ':':
114*3f982cf4SFabien Sanglard         colon = i;
115*3f982cf4SFabien Sanglard         break;
116*3f982cf4SFabien Sanglard     }
117*3f982cf4SFabien Sanglard   }
118*3f982cf4SFabien Sanglard 
119*3f982cf4SFabien Sanglard   if (colon > ipv6_terminator) {
120*3f982cf4SFabien Sanglard     // Found a port number: <hostname>:<port>
121*3f982cf4SFabien Sanglard     *hostname = MakeRange(serverinfo.begin, colon);
122*3f982cf4SFabien Sanglard     if (hostname->len == 0)
123*3f982cf4SFabien Sanglard       hostname->reset();
124*3f982cf4SFabien Sanglard     *port_num = MakeRange(colon + 1, serverinfo.end());
125*3f982cf4SFabien Sanglard   } else {
126*3f982cf4SFabien Sanglard     // No port: <hostname>
127*3f982cf4SFabien Sanglard     *hostname = serverinfo;
128*3f982cf4SFabien Sanglard     port_num->reset();
129*3f982cf4SFabien Sanglard   }
130*3f982cf4SFabien Sanglard }
131*3f982cf4SFabien Sanglard 
132*3f982cf4SFabien Sanglard // Given an already-identified auth section, breaks it into its consituent
133*3f982cf4SFabien Sanglard // parts. The port number will be parsed and the resulting integer will be
134*3f982cf4SFabien Sanglard // filled into the given *port variable, or -1 if there is no port number or it
135*3f982cf4SFabien Sanglard // is invalid.
DoParseAuthority(const char * spec,const Component & auth,Component * username,Component * password,Component * hostname,Component * port_num)136*3f982cf4SFabien Sanglard void DoParseAuthority(const char* spec,
137*3f982cf4SFabien Sanglard                       const Component& auth,
138*3f982cf4SFabien Sanglard                       Component* username,
139*3f982cf4SFabien Sanglard                       Component* password,
140*3f982cf4SFabien Sanglard                       Component* hostname,
141*3f982cf4SFabien Sanglard                       Component* port_num) {
142*3f982cf4SFabien Sanglard   assert(auth.is_valid());
143*3f982cf4SFabien Sanglard   if (auth.len == 0) {
144*3f982cf4SFabien Sanglard     username->reset();
145*3f982cf4SFabien Sanglard     password->reset();
146*3f982cf4SFabien Sanglard     hostname->reset();
147*3f982cf4SFabien Sanglard     port_num->reset();
148*3f982cf4SFabien Sanglard     return;
149*3f982cf4SFabien Sanglard   }
150*3f982cf4SFabien Sanglard 
151*3f982cf4SFabien Sanglard   // Search backwards for @, which is the separator between the user info and
152*3f982cf4SFabien Sanglard   // the server info.
153*3f982cf4SFabien Sanglard   int i = auth.begin + auth.len - 1;
154*3f982cf4SFabien Sanglard   while (i > auth.begin && spec[i] != '@')
155*3f982cf4SFabien Sanglard     i--;
156*3f982cf4SFabien Sanglard 
157*3f982cf4SFabien Sanglard   if (spec[i] == '@') {
158*3f982cf4SFabien Sanglard     // Found user info: <user-info>@<server-info>
159*3f982cf4SFabien Sanglard     ParseUserInfo(spec, Component(auth.begin, i - auth.begin), username,
160*3f982cf4SFabien Sanglard                   password);
161*3f982cf4SFabien Sanglard     ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len), hostname,
162*3f982cf4SFabien Sanglard                     port_num);
163*3f982cf4SFabien Sanglard   } else {
164*3f982cf4SFabien Sanglard     // No user info, everything is server info.
165*3f982cf4SFabien Sanglard     username->reset();
166*3f982cf4SFabien Sanglard     password->reset();
167*3f982cf4SFabien Sanglard     ParseServerInfo(spec, auth, hostname, port_num);
168*3f982cf4SFabien Sanglard   }
169*3f982cf4SFabien Sanglard }
170*3f982cf4SFabien Sanglard 
FindQueryAndRefParts(const char * spec,const Component & path,int * query_separator,int * ref_separator)171*3f982cf4SFabien Sanglard inline void FindQueryAndRefParts(const char* spec,
172*3f982cf4SFabien Sanglard                                  const Component& path,
173*3f982cf4SFabien Sanglard                                  int* query_separator,
174*3f982cf4SFabien Sanglard                                  int* ref_separator) {
175*3f982cf4SFabien Sanglard   int path_end = path.begin + path.len;
176*3f982cf4SFabien Sanglard   for (int i = path.begin; i < path_end; i++) {
177*3f982cf4SFabien Sanglard     switch (spec[i]) {
178*3f982cf4SFabien Sanglard       case '?':
179*3f982cf4SFabien Sanglard         // Only match the query string if it precedes the reference fragment
180*3f982cf4SFabien Sanglard         // and when we haven't found one already.
181*3f982cf4SFabien Sanglard         if (*query_separator < 0)
182*3f982cf4SFabien Sanglard           *query_separator = i;
183*3f982cf4SFabien Sanglard         break;
184*3f982cf4SFabien Sanglard       case '#':
185*3f982cf4SFabien Sanglard         // Record the first # sign only.
186*3f982cf4SFabien Sanglard         if (*ref_separator < 0) {
187*3f982cf4SFabien Sanglard           *ref_separator = i;
188*3f982cf4SFabien Sanglard           return;
189*3f982cf4SFabien Sanglard         }
190*3f982cf4SFabien Sanglard         break;
191*3f982cf4SFabien Sanglard     }
192*3f982cf4SFabien Sanglard   }
193*3f982cf4SFabien Sanglard }
194*3f982cf4SFabien Sanglard 
ParsePath(const char * spec,const Component & path,Component * filepath,Component * query,Component * ref)195*3f982cf4SFabien Sanglard void ParsePath(const char* spec,
196*3f982cf4SFabien Sanglard                const Component& path,
197*3f982cf4SFabien Sanglard                Component* filepath,
198*3f982cf4SFabien Sanglard                Component* query,
199*3f982cf4SFabien Sanglard                Component* ref) {
200*3f982cf4SFabien Sanglard   // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>
201*3f982cf4SFabien Sanglard 
202*3f982cf4SFabien Sanglard   // Special case when there is no path.
203*3f982cf4SFabien Sanglard   if (path.len == -1) {
204*3f982cf4SFabien Sanglard     filepath->reset();
205*3f982cf4SFabien Sanglard     query->reset();
206*3f982cf4SFabien Sanglard     ref->reset();
207*3f982cf4SFabien Sanglard     return;
208*3f982cf4SFabien Sanglard   }
209*3f982cf4SFabien Sanglard   assert(path.len > 0);
210*3f982cf4SFabien Sanglard 
211*3f982cf4SFabien Sanglard   // Search for first occurrence of either ? or #.
212*3f982cf4SFabien Sanglard   int query_separator = -1;  // Index of the '?'
213*3f982cf4SFabien Sanglard   int ref_separator = -1;    // Index of the '#'
214*3f982cf4SFabien Sanglard   FindQueryAndRefParts(spec, path, &query_separator, &ref_separator);
215*3f982cf4SFabien Sanglard 
216*3f982cf4SFabien Sanglard   // Markers pointing to the character after each of these corresponding
217*3f982cf4SFabien Sanglard   // components. The code below words from the end back to the beginning,
218*3f982cf4SFabien Sanglard   // and will update these indices as it finds components that exist.
219*3f982cf4SFabien Sanglard   int file_end, query_end;
220*3f982cf4SFabien Sanglard 
221*3f982cf4SFabien Sanglard   // Ref fragment: from the # to the end of the path.
222*3f982cf4SFabien Sanglard   int path_end = path.begin + path.len;
223*3f982cf4SFabien Sanglard   if (ref_separator >= 0) {
224*3f982cf4SFabien Sanglard     file_end = query_end = ref_separator;
225*3f982cf4SFabien Sanglard     *ref = MakeRange(ref_separator + 1, path_end);
226*3f982cf4SFabien Sanglard   } else {
227*3f982cf4SFabien Sanglard     file_end = query_end = path_end;
228*3f982cf4SFabien Sanglard     ref->reset();
229*3f982cf4SFabien Sanglard   }
230*3f982cf4SFabien Sanglard 
231*3f982cf4SFabien Sanglard   // Query fragment: everything from the ? to the next boundary (either the end
232*3f982cf4SFabien Sanglard   // of the path or the ref fragment).
233*3f982cf4SFabien Sanglard   if (query_separator >= 0) {
234*3f982cf4SFabien Sanglard     file_end = query_separator;
235*3f982cf4SFabien Sanglard     *query = MakeRange(query_separator + 1, query_end);
236*3f982cf4SFabien Sanglard   } else {
237*3f982cf4SFabien Sanglard     query->reset();
238*3f982cf4SFabien Sanglard   }
239*3f982cf4SFabien Sanglard 
240*3f982cf4SFabien Sanglard   // File path: treat an empty file path as no file path.
241*3f982cf4SFabien Sanglard   if (file_end != path.begin)
242*3f982cf4SFabien Sanglard     *filepath = MakeRange(path.begin, file_end);
243*3f982cf4SFabien Sanglard   else
244*3f982cf4SFabien Sanglard     filepath->reset();
245*3f982cf4SFabien Sanglard }
246*3f982cf4SFabien Sanglard 
DoExtractScheme(const char * url,int url_len,Component * scheme)247*3f982cf4SFabien Sanglard bool DoExtractScheme(const char* url, int url_len, Component* scheme) {
248*3f982cf4SFabien Sanglard   // Skip leading whitespace and control characters.
249*3f982cf4SFabien Sanglard   int begin = 0;
250*3f982cf4SFabien Sanglard   while (begin < url_len && ShouldTrimFromURL(url[begin]))
251*3f982cf4SFabien Sanglard     begin++;
252*3f982cf4SFabien Sanglard   if (begin == url_len)
253*3f982cf4SFabien Sanglard     return false;  // Input is empty or all whitespace.
254*3f982cf4SFabien Sanglard 
255*3f982cf4SFabien Sanglard   // Find the first colon character.
256*3f982cf4SFabien Sanglard   for (int i = begin; i < url_len; i++) {
257*3f982cf4SFabien Sanglard     if (url[i] == ':') {
258*3f982cf4SFabien Sanglard       *scheme = MakeRange(begin, i);
259*3f982cf4SFabien Sanglard       return true;
260*3f982cf4SFabien Sanglard     }
261*3f982cf4SFabien Sanglard   }
262*3f982cf4SFabien Sanglard   return false;  // No colon found: no scheme
263*3f982cf4SFabien Sanglard }
264*3f982cf4SFabien Sanglard 
265*3f982cf4SFabien Sanglard // Fills in all members of the Parsed structure except for the scheme.
266*3f982cf4SFabien Sanglard //
267*3f982cf4SFabien Sanglard // |spec| is the full spec being parsed, of length |spec_len|.
268*3f982cf4SFabien Sanglard // |after_scheme| is the character immediately following the scheme (after the
269*3f982cf4SFabien Sanglard //   colon) where we'll begin parsing.
270*3f982cf4SFabien Sanglard //
271*3f982cf4SFabien Sanglard // Compatability data points. I list "host", "path" extracted:
272*3f982cf4SFabien Sanglard // Input                IE6             Firefox                Us
273*3f982cf4SFabien Sanglard // -----                --------------  --------------         --------------
274*3f982cf4SFabien Sanglard // http://foo.com/      "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
275*3f982cf4SFabien Sanglard // http:foo.com/        "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
276*3f982cf4SFabien Sanglard // http:/foo.com/       fail(*)         "foo.com", "/"         "foo.com", "/"
277*3f982cf4SFabien Sanglard // http:\foo.com/       fail(*)         "\foo.com", "/"(fail)  "foo.com", "/"
278*3f982cf4SFabien Sanglard // http:////foo.com/    "foo.com", "/"  "foo.com", "/"         "foo.com", "/"
279*3f982cf4SFabien Sanglard //
280*3f982cf4SFabien Sanglard // (*) Interestingly, although IE fails to load these URLs, its history
281*3f982cf4SFabien Sanglard // canonicalizer handles them, meaning if you've been to the corresponding
282*3f982cf4SFabien Sanglard // "http://foo.com/" link, it will be colored.
DoParseAfterScheme(const char * spec,int spec_len,int after_scheme,Parsed * parsed)283*3f982cf4SFabien Sanglard void DoParseAfterScheme(const char* spec,
284*3f982cf4SFabien Sanglard                         int spec_len,
285*3f982cf4SFabien Sanglard                         int after_scheme,
286*3f982cf4SFabien Sanglard                         Parsed* parsed) {
287*3f982cf4SFabien Sanglard   int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
288*3f982cf4SFabien Sanglard   int after_slashes = after_scheme + num_slashes;
289*3f982cf4SFabien Sanglard 
290*3f982cf4SFabien Sanglard   // First split into two main parts, the authority (username, password, host,
291*3f982cf4SFabien Sanglard   // and port) and the full path (path, query, and reference).
292*3f982cf4SFabien Sanglard   Component authority;
293*3f982cf4SFabien Sanglard   Component full_path;
294*3f982cf4SFabien Sanglard 
295*3f982cf4SFabien Sanglard   // Found "//<some data>", looks like an authority section. Treat everything
296*3f982cf4SFabien Sanglard   // from there to the next slash (or end of spec) to be the authority. Note
297*3f982cf4SFabien Sanglard   // that we ignore the number of slashes and treat it as the authority.
298*3f982cf4SFabien Sanglard   int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);
299*3f982cf4SFabien Sanglard   authority = Component(after_slashes, end_auth - after_slashes);
300*3f982cf4SFabien Sanglard 
301*3f982cf4SFabien Sanglard   if (end_auth == spec_len)  // No beginning of path found.
302*3f982cf4SFabien Sanglard     full_path = Component();
303*3f982cf4SFabien Sanglard   else  // Everything starting from the slash to the end is the path.
304*3f982cf4SFabien Sanglard     full_path = Component(end_auth, spec_len - end_auth);
305*3f982cf4SFabien Sanglard 
306*3f982cf4SFabien Sanglard   // Now parse those two sub-parts.
307*3f982cf4SFabien Sanglard   DoParseAuthority(spec, authority, &parsed->username, &parsed->password,
308*3f982cf4SFabien Sanglard                    &parsed->host, &parsed->port);
309*3f982cf4SFabien Sanglard   ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);
310*3f982cf4SFabien Sanglard }
311*3f982cf4SFabien Sanglard 
312*3f982cf4SFabien Sanglard // The main parsing function for standard URLs. Standard URLs have a scheme,
313*3f982cf4SFabien Sanglard // host, path, etc.
DoParseStandardURL(const char * spec,int spec_len,Parsed * parsed)314*3f982cf4SFabien Sanglard void DoParseStandardURL(const char* spec, int spec_len, Parsed* parsed) {
315*3f982cf4SFabien Sanglard   assert(spec_len >= 0);
316*3f982cf4SFabien Sanglard 
317*3f982cf4SFabien Sanglard   // Strip leading & trailing spaces and control characters.
318*3f982cf4SFabien Sanglard   int begin = 0;
319*3f982cf4SFabien Sanglard   TrimURL(spec, &begin, &spec_len);
320*3f982cf4SFabien Sanglard 
321*3f982cf4SFabien Sanglard   int after_scheme;
322*3f982cf4SFabien Sanglard   if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {
323*3f982cf4SFabien Sanglard     after_scheme = parsed->scheme.end() + 1;  // Skip past the colon.
324*3f982cf4SFabien Sanglard   } else {
325*3f982cf4SFabien Sanglard     // Say there's no scheme when there is no colon. We could also say that
326*3f982cf4SFabien Sanglard     // everything is the scheme. Both would produce an invalid URL, but this way
327*3f982cf4SFabien Sanglard     // seems less wrong in more cases.
328*3f982cf4SFabien Sanglard     parsed->scheme.reset();
329*3f982cf4SFabien Sanglard     after_scheme = begin;
330*3f982cf4SFabien Sanglard   }
331*3f982cf4SFabien Sanglard   DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
332*3f982cf4SFabien Sanglard }
333*3f982cf4SFabien Sanglard 
DoParseFileSystemURL(const char * spec,int spec_len,Parsed * parsed)334*3f982cf4SFabien Sanglard void DoParseFileSystemURL(const char* spec, int spec_len, Parsed* parsed) {
335*3f982cf4SFabien Sanglard   assert(spec_len >= 0);
336*3f982cf4SFabien Sanglard 
337*3f982cf4SFabien Sanglard   // Get the unused parts of the URL out of the way.
338*3f982cf4SFabien Sanglard   parsed->username.reset();
339*3f982cf4SFabien Sanglard   parsed->password.reset();
340*3f982cf4SFabien Sanglard   parsed->host.reset();
341*3f982cf4SFabien Sanglard   parsed->port.reset();
342*3f982cf4SFabien Sanglard   parsed->path.reset();          // May use this; reset for convenience.
343*3f982cf4SFabien Sanglard   parsed->ref.reset();           // May use this; reset for convenience.
344*3f982cf4SFabien Sanglard   parsed->query.reset();         // May use this; reset for convenience.
345*3f982cf4SFabien Sanglard   parsed->clear_inner_parsed();  // May use this; reset for convenience.
346*3f982cf4SFabien Sanglard 
347*3f982cf4SFabien Sanglard   // Strip leading & trailing spaces and control characters.
348*3f982cf4SFabien Sanglard   int begin = 0;
349*3f982cf4SFabien Sanglard   TrimURL(spec, &begin, &spec_len);
350*3f982cf4SFabien Sanglard 
351*3f982cf4SFabien Sanglard   // Handle empty specs or ones that contain only whitespace or control chars.
352*3f982cf4SFabien Sanglard   if (begin == spec_len) {
353*3f982cf4SFabien Sanglard     parsed->scheme.reset();
354*3f982cf4SFabien Sanglard     return;
355*3f982cf4SFabien Sanglard   }
356*3f982cf4SFabien Sanglard 
357*3f982cf4SFabien Sanglard   int inner_start = -1;
358*3f982cf4SFabien Sanglard 
359*3f982cf4SFabien Sanglard   // Extract the scheme.  We also handle the case where there is no scheme.
360*3f982cf4SFabien Sanglard   if (DoExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
361*3f982cf4SFabien Sanglard     // Offset the results since we gave ExtractScheme a substring.
362*3f982cf4SFabien Sanglard     parsed->scheme.begin += begin;
363*3f982cf4SFabien Sanglard 
364*3f982cf4SFabien Sanglard     if (parsed->scheme.end() == spec_len - 1)
365*3f982cf4SFabien Sanglard       return;
366*3f982cf4SFabien Sanglard 
367*3f982cf4SFabien Sanglard     inner_start = parsed->scheme.end() + 1;
368*3f982cf4SFabien Sanglard   } else {
369*3f982cf4SFabien Sanglard     // No scheme found; that's not valid for filesystem URLs.
370*3f982cf4SFabien Sanglard     parsed->scheme.reset();
371*3f982cf4SFabien Sanglard     return;
372*3f982cf4SFabien Sanglard   }
373*3f982cf4SFabien Sanglard 
374*3f982cf4SFabien Sanglard   Component inner_scheme;
375*3f982cf4SFabien Sanglard   const char* inner_spec = &spec[inner_start];
376*3f982cf4SFabien Sanglard   int inner_spec_len = spec_len - inner_start;
377*3f982cf4SFabien Sanglard 
378*3f982cf4SFabien Sanglard   if (DoExtractScheme(inner_spec, inner_spec_len, &inner_scheme)) {
379*3f982cf4SFabien Sanglard     // Offset the results since we gave ExtractScheme a substring.
380*3f982cf4SFabien Sanglard     inner_scheme.begin += inner_start;
381*3f982cf4SFabien Sanglard 
382*3f982cf4SFabien Sanglard     if (inner_scheme.end() == spec_len - 1)
383*3f982cf4SFabien Sanglard       return;
384*3f982cf4SFabien Sanglard   } else {
385*3f982cf4SFabien Sanglard     // No scheme found; that's not valid for filesystem URLs.
386*3f982cf4SFabien Sanglard     // The best we can do is return "filesystem://".
387*3f982cf4SFabien Sanglard     return;
388*3f982cf4SFabien Sanglard   }
389*3f982cf4SFabien Sanglard 
390*3f982cf4SFabien Sanglard   Parsed inner_parsed;
391*3f982cf4SFabien Sanglard 
392*3f982cf4SFabien Sanglard   if (CompareSchemeComponent(spec, inner_scheme, kFileScheme)) {
393*3f982cf4SFabien Sanglard     // File URLs are special.
394*3f982cf4SFabien Sanglard     ParseFileURL(inner_spec, inner_spec_len, &inner_parsed);
395*3f982cf4SFabien Sanglard   } else if (CompareSchemeComponent(spec, inner_scheme, kFileSystemScheme)) {
396*3f982cf4SFabien Sanglard     // Filesystem URLs don't nest.
397*3f982cf4SFabien Sanglard     return;
398*3f982cf4SFabien Sanglard   } else if (IsStandard(spec, inner_scheme)) {
399*3f982cf4SFabien Sanglard     // All "normal" URLs.
400*3f982cf4SFabien Sanglard     DoParseStandardURL(inner_spec, inner_spec_len, &inner_parsed);
401*3f982cf4SFabien Sanglard   } else {
402*3f982cf4SFabien Sanglard     return;
403*3f982cf4SFabien Sanglard   }
404*3f982cf4SFabien Sanglard 
405*3f982cf4SFabien Sanglard   // All members of inner_parsed need to be offset by inner_start.
406*3f982cf4SFabien Sanglard   // If we had any scheme that supported nesting more than one level deep,
407*3f982cf4SFabien Sanglard   // we'd have to recurse into the inner_parsed's inner_parsed when
408*3f982cf4SFabien Sanglard   // adjusting by inner_start.
409*3f982cf4SFabien Sanglard   inner_parsed.scheme.begin += inner_start;
410*3f982cf4SFabien Sanglard   inner_parsed.username.begin += inner_start;
411*3f982cf4SFabien Sanglard   inner_parsed.password.begin += inner_start;
412*3f982cf4SFabien Sanglard   inner_parsed.host.begin += inner_start;
413*3f982cf4SFabien Sanglard   inner_parsed.port.begin += inner_start;
414*3f982cf4SFabien Sanglard   inner_parsed.query.begin += inner_start;
415*3f982cf4SFabien Sanglard   inner_parsed.ref.begin += inner_start;
416*3f982cf4SFabien Sanglard   inner_parsed.path.begin += inner_start;
417*3f982cf4SFabien Sanglard 
418*3f982cf4SFabien Sanglard   // Query and ref move from inner_parsed to parsed.
419*3f982cf4SFabien Sanglard   parsed->query = inner_parsed.query;
420*3f982cf4SFabien Sanglard   inner_parsed.query.reset();
421*3f982cf4SFabien Sanglard   parsed->ref = inner_parsed.ref;
422*3f982cf4SFabien Sanglard   inner_parsed.ref.reset();
423*3f982cf4SFabien Sanglard 
424*3f982cf4SFabien Sanglard   parsed->set_inner_parsed(inner_parsed);
425*3f982cf4SFabien Sanglard   if (!inner_parsed.scheme.is_valid() || !inner_parsed.path.is_valid() ||
426*3f982cf4SFabien Sanglard       inner_parsed.inner_parsed()) {
427*3f982cf4SFabien Sanglard     return;
428*3f982cf4SFabien Sanglard   }
429*3f982cf4SFabien Sanglard 
430*3f982cf4SFabien Sanglard   // The path in inner_parsed should start with a slash, then have a filesystem
431*3f982cf4SFabien Sanglard   // type followed by a slash.  From the first slash up to but excluding the
432*3f982cf4SFabien Sanglard   // second should be what it keeps; the rest goes to parsed.  If the path ends
433*3f982cf4SFabien Sanglard   // before the second slash, it's still pretty clear what the user meant, so
434*3f982cf4SFabien Sanglard   // we'll let that through.
435*3f982cf4SFabien Sanglard   if (!IsURLSlash(spec[inner_parsed.path.begin])) {
436*3f982cf4SFabien Sanglard     return;
437*3f982cf4SFabien Sanglard   }
438*3f982cf4SFabien Sanglard   int inner_path_end = inner_parsed.path.begin + 1;  // skip the leading slash
439*3f982cf4SFabien Sanglard   while (inner_path_end < spec_len && !IsURLSlash(spec[inner_path_end]))
440*3f982cf4SFabien Sanglard     ++inner_path_end;
441*3f982cf4SFabien Sanglard   parsed->path.begin = inner_path_end;
442*3f982cf4SFabien Sanglard   int new_inner_path_length = inner_path_end - inner_parsed.path.begin;
443*3f982cf4SFabien Sanglard   parsed->path.len = inner_parsed.path.len - new_inner_path_length;
444*3f982cf4SFabien Sanglard   parsed->inner_parsed()->path.len = new_inner_path_length;
445*3f982cf4SFabien Sanglard }
446*3f982cf4SFabien Sanglard 
447*3f982cf4SFabien Sanglard // Initializes a path URL which is merely a scheme followed by a path. Examples
448*3f982cf4SFabien Sanglard // include "about:foo" and "javascript:alert('bar');"
DoParsePathURL(const char * spec,int spec_len,bool trim_path_end,Parsed * parsed)449*3f982cf4SFabien Sanglard void DoParsePathURL(const char* spec,
450*3f982cf4SFabien Sanglard                     int spec_len,
451*3f982cf4SFabien Sanglard                     bool trim_path_end,
452*3f982cf4SFabien Sanglard                     Parsed* parsed) {
453*3f982cf4SFabien Sanglard   // Get the non-path and non-scheme parts of the URL out of the way, we never
454*3f982cf4SFabien Sanglard   // use them.
455*3f982cf4SFabien Sanglard   parsed->username.reset();
456*3f982cf4SFabien Sanglard   parsed->password.reset();
457*3f982cf4SFabien Sanglard   parsed->host.reset();
458*3f982cf4SFabien Sanglard   parsed->port.reset();
459*3f982cf4SFabien Sanglard   parsed->path.reset();
460*3f982cf4SFabien Sanglard   parsed->query.reset();
461*3f982cf4SFabien Sanglard   parsed->ref.reset();
462*3f982cf4SFabien Sanglard 
463*3f982cf4SFabien Sanglard   // Strip leading & trailing spaces and control characters.
464*3f982cf4SFabien Sanglard   int scheme_begin = 0;
465*3f982cf4SFabien Sanglard   TrimURL(spec, &scheme_begin, &spec_len, trim_path_end);
466*3f982cf4SFabien Sanglard 
467*3f982cf4SFabien Sanglard   // Handle empty specs or ones that contain only whitespace or control chars.
468*3f982cf4SFabien Sanglard   if (scheme_begin == spec_len) {
469*3f982cf4SFabien Sanglard     parsed->scheme.reset();
470*3f982cf4SFabien Sanglard     parsed->path.reset();
471*3f982cf4SFabien Sanglard     return;
472*3f982cf4SFabien Sanglard   }
473*3f982cf4SFabien Sanglard 
474*3f982cf4SFabien Sanglard   int path_begin;
475*3f982cf4SFabien Sanglard   // Extract the scheme, with the path being everything following. We also
476*3f982cf4SFabien Sanglard   // handle the case where there is no scheme.
477*3f982cf4SFabien Sanglard   if (ExtractScheme(&spec[scheme_begin], spec_len - scheme_begin,
478*3f982cf4SFabien Sanglard                     &parsed->scheme)) {
479*3f982cf4SFabien Sanglard     // Offset the results since we gave ExtractScheme a substring.
480*3f982cf4SFabien Sanglard     parsed->scheme.begin += scheme_begin;
481*3f982cf4SFabien Sanglard     path_begin = parsed->scheme.end() + 1;
482*3f982cf4SFabien Sanglard   } else {
483*3f982cf4SFabien Sanglard     // No scheme case.
484*3f982cf4SFabien Sanglard     parsed->scheme.reset();
485*3f982cf4SFabien Sanglard     path_begin = scheme_begin;
486*3f982cf4SFabien Sanglard   }
487*3f982cf4SFabien Sanglard 
488*3f982cf4SFabien Sanglard   if (path_begin == spec_len)
489*3f982cf4SFabien Sanglard     return;
490*3f982cf4SFabien Sanglard   assert(path_begin < spec_len);
491*3f982cf4SFabien Sanglard 
492*3f982cf4SFabien Sanglard   ParsePath(spec, MakeRange(path_begin, spec_len), &parsed->path,
493*3f982cf4SFabien Sanglard             &parsed->query, &parsed->ref);
494*3f982cf4SFabien Sanglard }
495*3f982cf4SFabien Sanglard 
DoParseMailtoURL(const char * spec,int spec_len,Parsed * parsed)496*3f982cf4SFabien Sanglard void DoParseMailtoURL(const char* spec, int spec_len, Parsed* parsed) {
497*3f982cf4SFabien Sanglard   assert(spec_len >= 0);
498*3f982cf4SFabien Sanglard 
499*3f982cf4SFabien Sanglard   // Get the non-path and non-scheme parts of the URL out of the way, we never
500*3f982cf4SFabien Sanglard   // use them.
501*3f982cf4SFabien Sanglard   parsed->username.reset();
502*3f982cf4SFabien Sanglard   parsed->password.reset();
503*3f982cf4SFabien Sanglard   parsed->host.reset();
504*3f982cf4SFabien Sanglard   parsed->port.reset();
505*3f982cf4SFabien Sanglard   parsed->ref.reset();
506*3f982cf4SFabien Sanglard   parsed->query.reset();  // May use this; reset for convenience.
507*3f982cf4SFabien Sanglard 
508*3f982cf4SFabien Sanglard   // Strip leading & trailing spaces and control characters.
509*3f982cf4SFabien Sanglard   int begin = 0;
510*3f982cf4SFabien Sanglard   TrimURL(spec, &begin, &spec_len);
511*3f982cf4SFabien Sanglard 
512*3f982cf4SFabien Sanglard   // Handle empty specs or ones that contain only whitespace or control chars.
513*3f982cf4SFabien Sanglard   if (begin == spec_len) {
514*3f982cf4SFabien Sanglard     parsed->scheme.reset();
515*3f982cf4SFabien Sanglard     parsed->path.reset();
516*3f982cf4SFabien Sanglard     return;
517*3f982cf4SFabien Sanglard   }
518*3f982cf4SFabien Sanglard 
519*3f982cf4SFabien Sanglard   int path_begin = -1;
520*3f982cf4SFabien Sanglard   int path_end = -1;
521*3f982cf4SFabien Sanglard 
522*3f982cf4SFabien Sanglard   // Extract the scheme, with the path being everything following. We also
523*3f982cf4SFabien Sanglard   // handle the case where there is no scheme.
524*3f982cf4SFabien Sanglard   if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
525*3f982cf4SFabien Sanglard     // Offset the results since we gave ExtractScheme a substring.
526*3f982cf4SFabien Sanglard     parsed->scheme.begin += begin;
527*3f982cf4SFabien Sanglard 
528*3f982cf4SFabien Sanglard     if (parsed->scheme.end() != spec_len - 1) {
529*3f982cf4SFabien Sanglard       path_begin = parsed->scheme.end() + 1;
530*3f982cf4SFabien Sanglard       path_end = spec_len;
531*3f982cf4SFabien Sanglard     }
532*3f982cf4SFabien Sanglard   } else {
533*3f982cf4SFabien Sanglard     // No scheme found, just path.
534*3f982cf4SFabien Sanglard     parsed->scheme.reset();
535*3f982cf4SFabien Sanglard     path_begin = begin;
536*3f982cf4SFabien Sanglard     path_end = spec_len;
537*3f982cf4SFabien Sanglard   }
538*3f982cf4SFabien Sanglard 
539*3f982cf4SFabien Sanglard   // Split [path_begin, path_end) into a path + query.
540*3f982cf4SFabien Sanglard   for (int i = path_begin; i < path_end; ++i) {
541*3f982cf4SFabien Sanglard     if (spec[i] == '?') {
542*3f982cf4SFabien Sanglard       parsed->query = MakeRange(i + 1, path_end);
543*3f982cf4SFabien Sanglard       path_end = i;
544*3f982cf4SFabien Sanglard       break;
545*3f982cf4SFabien Sanglard     }
546*3f982cf4SFabien Sanglard   }
547*3f982cf4SFabien Sanglard 
548*3f982cf4SFabien Sanglard   // For compatability with the standard URL parser, treat no path as
549*3f982cf4SFabien Sanglard   // -1, rather than having a length of 0
550*3f982cf4SFabien Sanglard   if (path_begin == path_end) {
551*3f982cf4SFabien Sanglard     parsed->path.reset();
552*3f982cf4SFabien Sanglard   } else {
553*3f982cf4SFabien Sanglard     parsed->path = MakeRange(path_begin, path_end);
554*3f982cf4SFabien Sanglard   }
555*3f982cf4SFabien Sanglard }
556*3f982cf4SFabien Sanglard 
557*3f982cf4SFabien Sanglard // Converts a port number in a string to an integer. We'd like to just call
558*3f982cf4SFabien Sanglard // sscanf but our input is not NULL-terminated, which sscanf requires. Instead,
559*3f982cf4SFabien Sanglard // we copy the digits to a small stack buffer (since we know the maximum number
560*3f982cf4SFabien Sanglard // of digits in a valid port number) that we can NULL terminate.
DoParsePort(const char * spec,const Component & component)561*3f982cf4SFabien Sanglard int DoParsePort(const char* spec, const Component& component) {
562*3f982cf4SFabien Sanglard   // Easy success case when there is no port.
563*3f982cf4SFabien Sanglard   const int kMaxDigits = 5;
564*3f982cf4SFabien Sanglard   if (!component.is_nonempty())
565*3f982cf4SFabien Sanglard     return PORT_UNSPECIFIED;
566*3f982cf4SFabien Sanglard 
567*3f982cf4SFabien Sanglard   // Skip over any leading 0s.
568*3f982cf4SFabien Sanglard   Component digits_comp(component.end(), 0);
569*3f982cf4SFabien Sanglard   for (int i = 0; i < component.len; i++) {
570*3f982cf4SFabien Sanglard     if (spec[component.begin + i] != '0') {
571*3f982cf4SFabien Sanglard       digits_comp = MakeRange(component.begin + i, component.end());
572*3f982cf4SFabien Sanglard       break;
573*3f982cf4SFabien Sanglard     }
574*3f982cf4SFabien Sanglard   }
575*3f982cf4SFabien Sanglard   if (digits_comp.len == 0)
576*3f982cf4SFabien Sanglard     return 0;  // All digits were 0.
577*3f982cf4SFabien Sanglard 
578*3f982cf4SFabien Sanglard   // Verify we don't have too many digits (we'll be copying to our buffer so
579*3f982cf4SFabien Sanglard   // we need to double-check).
580*3f982cf4SFabien Sanglard   if (digits_comp.len > kMaxDigits)
581*3f982cf4SFabien Sanglard     return PORT_INVALID;
582*3f982cf4SFabien Sanglard 
583*3f982cf4SFabien Sanglard   // Copy valid digits to the buffer.
584*3f982cf4SFabien Sanglard   char digits[kMaxDigits + 1];  // +1 for null terminator
585*3f982cf4SFabien Sanglard   for (int i = 0; i < digits_comp.len; i++) {
586*3f982cf4SFabien Sanglard     char ch = spec[digits_comp.begin + i];
587*3f982cf4SFabien Sanglard     if (!IsPortDigit(ch)) {
588*3f982cf4SFabien Sanglard       // Invalid port digit, fail.
589*3f982cf4SFabien Sanglard       return PORT_INVALID;
590*3f982cf4SFabien Sanglard     }
591*3f982cf4SFabien Sanglard     digits[i] = static_cast<char>(ch);
592*3f982cf4SFabien Sanglard   }
593*3f982cf4SFabien Sanglard 
594*3f982cf4SFabien Sanglard   // Null-terminate the string and convert to integer. Since we guarantee
595*3f982cf4SFabien Sanglard   // only digits, atoi's lack of error handling is OK.
596*3f982cf4SFabien Sanglard   digits[digits_comp.len] = 0;
597*3f982cf4SFabien Sanglard   int port = atoi(digits);
598*3f982cf4SFabien Sanglard   if (port > 65535)
599*3f982cf4SFabien Sanglard     return PORT_INVALID;  // Out of range.
600*3f982cf4SFabien Sanglard   return port;
601*3f982cf4SFabien Sanglard }
602*3f982cf4SFabien Sanglard 
DoExtractFileName(const char * spec,const Component & path,Component * file_name)603*3f982cf4SFabien Sanglard void DoExtractFileName(const char* spec,
604*3f982cf4SFabien Sanglard                        const Component& path,
605*3f982cf4SFabien Sanglard                        Component* file_name) {
606*3f982cf4SFabien Sanglard   // Handle empty paths: they have no file names.
607*3f982cf4SFabien Sanglard   if (!path.is_nonempty()) {
608*3f982cf4SFabien Sanglard     file_name->reset();
609*3f982cf4SFabien Sanglard     return;
610*3f982cf4SFabien Sanglard   }
611*3f982cf4SFabien Sanglard 
612*3f982cf4SFabien Sanglard   // Extract the filename range from the path which is between
613*3f982cf4SFabien Sanglard   // the last slash and the following semicolon.
614*3f982cf4SFabien Sanglard   int file_end = path.end();
615*3f982cf4SFabien Sanglard   for (int i = path.end() - 1; i >= path.begin; i--) {
616*3f982cf4SFabien Sanglard     if (spec[i] == ';') {
617*3f982cf4SFabien Sanglard       file_end = i;
618*3f982cf4SFabien Sanglard     } else if (IsURLSlash(spec[i])) {
619*3f982cf4SFabien Sanglard       // File name is everything following this character to the end
620*3f982cf4SFabien Sanglard       *file_name = MakeRange(i + 1, file_end);
621*3f982cf4SFabien Sanglard       return;
622*3f982cf4SFabien Sanglard     }
623*3f982cf4SFabien Sanglard   }
624*3f982cf4SFabien Sanglard 
625*3f982cf4SFabien Sanglard   // No slash found, this means the input was degenerate (generally paths
626*3f982cf4SFabien Sanglard   // will start with a slash). Let's call everything the file name.
627*3f982cf4SFabien Sanglard   *file_name = MakeRange(path.begin, file_end);
628*3f982cf4SFabien Sanglard   return;
629*3f982cf4SFabien Sanglard }
630*3f982cf4SFabien Sanglard 
DoExtractQueryKeyValue(const char * spec,Component * query,Component * key,Component * value)631*3f982cf4SFabien Sanglard bool DoExtractQueryKeyValue(const char* spec,
632*3f982cf4SFabien Sanglard                             Component* query,
633*3f982cf4SFabien Sanglard                             Component* key,
634*3f982cf4SFabien Sanglard                             Component* value) {
635*3f982cf4SFabien Sanglard   if (!query->is_nonempty())
636*3f982cf4SFabien Sanglard     return false;
637*3f982cf4SFabien Sanglard 
638*3f982cf4SFabien Sanglard   int start = query->begin;
639*3f982cf4SFabien Sanglard   int cur = start;
640*3f982cf4SFabien Sanglard   int end = query->end();
641*3f982cf4SFabien Sanglard 
642*3f982cf4SFabien Sanglard   // We assume the beginning of the input is the beginning of the "key" and we
643*3f982cf4SFabien Sanglard   // skip to the end of it.
644*3f982cf4SFabien Sanglard   key->begin = cur;
645*3f982cf4SFabien Sanglard   while (cur < end && spec[cur] != '&' && spec[cur] != '=')
646*3f982cf4SFabien Sanglard     cur++;
647*3f982cf4SFabien Sanglard   key->len = cur - key->begin;
648*3f982cf4SFabien Sanglard 
649*3f982cf4SFabien Sanglard   // Skip the separator after the key (if any).
650*3f982cf4SFabien Sanglard   if (cur < end && spec[cur] == '=')
651*3f982cf4SFabien Sanglard     cur++;
652*3f982cf4SFabien Sanglard 
653*3f982cf4SFabien Sanglard   // Find the value part.
654*3f982cf4SFabien Sanglard   value->begin = cur;
655*3f982cf4SFabien Sanglard   while (cur < end && spec[cur] != '&')
656*3f982cf4SFabien Sanglard     cur++;
657*3f982cf4SFabien Sanglard   value->len = cur - value->begin;
658*3f982cf4SFabien Sanglard 
659*3f982cf4SFabien Sanglard   // Finally skip the next separator if any
660*3f982cf4SFabien Sanglard   if (cur < end && spec[cur] == '&')
661*3f982cf4SFabien Sanglard     cur++;
662*3f982cf4SFabien Sanglard 
663*3f982cf4SFabien Sanglard   // Save the new query
664*3f982cf4SFabien Sanglard   *query = MakeRange(cur, end);
665*3f982cf4SFabien Sanglard   return true;
666*3f982cf4SFabien Sanglard }
667*3f982cf4SFabien Sanglard 
668*3f982cf4SFabien Sanglard }  // namespace
669*3f982cf4SFabien Sanglard 
Parsed()670*3f982cf4SFabien Sanglard Parsed::Parsed() : potentially_dangling_markup(false), inner_parsed_(NULL) {}
671*3f982cf4SFabien Sanglard 
Parsed(const Parsed & other)672*3f982cf4SFabien Sanglard Parsed::Parsed(const Parsed& other)
673*3f982cf4SFabien Sanglard     : scheme(other.scheme),
674*3f982cf4SFabien Sanglard       username(other.username),
675*3f982cf4SFabien Sanglard       password(other.password),
676*3f982cf4SFabien Sanglard       host(other.host),
677*3f982cf4SFabien Sanglard       port(other.port),
678*3f982cf4SFabien Sanglard       path(other.path),
679*3f982cf4SFabien Sanglard       query(other.query),
680*3f982cf4SFabien Sanglard       ref(other.ref),
681*3f982cf4SFabien Sanglard       potentially_dangling_markup(other.potentially_dangling_markup),
682*3f982cf4SFabien Sanglard       inner_parsed_(NULL) {
683*3f982cf4SFabien Sanglard   if (other.inner_parsed_)
684*3f982cf4SFabien Sanglard     set_inner_parsed(*other.inner_parsed_);
685*3f982cf4SFabien Sanglard }
686*3f982cf4SFabien Sanglard 
operator =(const Parsed & other)687*3f982cf4SFabien Sanglard Parsed& Parsed::operator=(const Parsed& other) {
688*3f982cf4SFabien Sanglard   if (this != &other) {
689*3f982cf4SFabien Sanglard     scheme = other.scheme;
690*3f982cf4SFabien Sanglard     username = other.username;
691*3f982cf4SFabien Sanglard     password = other.password;
692*3f982cf4SFabien Sanglard     host = other.host;
693*3f982cf4SFabien Sanglard     port = other.port;
694*3f982cf4SFabien Sanglard     path = other.path;
695*3f982cf4SFabien Sanglard     query = other.query;
696*3f982cf4SFabien Sanglard     ref = other.ref;
697*3f982cf4SFabien Sanglard     potentially_dangling_markup = other.potentially_dangling_markup;
698*3f982cf4SFabien Sanglard     if (other.inner_parsed_)
699*3f982cf4SFabien Sanglard       set_inner_parsed(*other.inner_parsed_);
700*3f982cf4SFabien Sanglard     else
701*3f982cf4SFabien Sanglard       clear_inner_parsed();
702*3f982cf4SFabien Sanglard   }
703*3f982cf4SFabien Sanglard   return *this;
704*3f982cf4SFabien Sanglard }
705*3f982cf4SFabien Sanglard 
~Parsed()706*3f982cf4SFabien Sanglard Parsed::~Parsed() {
707*3f982cf4SFabien Sanglard   delete inner_parsed_;
708*3f982cf4SFabien Sanglard }
709*3f982cf4SFabien Sanglard 
Length() const710*3f982cf4SFabien Sanglard int Parsed::Length() const {
711*3f982cf4SFabien Sanglard   if (ref.is_valid())
712*3f982cf4SFabien Sanglard     return ref.end();
713*3f982cf4SFabien Sanglard   return CountCharactersBefore(REF, false);
714*3f982cf4SFabien Sanglard }
715*3f982cf4SFabien Sanglard 
CountCharactersBefore(ComponentType type,bool include_delimiter) const716*3f982cf4SFabien Sanglard int Parsed::CountCharactersBefore(ComponentType type,
717*3f982cf4SFabien Sanglard                                   bool include_delimiter) const {
718*3f982cf4SFabien Sanglard   if (type == SCHEME)
719*3f982cf4SFabien Sanglard     return scheme.begin;
720*3f982cf4SFabien Sanglard 
721*3f982cf4SFabien Sanglard   // There will be some characters after the scheme like "://" and we don't
722*3f982cf4SFabien Sanglard   // know how many. Search forwards for the next thing until we find one.
723*3f982cf4SFabien Sanglard   int cur = 0;
724*3f982cf4SFabien Sanglard   if (scheme.is_valid())
725*3f982cf4SFabien Sanglard     cur = scheme.end() + 1;  // Advance over the ':' at the end of the scheme.
726*3f982cf4SFabien Sanglard 
727*3f982cf4SFabien Sanglard   if (username.is_valid()) {
728*3f982cf4SFabien Sanglard     if (type <= USERNAME)
729*3f982cf4SFabien Sanglard       return username.begin;
730*3f982cf4SFabien Sanglard     cur = username.end() + 1;  // Advance over the '@' or ':' at the end.
731*3f982cf4SFabien Sanglard   }
732*3f982cf4SFabien Sanglard 
733*3f982cf4SFabien Sanglard   if (password.is_valid()) {
734*3f982cf4SFabien Sanglard     if (type <= PASSWORD)
735*3f982cf4SFabien Sanglard       return password.begin;
736*3f982cf4SFabien Sanglard     cur = password.end() + 1;  // Advance over the '@' at the end.
737*3f982cf4SFabien Sanglard   }
738*3f982cf4SFabien Sanglard 
739*3f982cf4SFabien Sanglard   if (host.is_valid()) {
740*3f982cf4SFabien Sanglard     if (type <= HOST)
741*3f982cf4SFabien Sanglard       return host.begin;
742*3f982cf4SFabien Sanglard     cur = host.end();
743*3f982cf4SFabien Sanglard   }
744*3f982cf4SFabien Sanglard 
745*3f982cf4SFabien Sanglard   if (port.is_valid()) {
746*3f982cf4SFabien Sanglard     if (type < PORT || (type == PORT && include_delimiter))
747*3f982cf4SFabien Sanglard       return port.begin - 1;  // Back over delimiter.
748*3f982cf4SFabien Sanglard     if (type == PORT)
749*3f982cf4SFabien Sanglard       return port.begin;  // Don't want delimiter counted.
750*3f982cf4SFabien Sanglard     cur = port.end();
751*3f982cf4SFabien Sanglard   }
752*3f982cf4SFabien Sanglard 
753*3f982cf4SFabien Sanglard   if (path.is_valid()) {
754*3f982cf4SFabien Sanglard     if (type <= PATH)
755*3f982cf4SFabien Sanglard       return path.begin;
756*3f982cf4SFabien Sanglard     cur = path.end();
757*3f982cf4SFabien Sanglard   }
758*3f982cf4SFabien Sanglard 
759*3f982cf4SFabien Sanglard   if (query.is_valid()) {
760*3f982cf4SFabien Sanglard     if (type < QUERY || (type == QUERY && include_delimiter))
761*3f982cf4SFabien Sanglard       return query.begin - 1;  // Back over delimiter.
762*3f982cf4SFabien Sanglard     if (type == QUERY)
763*3f982cf4SFabien Sanglard       return query.begin;  // Don't want delimiter counted.
764*3f982cf4SFabien Sanglard     cur = query.end();
765*3f982cf4SFabien Sanglard   }
766*3f982cf4SFabien Sanglard 
767*3f982cf4SFabien Sanglard   if (ref.is_valid()) {
768*3f982cf4SFabien Sanglard     if (type == REF && !include_delimiter)
769*3f982cf4SFabien Sanglard       return ref.begin;  // Back over delimiter.
770*3f982cf4SFabien Sanglard 
771*3f982cf4SFabien Sanglard     // When there is a ref and we get here, the component we wanted was before
772*3f982cf4SFabien Sanglard     // this and not found, so we always know the beginning of the ref is right.
773*3f982cf4SFabien Sanglard     return ref.begin - 1;  // Don't want delimiter counted.
774*3f982cf4SFabien Sanglard   }
775*3f982cf4SFabien Sanglard 
776*3f982cf4SFabien Sanglard   return cur;
777*3f982cf4SFabien Sanglard }
778*3f982cf4SFabien Sanglard 
GetContent() const779*3f982cf4SFabien Sanglard Component Parsed::GetContent() const {
780*3f982cf4SFabien Sanglard   const int begin = CountCharactersBefore(USERNAME, false);
781*3f982cf4SFabien Sanglard   const int len = Length() - begin;
782*3f982cf4SFabien Sanglard   // For compatability with the standard URL parser, we treat no content as
783*3f982cf4SFabien Sanglard   // -1, rather than having a length of 0 (we normally wouldn't care so
784*3f982cf4SFabien Sanglard   // much for these non-standard URLs).
785*3f982cf4SFabien Sanglard   return len ? Component(begin, len) : Component();
786*3f982cf4SFabien Sanglard }
787*3f982cf4SFabien Sanglard 
ExtractScheme(const char * url,int url_len,Component * scheme)788*3f982cf4SFabien Sanglard bool ExtractScheme(const char* url, int url_len, Component* scheme) {
789*3f982cf4SFabien Sanglard   return DoExtractScheme(url, url_len, scheme);
790*3f982cf4SFabien Sanglard }
791*3f982cf4SFabien Sanglard 
792*3f982cf4SFabien Sanglard // This handles everything that may be an authority terminator, including
793*3f982cf4SFabien Sanglard // backslash. For special backslash handling see DoParseAfterScheme.
IsAuthorityTerminator(char ch)794*3f982cf4SFabien Sanglard bool IsAuthorityTerminator(char ch) {
795*3f982cf4SFabien Sanglard   return IsURLSlash(ch) || ch == '?' || ch == '#';
796*3f982cf4SFabien Sanglard }
797*3f982cf4SFabien Sanglard 
ExtractFileName(const char * url,const Component & path,Component * file_name)798*3f982cf4SFabien Sanglard void ExtractFileName(const char* url,
799*3f982cf4SFabien Sanglard                      const Component& path,
800*3f982cf4SFabien Sanglard                      Component* file_name) {
801*3f982cf4SFabien Sanglard   DoExtractFileName(url, path, file_name);
802*3f982cf4SFabien Sanglard }
803*3f982cf4SFabien Sanglard 
ExtractQueryKeyValue(const char * url,Component * query,Component * key,Component * value)804*3f982cf4SFabien Sanglard bool ExtractQueryKeyValue(const char* url,
805*3f982cf4SFabien Sanglard                           Component* query,
806*3f982cf4SFabien Sanglard                           Component* key,
807*3f982cf4SFabien Sanglard                           Component* value) {
808*3f982cf4SFabien Sanglard   return DoExtractQueryKeyValue(url, query, key, value);
809*3f982cf4SFabien Sanglard }
810*3f982cf4SFabien Sanglard 
ParseAuthority(const char * spec,const Component & auth,Component * username,Component * password,Component * hostname,Component * port_num)811*3f982cf4SFabien Sanglard void ParseAuthority(const char* spec,
812*3f982cf4SFabien Sanglard                     const Component& auth,
813*3f982cf4SFabien Sanglard                     Component* username,
814*3f982cf4SFabien Sanglard                     Component* password,
815*3f982cf4SFabien Sanglard                     Component* hostname,
816*3f982cf4SFabien Sanglard                     Component* port_num) {
817*3f982cf4SFabien Sanglard   DoParseAuthority(spec, auth, username, password, hostname, port_num);
818*3f982cf4SFabien Sanglard }
819*3f982cf4SFabien Sanglard 
ParsePort(const char * url,const Component & port)820*3f982cf4SFabien Sanglard int ParsePort(const char* url, const Component& port) {
821*3f982cf4SFabien Sanglard   return DoParsePort(url, port);
822*3f982cf4SFabien Sanglard }
823*3f982cf4SFabien Sanglard 
ParseStandardURL(const char * url,int url_len,Parsed * parsed)824*3f982cf4SFabien Sanglard void ParseStandardURL(const char* url, int url_len, Parsed* parsed) {
825*3f982cf4SFabien Sanglard   DoParseStandardURL(url, url_len, parsed);
826*3f982cf4SFabien Sanglard }
827*3f982cf4SFabien Sanglard 
ParsePathURL(const char * url,int url_len,bool trim_path_end,Parsed * parsed)828*3f982cf4SFabien Sanglard void ParsePathURL(const char* url,
829*3f982cf4SFabien Sanglard                   int url_len,
830*3f982cf4SFabien Sanglard                   bool trim_path_end,
831*3f982cf4SFabien Sanglard                   Parsed* parsed) {
832*3f982cf4SFabien Sanglard   DoParsePathURL(url, url_len, trim_path_end, parsed);
833*3f982cf4SFabien Sanglard }
834*3f982cf4SFabien Sanglard 
ParseFileSystemURL(const char * url,int url_len,Parsed * parsed)835*3f982cf4SFabien Sanglard void ParseFileSystemURL(const char* url, int url_len, Parsed* parsed) {
836*3f982cf4SFabien Sanglard   DoParseFileSystemURL(url, url_len, parsed);
837*3f982cf4SFabien Sanglard }
838*3f982cf4SFabien Sanglard 
ParseMailtoURL(const char * url,int url_len,Parsed * parsed)839*3f982cf4SFabien Sanglard void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {
840*3f982cf4SFabien Sanglard   DoParseMailtoURL(url, url_len, parsed);
841*3f982cf4SFabien Sanglard }
842*3f982cf4SFabien Sanglard 
ParsePathInternal(const char * spec,const Component & path,Component * filepath,Component * query,Component * ref)843*3f982cf4SFabien Sanglard void ParsePathInternal(const char* spec,
844*3f982cf4SFabien Sanglard                        const Component& path,
845*3f982cf4SFabien Sanglard                        Component* filepath,
846*3f982cf4SFabien Sanglard                        Component* query,
847*3f982cf4SFabien Sanglard                        Component* ref) {
848*3f982cf4SFabien Sanglard   ParsePath(spec, path, filepath, query, ref);
849*3f982cf4SFabien Sanglard }
850*3f982cf4SFabien Sanglard 
ParseAfterScheme(const char * spec,int spec_len,int after_scheme,Parsed * parsed)851*3f982cf4SFabien Sanglard void ParseAfterScheme(const char* spec,
852*3f982cf4SFabien Sanglard                       int spec_len,
853*3f982cf4SFabien Sanglard                       int after_scheme,
854*3f982cf4SFabien Sanglard                       Parsed* parsed) {
855*3f982cf4SFabien Sanglard   DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
856*3f982cf4SFabien Sanglard }
857*3f982cf4SFabien Sanglard 
858*3f982cf4SFabien Sanglard }  // namespace openscreen
859