1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker #include <limits.h>
6*6777b538SAndroid Build Coastguard Worker
7*6777b538SAndroid Build Coastguard Worker #include <optional>
8*6777b538SAndroid Build Coastguard Worker
9*6777b538SAndroid Build Coastguard Worker #include "base/check.h"
10*6777b538SAndroid Build Coastguard Worker #include "base/check_op.h"
11*6777b538SAndroid Build Coastguard Worker #include "url/url_canon.h"
12*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_internal.h"
13*6777b538SAndroid Build Coastguard Worker #include "url/url_features.h"
14*6777b538SAndroid Build Coastguard Worker #include "url/url_parse_internal.h"
15*6777b538SAndroid Build Coastguard Worker
16*6777b538SAndroid Build Coastguard Worker namespace url {
17*6777b538SAndroid Build Coastguard Worker
18*6777b538SAndroid Build Coastguard Worker namespace {
19*6777b538SAndroid Build Coastguard Worker
20*6777b538SAndroid Build Coastguard Worker enum CharacterFlags {
21*6777b538SAndroid Build Coastguard Worker // Pass through unchanged, whether escaped or not. This doesn't
22*6777b538SAndroid Build Coastguard Worker // actually set anything so you can't OR it to check, it's just to make the
23*6777b538SAndroid Build Coastguard Worker // table below more clear when any other flag is not set.
24*6777b538SAndroid Build Coastguard Worker PASS = 0,
25*6777b538SAndroid Build Coastguard Worker
26*6777b538SAndroid Build Coastguard Worker // This character requires special handling in DoPartialPathInternal. Doing
27*6777b538SAndroid Build Coastguard Worker // this test
28*6777b538SAndroid Build Coastguard Worker // first allows us to filter out the common cases of regular characters that
29*6777b538SAndroid Build Coastguard Worker // can be directly copied.
30*6777b538SAndroid Build Coastguard Worker SPECIAL = 1,
31*6777b538SAndroid Build Coastguard Worker
32*6777b538SAndroid Build Coastguard Worker // This character must be escaped in the canonical output. Note that all
33*6777b538SAndroid Build Coastguard Worker // escaped chars also have the "special" bit set so that the code that looks
34*6777b538SAndroid Build Coastguard Worker // for this is triggered. Not valid with PASS or ESCAPE
35*6777b538SAndroid Build Coastguard Worker ESCAPE_BIT = 2,
36*6777b538SAndroid Build Coastguard Worker ESCAPE = ESCAPE_BIT | SPECIAL,
37*6777b538SAndroid Build Coastguard Worker };
38*6777b538SAndroid Build Coastguard Worker
39*6777b538SAndroid Build Coastguard Worker // This table contains one of the above flag values. Note some flags are more
40*6777b538SAndroid Build Coastguard Worker // than one bits because they also turn on the "special" flag. Special is the
41*6777b538SAndroid Build Coastguard Worker // only flag that may be combined with others.
42*6777b538SAndroid Build Coastguard Worker //
43*6777b538SAndroid Build Coastguard Worker // This table was used to be designed to match exactly what IE did with the
44*6777b538SAndroid Build Coastguard Worker // characters, however, which doesn't comply with the URL Standard as of Dec
45*6777b538SAndroid Build Coastguard Worker // 2023. See https://crbug.com/1509295.
46*6777b538SAndroid Build Coastguard Worker //
47*6777b538SAndroid Build Coastguard Worker // Dot is even more special, and the escaped version is handled specially by
48*6777b538SAndroid Build Coastguard Worker // IsDot. Therefore, we don't need the "escape" flag. We just need the "special"
49*6777b538SAndroid Build Coastguard Worker // bit.
50*6777b538SAndroid Build Coastguard Worker //
51*6777b538SAndroid Build Coastguard Worker // clang-format off
52*6777b538SAndroid Build Coastguard Worker const unsigned char kPathCharLookup[0x100] = {
53*6777b538SAndroid Build Coastguard Worker // NULL control chars...
54*6777b538SAndroid Build Coastguard Worker ESCAPE , ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
55*6777b538SAndroid Build Coastguard Worker // control chars...
56*6777b538SAndroid Build Coastguard Worker ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
57*6777b538SAndroid Build Coastguard Worker // ' ' ! " # $ % & ' ( ) * + , - . /
58*6777b538SAndroid Build Coastguard Worker ESCAPE, PASS, ESCAPE, ESCAPE, PASS, ESCAPE, PASS, PASS, PASS, PASS, PASS, PASS, PASS, PASS ,SPECIAL, PASS,
59*6777b538SAndroid Build Coastguard Worker // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
60*6777b538SAndroid Build Coastguard Worker PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS, PASS, ESCAPE, PASS, ESCAPE, ESCAPE,
61*6777b538SAndroid Build Coastguard Worker // @ A B C D E F G H I J K L M N O
62*6777b538SAndroid Build Coastguard Worker PASS, PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,
63*6777b538SAndroid Build Coastguard Worker // P Q R S T U V W X Y Z [ \ ] ^ _
64*6777b538SAndroid Build Coastguard Worker PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS, ESCAPE, PASS, ESCAPE, PASS ,
65*6777b538SAndroid Build Coastguard Worker // ` a b c d e f g h i j k l m n o
66*6777b538SAndroid Build Coastguard Worker ESCAPE, PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,
67*6777b538SAndroid Build Coastguard Worker // p q r s t u v w x y z { | } ~ <NBSP>
68*6777b538SAndroid Build Coastguard Worker PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,PASS ,ESCAPE, ESCAPE, ESCAPE, PASS ,ESCAPE,
69*6777b538SAndroid Build Coastguard Worker // ...all the high-bit characters are escaped
70*6777b538SAndroid Build Coastguard Worker ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
71*6777b538SAndroid Build Coastguard Worker ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
72*6777b538SAndroid Build Coastguard Worker ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
73*6777b538SAndroid Build Coastguard Worker ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
74*6777b538SAndroid Build Coastguard Worker ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
75*6777b538SAndroid Build Coastguard Worker ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
76*6777b538SAndroid Build Coastguard Worker ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE,
77*6777b538SAndroid Build Coastguard Worker ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE, ESCAPE};
78*6777b538SAndroid Build Coastguard Worker // clang-format on
79*6777b538SAndroid Build Coastguard Worker
80*6777b538SAndroid Build Coastguard Worker enum DotDisposition {
81*6777b538SAndroid Build Coastguard Worker // The given dot is just part of a filename and is not special.
82*6777b538SAndroid Build Coastguard Worker NOT_A_DIRECTORY,
83*6777b538SAndroid Build Coastguard Worker
84*6777b538SAndroid Build Coastguard Worker // The given dot is the current directory.
85*6777b538SAndroid Build Coastguard Worker DIRECTORY_CUR,
86*6777b538SAndroid Build Coastguard Worker
87*6777b538SAndroid Build Coastguard Worker // The given dot is the first of a double dot that should take us up one.
88*6777b538SAndroid Build Coastguard Worker DIRECTORY_UP
89*6777b538SAndroid Build Coastguard Worker };
90*6777b538SAndroid Build Coastguard Worker
91*6777b538SAndroid Build Coastguard Worker // When the path resolver finds a dot, this function is called with the
92*6777b538SAndroid Build Coastguard Worker // character following that dot to see what it is. The return value
93*6777b538SAndroid Build Coastguard Worker // indicates what type this dot is (see above). This code handles the case
94*6777b538SAndroid Build Coastguard Worker // where the dot is at the end of the input.
95*6777b538SAndroid Build Coastguard Worker //
96*6777b538SAndroid Build Coastguard Worker // |*consumed_len| will contain the number of characters in the input that
97*6777b538SAndroid Build Coastguard Worker // express what we found.
98*6777b538SAndroid Build Coastguard Worker //
99*6777b538SAndroid Build Coastguard Worker // If the input is "../foo", |after_dot| = 1, |end| = 6, and
100*6777b538SAndroid Build Coastguard Worker // at the end, |*consumed_len| = 2 for the "./" this function consumed. The
101*6777b538SAndroid Build Coastguard Worker // original dot length should be handled by the caller.
102*6777b538SAndroid Build Coastguard Worker template <typename CHAR>
ClassifyAfterDot(const CHAR * spec,size_t after_dot,size_t end,size_t * consumed_len)103*6777b538SAndroid Build Coastguard Worker DotDisposition ClassifyAfterDot(const CHAR* spec,
104*6777b538SAndroid Build Coastguard Worker size_t after_dot,
105*6777b538SAndroid Build Coastguard Worker size_t end,
106*6777b538SAndroid Build Coastguard Worker size_t* consumed_len) {
107*6777b538SAndroid Build Coastguard Worker if (after_dot == end) {
108*6777b538SAndroid Build Coastguard Worker // Single dot at the end.
109*6777b538SAndroid Build Coastguard Worker *consumed_len = 0;
110*6777b538SAndroid Build Coastguard Worker return DIRECTORY_CUR;
111*6777b538SAndroid Build Coastguard Worker }
112*6777b538SAndroid Build Coastguard Worker if (IsSlashOrBackslash(spec[after_dot])) {
113*6777b538SAndroid Build Coastguard Worker // Single dot followed by a slash.
114*6777b538SAndroid Build Coastguard Worker *consumed_len = 1; // Consume the slash
115*6777b538SAndroid Build Coastguard Worker return DIRECTORY_CUR;
116*6777b538SAndroid Build Coastguard Worker }
117*6777b538SAndroid Build Coastguard Worker
118*6777b538SAndroid Build Coastguard Worker size_t second_dot_len = IsDot(spec, after_dot, end);
119*6777b538SAndroid Build Coastguard Worker if (second_dot_len) {
120*6777b538SAndroid Build Coastguard Worker size_t after_second_dot = after_dot + second_dot_len;
121*6777b538SAndroid Build Coastguard Worker if (after_second_dot == end) {
122*6777b538SAndroid Build Coastguard Worker // Double dot at the end.
123*6777b538SAndroid Build Coastguard Worker *consumed_len = second_dot_len;
124*6777b538SAndroid Build Coastguard Worker return DIRECTORY_UP;
125*6777b538SAndroid Build Coastguard Worker }
126*6777b538SAndroid Build Coastguard Worker if (IsSlashOrBackslash(spec[after_second_dot])) {
127*6777b538SAndroid Build Coastguard Worker // Double dot followed by a slash.
128*6777b538SAndroid Build Coastguard Worker *consumed_len = second_dot_len + 1;
129*6777b538SAndroid Build Coastguard Worker return DIRECTORY_UP;
130*6777b538SAndroid Build Coastguard Worker }
131*6777b538SAndroid Build Coastguard Worker }
132*6777b538SAndroid Build Coastguard Worker
133*6777b538SAndroid Build Coastguard Worker // The dots are followed by something else, not a directory.
134*6777b538SAndroid Build Coastguard Worker *consumed_len = 0;
135*6777b538SAndroid Build Coastguard Worker return NOT_A_DIRECTORY;
136*6777b538SAndroid Build Coastguard Worker }
137*6777b538SAndroid Build Coastguard Worker
138*6777b538SAndroid Build Coastguard Worker // Rewinds the output to the previous slash. It is assumed that the output
139*6777b538SAndroid Build Coastguard Worker // ends with a slash and this doesn't count (we call this when we are
140*6777b538SAndroid Build Coastguard Worker // appending directory paths, so the previous path component has and ending
141*6777b538SAndroid Build Coastguard Worker // slash).
142*6777b538SAndroid Build Coastguard Worker //
143*6777b538SAndroid Build Coastguard Worker // This will stop at the first slash (assumed to be at position
144*6777b538SAndroid Build Coastguard Worker // |path_begin_in_output| and not go any higher than that. Some web pages
145*6777b538SAndroid Build Coastguard Worker // do ".." too many times, so we need to handle that brokenness.
146*6777b538SAndroid Build Coastguard Worker //
147*6777b538SAndroid Build Coastguard Worker // It searches for a literal slash rather than including a backslash as well
148*6777b538SAndroid Build Coastguard Worker // because it is run only on the canonical output.
149*6777b538SAndroid Build Coastguard Worker //
150*6777b538SAndroid Build Coastguard Worker // The output is guaranteed to end in a slash when this function completes.
BackUpToPreviousSlash(size_t path_begin_in_output,CanonOutput * output)151*6777b538SAndroid Build Coastguard Worker void BackUpToPreviousSlash(size_t path_begin_in_output, CanonOutput* output) {
152*6777b538SAndroid Build Coastguard Worker CHECK(output->length() > 0);
153*6777b538SAndroid Build Coastguard Worker CHECK(path_begin_in_output < output->length());
154*6777b538SAndroid Build Coastguard Worker
155*6777b538SAndroid Build Coastguard Worker size_t i = output->length() - 1;
156*6777b538SAndroid Build Coastguard Worker DCHECK(output->at(i) == '/');
157*6777b538SAndroid Build Coastguard Worker if (i == path_begin_in_output)
158*6777b538SAndroid Build Coastguard Worker return; // We're at the first slash, nothing to do.
159*6777b538SAndroid Build Coastguard Worker
160*6777b538SAndroid Build Coastguard Worker // Now back up (skipping the trailing slash) until we find another slash.
161*6777b538SAndroid Build Coastguard Worker do {
162*6777b538SAndroid Build Coastguard Worker --i;
163*6777b538SAndroid Build Coastguard Worker } while (output->at(i) != '/' && i > path_begin_in_output);
164*6777b538SAndroid Build Coastguard Worker
165*6777b538SAndroid Build Coastguard Worker // Now shrink the output to just include that last slash we found.
166*6777b538SAndroid Build Coastguard Worker output->set_length(i + 1);
167*6777b538SAndroid Build Coastguard Worker }
168*6777b538SAndroid Build Coastguard Worker
169*6777b538SAndroid Build Coastguard Worker // Canonicalizes and appends the given path to the output. It assumes that if
170*6777b538SAndroid Build Coastguard Worker // the input path starts with a slash, it should be copied to the output.
171*6777b538SAndroid Build Coastguard Worker //
172*6777b538SAndroid Build Coastguard Worker // If there are already path components (this mode is used when appending
173*6777b538SAndroid Build Coastguard Worker // relative paths for resolving), it assumes that the output already has
174*6777b538SAndroid Build Coastguard Worker // a trailing slash and that if the input begins with a slash, it should be
175*6777b538SAndroid Build Coastguard Worker // copied to the output.
176*6777b538SAndroid Build Coastguard Worker //
177*6777b538SAndroid Build Coastguard Worker // We do not collapse multiple slashes in a row to a single slash. It seems
178*6777b538SAndroid Build Coastguard Worker // no web browsers do this, and we don't want incompatibilities, even though
179*6777b538SAndroid Build Coastguard Worker // it would be correct for most systems.
180*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoPartialPathInternal(const CHAR * spec,const Component & path,size_t path_begin_in_output,CanonMode canon_mode,CanonOutput * output)181*6777b538SAndroid Build Coastguard Worker bool DoPartialPathInternal(const CHAR* spec,
182*6777b538SAndroid Build Coastguard Worker const Component& path,
183*6777b538SAndroid Build Coastguard Worker size_t path_begin_in_output,
184*6777b538SAndroid Build Coastguard Worker CanonMode canon_mode,
185*6777b538SAndroid Build Coastguard Worker CanonOutput* output) {
186*6777b538SAndroid Build Coastguard Worker if (path.is_empty())
187*6777b538SAndroid Build Coastguard Worker return true;
188*6777b538SAndroid Build Coastguard Worker
189*6777b538SAndroid Build Coastguard Worker size_t end = static_cast<size_t>(path.end());
190*6777b538SAndroid Build Coastguard Worker
191*6777b538SAndroid Build Coastguard Worker bool success = true;
192*6777b538SAndroid Build Coastguard Worker for (size_t i = static_cast<size_t>(path.begin); i < end; i++) {
193*6777b538SAndroid Build Coastguard Worker UCHAR uch = static_cast<UCHAR>(spec[i]);
194*6777b538SAndroid Build Coastguard Worker if (sizeof(CHAR) > 1 && uch >= 0x80) {
195*6777b538SAndroid Build Coastguard Worker // We only need to test wide input for having non-ASCII characters. For
196*6777b538SAndroid Build Coastguard Worker // narrow input, we'll always just use the lookup table. We don't try to
197*6777b538SAndroid Build Coastguard Worker // do anything tricky with decoding/validating UTF-8. This function will
198*6777b538SAndroid Build Coastguard Worker // read one or two UTF-16 characters and append the output as UTF-8. This
199*6777b538SAndroid Build Coastguard Worker // call will be removed in 8-bit mode.
200*6777b538SAndroid Build Coastguard Worker success &= AppendUTF8EscapedChar(spec, &i, end, output);
201*6777b538SAndroid Build Coastguard Worker } else {
202*6777b538SAndroid Build Coastguard Worker // Normal ASCII character or 8-bit input, use the lookup table.
203*6777b538SAndroid Build Coastguard Worker unsigned char out_ch = static_cast<unsigned char>(uch);
204*6777b538SAndroid Build Coastguard Worker unsigned char flags = kPathCharLookup[out_ch];
205*6777b538SAndroid Build Coastguard Worker if (flags & SPECIAL) {
206*6777b538SAndroid Build Coastguard Worker // Needs special handling of some sort.
207*6777b538SAndroid Build Coastguard Worker size_t dotlen;
208*6777b538SAndroid Build Coastguard Worker if ((dotlen = IsDot(spec, i, end)) > 0) {
209*6777b538SAndroid Build Coastguard Worker // See if this dot was preceded by a slash in the output.
210*6777b538SAndroid Build Coastguard Worker //
211*6777b538SAndroid Build Coastguard Worker // Note that we check this in the case of dots so we don't have to
212*6777b538SAndroid Build Coastguard Worker // special case slashes. Since slashes are much more common than
213*6777b538SAndroid Build Coastguard Worker // dots, this actually increases performance measurably (though
214*6777b538SAndroid Build Coastguard Worker // slightly).
215*6777b538SAndroid Build Coastguard Worker if (output->length() > path_begin_in_output &&
216*6777b538SAndroid Build Coastguard Worker output->at(output->length() - 1) == '/') {
217*6777b538SAndroid Build Coastguard Worker // Slash followed by a dot, check to see if this is means relative
218*6777b538SAndroid Build Coastguard Worker size_t consumed_len;
219*6777b538SAndroid Build Coastguard Worker switch (ClassifyAfterDot<CHAR>(spec, i + dotlen, end,
220*6777b538SAndroid Build Coastguard Worker &consumed_len)) {
221*6777b538SAndroid Build Coastguard Worker case NOT_A_DIRECTORY:
222*6777b538SAndroid Build Coastguard Worker // Copy the dot to the output, it means nothing special.
223*6777b538SAndroid Build Coastguard Worker output->push_back('.');
224*6777b538SAndroid Build Coastguard Worker i += dotlen - 1;
225*6777b538SAndroid Build Coastguard Worker break;
226*6777b538SAndroid Build Coastguard Worker case DIRECTORY_CUR: // Current directory, just skip the input.
227*6777b538SAndroid Build Coastguard Worker i += dotlen + consumed_len - 1;
228*6777b538SAndroid Build Coastguard Worker break;
229*6777b538SAndroid Build Coastguard Worker case DIRECTORY_UP:
230*6777b538SAndroid Build Coastguard Worker BackUpToPreviousSlash(path_begin_in_output, output);
231*6777b538SAndroid Build Coastguard Worker i += dotlen + consumed_len - 1;
232*6777b538SAndroid Build Coastguard Worker break;
233*6777b538SAndroid Build Coastguard Worker }
234*6777b538SAndroid Build Coastguard Worker } else {
235*6777b538SAndroid Build Coastguard Worker // This dot is not preceded by a slash, it is just part of some
236*6777b538SAndroid Build Coastguard Worker // file name.
237*6777b538SAndroid Build Coastguard Worker output->push_back('.');
238*6777b538SAndroid Build Coastguard Worker i += dotlen - 1;
239*6777b538SAndroid Build Coastguard Worker }
240*6777b538SAndroid Build Coastguard Worker
241*6777b538SAndroid Build Coastguard Worker } else if (out_ch == '\\') {
242*6777b538SAndroid Build Coastguard Worker if (canon_mode == CanonMode::kSpecialURL) {
243*6777b538SAndroid Build Coastguard Worker // Backslashes are path separators in special URLs.
244*6777b538SAndroid Build Coastguard Worker //
245*6777b538SAndroid Build Coastguard Worker // URL Standard: https://url.spec.whatwg.org/#path-state
246*6777b538SAndroid Build Coastguard Worker // > 1. url is special and c is U+005C (\)
247*6777b538SAndroid Build Coastguard Worker //
248*6777b538SAndroid Build Coastguard Worker // Convert backslashes to forward slashes.
249*6777b538SAndroid Build Coastguard Worker output->push_back('/');
250*6777b538SAndroid Build Coastguard Worker } else {
251*6777b538SAndroid Build Coastguard Worker output->push_back(out_ch);
252*6777b538SAndroid Build Coastguard Worker }
253*6777b538SAndroid Build Coastguard Worker } else if (out_ch == '%') {
254*6777b538SAndroid Build Coastguard Worker // Handle escape sequences.
255*6777b538SAndroid Build Coastguard Worker unsigned char unused_unescaped_value;
256*6777b538SAndroid Build Coastguard Worker if (DecodeEscaped(spec, &i, end, &unused_unescaped_value)) {
257*6777b538SAndroid Build Coastguard Worker // Valid escape sequence. We should just copy it exactly.
258*6777b538SAndroid Build Coastguard Worker output->push_back('%');
259*6777b538SAndroid Build Coastguard Worker output->push_back(static_cast<char>(spec[i - 1]));
260*6777b538SAndroid Build Coastguard Worker output->push_back(static_cast<char>(spec[i]));
261*6777b538SAndroid Build Coastguard Worker } else {
262*6777b538SAndroid Build Coastguard Worker // Invalid escape sequence. IE7+ rejects any URLs with such
263*6777b538SAndroid Build Coastguard Worker // sequences, while other browsers pass them through unchanged. We
264*6777b538SAndroid Build Coastguard Worker // use the permissive behavior.
265*6777b538SAndroid Build Coastguard Worker // TODO(brettw): Consider testing IE's strict behavior, which would
266*6777b538SAndroid Build Coastguard Worker // allow removing the code to handle nested escapes above.
267*6777b538SAndroid Build Coastguard Worker output->push_back('%');
268*6777b538SAndroid Build Coastguard Worker }
269*6777b538SAndroid Build Coastguard Worker } else if (flags & ESCAPE_BIT) {
270*6777b538SAndroid Build Coastguard Worker // This character should be escaped.
271*6777b538SAndroid Build Coastguard Worker AppendEscapedChar(out_ch, output);
272*6777b538SAndroid Build Coastguard Worker }
273*6777b538SAndroid Build Coastguard Worker } else {
274*6777b538SAndroid Build Coastguard Worker // Nothing special about this character, just append it.
275*6777b538SAndroid Build Coastguard Worker output->push_back(out_ch);
276*6777b538SAndroid Build Coastguard Worker }
277*6777b538SAndroid Build Coastguard Worker }
278*6777b538SAndroid Build Coastguard Worker }
279*6777b538SAndroid Build Coastguard Worker return success;
280*6777b538SAndroid Build Coastguard Worker }
281*6777b538SAndroid Build Coastguard Worker
282*6777b538SAndroid Build Coastguard Worker // Perform the same logic as in DoPartialPathInternal(), but updates the
283*6777b538SAndroid Build Coastguard Worker // publicly exposed CanonOutput structure similar to DoPath(). Returns
284*6777b538SAndroid Build Coastguard Worker // true if successful.
285*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoPartialPath(const CHAR * spec,const Component & path,CanonOutput * output,Component * out_path)286*6777b538SAndroid Build Coastguard Worker bool DoPartialPath(const CHAR* spec,
287*6777b538SAndroid Build Coastguard Worker const Component& path,
288*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
289*6777b538SAndroid Build Coastguard Worker Component* out_path) {
290*6777b538SAndroid Build Coastguard Worker out_path->begin = output->length();
291*6777b538SAndroid Build Coastguard Worker bool success = DoPartialPathInternal<CHAR, UCHAR>(
292*6777b538SAndroid Build Coastguard Worker spec, path, out_path->begin,
293*6777b538SAndroid Build Coastguard Worker // TODO(crbug.com/1416006): Support Non-special URLs.
294*6777b538SAndroid Build Coastguard Worker CanonMode::kSpecialURL, output);
295*6777b538SAndroid Build Coastguard Worker out_path->len = output->length() - out_path->begin;
296*6777b538SAndroid Build Coastguard Worker return success;
297*6777b538SAndroid Build Coastguard Worker }
298*6777b538SAndroid Build Coastguard Worker
299*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoPath(const CHAR * spec,const Component & path,CanonMode canon_mode,CanonOutput * output,Component * out_path)300*6777b538SAndroid Build Coastguard Worker bool DoPath(const CHAR* spec,
301*6777b538SAndroid Build Coastguard Worker const Component& path,
302*6777b538SAndroid Build Coastguard Worker CanonMode canon_mode,
303*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
304*6777b538SAndroid Build Coastguard Worker Component* out_path) {
305*6777b538SAndroid Build Coastguard Worker // URL Standard:
306*6777b538SAndroid Build Coastguard Worker // - https://url.spec.whatwg.org/#path-start-state
307*6777b538SAndroid Build Coastguard Worker // - https://url.spec.whatwg.org/#path-state
308*6777b538SAndroid Build Coastguard Worker
309*6777b538SAndroid Build Coastguard Worker bool success = true;
310*6777b538SAndroid Build Coastguard Worker out_path->begin = output->length();
311*6777b538SAndroid Build Coastguard Worker if (path.is_nonempty()) {
312*6777b538SAndroid Build Coastguard Worker // Write out an initial slash if the input has none. If we just parse a URL
313*6777b538SAndroid Build Coastguard Worker // and then canonicalize it, it will of course have a slash already. This
314*6777b538SAndroid Build Coastguard Worker // check is for the replacement and relative URL resolving cases of file
315*6777b538SAndroid Build Coastguard Worker // URLs.
316*6777b538SAndroid Build Coastguard Worker if (!IsSlashOrBackslash(spec[path.begin])) {
317*6777b538SAndroid Build Coastguard Worker output->push_back('/');
318*6777b538SAndroid Build Coastguard Worker }
319*6777b538SAndroid Build Coastguard Worker
320*6777b538SAndroid Build Coastguard Worker success = DoPartialPathInternal<CHAR, UCHAR>(spec, path, out_path->begin,
321*6777b538SAndroid Build Coastguard Worker canon_mode, output);
322*6777b538SAndroid Build Coastguard Worker } else if (canon_mode == CanonMode::kSpecialURL) {
323*6777b538SAndroid Build Coastguard Worker // No input, canonical path is a slash for special URLs, but it is empty for
324*6777b538SAndroid Build Coastguard Worker // non-special URLs.
325*6777b538SAndroid Build Coastguard Worker //
326*6777b538SAndroid Build Coastguard Worker // Implementation note:
327*6777b538SAndroid Build Coastguard Worker //
328*6777b538SAndroid Build Coastguard Worker // According to the URL Standard, for non-special URLs whose parsed path is
329*6777b538SAndroid Build Coastguard Worker // empty, such as "git://host", the state-machine finishes in the
330*6777b538SAndroid Build Coastguard Worker // `path-start-state` without entering the `path-state`. As a result, the
331*6777b538SAndroid Build Coastguard Worker // url's path remains an empty array. Therefore, no slash should be
332*6777b538SAndroid Build Coastguard Worker // appended.
333*6777b538SAndroid Build Coastguard Worker output->push_back('/');
334*6777b538SAndroid Build Coastguard Worker }
335*6777b538SAndroid Build Coastguard Worker out_path->len = output->length() - out_path->begin;
336*6777b538SAndroid Build Coastguard Worker return success;
337*6777b538SAndroid Build Coastguard Worker }
338*6777b538SAndroid Build Coastguard Worker
339*6777b538SAndroid Build Coastguard Worker } // namespace
340*6777b538SAndroid Build Coastguard Worker
CanonicalizePath(const char * spec,const Component & path,CanonMode canon_mode,CanonOutput * output,Component * out_path)341*6777b538SAndroid Build Coastguard Worker bool CanonicalizePath(const char* spec,
342*6777b538SAndroid Build Coastguard Worker const Component& path,
343*6777b538SAndroid Build Coastguard Worker CanonMode canon_mode,
344*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
345*6777b538SAndroid Build Coastguard Worker Component* out_path) {
346*6777b538SAndroid Build Coastguard Worker return DoPath<char, unsigned char>(spec, path, canon_mode, output, out_path);
347*6777b538SAndroid Build Coastguard Worker }
348*6777b538SAndroid Build Coastguard Worker
CanonicalizePath(const char16_t * spec,const Component & path,CanonMode canon_mode,CanonOutput * output,Component * out_path)349*6777b538SAndroid Build Coastguard Worker bool CanonicalizePath(const char16_t* spec,
350*6777b538SAndroid Build Coastguard Worker const Component& path,
351*6777b538SAndroid Build Coastguard Worker CanonMode canon_mode,
352*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
353*6777b538SAndroid Build Coastguard Worker Component* out_path) {
354*6777b538SAndroid Build Coastguard Worker return DoPath<char16_t, char16_t>(spec, path, canon_mode, output, out_path);
355*6777b538SAndroid Build Coastguard Worker }
356*6777b538SAndroid Build Coastguard Worker
CanonicalizePath(const char * spec,const Component & path,CanonOutput * output,Component * out_path)357*6777b538SAndroid Build Coastguard Worker bool CanonicalizePath(const char* spec,
358*6777b538SAndroid Build Coastguard Worker const Component& path,
359*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
360*6777b538SAndroid Build Coastguard Worker Component* out_path) {
361*6777b538SAndroid Build Coastguard Worker return DoPath<char, unsigned char>(spec, path, CanonMode::kSpecialURL, output,
362*6777b538SAndroid Build Coastguard Worker out_path);
363*6777b538SAndroid Build Coastguard Worker }
364*6777b538SAndroid Build Coastguard Worker
CanonicalizePath(const char16_t * spec,const Component & path,CanonOutput * output,Component * out_path)365*6777b538SAndroid Build Coastguard Worker bool CanonicalizePath(const char16_t* spec,
366*6777b538SAndroid Build Coastguard Worker const Component& path,
367*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
368*6777b538SAndroid Build Coastguard Worker Component* out_path) {
369*6777b538SAndroid Build Coastguard Worker return DoPath<char16_t, char16_t>(spec, path, CanonMode::kSpecialURL, output,
370*6777b538SAndroid Build Coastguard Worker out_path);
371*6777b538SAndroid Build Coastguard Worker }
372*6777b538SAndroid Build Coastguard Worker
CanonicalizePartialPath(const char * spec,const Component & path,CanonOutput * output,Component * out_path)373*6777b538SAndroid Build Coastguard Worker bool CanonicalizePartialPath(const char* spec,
374*6777b538SAndroid Build Coastguard Worker const Component& path,
375*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
376*6777b538SAndroid Build Coastguard Worker Component* out_path) {
377*6777b538SAndroid Build Coastguard Worker return DoPartialPath<char, unsigned char>(spec, path, output, out_path);
378*6777b538SAndroid Build Coastguard Worker }
379*6777b538SAndroid Build Coastguard Worker
CanonicalizePartialPath(const char16_t * spec,const Component & path,CanonOutput * output,Component * out_path)380*6777b538SAndroid Build Coastguard Worker bool CanonicalizePartialPath(const char16_t* spec,
381*6777b538SAndroid Build Coastguard Worker const Component& path,
382*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
383*6777b538SAndroid Build Coastguard Worker Component* out_path) {
384*6777b538SAndroid Build Coastguard Worker return DoPartialPath<char16_t, char16_t>(spec, path, output, out_path);
385*6777b538SAndroid Build Coastguard Worker }
386*6777b538SAndroid Build Coastguard Worker
CanonicalizePartialPathInternal(const char * spec,const Component & path,size_t path_begin_in_output,CanonMode canon_mode,CanonOutput * output)387*6777b538SAndroid Build Coastguard Worker bool CanonicalizePartialPathInternal(const char* spec,
388*6777b538SAndroid Build Coastguard Worker const Component& path,
389*6777b538SAndroid Build Coastguard Worker size_t path_begin_in_output,
390*6777b538SAndroid Build Coastguard Worker CanonMode canon_mode,
391*6777b538SAndroid Build Coastguard Worker CanonOutput* output) {
392*6777b538SAndroid Build Coastguard Worker return DoPartialPathInternal<char, unsigned char>(
393*6777b538SAndroid Build Coastguard Worker spec, path, path_begin_in_output, canon_mode, output);
394*6777b538SAndroid Build Coastguard Worker }
395*6777b538SAndroid Build Coastguard Worker
CanonicalizePartialPathInternal(const char16_t * spec,const Component & path,size_t path_begin_in_output,CanonMode canon_mode,CanonOutput * output)396*6777b538SAndroid Build Coastguard Worker bool CanonicalizePartialPathInternal(const char16_t* spec,
397*6777b538SAndroid Build Coastguard Worker const Component& path,
398*6777b538SAndroid Build Coastguard Worker size_t path_begin_in_output,
399*6777b538SAndroid Build Coastguard Worker CanonMode canon_mode,
400*6777b538SAndroid Build Coastguard Worker CanonOutput* output) {
401*6777b538SAndroid Build Coastguard Worker return DoPartialPathInternal<char16_t, char16_t>(
402*6777b538SAndroid Build Coastguard Worker spec, path, path_begin_in_output, canon_mode, output);
403*6777b538SAndroid Build Coastguard Worker }
404*6777b538SAndroid Build Coastguard Worker
405*6777b538SAndroid Build Coastguard Worker } // namespace url
406