1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker #ifndef URL_URL_CANON_H_
6*6777b538SAndroid Build Coastguard Worker #define URL_URL_CANON_H_
7*6777b538SAndroid Build Coastguard Worker
8*6777b538SAndroid Build Coastguard Worker #include <stdlib.h>
9*6777b538SAndroid Build Coastguard Worker #include <string.h>
10*6777b538SAndroid Build Coastguard Worker
11*6777b538SAndroid Build Coastguard Worker #include <string_view>
12*6777b538SAndroid Build Coastguard Worker
13*6777b538SAndroid Build Coastguard Worker #include "base/check_op.h"
14*6777b538SAndroid Build Coastguard Worker #include "base/component_export.h"
15*6777b538SAndroid Build Coastguard Worker #include "base/export_template.h"
16*6777b538SAndroid Build Coastguard Worker #include "base/memory/raw_ptr_exclusion.h"
17*6777b538SAndroid Build Coastguard Worker #include "base/numerics/clamped_math.h"
18*6777b538SAndroid Build Coastguard Worker #include "url/third_party/mozilla/url_parse.h"
19*6777b538SAndroid Build Coastguard Worker
20*6777b538SAndroid Build Coastguard Worker namespace url {
21*6777b538SAndroid Build Coastguard Worker
22*6777b538SAndroid Build Coastguard Worker // Represents the different behavior between canonicalizing special URLs
23*6777b538SAndroid Build Coastguard Worker // (https://url.spec.whatwg.org/#is-special) and canonicalizing URLs which are
24*6777b538SAndroid Build Coastguard Worker // not special.
25*6777b538SAndroid Build Coastguard Worker //
26*6777b538SAndroid Build Coastguard Worker // Examples:
27*6777b538SAndroid Build Coastguard Worker // - Special URLs: "https://host/path", "ftp://host/path"
28*6777b538SAndroid Build Coastguard Worker // - Non Special URLs: "about:blank", "data:xxx", "git://host/path"
29*6777b538SAndroid Build Coastguard Worker enum class CanonMode { kSpecialURL, kNonSpecialURL };
30*6777b538SAndroid Build Coastguard Worker
31*6777b538SAndroid Build Coastguard Worker // Canonicalizer output
32*6777b538SAndroid Build Coastguard Worker // -------------------------------------------------------
33*6777b538SAndroid Build Coastguard Worker
34*6777b538SAndroid Build Coastguard Worker // Base class for the canonicalizer output, this maintains a buffer and
35*6777b538SAndroid Build Coastguard Worker // supports simple resizing and append operations on it.
36*6777b538SAndroid Build Coastguard Worker //
37*6777b538SAndroid Build Coastguard Worker // It is VERY IMPORTANT that no virtual function calls be made on the common
38*6777b538SAndroid Build Coastguard Worker // code path. We only have two virtual function calls, the destructor and a
39*6777b538SAndroid Build Coastguard Worker // resize function that is called when the existing buffer is not big enough.
40*6777b538SAndroid Build Coastguard Worker // The derived class is then in charge of setting up our buffer which we will
41*6777b538SAndroid Build Coastguard Worker // manage.
42*6777b538SAndroid Build Coastguard Worker template <typename T>
43*6777b538SAndroid Build Coastguard Worker class CanonOutputT {
44*6777b538SAndroid Build Coastguard Worker public:
45*6777b538SAndroid Build Coastguard Worker CanonOutputT() = default;
46*6777b538SAndroid Build Coastguard Worker virtual ~CanonOutputT() = default;
47*6777b538SAndroid Build Coastguard Worker
48*6777b538SAndroid Build Coastguard Worker // Implemented to resize the buffer. This function should update the buffer
49*6777b538SAndroid Build Coastguard Worker // pointer to point to the new buffer, and any old data up to |cur_len_| in
50*6777b538SAndroid Build Coastguard Worker // the buffer must be copied over.
51*6777b538SAndroid Build Coastguard Worker //
52*6777b538SAndroid Build Coastguard Worker // The new size |sz| must be larger than buffer_len_.
53*6777b538SAndroid Build Coastguard Worker virtual void Resize(size_t sz) = 0;
54*6777b538SAndroid Build Coastguard Worker
55*6777b538SAndroid Build Coastguard Worker // Accessor for returning a character at a given position. The input offset
56*6777b538SAndroid Build Coastguard Worker // must be in the valid range.
at(size_t offset)57*6777b538SAndroid Build Coastguard Worker inline T at(size_t offset) const { return buffer_[offset]; }
58*6777b538SAndroid Build Coastguard Worker
59*6777b538SAndroid Build Coastguard Worker // Sets the character at the given position. The given position MUST be less
60*6777b538SAndroid Build Coastguard Worker // than the length().
set(size_t offset,T ch)61*6777b538SAndroid Build Coastguard Worker inline void set(size_t offset, T ch) { buffer_[offset] = ch; }
62*6777b538SAndroid Build Coastguard Worker
63*6777b538SAndroid Build Coastguard Worker // Returns the number of characters currently in the buffer.
length()64*6777b538SAndroid Build Coastguard Worker inline size_t length() const { return cur_len_; }
65*6777b538SAndroid Build Coastguard Worker
66*6777b538SAndroid Build Coastguard Worker // Returns the current capacity of the buffer. The length() is the number of
67*6777b538SAndroid Build Coastguard Worker // characters that have been declared to be written, but the capacity() is
68*6777b538SAndroid Build Coastguard Worker // the number that can be written without reallocation. If the caller must
69*6777b538SAndroid Build Coastguard Worker // write many characters at once, it can make sure there is enough capacity,
70*6777b538SAndroid Build Coastguard Worker // write the data, then use set_size() to declare the new length().
capacity()71*6777b538SAndroid Build Coastguard Worker size_t capacity() const { return buffer_len_; }
72*6777b538SAndroid Build Coastguard Worker
73*6777b538SAndroid Build Coastguard Worker // Returns the contents of the buffer as a string_view.
view()74*6777b538SAndroid Build Coastguard Worker std::basic_string_view<T> view() const {
75*6777b538SAndroid Build Coastguard Worker return std::basic_string_view<T>(data(), length());
76*6777b538SAndroid Build Coastguard Worker }
77*6777b538SAndroid Build Coastguard Worker
78*6777b538SAndroid Build Coastguard Worker // Called by the user of this class to get the output. The output will NOT
79*6777b538SAndroid Build Coastguard Worker // be NULL-terminated. Call length() to get the
80*6777b538SAndroid Build Coastguard Worker // length.
data()81*6777b538SAndroid Build Coastguard Worker const T* data() const { return buffer_; }
data()82*6777b538SAndroid Build Coastguard Worker T* data() { return buffer_; }
83*6777b538SAndroid Build Coastguard Worker
84*6777b538SAndroid Build Coastguard Worker // Shortens the URL to the new length. Used for "backing up" when processing
85*6777b538SAndroid Build Coastguard Worker // relative paths. This can also be used if an external function writes a lot
86*6777b538SAndroid Build Coastguard Worker // of data to the buffer (when using the "Raw" version below) beyond the end,
87*6777b538SAndroid Build Coastguard Worker // to declare the new length.
88*6777b538SAndroid Build Coastguard Worker //
89*6777b538SAndroid Build Coastguard Worker // This MUST NOT be used to expand the size of the buffer beyond capacity().
set_length(size_t new_len)90*6777b538SAndroid Build Coastguard Worker void set_length(size_t new_len) { cur_len_ = new_len; }
91*6777b538SAndroid Build Coastguard Worker
92*6777b538SAndroid Build Coastguard Worker // This is the most performance critical function, since it is called for
93*6777b538SAndroid Build Coastguard Worker // every character.
push_back(T ch)94*6777b538SAndroid Build Coastguard Worker void push_back(T ch) {
95*6777b538SAndroid Build Coastguard Worker // In VC2005, putting this common case first speeds up execution
96*6777b538SAndroid Build Coastguard Worker // dramatically because this branch is predicted as taken.
97*6777b538SAndroid Build Coastguard Worker if (cur_len_ < buffer_len_) {
98*6777b538SAndroid Build Coastguard Worker buffer_[cur_len_] = ch;
99*6777b538SAndroid Build Coastguard Worker cur_len_++;
100*6777b538SAndroid Build Coastguard Worker return;
101*6777b538SAndroid Build Coastguard Worker }
102*6777b538SAndroid Build Coastguard Worker
103*6777b538SAndroid Build Coastguard Worker // Grow the buffer to hold at least one more item. Hopefully we won't have
104*6777b538SAndroid Build Coastguard Worker // to do this very often.
105*6777b538SAndroid Build Coastguard Worker if (!Grow(1))
106*6777b538SAndroid Build Coastguard Worker return;
107*6777b538SAndroid Build Coastguard Worker
108*6777b538SAndroid Build Coastguard Worker // Actually do the insertion.
109*6777b538SAndroid Build Coastguard Worker buffer_[cur_len_] = ch;
110*6777b538SAndroid Build Coastguard Worker cur_len_++;
111*6777b538SAndroid Build Coastguard Worker }
112*6777b538SAndroid Build Coastguard Worker
113*6777b538SAndroid Build Coastguard Worker // Appends the given string to the output.
Append(const T * str,size_t str_len)114*6777b538SAndroid Build Coastguard Worker void Append(const T* str, size_t str_len) {
115*6777b538SAndroid Build Coastguard Worker if (str_len > buffer_len_ - cur_len_) {
116*6777b538SAndroid Build Coastguard Worker if (!Grow(str_len - (buffer_len_ - cur_len_)))
117*6777b538SAndroid Build Coastguard Worker return;
118*6777b538SAndroid Build Coastguard Worker }
119*6777b538SAndroid Build Coastguard Worker memcpy(buffer_ + cur_len_, str, str_len * sizeof(T));
120*6777b538SAndroid Build Coastguard Worker cur_len_ += str_len;
121*6777b538SAndroid Build Coastguard Worker }
122*6777b538SAndroid Build Coastguard Worker
Append(std::basic_string_view<T> str)123*6777b538SAndroid Build Coastguard Worker void Append(std::basic_string_view<T> str) { Append(str.data(), str.size()); }
124*6777b538SAndroid Build Coastguard Worker
ReserveSizeIfNeeded(size_t estimated_size)125*6777b538SAndroid Build Coastguard Worker void ReserveSizeIfNeeded(size_t estimated_size) {
126*6777b538SAndroid Build Coastguard Worker // Reserve a bit extra to account for escaped chars.
127*6777b538SAndroid Build Coastguard Worker if (estimated_size > buffer_len_)
128*6777b538SAndroid Build Coastguard Worker Resize((base::ClampedNumeric<size_t>(estimated_size) + 8).RawValue());
129*6777b538SAndroid Build Coastguard Worker }
130*6777b538SAndroid Build Coastguard Worker
131*6777b538SAndroid Build Coastguard Worker // Insert `str` at `pos`. Used for post-processing non-special URL's pathname.
132*6777b538SAndroid Build Coastguard Worker // Since this takes O(N), don't use this unless there is a strong reason.
Insert(size_t pos,std::basic_string_view<T> str)133*6777b538SAndroid Build Coastguard Worker void Insert(size_t pos, std::basic_string_view<T> str) {
134*6777b538SAndroid Build Coastguard Worker DCHECK_LE(pos, cur_len_);
135*6777b538SAndroid Build Coastguard Worker std::basic_string<T> copy(view().substr(pos));
136*6777b538SAndroid Build Coastguard Worker set_length(pos);
137*6777b538SAndroid Build Coastguard Worker Append(str);
138*6777b538SAndroid Build Coastguard Worker Append(copy);
139*6777b538SAndroid Build Coastguard Worker }
140*6777b538SAndroid Build Coastguard Worker
141*6777b538SAndroid Build Coastguard Worker protected:
142*6777b538SAndroid Build Coastguard Worker // Grows the given buffer so that it can fit at least |min_additional|
143*6777b538SAndroid Build Coastguard Worker // characters. Returns true if the buffer could be resized, false on OOM.
Grow(size_t min_additional)144*6777b538SAndroid Build Coastguard Worker bool Grow(size_t min_additional) {
145*6777b538SAndroid Build Coastguard Worker static const size_t kMinBufferLen = 16;
146*6777b538SAndroid Build Coastguard Worker size_t new_len = (buffer_len_ == 0) ? kMinBufferLen : buffer_len_;
147*6777b538SAndroid Build Coastguard Worker do {
148*6777b538SAndroid Build Coastguard Worker if (new_len >= (1 << 30)) // Prevent overflow below.
149*6777b538SAndroid Build Coastguard Worker return false;
150*6777b538SAndroid Build Coastguard Worker new_len *= 2;
151*6777b538SAndroid Build Coastguard Worker } while (new_len < buffer_len_ + min_additional);
152*6777b538SAndroid Build Coastguard Worker Resize(new_len);
153*6777b538SAndroid Build Coastguard Worker return true;
154*6777b538SAndroid Build Coastguard Worker }
155*6777b538SAndroid Build Coastguard Worker
156*6777b538SAndroid Build Coastguard Worker // `buffer_` is not a raw_ptr<...> for performance reasons (based on analysis
157*6777b538SAndroid Build Coastguard Worker // of sampling profiler data).
158*6777b538SAndroid Build Coastguard Worker RAW_PTR_EXCLUSION T* buffer_ = nullptr;
159*6777b538SAndroid Build Coastguard Worker size_t buffer_len_ = 0;
160*6777b538SAndroid Build Coastguard Worker
161*6777b538SAndroid Build Coastguard Worker // Used characters in the buffer.
162*6777b538SAndroid Build Coastguard Worker size_t cur_len_ = 0;
163*6777b538SAndroid Build Coastguard Worker };
164*6777b538SAndroid Build Coastguard Worker
165*6777b538SAndroid Build Coastguard Worker // Simple implementation of the CanonOutput using new[]. This class
166*6777b538SAndroid Build Coastguard Worker // also supports a static buffer so if it is allocated on the stack, most
167*6777b538SAndroid Build Coastguard Worker // URLs can be canonicalized with no heap allocations.
168*6777b538SAndroid Build Coastguard Worker template <typename T, int fixed_capacity = 1024>
169*6777b538SAndroid Build Coastguard Worker class RawCanonOutputT : public CanonOutputT<T> {
170*6777b538SAndroid Build Coastguard Worker public:
RawCanonOutputT()171*6777b538SAndroid Build Coastguard Worker RawCanonOutputT() : CanonOutputT<T>() {
172*6777b538SAndroid Build Coastguard Worker this->buffer_ = fixed_buffer_;
173*6777b538SAndroid Build Coastguard Worker this->buffer_len_ = fixed_capacity;
174*6777b538SAndroid Build Coastguard Worker }
~RawCanonOutputT()175*6777b538SAndroid Build Coastguard Worker ~RawCanonOutputT() override {
176*6777b538SAndroid Build Coastguard Worker if (this->buffer_ != fixed_buffer_)
177*6777b538SAndroid Build Coastguard Worker delete[] this->buffer_;
178*6777b538SAndroid Build Coastguard Worker }
179*6777b538SAndroid Build Coastguard Worker
Resize(size_t sz)180*6777b538SAndroid Build Coastguard Worker void Resize(size_t sz) override {
181*6777b538SAndroid Build Coastguard Worker T* new_buf = new T[sz];
182*6777b538SAndroid Build Coastguard Worker memcpy(new_buf, this->buffer_,
183*6777b538SAndroid Build Coastguard Worker sizeof(T) * (this->cur_len_ < sz ? this->cur_len_ : sz));
184*6777b538SAndroid Build Coastguard Worker if (this->buffer_ != fixed_buffer_)
185*6777b538SAndroid Build Coastguard Worker delete[] this->buffer_;
186*6777b538SAndroid Build Coastguard Worker this->buffer_ = new_buf;
187*6777b538SAndroid Build Coastguard Worker this->buffer_len_ = sz;
188*6777b538SAndroid Build Coastguard Worker }
189*6777b538SAndroid Build Coastguard Worker
190*6777b538SAndroid Build Coastguard Worker protected:
191*6777b538SAndroid Build Coastguard Worker T fixed_buffer_[fixed_capacity];
192*6777b538SAndroid Build Coastguard Worker };
193*6777b538SAndroid Build Coastguard Worker
194*6777b538SAndroid Build Coastguard Worker // Explicitely instantiate commonly used instatiations.
195*6777b538SAndroid Build Coastguard Worker extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
196*6777b538SAndroid Build Coastguard Worker CanonOutputT<char>;
197*6777b538SAndroid Build Coastguard Worker extern template class EXPORT_TEMPLATE_DECLARE(COMPONENT_EXPORT(URL))
198*6777b538SAndroid Build Coastguard Worker CanonOutputT<char16_t>;
199*6777b538SAndroid Build Coastguard Worker
200*6777b538SAndroid Build Coastguard Worker // Normally, all canonicalization output is in narrow characters. We support
201*6777b538SAndroid Build Coastguard Worker // the templates so it can also be used internally if a wide buffer is
202*6777b538SAndroid Build Coastguard Worker // required.
203*6777b538SAndroid Build Coastguard Worker typedef CanonOutputT<char> CanonOutput;
204*6777b538SAndroid Build Coastguard Worker typedef CanonOutputT<char16_t> CanonOutputW;
205*6777b538SAndroid Build Coastguard Worker
206*6777b538SAndroid Build Coastguard Worker template <int fixed_capacity>
207*6777b538SAndroid Build Coastguard Worker class RawCanonOutput : public RawCanonOutputT<char, fixed_capacity> {};
208*6777b538SAndroid Build Coastguard Worker template <int fixed_capacity>
209*6777b538SAndroid Build Coastguard Worker class RawCanonOutputW : public RawCanonOutputT<char16_t, fixed_capacity> {};
210*6777b538SAndroid Build Coastguard Worker
211*6777b538SAndroid Build Coastguard Worker // Character set converter ----------------------------------------------------
212*6777b538SAndroid Build Coastguard Worker //
213*6777b538SAndroid Build Coastguard Worker // Converts query strings into a custom encoding. The embedder can supply an
214*6777b538SAndroid Build Coastguard Worker // implementation of this class to interface with their own character set
215*6777b538SAndroid Build Coastguard Worker // conversion libraries.
216*6777b538SAndroid Build Coastguard Worker //
217*6777b538SAndroid Build Coastguard Worker // Embedders will want to see the unit test for the ICU version.
218*6777b538SAndroid Build Coastguard Worker
COMPONENT_EXPORT(URL)219*6777b538SAndroid Build Coastguard Worker class COMPONENT_EXPORT(URL) CharsetConverter {
220*6777b538SAndroid Build Coastguard Worker public:
221*6777b538SAndroid Build Coastguard Worker CharsetConverter() {}
222*6777b538SAndroid Build Coastguard Worker virtual ~CharsetConverter() {}
223*6777b538SAndroid Build Coastguard Worker
224*6777b538SAndroid Build Coastguard Worker // Converts the given input string from UTF-16 to whatever output format the
225*6777b538SAndroid Build Coastguard Worker // converter supports. This is used only for the query encoding conversion,
226*6777b538SAndroid Build Coastguard Worker // which does not fail. Instead, the converter should insert "invalid
227*6777b538SAndroid Build Coastguard Worker // character" characters in the output for invalid sequences, and do the
228*6777b538SAndroid Build Coastguard Worker // best it can.
229*6777b538SAndroid Build Coastguard Worker //
230*6777b538SAndroid Build Coastguard Worker // If the input contains a character not representable in the output
231*6777b538SAndroid Build Coastguard Worker // character set, the converter should append the HTML entity sequence in
232*6777b538SAndroid Build Coastguard Worker // decimal, (such as "你") with escaping of the ampersand, number
233*6777b538SAndroid Build Coastguard Worker // sign, and semicolon (in the previous example it would be
234*6777b538SAndroid Build Coastguard Worker // "%26%2320320%3B"). This rule is based on what IE does in this situation.
235*6777b538SAndroid Build Coastguard Worker virtual void ConvertFromUTF16(const char16_t* input,
236*6777b538SAndroid Build Coastguard Worker int input_len,
237*6777b538SAndroid Build Coastguard Worker CanonOutput* output) = 0;
238*6777b538SAndroid Build Coastguard Worker };
239*6777b538SAndroid Build Coastguard Worker
240*6777b538SAndroid Build Coastguard Worker // Schemes --------------------------------------------------------------------
241*6777b538SAndroid Build Coastguard Worker
242*6777b538SAndroid Build Coastguard Worker // Types of a scheme representing the requirements on the data represented by
243*6777b538SAndroid Build Coastguard Worker // the authority component of a URL with the scheme.
244*6777b538SAndroid Build Coastguard Worker enum SchemeType {
245*6777b538SAndroid Build Coastguard Worker // The authority component of a URL with the scheme has the form
246*6777b538SAndroid Build Coastguard Worker // "username:password@host:port". The username and password entries are
247*6777b538SAndroid Build Coastguard Worker // optional; the host may not be empty. The default value of the port can be
248*6777b538SAndroid Build Coastguard Worker // omitted in serialization. This type occurs with network schemes like http,
249*6777b538SAndroid Build Coastguard Worker // https, and ftp.
250*6777b538SAndroid Build Coastguard Worker SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION,
251*6777b538SAndroid Build Coastguard Worker // The authority component of a URL with the scheme has the form "host:port",
252*6777b538SAndroid Build Coastguard Worker // and does not include username or password. The default value of the port
253*6777b538SAndroid Build Coastguard Worker // can be omitted in serialization. Used by inner URLs of filesystem URLs of
254*6777b538SAndroid Build Coastguard Worker // origins with network hosts, from which the username and password are
255*6777b538SAndroid Build Coastguard Worker // stripped.
256*6777b538SAndroid Build Coastguard Worker SCHEME_WITH_HOST_AND_PORT,
257*6777b538SAndroid Build Coastguard Worker // The authority component of an URL with the scheme has the form "host", and
258*6777b538SAndroid Build Coastguard Worker // does not include port, username, or password. Used when the hosts are not
259*6777b538SAndroid Build Coastguard Worker // network addresses; for example, schemes used internally by the browser.
260*6777b538SAndroid Build Coastguard Worker SCHEME_WITH_HOST,
261*6777b538SAndroid Build Coastguard Worker // A URL with the scheme doesn't have the authority component.
262*6777b538SAndroid Build Coastguard Worker SCHEME_WITHOUT_AUTHORITY,
263*6777b538SAndroid Build Coastguard Worker };
264*6777b538SAndroid Build Coastguard Worker
265*6777b538SAndroid Build Coastguard Worker // Whitespace -----------------------------------------------------------------
266*6777b538SAndroid Build Coastguard Worker
267*6777b538SAndroid Build Coastguard Worker // Searches for whitespace that should be removed from the middle of URLs, and
268*6777b538SAndroid Build Coastguard Worker // removes it. Removed whitespace are tabs and newlines, but NOT spaces. Spaces
269*6777b538SAndroid Build Coastguard Worker // are preserved, which is what most browsers do. A pointer to the output will
270*6777b538SAndroid Build Coastguard Worker // be returned, and the length of that output will be in |output_len|.
271*6777b538SAndroid Build Coastguard Worker //
272*6777b538SAndroid Build Coastguard Worker // This should be called before parsing if whitespace removal is desired (which
273*6777b538SAndroid Build Coastguard Worker // it normally is when you are canonicalizing).
274*6777b538SAndroid Build Coastguard Worker //
275*6777b538SAndroid Build Coastguard Worker // If no whitespace is removed, this function will not use the buffer and will
276*6777b538SAndroid Build Coastguard Worker // return a pointer to the input, to avoid the extra copy. If modification is
277*6777b538SAndroid Build Coastguard Worker // required, the given |buffer| will be used and the returned pointer will
278*6777b538SAndroid Build Coastguard Worker // point to the beginning of the buffer.
279*6777b538SAndroid Build Coastguard Worker //
280*6777b538SAndroid Build Coastguard Worker // Therefore, callers should not use the buffer, since it may actually be empty,
281*6777b538SAndroid Build Coastguard Worker // use the computed pointer and |*output_len| instead.
282*6777b538SAndroid Build Coastguard Worker //
283*6777b538SAndroid Build Coastguard Worker // If |input| contained both removable whitespace and a raw `<` character,
284*6777b538SAndroid Build Coastguard Worker // |potentially_dangling_markup| will be set to `true`. Otherwise, it will be
285*6777b538SAndroid Build Coastguard Worker // left untouched.
286*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
287*6777b538SAndroid Build Coastguard Worker const char* RemoveURLWhitespace(const char* input,
288*6777b538SAndroid Build Coastguard Worker int input_len,
289*6777b538SAndroid Build Coastguard Worker CanonOutputT<char>* buffer,
290*6777b538SAndroid Build Coastguard Worker int* output_len,
291*6777b538SAndroid Build Coastguard Worker bool* potentially_dangling_markup);
292*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
293*6777b538SAndroid Build Coastguard Worker const char16_t* RemoveURLWhitespace(const char16_t* input,
294*6777b538SAndroid Build Coastguard Worker int input_len,
295*6777b538SAndroid Build Coastguard Worker CanonOutputT<char16_t>* buffer,
296*6777b538SAndroid Build Coastguard Worker int* output_len,
297*6777b538SAndroid Build Coastguard Worker bool* potentially_dangling_markup);
298*6777b538SAndroid Build Coastguard Worker
299*6777b538SAndroid Build Coastguard Worker // IDN ------------------------------------------------------------------------
300*6777b538SAndroid Build Coastguard Worker
301*6777b538SAndroid Build Coastguard Worker // Converts the Unicode input representing a hostname to ASCII using IDN rules.
302*6777b538SAndroid Build Coastguard Worker // The output must fall in the ASCII range, but will be encoded in UTF-16.
303*6777b538SAndroid Build Coastguard Worker //
304*6777b538SAndroid Build Coastguard Worker // On success, the output will be filled with the ASCII host name and it will
305*6777b538SAndroid Build Coastguard Worker // return true. Unlike most other canonicalization functions, this assumes that
306*6777b538SAndroid Build Coastguard Worker // the output is empty. The beginning of the host will be at offset 0, and
307*6777b538SAndroid Build Coastguard Worker // the length of the output will be set to the length of the new host name.
308*6777b538SAndroid Build Coastguard Worker //
309*6777b538SAndroid Build Coastguard Worker // On error, returns false. The output in this case is undefined.
310*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
311*6777b538SAndroid Build Coastguard Worker bool IDNToASCII(std::u16string_view src, CanonOutputW* output);
312*6777b538SAndroid Build Coastguard Worker
313*6777b538SAndroid Build Coastguard Worker // Piece-by-piece canonicalizers ----------------------------------------------
314*6777b538SAndroid Build Coastguard Worker //
315*6777b538SAndroid Build Coastguard Worker // These individual canonicalizers append the canonicalized versions of the
316*6777b538SAndroid Build Coastguard Worker // corresponding URL component to the given CanonOutput. The spec and the
317*6777b538SAndroid Build Coastguard Worker // previously-identified range of that component are the input. The range of
318*6777b538SAndroid Build Coastguard Worker // the canonicalized component will be written to the output component.
319*6777b538SAndroid Build Coastguard Worker //
320*6777b538SAndroid Build Coastguard Worker // These functions all append to the output so they can be chained. Make sure
321*6777b538SAndroid Build Coastguard Worker // the output is empty when you start.
322*6777b538SAndroid Build Coastguard Worker //
323*6777b538SAndroid Build Coastguard Worker // These functions returns boolean values indicating success. On failure, they
324*6777b538SAndroid Build Coastguard Worker // will attempt to write something reasonable to the output so that, if
325*6777b538SAndroid Build Coastguard Worker // displayed to the user, they will recognise it as something that's messed up.
326*6777b538SAndroid Build Coastguard Worker // Nothing more should ever be done with these invalid URLs, however.
327*6777b538SAndroid Build Coastguard Worker
328*6777b538SAndroid Build Coastguard Worker // Scheme: Appends the scheme and colon to the URL. The output component will
329*6777b538SAndroid Build Coastguard Worker // indicate the range of characters up to but not including the colon.
330*6777b538SAndroid Build Coastguard Worker //
331*6777b538SAndroid Build Coastguard Worker // Canonical URLs always have a scheme. If the scheme is not present in the
332*6777b538SAndroid Build Coastguard Worker // input, this will just write the colon to indicate an empty scheme. Does not
333*6777b538SAndroid Build Coastguard Worker // append slashes which will be needed before any authority components for most
334*6777b538SAndroid Build Coastguard Worker // URLs.
335*6777b538SAndroid Build Coastguard Worker //
336*6777b538SAndroid Build Coastguard Worker // The 8-bit version requires UTF-8 encoding.
337*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
338*6777b538SAndroid Build Coastguard Worker bool CanonicalizeScheme(const char* spec,
339*6777b538SAndroid Build Coastguard Worker const Component& scheme,
340*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
341*6777b538SAndroid Build Coastguard Worker Component* out_scheme);
342*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
343*6777b538SAndroid Build Coastguard Worker bool CanonicalizeScheme(const char16_t* spec,
344*6777b538SAndroid Build Coastguard Worker const Component& scheme,
345*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
346*6777b538SAndroid Build Coastguard Worker Component* out_scheme);
347*6777b538SAndroid Build Coastguard Worker
348*6777b538SAndroid Build Coastguard Worker // User info: username/password. If present, this will add the delimiters so
349*6777b538SAndroid Build Coastguard Worker // the output will be "<username>:<password>@" or "<username>@". Empty
350*6777b538SAndroid Build Coastguard Worker // username/password pairs, or empty passwords, will get converted to
351*6777b538SAndroid Build Coastguard Worker // nonexistent in the canonical version.
352*6777b538SAndroid Build Coastguard Worker //
353*6777b538SAndroid Build Coastguard Worker // The components for the username and password refer to ranges in the
354*6777b538SAndroid Build Coastguard Worker // respective source strings. Usually, these will be the same string, which
355*6777b538SAndroid Build Coastguard Worker // is legal as long as the two components don't overlap.
356*6777b538SAndroid Build Coastguard Worker //
357*6777b538SAndroid Build Coastguard Worker // The 8-bit version requires UTF-8 encoding.
358*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
359*6777b538SAndroid Build Coastguard Worker bool CanonicalizeUserInfo(const char* username_source,
360*6777b538SAndroid Build Coastguard Worker const Component& username,
361*6777b538SAndroid Build Coastguard Worker const char* password_source,
362*6777b538SAndroid Build Coastguard Worker const Component& password,
363*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
364*6777b538SAndroid Build Coastguard Worker Component* out_username,
365*6777b538SAndroid Build Coastguard Worker Component* out_password);
366*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
367*6777b538SAndroid Build Coastguard Worker bool CanonicalizeUserInfo(const char16_t* username_source,
368*6777b538SAndroid Build Coastguard Worker const Component& username,
369*6777b538SAndroid Build Coastguard Worker const char16_t* password_source,
370*6777b538SAndroid Build Coastguard Worker const Component& password,
371*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
372*6777b538SAndroid Build Coastguard Worker Component* out_username,
373*6777b538SAndroid Build Coastguard Worker Component* out_password);
374*6777b538SAndroid Build Coastguard Worker
375*6777b538SAndroid Build Coastguard Worker // This structure holds detailed state exported from the IP/Host canonicalizers.
376*6777b538SAndroid Build Coastguard Worker // Additional fields may be added as callers require them.
377*6777b538SAndroid Build Coastguard Worker struct CanonHostInfo {
CanonHostInfoCanonHostInfo378*6777b538SAndroid Build Coastguard Worker CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}
379*6777b538SAndroid Build Coastguard Worker
380*6777b538SAndroid Build Coastguard Worker // Convenience function to test if family is an IP address.
IsIPAddressCanonHostInfo381*6777b538SAndroid Build Coastguard Worker bool IsIPAddress() const { return family == IPV4 || family == IPV6; }
382*6777b538SAndroid Build Coastguard Worker
383*6777b538SAndroid Build Coastguard Worker // This field summarizes how the input was classified by the canonicalizer.
384*6777b538SAndroid Build Coastguard Worker enum Family {
385*6777b538SAndroid Build Coastguard Worker NEUTRAL, // - Doesn't resemble an IP address. As far as the IP
386*6777b538SAndroid Build Coastguard Worker // canonicalizer is concerned, it should be treated as a
387*6777b538SAndroid Build Coastguard Worker // hostname.
388*6777b538SAndroid Build Coastguard Worker BROKEN, // - Almost an IP, but was not canonicalized. This could be an
389*6777b538SAndroid Build Coastguard Worker // IPv4 address where truncation occurred, or something
390*6777b538SAndroid Build Coastguard Worker // containing the special characters :[] which did not parse
391*6777b538SAndroid Build Coastguard Worker // as an IPv6 address. Never attempt to connect to this
392*6777b538SAndroid Build Coastguard Worker // address, because it might actually succeed!
393*6777b538SAndroid Build Coastguard Worker IPV4, // - Successfully canonicalized as an IPv4 address.
394*6777b538SAndroid Build Coastguard Worker IPV6, // - Successfully canonicalized as an IPv6 address.
395*6777b538SAndroid Build Coastguard Worker };
396*6777b538SAndroid Build Coastguard Worker Family family;
397*6777b538SAndroid Build Coastguard Worker
398*6777b538SAndroid Build Coastguard Worker // If |family| is IPV4, then this is the number of nonempty dot-separated
399*6777b538SAndroid Build Coastguard Worker // components in the input text, from 1 to 4. If |family| is not IPV4,
400*6777b538SAndroid Build Coastguard Worker // this value is undefined.
401*6777b538SAndroid Build Coastguard Worker int num_ipv4_components;
402*6777b538SAndroid Build Coastguard Worker
403*6777b538SAndroid Build Coastguard Worker // Location of host within the canonicalized output.
404*6777b538SAndroid Build Coastguard Worker // CanonicalizeIPAddress() only sets this field if |family| is IPV4 or IPV6.
405*6777b538SAndroid Build Coastguard Worker // CanonicalizeHostVerbose() always sets it.
406*6777b538SAndroid Build Coastguard Worker Component out_host;
407*6777b538SAndroid Build Coastguard Worker
408*6777b538SAndroid Build Coastguard Worker // |address| contains the parsed IP Address (if any) in its first
409*6777b538SAndroid Build Coastguard Worker // AddressLength() bytes, in network order. If IsIPAddress() is false
410*6777b538SAndroid Build Coastguard Worker // AddressLength() will return zero and the content of |address| is undefined.
411*6777b538SAndroid Build Coastguard Worker unsigned char address[16];
412*6777b538SAndroid Build Coastguard Worker
413*6777b538SAndroid Build Coastguard Worker // Convenience function to calculate the length of an IP address corresponding
414*6777b538SAndroid Build Coastguard Worker // to the current IP version in |family|, if any. For use with |address|.
AddressLengthCanonHostInfo415*6777b538SAndroid Build Coastguard Worker int AddressLength() const {
416*6777b538SAndroid Build Coastguard Worker return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);
417*6777b538SAndroid Build Coastguard Worker }
418*6777b538SAndroid Build Coastguard Worker };
419*6777b538SAndroid Build Coastguard Worker
420*6777b538SAndroid Build Coastguard Worker // Deprecated. Please call either CanonicalizeSpecialHost or
421*6777b538SAndroid Build Coastguard Worker // CanonicalizeNonSpecialHost.
422*6777b538SAndroid Build Coastguard Worker //
423*6777b538SAndroid Build Coastguard Worker // TODO(crbug.com/1416006): Check the callers of these functions.
424*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
425*6777b538SAndroid Build Coastguard Worker bool CanonicalizeHost(const char* spec,
426*6777b538SAndroid Build Coastguard Worker const Component& host,
427*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
428*6777b538SAndroid Build Coastguard Worker Component* out_host);
429*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
430*6777b538SAndroid Build Coastguard Worker bool CanonicalizeHost(const char16_t* spec,
431*6777b538SAndroid Build Coastguard Worker const Component& host,
432*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
433*6777b538SAndroid Build Coastguard Worker Component* out_host);
434*6777b538SAndroid Build Coastguard Worker
435*6777b538SAndroid Build Coastguard Worker // Host in special URLs.
436*6777b538SAndroid Build Coastguard Worker //
437*6777b538SAndroid Build Coastguard Worker // The 8-bit version requires UTF-8 encoding. Use this version when you only
438*6777b538SAndroid Build Coastguard Worker // need to know whether canonicalization succeeded.
439*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
440*6777b538SAndroid Build Coastguard Worker bool CanonicalizeSpecialHost(const char* spec,
441*6777b538SAndroid Build Coastguard Worker const Component& host,
442*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
443*6777b538SAndroid Build Coastguard Worker Component& out_host);
444*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
445*6777b538SAndroid Build Coastguard Worker bool CanonicalizeSpecialHost(const char16_t* spec,
446*6777b538SAndroid Build Coastguard Worker const Component& host,
447*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
448*6777b538SAndroid Build Coastguard Worker Component& out_host);
449*6777b538SAndroid Build Coastguard Worker
450*6777b538SAndroid Build Coastguard Worker // Deprecated. Please call either CanonicalizeSpecialHostVerbose or
451*6777b538SAndroid Build Coastguard Worker // CanonicalizeNonSpecialHostVerbose.
452*6777b538SAndroid Build Coastguard Worker //
453*6777b538SAndroid Build Coastguard Worker // TODO(crbug.com/1416006): Check the callers of these functions.
454*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
455*6777b538SAndroid Build Coastguard Worker void CanonicalizeHostVerbose(const char* spec,
456*6777b538SAndroid Build Coastguard Worker const Component& host,
457*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
458*6777b538SAndroid Build Coastguard Worker CanonHostInfo* host_info);
459*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
460*6777b538SAndroid Build Coastguard Worker void CanonicalizeHostVerbose(const char16_t* spec,
461*6777b538SAndroid Build Coastguard Worker const Component& host,
462*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
463*6777b538SAndroid Build Coastguard Worker CanonHostInfo* host_info);
464*6777b538SAndroid Build Coastguard Worker
465*6777b538SAndroid Build Coastguard Worker // Extended version of CanonicalizeSpecialHost, which returns additional
466*6777b538SAndroid Build Coastguard Worker // information. Use this when you need to know whether the hostname was an IP
467*6777b538SAndroid Build Coastguard Worker // address. A successful return is indicated by host_info->family != BROKEN. See
468*6777b538SAndroid Build Coastguard Worker // the definition of CanonHostInfo above for details.
469*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
470*6777b538SAndroid Build Coastguard Worker void CanonicalizeSpecialHostVerbose(const char* spec,
471*6777b538SAndroid Build Coastguard Worker const Component& host,
472*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
473*6777b538SAndroid Build Coastguard Worker CanonHostInfo& host_info);
474*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
475*6777b538SAndroid Build Coastguard Worker void CanonicalizeSpecialHostVerbose(const char16_t* spec,
476*6777b538SAndroid Build Coastguard Worker const Component& host,
477*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
478*6777b538SAndroid Build Coastguard Worker CanonHostInfo& host_info);
479*6777b538SAndroid Build Coastguard Worker
480*6777b538SAndroid Build Coastguard Worker // Canonicalizes a string according to the host canonicalization rules. Unlike
481*6777b538SAndroid Build Coastguard Worker // CanonicalizeHost, this will not check for IP addresses which can change the
482*6777b538SAndroid Build Coastguard Worker // meaning (and canonicalization) of the components. This means it is possible
483*6777b538SAndroid Build Coastguard Worker // to call this for sub-components of a host name without corruption.
484*6777b538SAndroid Build Coastguard Worker //
485*6777b538SAndroid Build Coastguard Worker // As an example, "01.02.03.04.com" is a canonical hostname. If you called
486*6777b538SAndroid Build Coastguard Worker // CanonicalizeHost on the substring "01.02.03.04" it will get "fixed" to
487*6777b538SAndroid Build Coastguard Worker // "1.2.3.4" which will produce an invalid host name when reassembled. This
488*6777b538SAndroid Build Coastguard Worker // can happen more than one might think because all numbers by themselves are
489*6777b538SAndroid Build Coastguard Worker // considered IP addresses; so "5" canonicalizes to "0.0.0.5".
490*6777b538SAndroid Build Coastguard Worker //
491*6777b538SAndroid Build Coastguard Worker // Be careful: Because Punycode works on each dot-separated substring as a
492*6777b538SAndroid Build Coastguard Worker // unit, you should only pass this function substrings that represent complete
493*6777b538SAndroid Build Coastguard Worker // dot-separated subcomponents of the original host. Even if you have ASCII
494*6777b538SAndroid Build Coastguard Worker // input, percent-escaped characters will have different meanings if split in
495*6777b538SAndroid Build Coastguard Worker // the middle.
496*6777b538SAndroid Build Coastguard Worker //
497*6777b538SAndroid Build Coastguard Worker // Returns true if the host was valid. This function will treat a 0-length
498*6777b538SAndroid Build Coastguard Worker // host as valid (because it's designed to be used for substrings) while the
499*6777b538SAndroid Build Coastguard Worker // full version above will mark empty hosts as broken.
500*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
501*6777b538SAndroid Build Coastguard Worker bool CanonicalizeHostSubstring(const char* spec,
502*6777b538SAndroid Build Coastguard Worker const Component& host,
503*6777b538SAndroid Build Coastguard Worker CanonOutput* output);
504*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
505*6777b538SAndroid Build Coastguard Worker bool CanonicalizeHostSubstring(const char16_t* spec,
506*6777b538SAndroid Build Coastguard Worker const Component& host,
507*6777b538SAndroid Build Coastguard Worker CanonOutput* output);
508*6777b538SAndroid Build Coastguard Worker
509*6777b538SAndroid Build Coastguard Worker // Host in non-special URLs.
510*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
511*6777b538SAndroid Build Coastguard Worker bool CanonicalizeNonSpecialHost(const char* spec,
512*6777b538SAndroid Build Coastguard Worker const Component& host,
513*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
514*6777b538SAndroid Build Coastguard Worker Component& out_host);
515*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
516*6777b538SAndroid Build Coastguard Worker bool CanonicalizeNonSpecialHost(const char16_t* spec,
517*6777b538SAndroid Build Coastguard Worker const Component& host,
518*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
519*6777b538SAndroid Build Coastguard Worker Component& out_host);
520*6777b538SAndroid Build Coastguard Worker
521*6777b538SAndroid Build Coastguard Worker // Extended version of CanonicalizeNonSpecialHost, which returns additional
522*6777b538SAndroid Build Coastguard Worker // information. See CanonicalizeSpecialHost for details.
523*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
524*6777b538SAndroid Build Coastguard Worker void CanonicalizeNonSpecialHostVerbose(const char* spec,
525*6777b538SAndroid Build Coastguard Worker const Component& host,
526*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
527*6777b538SAndroid Build Coastguard Worker CanonHostInfo& host_info);
528*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
529*6777b538SAndroid Build Coastguard Worker void CanonicalizeNonSpecialHostVerbose(const char16_t* spec,
530*6777b538SAndroid Build Coastguard Worker const Component& host,
531*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
532*6777b538SAndroid Build Coastguard Worker CanonHostInfo& host_info);
533*6777b538SAndroid Build Coastguard Worker
534*6777b538SAndroid Build Coastguard Worker // IP addresses.
535*6777b538SAndroid Build Coastguard Worker //
536*6777b538SAndroid Build Coastguard Worker // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is
537*6777b538SAndroid Build Coastguard Worker // an IP address, it will canonicalize it as such, appending it to |output|.
538*6777b538SAndroid Build Coastguard Worker // Additional status information is returned via the |*host_info| parameter.
539*6777b538SAndroid Build Coastguard Worker // See the definition of CanonHostInfo above for details.
540*6777b538SAndroid Build Coastguard Worker //
541*6777b538SAndroid Build Coastguard Worker // This is called AUTOMATICALLY from the host canonicalizer, which ensures that
542*6777b538SAndroid Build Coastguard Worker // the input is unescaped and name-prepped, etc. It should not normally be
543*6777b538SAndroid Build Coastguard Worker // necessary or wise to call this directly.
544*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
545*6777b538SAndroid Build Coastguard Worker void CanonicalizeIPAddress(const char* spec,
546*6777b538SAndroid Build Coastguard Worker const Component& host,
547*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
548*6777b538SAndroid Build Coastguard Worker CanonHostInfo* host_info);
549*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
550*6777b538SAndroid Build Coastguard Worker void CanonicalizeIPAddress(const char16_t* spec,
551*6777b538SAndroid Build Coastguard Worker const Component& host,
552*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
553*6777b538SAndroid Build Coastguard Worker CanonHostInfo* host_info);
554*6777b538SAndroid Build Coastguard Worker
555*6777b538SAndroid Build Coastguard Worker // Similar to CanonicalizeIPAddress, but supports only IPv6 address.
556*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
557*6777b538SAndroid Build Coastguard Worker void CanonicalizeIPv6Address(const char* spec,
558*6777b538SAndroid Build Coastguard Worker const Component& host,
559*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
560*6777b538SAndroid Build Coastguard Worker CanonHostInfo& host_info);
561*6777b538SAndroid Build Coastguard Worker
562*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
563*6777b538SAndroid Build Coastguard Worker void CanonicalizeIPv6Address(const char16_t* spec,
564*6777b538SAndroid Build Coastguard Worker const Component& host,
565*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
566*6777b538SAndroid Build Coastguard Worker CanonHostInfo& host_info);
567*6777b538SAndroid Build Coastguard Worker
568*6777b538SAndroid Build Coastguard Worker // Port: this function will add the colon for the port if a port is present.
569*6777b538SAndroid Build Coastguard Worker // The caller can pass PORT_UNSPECIFIED as the
570*6777b538SAndroid Build Coastguard Worker // default_port_for_scheme argument if there is no default port.
571*6777b538SAndroid Build Coastguard Worker //
572*6777b538SAndroid Build Coastguard Worker // The 8-bit version requires UTF-8 encoding.
573*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
574*6777b538SAndroid Build Coastguard Worker bool CanonicalizePort(const char* spec,
575*6777b538SAndroid Build Coastguard Worker const Component& port,
576*6777b538SAndroid Build Coastguard Worker int default_port_for_scheme,
577*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
578*6777b538SAndroid Build Coastguard Worker Component* out_port);
579*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
580*6777b538SAndroid Build Coastguard Worker bool CanonicalizePort(const char16_t* spec,
581*6777b538SAndroid Build Coastguard Worker const Component& port,
582*6777b538SAndroid Build Coastguard Worker int default_port_for_scheme,
583*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
584*6777b538SAndroid Build Coastguard Worker Component* out_port);
585*6777b538SAndroid Build Coastguard Worker
586*6777b538SAndroid Build Coastguard Worker // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
587*6777b538SAndroid Build Coastguard Worker // if the scheme is unknown. Based on https://url.spec.whatwg.org/#default-port
588*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
589*6777b538SAndroid Build Coastguard Worker int DefaultPortForScheme(const char* scheme, int scheme_len);
590*6777b538SAndroid Build Coastguard Worker
591*6777b538SAndroid Build Coastguard Worker // Path. If the input does not begin in a slash (including if the input is
592*6777b538SAndroid Build Coastguard Worker // empty), we'll prepend a slash to the path to make it canonical.
593*6777b538SAndroid Build Coastguard Worker //
594*6777b538SAndroid Build Coastguard Worker // The 8-bit version assumes UTF-8 encoding, but does not verify the validity
595*6777b538SAndroid Build Coastguard Worker // of the UTF-8 (i.e., you can have invalid UTF-8 sequences, invalid
596*6777b538SAndroid Build Coastguard Worker // characters, etc.). Normally, URLs will come in as UTF-16, so this isn't
597*6777b538SAndroid Build Coastguard Worker // an issue. Somebody giving us an 8-bit path is responsible for generating
598*6777b538SAndroid Build Coastguard Worker // the path that the server expects (we'll escape high-bit characters), so
599*6777b538SAndroid Build Coastguard Worker // if something is invalid, it's their problem.
600*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
601*6777b538SAndroid Build Coastguard Worker bool CanonicalizePath(const char* spec,
602*6777b538SAndroid Build Coastguard Worker const Component& path,
603*6777b538SAndroid Build Coastguard Worker CanonMode canon_mode,
604*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
605*6777b538SAndroid Build Coastguard Worker Component* out_path);
606*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
607*6777b538SAndroid Build Coastguard Worker bool CanonicalizePath(const char16_t* spec,
608*6777b538SAndroid Build Coastguard Worker const Component& path,
609*6777b538SAndroid Build Coastguard Worker CanonMode canon_mode,
610*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
611*6777b538SAndroid Build Coastguard Worker Component* out_path);
612*6777b538SAndroid Build Coastguard Worker
613*6777b538SAndroid Build Coastguard Worker // Deprecated. Please pass CanonMode explicitly.
614*6777b538SAndroid Build Coastguard Worker //
615*6777b538SAndroid Build Coastguard Worker // These functions are also used in net/third_party code. So removing these
616*6777b538SAndroid Build Coastguard Worker // functions requires several steps.
617*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
618*6777b538SAndroid Build Coastguard Worker bool CanonicalizePath(const char* spec,
619*6777b538SAndroid Build Coastguard Worker const Component& path,
620*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
621*6777b538SAndroid Build Coastguard Worker Component* out_path);
622*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
623*6777b538SAndroid Build Coastguard Worker bool CanonicalizePath(const char16_t* spec,
624*6777b538SAndroid Build Coastguard Worker const Component& path,
625*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
626*6777b538SAndroid Build Coastguard Worker Component* out_path);
627*6777b538SAndroid Build Coastguard Worker
628*6777b538SAndroid Build Coastguard Worker // Like CanonicalizePath(), but does not assume that its operating on the
629*6777b538SAndroid Build Coastguard Worker // entire path. It therefore does not prepend a slash, etc.
630*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
631*6777b538SAndroid Build Coastguard Worker bool CanonicalizePartialPath(const char* spec,
632*6777b538SAndroid Build Coastguard Worker const Component& path,
633*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
634*6777b538SAndroid Build Coastguard Worker Component* out_path);
635*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
636*6777b538SAndroid Build Coastguard Worker bool CanonicalizePartialPath(const char16_t* spec,
637*6777b538SAndroid Build Coastguard Worker const Component& path,
638*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
639*6777b538SAndroid Build Coastguard Worker Component* out_path);
640*6777b538SAndroid Build Coastguard Worker
641*6777b538SAndroid Build Coastguard Worker // Canonicalizes the input as a file path. This is like CanonicalizePath except
642*6777b538SAndroid Build Coastguard Worker // that it also handles Windows drive specs. For example, the path can begin
643*6777b538SAndroid Build Coastguard Worker // with "c|\" and it will get properly canonicalized to "C:/".
644*6777b538SAndroid Build Coastguard Worker // The string will be appended to |*output| and |*out_path| will be updated.
645*6777b538SAndroid Build Coastguard Worker //
646*6777b538SAndroid Build Coastguard Worker // The 8-bit version requires UTF-8 encoding.
647*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
648*6777b538SAndroid Build Coastguard Worker bool FileCanonicalizePath(const char* spec,
649*6777b538SAndroid Build Coastguard Worker const Component& path,
650*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
651*6777b538SAndroid Build Coastguard Worker Component* out_path);
652*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
653*6777b538SAndroid Build Coastguard Worker bool FileCanonicalizePath(const char16_t* spec,
654*6777b538SAndroid Build Coastguard Worker const Component& path,
655*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
656*6777b538SAndroid Build Coastguard Worker Component* out_path);
657*6777b538SAndroid Build Coastguard Worker
658*6777b538SAndroid Build Coastguard Worker // Query: Prepends the ? if needed.
659*6777b538SAndroid Build Coastguard Worker //
660*6777b538SAndroid Build Coastguard Worker // The 8-bit version requires the input to be UTF-8 encoding. Incorrectly
661*6777b538SAndroid Build Coastguard Worker // encoded characters (in UTF-8 or UTF-16) will be replaced with the Unicode
662*6777b538SAndroid Build Coastguard Worker // "invalid character." This function can not fail, we always just try to do
663*6777b538SAndroid Build Coastguard Worker // our best for crazy input here since web pages can set it themselves.
664*6777b538SAndroid Build Coastguard Worker //
665*6777b538SAndroid Build Coastguard Worker // This will convert the given input into the output encoding that the given
666*6777b538SAndroid Build Coastguard Worker // character set converter object provides. The converter will only be called
667*6777b538SAndroid Build Coastguard Worker // if necessary, for ASCII input, no conversions are necessary.
668*6777b538SAndroid Build Coastguard Worker //
669*6777b538SAndroid Build Coastguard Worker // The converter can be NULL. In this case, the output encoding will be UTF-8.
670*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
671*6777b538SAndroid Build Coastguard Worker void CanonicalizeQuery(const char* spec,
672*6777b538SAndroid Build Coastguard Worker const Component& query,
673*6777b538SAndroid Build Coastguard Worker CharsetConverter* converter,
674*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
675*6777b538SAndroid Build Coastguard Worker Component* out_query);
676*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
677*6777b538SAndroid Build Coastguard Worker void CanonicalizeQuery(const char16_t* spec,
678*6777b538SAndroid Build Coastguard Worker const Component& query,
679*6777b538SAndroid Build Coastguard Worker CharsetConverter* converter,
680*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
681*6777b538SAndroid Build Coastguard Worker Component* out_query);
682*6777b538SAndroid Build Coastguard Worker
683*6777b538SAndroid Build Coastguard Worker // Ref: Prepends the # if needed. The output will be UTF-8 (this is the only
684*6777b538SAndroid Build Coastguard Worker // canonicalizer that does not produce ASCII output). The output is
685*6777b538SAndroid Build Coastguard Worker // guaranteed to be valid UTF-8.
686*6777b538SAndroid Build Coastguard Worker //
687*6777b538SAndroid Build Coastguard Worker // This function will not fail. If the input is invalid UTF-8/UTF-16, we'll use
688*6777b538SAndroid Build Coastguard Worker // the "Unicode replacement character" for the confusing bits and copy the rest.
689*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
690*6777b538SAndroid Build Coastguard Worker void CanonicalizeRef(const char* spec,
691*6777b538SAndroid Build Coastguard Worker const Component& path,
692*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
693*6777b538SAndroid Build Coastguard Worker Component* out_path);
694*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
695*6777b538SAndroid Build Coastguard Worker void CanonicalizeRef(const char16_t* spec,
696*6777b538SAndroid Build Coastguard Worker const Component& path,
697*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
698*6777b538SAndroid Build Coastguard Worker Component* out_path);
699*6777b538SAndroid Build Coastguard Worker
700*6777b538SAndroid Build Coastguard Worker // Full canonicalizer ---------------------------------------------------------
701*6777b538SAndroid Build Coastguard Worker //
702*6777b538SAndroid Build Coastguard Worker // These functions replace any string contents, rather than append as above.
703*6777b538SAndroid Build Coastguard Worker // See the above piece-by-piece functions for information specific to
704*6777b538SAndroid Build Coastguard Worker // canonicalizing individual components.
705*6777b538SAndroid Build Coastguard Worker //
706*6777b538SAndroid Build Coastguard Worker // The output will be ASCII except the reference fragment, which may be UTF-8.
707*6777b538SAndroid Build Coastguard Worker //
708*6777b538SAndroid Build Coastguard Worker // The 8-bit versions require UTF-8 encoding.
709*6777b538SAndroid Build Coastguard Worker
710*6777b538SAndroid Build Coastguard Worker // Use for standard URLs with authorities and paths.
711*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
712*6777b538SAndroid Build Coastguard Worker bool CanonicalizeStandardURL(const char* spec,
713*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
714*6777b538SAndroid Build Coastguard Worker SchemeType scheme_type,
715*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
716*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
717*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
718*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
719*6777b538SAndroid Build Coastguard Worker bool CanonicalizeStandardURL(const char16_t* spec,
720*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
721*6777b538SAndroid Build Coastguard Worker SchemeType scheme_type,
722*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
723*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
724*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
725*6777b538SAndroid Build Coastguard Worker
726*6777b538SAndroid Build Coastguard Worker // Use for non-special URLs.
727*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
728*6777b538SAndroid Build Coastguard Worker bool CanonicalizeNonSpecialURL(const char* spec,
729*6777b538SAndroid Build Coastguard Worker int spec_len,
730*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
731*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
732*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
733*6777b538SAndroid Build Coastguard Worker Parsed& new_parsed);
734*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
735*6777b538SAndroid Build Coastguard Worker bool CanonicalizeNonSpecialURL(const char16_t* spec,
736*6777b538SAndroid Build Coastguard Worker int spec_len,
737*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
738*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
739*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
740*6777b538SAndroid Build Coastguard Worker Parsed& new_parsed);
741*6777b538SAndroid Build Coastguard Worker
742*6777b538SAndroid Build Coastguard Worker // Use for file URLs.
743*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
744*6777b538SAndroid Build Coastguard Worker bool CanonicalizeFileURL(const char* spec,
745*6777b538SAndroid Build Coastguard Worker int spec_len,
746*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
747*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
748*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
749*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
750*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
751*6777b538SAndroid Build Coastguard Worker bool CanonicalizeFileURL(const char16_t* spec,
752*6777b538SAndroid Build Coastguard Worker int spec_len,
753*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
754*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
755*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
756*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
757*6777b538SAndroid Build Coastguard Worker
758*6777b538SAndroid Build Coastguard Worker // Use for filesystem URLs.
759*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
760*6777b538SAndroid Build Coastguard Worker bool CanonicalizeFileSystemURL(const char* spec,
761*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
762*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
763*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
764*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
765*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
766*6777b538SAndroid Build Coastguard Worker bool CanonicalizeFileSystemURL(const char16_t* spec,
767*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
768*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
769*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
770*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
771*6777b538SAndroid Build Coastguard Worker
772*6777b538SAndroid Build Coastguard Worker // Use for path URLs such as javascript. This does not modify the path in any
773*6777b538SAndroid Build Coastguard Worker // way, for example, by escaping it.
774*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
775*6777b538SAndroid Build Coastguard Worker bool CanonicalizePathURL(const char* spec,
776*6777b538SAndroid Build Coastguard Worker int spec_len,
777*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
778*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
779*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
780*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
781*6777b538SAndroid Build Coastguard Worker bool CanonicalizePathURL(const char16_t* spec,
782*6777b538SAndroid Build Coastguard Worker int spec_len,
783*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
784*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
785*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
786*6777b538SAndroid Build Coastguard Worker
787*6777b538SAndroid Build Coastguard Worker // Use to canonicalize just the path component of a "path" URL; e.g. the
788*6777b538SAndroid Build Coastguard Worker // path of a javascript URL.
789*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
790*6777b538SAndroid Build Coastguard Worker void CanonicalizePathURLPath(const char* source,
791*6777b538SAndroid Build Coastguard Worker const Component& component,
792*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
793*6777b538SAndroid Build Coastguard Worker Component* new_component);
794*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
795*6777b538SAndroid Build Coastguard Worker void CanonicalizePathURLPath(const char16_t* source,
796*6777b538SAndroid Build Coastguard Worker const Component& component,
797*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
798*6777b538SAndroid Build Coastguard Worker Component* new_component);
799*6777b538SAndroid Build Coastguard Worker
800*6777b538SAndroid Build Coastguard Worker // Use for mailto URLs. This "canonicalizes" the URL into a path and query
801*6777b538SAndroid Build Coastguard Worker // component. It does not attempt to merge "to" fields. It uses UTF-8 for
802*6777b538SAndroid Build Coastguard Worker // the query encoding if there is a query. This is because a mailto URL is
803*6777b538SAndroid Build Coastguard Worker // really intended for an external mail program, and the encoding of a page,
804*6777b538SAndroid Build Coastguard Worker // etc. which would influence a query encoding normally are irrelevant.
805*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
806*6777b538SAndroid Build Coastguard Worker bool CanonicalizeMailtoURL(const char* spec,
807*6777b538SAndroid Build Coastguard Worker int spec_len,
808*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
809*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
810*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
811*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
812*6777b538SAndroid Build Coastguard Worker bool CanonicalizeMailtoURL(const char16_t* spec,
813*6777b538SAndroid Build Coastguard Worker int spec_len,
814*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
815*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
816*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
817*6777b538SAndroid Build Coastguard Worker
818*6777b538SAndroid Build Coastguard Worker // Part replacer --------------------------------------------------------------
819*6777b538SAndroid Build Coastguard Worker
820*6777b538SAndroid Build Coastguard Worker // Internal structure used for storing separate strings for each component.
821*6777b538SAndroid Build Coastguard Worker // The basic canonicalization functions use this structure internally so that
822*6777b538SAndroid Build Coastguard Worker // component replacement (different strings for different components) can be
823*6777b538SAndroid Build Coastguard Worker // treated on the same code path as regular canonicalization (the same string
824*6777b538SAndroid Build Coastguard Worker // for each component).
825*6777b538SAndroid Build Coastguard Worker //
826*6777b538SAndroid Build Coastguard Worker // A Parsed structure usually goes along with this. Those components identify
827*6777b538SAndroid Build Coastguard Worker // offsets within these strings, so that they can all be in the same string,
828*6777b538SAndroid Build Coastguard Worker // or spread arbitrarily across different ones.
829*6777b538SAndroid Build Coastguard Worker //
830*6777b538SAndroid Build Coastguard Worker // This structures does not own any data. It is the caller's responsibility to
831*6777b538SAndroid Build Coastguard Worker // ensure that the data the pointers point to stays in scope and is not
832*6777b538SAndroid Build Coastguard Worker // modified.
833*6777b538SAndroid Build Coastguard Worker template <typename CHAR>
834*6777b538SAndroid Build Coastguard Worker struct URLComponentSource {
835*6777b538SAndroid Build Coastguard Worker // Constructor normally used by callers wishing to replace components. This
836*6777b538SAndroid Build Coastguard Worker // will make them all NULL, which is no replacement. The caller would then
837*6777b538SAndroid Build Coastguard Worker // override the components they want to replace.
URLComponentSourceURLComponentSource838*6777b538SAndroid Build Coastguard Worker URLComponentSource()
839*6777b538SAndroid Build Coastguard Worker : scheme(nullptr),
840*6777b538SAndroid Build Coastguard Worker username(nullptr),
841*6777b538SAndroid Build Coastguard Worker password(nullptr),
842*6777b538SAndroid Build Coastguard Worker host(nullptr),
843*6777b538SAndroid Build Coastguard Worker port(nullptr),
844*6777b538SAndroid Build Coastguard Worker path(nullptr),
845*6777b538SAndroid Build Coastguard Worker query(nullptr),
846*6777b538SAndroid Build Coastguard Worker ref(nullptr) {}
847*6777b538SAndroid Build Coastguard Worker
848*6777b538SAndroid Build Coastguard Worker // Constructor normally used internally to initialize all the components to
849*6777b538SAndroid Build Coastguard Worker // point to the same spec.
URLComponentSourceURLComponentSource850*6777b538SAndroid Build Coastguard Worker explicit URLComponentSource(const CHAR* default_value)
851*6777b538SAndroid Build Coastguard Worker : scheme(default_value),
852*6777b538SAndroid Build Coastguard Worker username(default_value),
853*6777b538SAndroid Build Coastguard Worker password(default_value),
854*6777b538SAndroid Build Coastguard Worker host(default_value),
855*6777b538SAndroid Build Coastguard Worker port(default_value),
856*6777b538SAndroid Build Coastguard Worker path(default_value),
857*6777b538SAndroid Build Coastguard Worker query(default_value),
858*6777b538SAndroid Build Coastguard Worker ref(default_value) {}
859*6777b538SAndroid Build Coastguard Worker
860*6777b538SAndroid Build Coastguard Worker // This field is not a raw_ptr<> because it was filtered by the rewriter for:
861*6777b538SAndroid Build Coastguard Worker // #addr-of
862*6777b538SAndroid Build Coastguard Worker RAW_PTR_EXCLUSION const CHAR* scheme;
863*6777b538SAndroid Build Coastguard Worker // This field is not a raw_ptr<> because it was filtered by the rewriter for:
864*6777b538SAndroid Build Coastguard Worker // #addr-of
865*6777b538SAndroid Build Coastguard Worker RAW_PTR_EXCLUSION const CHAR* username;
866*6777b538SAndroid Build Coastguard Worker // This field is not a raw_ptr<> because it was filtered by the rewriter for:
867*6777b538SAndroid Build Coastguard Worker // #addr-of
868*6777b538SAndroid Build Coastguard Worker RAW_PTR_EXCLUSION const CHAR* password;
869*6777b538SAndroid Build Coastguard Worker // This field is not a raw_ptr<> because it was filtered by the rewriter for:
870*6777b538SAndroid Build Coastguard Worker // #addr-of
871*6777b538SAndroid Build Coastguard Worker RAW_PTR_EXCLUSION const CHAR* host;
872*6777b538SAndroid Build Coastguard Worker // This field is not a raw_ptr<> because it was filtered by the rewriter for:
873*6777b538SAndroid Build Coastguard Worker // #addr-of
874*6777b538SAndroid Build Coastguard Worker RAW_PTR_EXCLUSION const CHAR* port;
875*6777b538SAndroid Build Coastguard Worker // This field is not a raw_ptr<> because it was filtered by the rewriter for:
876*6777b538SAndroid Build Coastguard Worker // #addr-of
877*6777b538SAndroid Build Coastguard Worker RAW_PTR_EXCLUSION const CHAR* path;
878*6777b538SAndroid Build Coastguard Worker // This field is not a raw_ptr<> because it was filtered by the rewriter for:
879*6777b538SAndroid Build Coastguard Worker // #addr-of
880*6777b538SAndroid Build Coastguard Worker RAW_PTR_EXCLUSION const CHAR* query;
881*6777b538SAndroid Build Coastguard Worker // This field is not a raw_ptr<> because it was filtered by the rewriter for:
882*6777b538SAndroid Build Coastguard Worker // #addr-of
883*6777b538SAndroid Build Coastguard Worker RAW_PTR_EXCLUSION const CHAR* ref;
884*6777b538SAndroid Build Coastguard Worker };
885*6777b538SAndroid Build Coastguard Worker
886*6777b538SAndroid Build Coastguard Worker // This structure encapsulates information on modifying a URL. Each component
887*6777b538SAndroid Build Coastguard Worker // may either be left unchanged, replaced, or deleted.
888*6777b538SAndroid Build Coastguard Worker //
889*6777b538SAndroid Build Coastguard Worker // By default, each component is unchanged. For those components that should be
890*6777b538SAndroid Build Coastguard Worker // modified, call either Set* or Clear* to modify it.
891*6777b538SAndroid Build Coastguard Worker //
892*6777b538SAndroid Build Coastguard Worker // The string passed to Set* functions DOES NOT GET COPIED AND MUST BE KEPT
893*6777b538SAndroid Build Coastguard Worker // IN SCOPE BY THE CALLER for as long as this object exists!
894*6777b538SAndroid Build Coastguard Worker //
895*6777b538SAndroid Build Coastguard Worker // Prefer the 8-bit replacement version if possible since it is more efficient.
896*6777b538SAndroid Build Coastguard Worker template <typename CHAR>
897*6777b538SAndroid Build Coastguard Worker class Replacements {
898*6777b538SAndroid Build Coastguard Worker public:
Replacements()899*6777b538SAndroid Build Coastguard Worker Replacements() {}
900*6777b538SAndroid Build Coastguard Worker
901*6777b538SAndroid Build Coastguard Worker // Scheme
SetScheme(const CHAR * s,const Component & comp)902*6777b538SAndroid Build Coastguard Worker void SetScheme(const CHAR* s, const Component& comp) {
903*6777b538SAndroid Build Coastguard Worker sources_.scheme = s;
904*6777b538SAndroid Build Coastguard Worker components_.scheme = comp;
905*6777b538SAndroid Build Coastguard Worker }
906*6777b538SAndroid Build Coastguard Worker // Note: we don't have a ClearScheme since this doesn't make any sense.
IsSchemeOverridden()907*6777b538SAndroid Build Coastguard Worker bool IsSchemeOverridden() const { return sources_.scheme != NULL; }
908*6777b538SAndroid Build Coastguard Worker
909*6777b538SAndroid Build Coastguard Worker // Username
SetUsername(const CHAR * s,const Component & comp)910*6777b538SAndroid Build Coastguard Worker void SetUsername(const CHAR* s, const Component& comp) {
911*6777b538SAndroid Build Coastguard Worker sources_.username = s;
912*6777b538SAndroid Build Coastguard Worker components_.username = comp;
913*6777b538SAndroid Build Coastguard Worker }
ClearUsername()914*6777b538SAndroid Build Coastguard Worker void ClearUsername() {
915*6777b538SAndroid Build Coastguard Worker sources_.username = Placeholder();
916*6777b538SAndroid Build Coastguard Worker components_.username = Component();
917*6777b538SAndroid Build Coastguard Worker }
IsUsernameOverridden()918*6777b538SAndroid Build Coastguard Worker bool IsUsernameOverridden() const { return sources_.username != NULL; }
919*6777b538SAndroid Build Coastguard Worker
920*6777b538SAndroid Build Coastguard Worker // Password
SetPassword(const CHAR * s,const Component & comp)921*6777b538SAndroid Build Coastguard Worker void SetPassword(const CHAR* s, const Component& comp) {
922*6777b538SAndroid Build Coastguard Worker sources_.password = s;
923*6777b538SAndroid Build Coastguard Worker components_.password = comp;
924*6777b538SAndroid Build Coastguard Worker }
ClearPassword()925*6777b538SAndroid Build Coastguard Worker void ClearPassword() {
926*6777b538SAndroid Build Coastguard Worker sources_.password = Placeholder();
927*6777b538SAndroid Build Coastguard Worker components_.password = Component();
928*6777b538SAndroid Build Coastguard Worker }
IsPasswordOverridden()929*6777b538SAndroid Build Coastguard Worker bool IsPasswordOverridden() const { return sources_.password != NULL; }
930*6777b538SAndroid Build Coastguard Worker
931*6777b538SAndroid Build Coastguard Worker // Host
SetHost(const CHAR * s,const Component & comp)932*6777b538SAndroid Build Coastguard Worker void SetHost(const CHAR* s, const Component& comp) {
933*6777b538SAndroid Build Coastguard Worker sources_.host = s;
934*6777b538SAndroid Build Coastguard Worker components_.host = comp;
935*6777b538SAndroid Build Coastguard Worker }
ClearHost()936*6777b538SAndroid Build Coastguard Worker void ClearHost() {
937*6777b538SAndroid Build Coastguard Worker sources_.host = Placeholder();
938*6777b538SAndroid Build Coastguard Worker components_.host = Component();
939*6777b538SAndroid Build Coastguard Worker }
IsHostOverridden()940*6777b538SAndroid Build Coastguard Worker bool IsHostOverridden() const { return sources_.host != NULL; }
941*6777b538SAndroid Build Coastguard Worker
942*6777b538SAndroid Build Coastguard Worker // Port
SetPort(const CHAR * s,const Component & comp)943*6777b538SAndroid Build Coastguard Worker void SetPort(const CHAR* s, const Component& comp) {
944*6777b538SAndroid Build Coastguard Worker sources_.port = s;
945*6777b538SAndroid Build Coastguard Worker components_.port = comp;
946*6777b538SAndroid Build Coastguard Worker }
ClearPort()947*6777b538SAndroid Build Coastguard Worker void ClearPort() {
948*6777b538SAndroid Build Coastguard Worker sources_.port = Placeholder();
949*6777b538SAndroid Build Coastguard Worker components_.port = Component();
950*6777b538SAndroid Build Coastguard Worker }
IsPortOverridden()951*6777b538SAndroid Build Coastguard Worker bool IsPortOverridden() const { return sources_.port != NULL; }
952*6777b538SAndroid Build Coastguard Worker
953*6777b538SAndroid Build Coastguard Worker // Path
SetPath(const CHAR * s,const Component & comp)954*6777b538SAndroid Build Coastguard Worker void SetPath(const CHAR* s, const Component& comp) {
955*6777b538SAndroid Build Coastguard Worker sources_.path = s;
956*6777b538SAndroid Build Coastguard Worker components_.path = comp;
957*6777b538SAndroid Build Coastguard Worker }
ClearPath()958*6777b538SAndroid Build Coastguard Worker void ClearPath() {
959*6777b538SAndroid Build Coastguard Worker sources_.path = Placeholder();
960*6777b538SAndroid Build Coastguard Worker components_.path = Component();
961*6777b538SAndroid Build Coastguard Worker }
IsPathOverridden()962*6777b538SAndroid Build Coastguard Worker bool IsPathOverridden() const { return sources_.path != NULL; }
963*6777b538SAndroid Build Coastguard Worker
964*6777b538SAndroid Build Coastguard Worker // Query
SetQuery(const CHAR * s,const Component & comp)965*6777b538SAndroid Build Coastguard Worker void SetQuery(const CHAR* s, const Component& comp) {
966*6777b538SAndroid Build Coastguard Worker sources_.query = s;
967*6777b538SAndroid Build Coastguard Worker components_.query = comp;
968*6777b538SAndroid Build Coastguard Worker }
ClearQuery()969*6777b538SAndroid Build Coastguard Worker void ClearQuery() {
970*6777b538SAndroid Build Coastguard Worker sources_.query = Placeholder();
971*6777b538SAndroid Build Coastguard Worker components_.query = Component();
972*6777b538SAndroid Build Coastguard Worker }
IsQueryOverridden()973*6777b538SAndroid Build Coastguard Worker bool IsQueryOverridden() const { return sources_.query != NULL; }
974*6777b538SAndroid Build Coastguard Worker
975*6777b538SAndroid Build Coastguard Worker // Ref
SetRef(const CHAR * s,const Component & comp)976*6777b538SAndroid Build Coastguard Worker void SetRef(const CHAR* s, const Component& comp) {
977*6777b538SAndroid Build Coastguard Worker sources_.ref = s;
978*6777b538SAndroid Build Coastguard Worker components_.ref = comp;
979*6777b538SAndroid Build Coastguard Worker }
ClearRef()980*6777b538SAndroid Build Coastguard Worker void ClearRef() {
981*6777b538SAndroid Build Coastguard Worker sources_.ref = Placeholder();
982*6777b538SAndroid Build Coastguard Worker components_.ref = Component();
983*6777b538SAndroid Build Coastguard Worker }
IsRefOverridden()984*6777b538SAndroid Build Coastguard Worker bool IsRefOverridden() const { return sources_.ref != NULL; }
985*6777b538SAndroid Build Coastguard Worker
986*6777b538SAndroid Build Coastguard Worker // Getters for the internal data. See the variables below for how the
987*6777b538SAndroid Build Coastguard Worker // information is encoded.
sources()988*6777b538SAndroid Build Coastguard Worker const URLComponentSource<CHAR>& sources() const { return sources_; }
components()989*6777b538SAndroid Build Coastguard Worker const Parsed& components() const { return components_; }
990*6777b538SAndroid Build Coastguard Worker
991*6777b538SAndroid Build Coastguard Worker private:
992*6777b538SAndroid Build Coastguard Worker // Returns a pointer to a static empty string that is used as a placeholder
993*6777b538SAndroid Build Coastguard Worker // to indicate a component should be deleted (see below).
Placeholder()994*6777b538SAndroid Build Coastguard Worker const CHAR* Placeholder() {
995*6777b538SAndroid Build Coastguard Worker static const CHAR empty_cstr = 0;
996*6777b538SAndroid Build Coastguard Worker return &empty_cstr;
997*6777b538SAndroid Build Coastguard Worker }
998*6777b538SAndroid Build Coastguard Worker
999*6777b538SAndroid Build Coastguard Worker // We support three states:
1000*6777b538SAndroid Build Coastguard Worker //
1001*6777b538SAndroid Build Coastguard Worker // Action | Source Component
1002*6777b538SAndroid Build Coastguard Worker // -----------------------+--------------------------------------------------
1003*6777b538SAndroid Build Coastguard Worker // Don't change component | NULL (unused)
1004*6777b538SAndroid Build Coastguard Worker // Replace component | (replacement string) (replacement component)
1005*6777b538SAndroid Build Coastguard Worker // Delete component | (non-NULL) (invalid component: (0,-1))
1006*6777b538SAndroid Build Coastguard Worker //
1007*6777b538SAndroid Build Coastguard Worker // We use a pointer to the empty string for the source when the component
1008*6777b538SAndroid Build Coastguard Worker // should be deleted.
1009*6777b538SAndroid Build Coastguard Worker URLComponentSource<CHAR> sources_;
1010*6777b538SAndroid Build Coastguard Worker Parsed components_;
1011*6777b538SAndroid Build Coastguard Worker };
1012*6777b538SAndroid Build Coastguard Worker
1013*6777b538SAndroid Build Coastguard Worker // The base must be an 8-bit canonical URL.
1014*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1015*6777b538SAndroid Build Coastguard Worker bool ReplaceStandardURL(const char* base,
1016*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1017*6777b538SAndroid Build Coastguard Worker const Replacements<char>& replacements,
1018*6777b538SAndroid Build Coastguard Worker SchemeType scheme_type,
1019*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
1020*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1021*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
1022*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1023*6777b538SAndroid Build Coastguard Worker bool ReplaceStandardURL(const char* base,
1024*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1025*6777b538SAndroid Build Coastguard Worker const Replacements<char16_t>& replacements,
1026*6777b538SAndroid Build Coastguard Worker SchemeType scheme_type,
1027*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
1028*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1029*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
1030*6777b538SAndroid Build Coastguard Worker
1031*6777b538SAndroid Build Coastguard Worker // For non-special URLs.
1032*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1033*6777b538SAndroid Build Coastguard Worker bool ReplaceNonSpecialURL(const char* base,
1034*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1035*6777b538SAndroid Build Coastguard Worker const Replacements<char>& replacements,
1036*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
1037*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
1038*6777b538SAndroid Build Coastguard Worker Parsed& new_parsed);
1039*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1040*6777b538SAndroid Build Coastguard Worker bool ReplaceNonSpecialURL(const char* base,
1041*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1042*6777b538SAndroid Build Coastguard Worker const Replacements<char16_t>& replacements,
1043*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
1044*6777b538SAndroid Build Coastguard Worker CanonOutput& output,
1045*6777b538SAndroid Build Coastguard Worker Parsed& new_parsed);
1046*6777b538SAndroid Build Coastguard Worker
1047*6777b538SAndroid Build Coastguard Worker // Filesystem URLs can only have the path, query, or ref replaced.
1048*6777b538SAndroid Build Coastguard Worker // All other components will be ignored.
1049*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1050*6777b538SAndroid Build Coastguard Worker bool ReplaceFileSystemURL(const char* base,
1051*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1052*6777b538SAndroid Build Coastguard Worker const Replacements<char>& replacements,
1053*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
1054*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1055*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
1056*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1057*6777b538SAndroid Build Coastguard Worker bool ReplaceFileSystemURL(const char* base,
1058*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1059*6777b538SAndroid Build Coastguard Worker const Replacements<char16_t>& replacements,
1060*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
1061*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1062*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
1063*6777b538SAndroid Build Coastguard Worker
1064*6777b538SAndroid Build Coastguard Worker // Replacing some parts of a file URL is not permitted. Everything except
1065*6777b538SAndroid Build Coastguard Worker // the host, path, query, and ref will be ignored.
1066*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1067*6777b538SAndroid Build Coastguard Worker bool ReplaceFileURL(const char* base,
1068*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1069*6777b538SAndroid Build Coastguard Worker const Replacements<char>& replacements,
1070*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
1071*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1072*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
1073*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1074*6777b538SAndroid Build Coastguard Worker bool ReplaceFileURL(const char* base,
1075*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1076*6777b538SAndroid Build Coastguard Worker const Replacements<char16_t>& replacements,
1077*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
1078*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1079*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
1080*6777b538SAndroid Build Coastguard Worker
1081*6777b538SAndroid Build Coastguard Worker // Path URLs can only have the scheme and path replaced. All other components
1082*6777b538SAndroid Build Coastguard Worker // will be ignored.
1083*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1084*6777b538SAndroid Build Coastguard Worker bool ReplacePathURL(const char* base,
1085*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1086*6777b538SAndroid Build Coastguard Worker const Replacements<char>& replacements,
1087*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1088*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
1089*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1090*6777b538SAndroid Build Coastguard Worker bool ReplacePathURL(const char* base,
1091*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1092*6777b538SAndroid Build Coastguard Worker const Replacements<char16_t>& replacements,
1093*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1094*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
1095*6777b538SAndroid Build Coastguard Worker
1096*6777b538SAndroid Build Coastguard Worker // Mailto URLs can only have the scheme, path, and query replaced.
1097*6777b538SAndroid Build Coastguard Worker // All other components will be ignored.
1098*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1099*6777b538SAndroid Build Coastguard Worker bool ReplaceMailtoURL(const char* base,
1100*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1101*6777b538SAndroid Build Coastguard Worker const Replacements<char>& replacements,
1102*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1103*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
1104*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1105*6777b538SAndroid Build Coastguard Worker bool ReplaceMailtoURL(const char* base,
1106*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1107*6777b538SAndroid Build Coastguard Worker const Replacements<char16_t>& replacements,
1108*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1109*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed);
1110*6777b538SAndroid Build Coastguard Worker
1111*6777b538SAndroid Build Coastguard Worker // Relative URL ---------------------------------------------------------------
1112*6777b538SAndroid Build Coastguard Worker
1113*6777b538SAndroid Build Coastguard Worker // Given an input URL or URL fragment |fragment|, determines if it is a
1114*6777b538SAndroid Build Coastguard Worker // relative or absolute URL and places the result into |*is_relative|. If it is
1115*6777b538SAndroid Build Coastguard Worker // relative, the relevant portion of the URL will be placed into
1116*6777b538SAndroid Build Coastguard Worker // |*relative_component| (there may have been trimmed whitespace, for example).
1117*6777b538SAndroid Build Coastguard Worker // This value is passed to ResolveRelativeURL. If the input is not relative,
1118*6777b538SAndroid Build Coastguard Worker // this value is UNDEFINED (it may be changed by the function).
1119*6777b538SAndroid Build Coastguard Worker //
1120*6777b538SAndroid Build Coastguard Worker // Returns true on success (we successfully determined the URL is relative or
1121*6777b538SAndroid Build Coastguard Worker // not). Failure means that the combination of URLs doesn't make any sense.
1122*6777b538SAndroid Build Coastguard Worker //
1123*6777b538SAndroid Build Coastguard Worker // The base URL should always be canonical, therefore is ASCII.
1124*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1125*6777b538SAndroid Build Coastguard Worker bool IsRelativeURL(const char* base,
1126*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1127*6777b538SAndroid Build Coastguard Worker const char* fragment,
1128*6777b538SAndroid Build Coastguard Worker int fragment_len,
1129*6777b538SAndroid Build Coastguard Worker bool is_base_hierarchical,
1130*6777b538SAndroid Build Coastguard Worker bool* is_relative,
1131*6777b538SAndroid Build Coastguard Worker Component* relative_component);
1132*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1133*6777b538SAndroid Build Coastguard Worker bool IsRelativeURL(const char* base,
1134*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1135*6777b538SAndroid Build Coastguard Worker const char16_t* fragment,
1136*6777b538SAndroid Build Coastguard Worker int fragment_len,
1137*6777b538SAndroid Build Coastguard Worker bool is_base_hierarchical,
1138*6777b538SAndroid Build Coastguard Worker bool* is_relative,
1139*6777b538SAndroid Build Coastguard Worker Component* relative_component);
1140*6777b538SAndroid Build Coastguard Worker
1141*6777b538SAndroid Build Coastguard Worker // Given a canonical parsed source URL, a URL fragment known to be relative,
1142*6777b538SAndroid Build Coastguard Worker // and the identified relevant portion of the relative URL (computed by
1143*6777b538SAndroid Build Coastguard Worker // IsRelativeURL), this produces a new parsed canonical URL in |output| and
1144*6777b538SAndroid Build Coastguard Worker // |out_parsed|.
1145*6777b538SAndroid Build Coastguard Worker //
1146*6777b538SAndroid Build Coastguard Worker // It also requires a flag indicating whether the base URL is a file: URL
1147*6777b538SAndroid Build Coastguard Worker // which triggers additional logic.
1148*6777b538SAndroid Build Coastguard Worker //
1149*6777b538SAndroid Build Coastguard Worker // The base URL should be canonical and have a host (may be empty for file
1150*6777b538SAndroid Build Coastguard Worker // URLs) and a path. If it doesn't have these, we can't resolve relative
1151*6777b538SAndroid Build Coastguard Worker // URLs off of it and will return the base as the output with an error flag.
1152*6777b538SAndroid Build Coastguard Worker // Because it is canonical is should also be ASCII.
1153*6777b538SAndroid Build Coastguard Worker //
1154*6777b538SAndroid Build Coastguard Worker // The query charset converter follows the same rules as CanonicalizeQuery.
1155*6777b538SAndroid Build Coastguard Worker //
1156*6777b538SAndroid Build Coastguard Worker // Returns true on success. On failure, the output will be "something
1157*6777b538SAndroid Build Coastguard Worker // reasonable" that will be consistent and valid, just probably not what
1158*6777b538SAndroid Build Coastguard Worker // was intended by the web page author or caller.
1159*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1160*6777b538SAndroid Build Coastguard Worker bool ResolveRelativeURL(const char* base_url,
1161*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1162*6777b538SAndroid Build Coastguard Worker bool base_is_file,
1163*6777b538SAndroid Build Coastguard Worker const char* relative_url,
1164*6777b538SAndroid Build Coastguard Worker const Component& relative_component,
1165*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
1166*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1167*6777b538SAndroid Build Coastguard Worker Parsed* out_parsed);
1168*6777b538SAndroid Build Coastguard Worker COMPONENT_EXPORT(URL)
1169*6777b538SAndroid Build Coastguard Worker bool ResolveRelativeURL(const char* base_url,
1170*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
1171*6777b538SAndroid Build Coastguard Worker bool base_is_file,
1172*6777b538SAndroid Build Coastguard Worker const char16_t* relative_url,
1173*6777b538SAndroid Build Coastguard Worker const Component& relative_component,
1174*6777b538SAndroid Build Coastguard Worker CharsetConverter* query_converter,
1175*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
1176*6777b538SAndroid Build Coastguard Worker Parsed* out_parsed);
1177*6777b538SAndroid Build Coastguard Worker
1178*6777b538SAndroid Build Coastguard Worker } // namespace url
1179*6777b538SAndroid Build Coastguard Worker
1180*6777b538SAndroid Build Coastguard Worker #endif // URL_URL_CANON_H_
1181