1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker // Functions for canonicalizing "path" URLs. Not to be confused with the path
6*6777b538SAndroid Build Coastguard Worker // of a URL, these are URLs that have no authority section, only a path. For
7*6777b538SAndroid Build Coastguard Worker // example, "javascript:" and "data:".
8*6777b538SAndroid Build Coastguard Worker
9*6777b538SAndroid Build Coastguard Worker #include "url/url_canon.h"
10*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_internal.h"
11*6777b538SAndroid Build Coastguard Worker
12*6777b538SAndroid Build Coastguard Worker namespace url {
13*6777b538SAndroid Build Coastguard Worker
14*6777b538SAndroid Build Coastguard Worker namespace {
15*6777b538SAndroid Build Coastguard Worker
16*6777b538SAndroid Build Coastguard Worker // Canonicalize the given |component| from |source| into |output| and
17*6777b538SAndroid Build Coastguard Worker // |new_component|. If |separator| is non-zero, it is pre-pended to |output|
18*6777b538SAndroid Build Coastguard Worker // prior to the canonicalized component; i.e. for the '?' or '#' characters.
19*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoCanonicalizePathComponent(const CHAR * source,const Component & component,char separator,CanonOutput * output,Component * new_component)20*6777b538SAndroid Build Coastguard Worker void DoCanonicalizePathComponent(const CHAR* source,
21*6777b538SAndroid Build Coastguard Worker const Component& component,
22*6777b538SAndroid Build Coastguard Worker char separator,
23*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
24*6777b538SAndroid Build Coastguard Worker Component* new_component) {
25*6777b538SAndroid Build Coastguard Worker if (component.is_valid()) {
26*6777b538SAndroid Build Coastguard Worker if (separator)
27*6777b538SAndroid Build Coastguard Worker output->push_back(separator);
28*6777b538SAndroid Build Coastguard Worker // Copy the path using path URL's more lax escaping rules (think for
29*6777b538SAndroid Build Coastguard Worker // javascript:). We convert to UTF-8 and escape characters from the
30*6777b538SAndroid Build Coastguard Worker // C0 control percent-encode set, but leave all other characters alone.
31*6777b538SAndroid Build Coastguard Worker // This helps readability of JavaScript.
32*6777b538SAndroid Build Coastguard Worker // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
33*6777b538SAndroid Build Coastguard Worker // https://url.spec.whatwg.org/#c0-control-percent-encode-set
34*6777b538SAndroid Build Coastguard Worker new_component->begin = output->length();
35*6777b538SAndroid Build Coastguard Worker size_t end = static_cast<size_t>(component.end());
36*6777b538SAndroid Build Coastguard Worker for (size_t i = static_cast<size_t>(component.begin); i < end; i++) {
37*6777b538SAndroid Build Coastguard Worker UCHAR uch = static_cast<UCHAR>(source[i]);
38*6777b538SAndroid Build Coastguard Worker if (IsInC0ControlPercentEncodeSet(uch)) {
39*6777b538SAndroid Build Coastguard Worker AppendUTF8EscapedChar(source, &i, end, output);
40*6777b538SAndroid Build Coastguard Worker } else {
41*6777b538SAndroid Build Coastguard Worker output->push_back(static_cast<char>(uch));
42*6777b538SAndroid Build Coastguard Worker }
43*6777b538SAndroid Build Coastguard Worker }
44*6777b538SAndroid Build Coastguard Worker new_component->len = output->length() - new_component->begin;
45*6777b538SAndroid Build Coastguard Worker } else {
46*6777b538SAndroid Build Coastguard Worker // Empty part.
47*6777b538SAndroid Build Coastguard Worker new_component->reset();
48*6777b538SAndroid Build Coastguard Worker }
49*6777b538SAndroid Build Coastguard Worker }
50*6777b538SAndroid Build Coastguard Worker
51*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoCanonicalizePathURL(const URLComponentSource<CHAR> & source,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)52*6777b538SAndroid Build Coastguard Worker bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
53*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
54*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
55*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed) {
56*6777b538SAndroid Build Coastguard Worker // Scheme: this will append the colon.
57*6777b538SAndroid Build Coastguard Worker bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
58*6777b538SAndroid Build Coastguard Worker output, &new_parsed->scheme);
59*6777b538SAndroid Build Coastguard Worker
60*6777b538SAndroid Build Coastguard Worker // We assume there's no authority for path URLs. Note that hosts should never
61*6777b538SAndroid Build Coastguard Worker // have -1 length.
62*6777b538SAndroid Build Coastguard Worker new_parsed->username.reset();
63*6777b538SAndroid Build Coastguard Worker new_parsed->password.reset();
64*6777b538SAndroid Build Coastguard Worker new_parsed->host.reset();
65*6777b538SAndroid Build Coastguard Worker new_parsed->port.reset();
66*6777b538SAndroid Build Coastguard Worker
67*6777b538SAndroid Build Coastguard Worker // Canonicalize path via the weaker path URL rules.
68*6777b538SAndroid Build Coastguard Worker //
69*6777b538SAndroid Build Coastguard Worker // Note: parsing the path part should never cause a failure, see
70*6777b538SAndroid Build Coastguard Worker // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
71*6777b538SAndroid Build Coastguard Worker DoCanonicalizePathComponent<CHAR, UCHAR>(source.path, parsed.path, '\0',
72*6777b538SAndroid Build Coastguard Worker output, &new_parsed->path);
73*6777b538SAndroid Build Coastguard Worker
74*6777b538SAndroid Build Coastguard Worker // Similar to mailto:, always use the default UTF-8 charset converter for
75*6777b538SAndroid Build Coastguard Worker // query.
76*6777b538SAndroid Build Coastguard Worker CanonicalizeQuery(source.query, parsed.query, nullptr, output,
77*6777b538SAndroid Build Coastguard Worker &new_parsed->query);
78*6777b538SAndroid Build Coastguard Worker
79*6777b538SAndroid Build Coastguard Worker CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
80*6777b538SAndroid Build Coastguard Worker
81*6777b538SAndroid Build Coastguard Worker return success;
82*6777b538SAndroid Build Coastguard Worker }
83*6777b538SAndroid Build Coastguard Worker
84*6777b538SAndroid Build Coastguard Worker } // namespace
85*6777b538SAndroid Build Coastguard Worker
CanonicalizePathURL(const char * spec,int spec_len,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)86*6777b538SAndroid Build Coastguard Worker bool CanonicalizePathURL(const char* spec,
87*6777b538SAndroid Build Coastguard Worker int spec_len,
88*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
89*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
90*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed) {
91*6777b538SAndroid Build Coastguard Worker return DoCanonicalizePathURL<char, unsigned char>(
92*6777b538SAndroid Build Coastguard Worker URLComponentSource<char>(spec), parsed, output, new_parsed);
93*6777b538SAndroid Build Coastguard Worker }
94*6777b538SAndroid Build Coastguard Worker
CanonicalizePathURL(const char16_t * spec,int spec_len,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)95*6777b538SAndroid Build Coastguard Worker bool CanonicalizePathURL(const char16_t* spec,
96*6777b538SAndroid Build Coastguard Worker int spec_len,
97*6777b538SAndroid Build Coastguard Worker const Parsed& parsed,
98*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
99*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed) {
100*6777b538SAndroid Build Coastguard Worker return DoCanonicalizePathURL<char16_t, char16_t>(
101*6777b538SAndroid Build Coastguard Worker URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
102*6777b538SAndroid Build Coastguard Worker }
103*6777b538SAndroid Build Coastguard Worker
CanonicalizePathURLPath(const char * source,const Component & component,CanonOutput * output,Component * new_component)104*6777b538SAndroid Build Coastguard Worker void CanonicalizePathURLPath(const char* source,
105*6777b538SAndroid Build Coastguard Worker const Component& component,
106*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
107*6777b538SAndroid Build Coastguard Worker Component* new_component) {
108*6777b538SAndroid Build Coastguard Worker DoCanonicalizePathComponent<char, unsigned char>(source, component, '\0',
109*6777b538SAndroid Build Coastguard Worker output, new_component);
110*6777b538SAndroid Build Coastguard Worker }
111*6777b538SAndroid Build Coastguard Worker
CanonicalizePathURLPath(const char16_t * source,const Component & component,CanonOutput * output,Component * new_component)112*6777b538SAndroid Build Coastguard Worker void CanonicalizePathURLPath(const char16_t* source,
113*6777b538SAndroid Build Coastguard Worker const Component& component,
114*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
115*6777b538SAndroid Build Coastguard Worker Component* new_component) {
116*6777b538SAndroid Build Coastguard Worker DoCanonicalizePathComponent<char16_t, char16_t>(source, component, '\0',
117*6777b538SAndroid Build Coastguard Worker output, new_component);
118*6777b538SAndroid Build Coastguard Worker }
119*6777b538SAndroid Build Coastguard Worker
ReplacePathURL(const char * base,const Parsed & base_parsed,const Replacements<char> & replacements,CanonOutput * output,Parsed * new_parsed)120*6777b538SAndroid Build Coastguard Worker bool ReplacePathURL(const char* base,
121*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
122*6777b538SAndroid Build Coastguard Worker const Replacements<char>& replacements,
123*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
124*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed) {
125*6777b538SAndroid Build Coastguard Worker URLComponentSource<char> source(base);
126*6777b538SAndroid Build Coastguard Worker Parsed parsed(base_parsed);
127*6777b538SAndroid Build Coastguard Worker SetupOverrideComponents(base, replacements, &source, &parsed);
128*6777b538SAndroid Build Coastguard Worker return DoCanonicalizePathURL<char, unsigned char>(
129*6777b538SAndroid Build Coastguard Worker source, parsed, output, new_parsed);
130*6777b538SAndroid Build Coastguard Worker }
131*6777b538SAndroid Build Coastguard Worker
ReplacePathURL(const char * base,const Parsed & base_parsed,const Replacements<char16_t> & replacements,CanonOutput * output,Parsed * new_parsed)132*6777b538SAndroid Build Coastguard Worker bool ReplacePathURL(const char* base,
133*6777b538SAndroid Build Coastguard Worker const Parsed& base_parsed,
134*6777b538SAndroid Build Coastguard Worker const Replacements<char16_t>& replacements,
135*6777b538SAndroid Build Coastguard Worker CanonOutput* output,
136*6777b538SAndroid Build Coastguard Worker Parsed* new_parsed) {
137*6777b538SAndroid Build Coastguard Worker RawCanonOutput<1024> utf8;
138*6777b538SAndroid Build Coastguard Worker URLComponentSource<char> source(base);
139*6777b538SAndroid Build Coastguard Worker Parsed parsed(base_parsed);
140*6777b538SAndroid Build Coastguard Worker SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
141*6777b538SAndroid Build Coastguard Worker return DoCanonicalizePathURL<char, unsigned char>(
142*6777b538SAndroid Build Coastguard Worker source, parsed, output, new_parsed);
143*6777b538SAndroid Build Coastguard Worker }
144*6777b538SAndroid Build Coastguard Worker
145*6777b538SAndroid Build Coastguard Worker } // namespace url
146