xref: /aosp_15_r20/external/cronet/url/url_canon_pathurl.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2013 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker // Functions for canonicalizing "path" URLs. Not to be confused with the path
6*6777b538SAndroid Build Coastguard Worker // of a URL, these are URLs that have no authority section, only a path. For
7*6777b538SAndroid Build Coastguard Worker // example, "javascript:" and "data:".
8*6777b538SAndroid Build Coastguard Worker 
9*6777b538SAndroid Build Coastguard Worker #include "url/url_canon.h"
10*6777b538SAndroid Build Coastguard Worker #include "url/url_canon_internal.h"
11*6777b538SAndroid Build Coastguard Worker 
12*6777b538SAndroid Build Coastguard Worker namespace url {
13*6777b538SAndroid Build Coastguard Worker 
14*6777b538SAndroid Build Coastguard Worker namespace {
15*6777b538SAndroid Build Coastguard Worker 
16*6777b538SAndroid Build Coastguard Worker // Canonicalize the given |component| from |source| into |output| and
17*6777b538SAndroid Build Coastguard Worker // |new_component|. If |separator| is non-zero, it is pre-pended to |output|
18*6777b538SAndroid Build Coastguard Worker // prior to the canonicalized component; i.e. for the '?' or '#' characters.
19*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoCanonicalizePathComponent(const CHAR * source,const Component & component,char separator,CanonOutput * output,Component * new_component)20*6777b538SAndroid Build Coastguard Worker void DoCanonicalizePathComponent(const CHAR* source,
21*6777b538SAndroid Build Coastguard Worker                                  const Component& component,
22*6777b538SAndroid Build Coastguard Worker                                  char separator,
23*6777b538SAndroid Build Coastguard Worker                                  CanonOutput* output,
24*6777b538SAndroid Build Coastguard Worker                                  Component* new_component) {
25*6777b538SAndroid Build Coastguard Worker   if (component.is_valid()) {
26*6777b538SAndroid Build Coastguard Worker     if (separator)
27*6777b538SAndroid Build Coastguard Worker       output->push_back(separator);
28*6777b538SAndroid Build Coastguard Worker     // Copy the path using path URL's more lax escaping rules (think for
29*6777b538SAndroid Build Coastguard Worker     // javascript:). We convert to UTF-8 and escape characters from the
30*6777b538SAndroid Build Coastguard Worker     // C0 control percent-encode set, but leave all other characters alone.
31*6777b538SAndroid Build Coastguard Worker     // This helps readability of JavaScript.
32*6777b538SAndroid Build Coastguard Worker     // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
33*6777b538SAndroid Build Coastguard Worker     // https://url.spec.whatwg.org/#c0-control-percent-encode-set
34*6777b538SAndroid Build Coastguard Worker     new_component->begin = output->length();
35*6777b538SAndroid Build Coastguard Worker     size_t end = static_cast<size_t>(component.end());
36*6777b538SAndroid Build Coastguard Worker     for (size_t i = static_cast<size_t>(component.begin); i < end; i++) {
37*6777b538SAndroid Build Coastguard Worker       UCHAR uch = static_cast<UCHAR>(source[i]);
38*6777b538SAndroid Build Coastguard Worker       if (IsInC0ControlPercentEncodeSet(uch)) {
39*6777b538SAndroid Build Coastguard Worker         AppendUTF8EscapedChar(source, &i, end, output);
40*6777b538SAndroid Build Coastguard Worker       } else {
41*6777b538SAndroid Build Coastguard Worker         output->push_back(static_cast<char>(uch));
42*6777b538SAndroid Build Coastguard Worker       }
43*6777b538SAndroid Build Coastguard Worker     }
44*6777b538SAndroid Build Coastguard Worker     new_component->len = output->length() - new_component->begin;
45*6777b538SAndroid Build Coastguard Worker   } else {
46*6777b538SAndroid Build Coastguard Worker     // Empty part.
47*6777b538SAndroid Build Coastguard Worker     new_component->reset();
48*6777b538SAndroid Build Coastguard Worker   }
49*6777b538SAndroid Build Coastguard Worker }
50*6777b538SAndroid Build Coastguard Worker 
51*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename UCHAR>
DoCanonicalizePathURL(const URLComponentSource<CHAR> & source,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)52*6777b538SAndroid Build Coastguard Worker bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
53*6777b538SAndroid Build Coastguard Worker                            const Parsed& parsed,
54*6777b538SAndroid Build Coastguard Worker                            CanonOutput* output,
55*6777b538SAndroid Build Coastguard Worker                            Parsed* new_parsed) {
56*6777b538SAndroid Build Coastguard Worker   // Scheme: this will append the colon.
57*6777b538SAndroid Build Coastguard Worker   bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
58*6777b538SAndroid Build Coastguard Worker                                     output, &new_parsed->scheme);
59*6777b538SAndroid Build Coastguard Worker 
60*6777b538SAndroid Build Coastguard Worker   // We assume there's no authority for path URLs. Note that hosts should never
61*6777b538SAndroid Build Coastguard Worker   // have -1 length.
62*6777b538SAndroid Build Coastguard Worker   new_parsed->username.reset();
63*6777b538SAndroid Build Coastguard Worker   new_parsed->password.reset();
64*6777b538SAndroid Build Coastguard Worker   new_parsed->host.reset();
65*6777b538SAndroid Build Coastguard Worker   new_parsed->port.reset();
66*6777b538SAndroid Build Coastguard Worker 
67*6777b538SAndroid Build Coastguard Worker   // Canonicalize path via the weaker path URL rules.
68*6777b538SAndroid Build Coastguard Worker   //
69*6777b538SAndroid Build Coastguard Worker   // Note: parsing the path part should never cause a failure, see
70*6777b538SAndroid Build Coastguard Worker   // https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
71*6777b538SAndroid Build Coastguard Worker   DoCanonicalizePathComponent<CHAR, UCHAR>(source.path, parsed.path, '\0',
72*6777b538SAndroid Build Coastguard Worker                                            output, &new_parsed->path);
73*6777b538SAndroid Build Coastguard Worker 
74*6777b538SAndroid Build Coastguard Worker   // Similar to mailto:, always use the default UTF-8 charset converter for
75*6777b538SAndroid Build Coastguard Worker   // query.
76*6777b538SAndroid Build Coastguard Worker   CanonicalizeQuery(source.query, parsed.query, nullptr, output,
77*6777b538SAndroid Build Coastguard Worker                     &new_parsed->query);
78*6777b538SAndroid Build Coastguard Worker 
79*6777b538SAndroid Build Coastguard Worker   CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
80*6777b538SAndroid Build Coastguard Worker 
81*6777b538SAndroid Build Coastguard Worker   return success;
82*6777b538SAndroid Build Coastguard Worker }
83*6777b538SAndroid Build Coastguard Worker 
84*6777b538SAndroid Build Coastguard Worker }  // namespace
85*6777b538SAndroid Build Coastguard Worker 
CanonicalizePathURL(const char * spec,int spec_len,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)86*6777b538SAndroid Build Coastguard Worker bool CanonicalizePathURL(const char* spec,
87*6777b538SAndroid Build Coastguard Worker                          int spec_len,
88*6777b538SAndroid Build Coastguard Worker                          const Parsed& parsed,
89*6777b538SAndroid Build Coastguard Worker                          CanonOutput* output,
90*6777b538SAndroid Build Coastguard Worker                          Parsed* new_parsed) {
91*6777b538SAndroid Build Coastguard Worker   return DoCanonicalizePathURL<char, unsigned char>(
92*6777b538SAndroid Build Coastguard Worker       URLComponentSource<char>(spec), parsed, output, new_parsed);
93*6777b538SAndroid Build Coastguard Worker }
94*6777b538SAndroid Build Coastguard Worker 
CanonicalizePathURL(const char16_t * spec,int spec_len,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)95*6777b538SAndroid Build Coastguard Worker bool CanonicalizePathURL(const char16_t* spec,
96*6777b538SAndroid Build Coastguard Worker                          int spec_len,
97*6777b538SAndroid Build Coastguard Worker                          const Parsed& parsed,
98*6777b538SAndroid Build Coastguard Worker                          CanonOutput* output,
99*6777b538SAndroid Build Coastguard Worker                          Parsed* new_parsed) {
100*6777b538SAndroid Build Coastguard Worker   return DoCanonicalizePathURL<char16_t, char16_t>(
101*6777b538SAndroid Build Coastguard Worker       URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
102*6777b538SAndroid Build Coastguard Worker }
103*6777b538SAndroid Build Coastguard Worker 
CanonicalizePathURLPath(const char * source,const Component & component,CanonOutput * output,Component * new_component)104*6777b538SAndroid Build Coastguard Worker void CanonicalizePathURLPath(const char* source,
105*6777b538SAndroid Build Coastguard Worker                              const Component& component,
106*6777b538SAndroid Build Coastguard Worker                              CanonOutput* output,
107*6777b538SAndroid Build Coastguard Worker                              Component* new_component) {
108*6777b538SAndroid Build Coastguard Worker   DoCanonicalizePathComponent<char, unsigned char>(source, component, '\0',
109*6777b538SAndroid Build Coastguard Worker                                                    output, new_component);
110*6777b538SAndroid Build Coastguard Worker }
111*6777b538SAndroid Build Coastguard Worker 
CanonicalizePathURLPath(const char16_t * source,const Component & component,CanonOutput * output,Component * new_component)112*6777b538SAndroid Build Coastguard Worker void CanonicalizePathURLPath(const char16_t* source,
113*6777b538SAndroid Build Coastguard Worker                              const Component& component,
114*6777b538SAndroid Build Coastguard Worker                              CanonOutput* output,
115*6777b538SAndroid Build Coastguard Worker                              Component* new_component) {
116*6777b538SAndroid Build Coastguard Worker   DoCanonicalizePathComponent<char16_t, char16_t>(source, component, '\0',
117*6777b538SAndroid Build Coastguard Worker                                                   output, new_component);
118*6777b538SAndroid Build Coastguard Worker }
119*6777b538SAndroid Build Coastguard Worker 
ReplacePathURL(const char * base,const Parsed & base_parsed,const Replacements<char> & replacements,CanonOutput * output,Parsed * new_parsed)120*6777b538SAndroid Build Coastguard Worker bool ReplacePathURL(const char* base,
121*6777b538SAndroid Build Coastguard Worker                     const Parsed& base_parsed,
122*6777b538SAndroid Build Coastguard Worker                     const Replacements<char>& replacements,
123*6777b538SAndroid Build Coastguard Worker                     CanonOutput* output,
124*6777b538SAndroid Build Coastguard Worker                     Parsed* new_parsed) {
125*6777b538SAndroid Build Coastguard Worker   URLComponentSource<char> source(base);
126*6777b538SAndroid Build Coastguard Worker   Parsed parsed(base_parsed);
127*6777b538SAndroid Build Coastguard Worker   SetupOverrideComponents(base, replacements, &source, &parsed);
128*6777b538SAndroid Build Coastguard Worker   return DoCanonicalizePathURL<char, unsigned char>(
129*6777b538SAndroid Build Coastguard Worker       source, parsed, output, new_parsed);
130*6777b538SAndroid Build Coastguard Worker }
131*6777b538SAndroid Build Coastguard Worker 
ReplacePathURL(const char * base,const Parsed & base_parsed,const Replacements<char16_t> & replacements,CanonOutput * output,Parsed * new_parsed)132*6777b538SAndroid Build Coastguard Worker bool ReplacePathURL(const char* base,
133*6777b538SAndroid Build Coastguard Worker                     const Parsed& base_parsed,
134*6777b538SAndroid Build Coastguard Worker                     const Replacements<char16_t>& replacements,
135*6777b538SAndroid Build Coastguard Worker                     CanonOutput* output,
136*6777b538SAndroid Build Coastguard Worker                     Parsed* new_parsed) {
137*6777b538SAndroid Build Coastguard Worker   RawCanonOutput<1024> utf8;
138*6777b538SAndroid Build Coastguard Worker   URLComponentSource<char> source(base);
139*6777b538SAndroid Build Coastguard Worker   Parsed parsed(base_parsed);
140*6777b538SAndroid Build Coastguard Worker   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
141*6777b538SAndroid Build Coastguard Worker   return DoCanonicalizePathURL<char, unsigned char>(
142*6777b538SAndroid Build Coastguard Worker       source, parsed, output, new_parsed);
143*6777b538SAndroid Build Coastguard Worker }
144*6777b538SAndroid Build Coastguard Worker 
145*6777b538SAndroid Build Coastguard Worker }  // namespace url
146