1 // Copyright 2013 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Functions for canonicalizing "mailto:" URLs.
6
7 #include "url/url_canon.h"
8 #include "url/url_canon_internal.h"
9 #include "url/url_file.h"
10 #include "url/url_parse_internal.h"
11
12 namespace url {
13
14 namespace {
15
16 // Certain characters should be percent-encoded when they appear in the path
17 // component of a mailto URL, to improve compatibility and mitigate against
18 // command-injection attacks on mailto handlers. See https://crbug.com/711020.
19 template <typename UCHAR>
ShouldEncodeMailboxCharacter(UCHAR uch)20 bool ShouldEncodeMailboxCharacter(UCHAR uch) {
21 if (uch < 0x21 || // space & control characters.
22 uch > 0x7e || // high-ascii characters.
23 uch == 0x22 || // quote.
24 uch == 0x3c || uch == 0x3e || // angle brackets.
25 uch == 0x60 || // backtick.
26 uch == 0x7b || uch == 0x7c || uch == 0x7d // braces and pipe.
27 ) {
28 return true;
29 }
30 return false;
31 }
32
33 template <typename CHAR, typename UCHAR>
DoCanonicalizeMailtoURL(const URLComponentSource<CHAR> & source,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)34 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
35 const Parsed& parsed,
36 CanonOutput* output,
37 Parsed* new_parsed) {
38 // mailto: only uses {scheme, path, query} -- clear the rest.
39 new_parsed->username = Component();
40 new_parsed->password = Component();
41 new_parsed->host = Component();
42 new_parsed->port = Component();
43 new_parsed->ref = Component();
44
45 // Scheme (known, so we don't bother running it through the more
46 // complicated scheme canonicalizer).
47 new_parsed->scheme.begin = output->length();
48 output->Append("mailto:");
49 new_parsed->scheme.len = 6;
50
51 bool success = true;
52
53 // Path
54 if (parsed.path.is_valid()) {
55 new_parsed->path.begin = output->length();
56
57 // Copy the path using path URL's more lax escaping rules.
58 // We convert to UTF-8 and escape non-ASCII, but leave most
59 // ASCII characters alone.
60 size_t end = static_cast<size_t>(parsed.path.end());
61 for (size_t i = static_cast<size_t>(parsed.path.begin); i < end; ++i) {
62 UCHAR uch = static_cast<UCHAR>(source.path[i]);
63 if (ShouldEncodeMailboxCharacter<UCHAR>(uch))
64 success &= AppendUTF8EscapedChar(source.path, &i, end, output);
65 else
66 output->push_back(static_cast<char>(uch));
67 }
68
69 new_parsed->path.len = output->length() - new_parsed->path.begin;
70 } else {
71 // No path at all
72 new_parsed->path.reset();
73 }
74
75 // Query -- always use the default UTF8 charset converter.
76 CanonicalizeQuery(source.query, parsed.query, NULL,
77 output, &new_parsed->query);
78
79 return success;
80 }
81
82 } // namespace
83
CanonicalizeMailtoURL(const char * spec,int spec_len,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)84 bool CanonicalizeMailtoURL(const char* spec,
85 int spec_len,
86 const Parsed& parsed,
87 CanonOutput* output,
88 Parsed* new_parsed) {
89 return DoCanonicalizeMailtoURL<char, unsigned char>(
90 URLComponentSource<char>(spec), parsed, output, new_parsed);
91 }
92
CanonicalizeMailtoURL(const char16_t * spec,int spec_len,const Parsed & parsed,CanonOutput * output,Parsed * new_parsed)93 bool CanonicalizeMailtoURL(const char16_t* spec,
94 int spec_len,
95 const Parsed& parsed,
96 CanonOutput* output,
97 Parsed* new_parsed) {
98 return DoCanonicalizeMailtoURL<char16_t, char16_t>(
99 URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
100 }
101
ReplaceMailtoURL(const char * base,const Parsed & base_parsed,const Replacements<char> & replacements,CanonOutput * output,Parsed * new_parsed)102 bool ReplaceMailtoURL(const char* base,
103 const Parsed& base_parsed,
104 const Replacements<char>& replacements,
105 CanonOutput* output,
106 Parsed* new_parsed) {
107 URLComponentSource<char> source(base);
108 Parsed parsed(base_parsed);
109 SetupOverrideComponents(base, replacements, &source, &parsed);
110 return DoCanonicalizeMailtoURL<char, unsigned char>(
111 source, parsed, output, new_parsed);
112 }
113
ReplaceMailtoURL(const char * base,const Parsed & base_parsed,const Replacements<char16_t> & replacements,CanonOutput * output,Parsed * new_parsed)114 bool ReplaceMailtoURL(const char* base,
115 const Parsed& base_parsed,
116 const Replacements<char16_t>& replacements,
117 CanonOutput* output,
118 Parsed* new_parsed) {
119 RawCanonOutput<1024> utf8;
120 URLComponentSource<char> source(base);
121 Parsed parsed(base_parsed);
122 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
123 return DoCanonicalizeMailtoURL<char, unsigned char>(
124 source, parsed, output, new_parsed);
125 }
126
127 } // namespace url
128