1*6777b538SAndroid Build Coastguard Worker // Copyright 2012 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker
5*6777b538SAndroid Build Coastguard Worker #include "net/http/http_content_disposition.h"
6*6777b538SAndroid Build Coastguard Worker
7*6777b538SAndroid Build Coastguard Worker #include <string_view>
8*6777b538SAndroid Build Coastguard Worker
9*6777b538SAndroid Build Coastguard Worker #include "base/base64.h"
10*6777b538SAndroid Build Coastguard Worker #include "base/check_op.h"
11*6777b538SAndroid Build Coastguard Worker #include "base/strings/escape.h"
12*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_tokenizer.h"
13*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
14*6777b538SAndroid Build Coastguard Worker #include "base/strings/sys_string_conversions.h"
15*6777b538SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversions.h"
16*6777b538SAndroid Build Coastguard Worker #include "net/base/net_string_util.h"
17*6777b538SAndroid Build Coastguard Worker #include "net/http/http_util.h"
18*6777b538SAndroid Build Coastguard Worker
19*6777b538SAndroid Build Coastguard Worker namespace net {
20*6777b538SAndroid Build Coastguard Worker
21*6777b538SAndroid Build Coastguard Worker namespace {
22*6777b538SAndroid Build Coastguard Worker
23*6777b538SAndroid Build Coastguard Worker enum RFC2047EncodingType {
24*6777b538SAndroid Build Coastguard Worker Q_ENCODING,
25*6777b538SAndroid Build Coastguard Worker B_ENCODING
26*6777b538SAndroid Build Coastguard Worker };
27*6777b538SAndroid Build Coastguard Worker
28*6777b538SAndroid Build Coastguard Worker // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to
29*6777b538SAndroid Build Coastguard Worker // decoding a quoted-printable string. Returns true if the input was valid.
DecodeQEncoding(std::string_view input,std::string * output)30*6777b538SAndroid Build Coastguard Worker bool DecodeQEncoding(std::string_view input, std::string* output) {
31*6777b538SAndroid Build Coastguard Worker std::string temp;
32*6777b538SAndroid Build Coastguard Worker temp.reserve(input.size());
33*6777b538SAndroid Build Coastguard Worker for (auto it = input.begin(); it != input.end(); ++it) {
34*6777b538SAndroid Build Coastguard Worker if (*it == '_') {
35*6777b538SAndroid Build Coastguard Worker temp.push_back(' ');
36*6777b538SAndroid Build Coastguard Worker } else if (*it == '=') {
37*6777b538SAndroid Build Coastguard Worker if ((input.end() - it < 3) ||
38*6777b538SAndroid Build Coastguard Worker !base::IsHexDigit(static_cast<unsigned char>(*(it + 1))) ||
39*6777b538SAndroid Build Coastguard Worker !base::IsHexDigit(static_cast<unsigned char>(*(it + 2))))
40*6777b538SAndroid Build Coastguard Worker return false;
41*6777b538SAndroid Build Coastguard Worker unsigned char ch =
42*6777b538SAndroid Build Coastguard Worker base::HexDigitToInt(*(it + 1)) * 16 + base::HexDigitToInt(*(it + 2));
43*6777b538SAndroid Build Coastguard Worker temp.push_back(static_cast<char>(ch));
44*6777b538SAndroid Build Coastguard Worker ++it;
45*6777b538SAndroid Build Coastguard Worker ++it;
46*6777b538SAndroid Build Coastguard Worker } else if (0x20 < *it && *it < 0x7F && *it != '?') {
47*6777b538SAndroid Build Coastguard Worker // In a Q-encoded word, only printable ASCII characters
48*6777b538SAndroid Build Coastguard Worker // represent themselves. Besides, space, '=', '_' and '?' are
49*6777b538SAndroid Build Coastguard Worker // not allowed, but they're already filtered out.
50*6777b538SAndroid Build Coastguard Worker DCHECK_NE('=', *it);
51*6777b538SAndroid Build Coastguard Worker DCHECK_NE('?', *it);
52*6777b538SAndroid Build Coastguard Worker DCHECK_NE('_', *it);
53*6777b538SAndroid Build Coastguard Worker temp.push_back(*it);
54*6777b538SAndroid Build Coastguard Worker } else {
55*6777b538SAndroid Build Coastguard Worker return false;
56*6777b538SAndroid Build Coastguard Worker }
57*6777b538SAndroid Build Coastguard Worker }
58*6777b538SAndroid Build Coastguard Worker output->swap(temp);
59*6777b538SAndroid Build Coastguard Worker return true;
60*6777b538SAndroid Build Coastguard Worker }
61*6777b538SAndroid Build Coastguard Worker
62*6777b538SAndroid Build Coastguard Worker // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding
63*6777b538SAndroid Build Coastguard Worker // type is specified in |enc_type|.
DecodeBQEncoding(std::string_view part,RFC2047EncodingType enc_type,const std::string & charset,std::string * output)64*6777b538SAndroid Build Coastguard Worker bool DecodeBQEncoding(std::string_view part,
65*6777b538SAndroid Build Coastguard Worker RFC2047EncodingType enc_type,
66*6777b538SAndroid Build Coastguard Worker const std::string& charset,
67*6777b538SAndroid Build Coastguard Worker std::string* output) {
68*6777b538SAndroid Build Coastguard Worker std::string decoded;
69*6777b538SAndroid Build Coastguard Worker if (!((enc_type == B_ENCODING) ?
70*6777b538SAndroid Build Coastguard Worker base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) {
71*6777b538SAndroid Build Coastguard Worker return false;
72*6777b538SAndroid Build Coastguard Worker }
73*6777b538SAndroid Build Coastguard Worker
74*6777b538SAndroid Build Coastguard Worker if (decoded.empty()) {
75*6777b538SAndroid Build Coastguard Worker output->clear();
76*6777b538SAndroid Build Coastguard Worker return true;
77*6777b538SAndroid Build Coastguard Worker }
78*6777b538SAndroid Build Coastguard Worker
79*6777b538SAndroid Build Coastguard Worker return ConvertToUtf8(decoded, charset.c_str(), output);
80*6777b538SAndroid Build Coastguard Worker }
81*6777b538SAndroid Build Coastguard Worker
DecodeWord(std::string_view encoded_word,const std::string & referrer_charset,bool * is_rfc2047,std::string * output,int * parse_result_flags)82*6777b538SAndroid Build Coastguard Worker bool DecodeWord(std::string_view encoded_word,
83*6777b538SAndroid Build Coastguard Worker const std::string& referrer_charset,
84*6777b538SAndroid Build Coastguard Worker bool* is_rfc2047,
85*6777b538SAndroid Build Coastguard Worker std::string* output,
86*6777b538SAndroid Build Coastguard Worker int* parse_result_flags) {
87*6777b538SAndroid Build Coastguard Worker *is_rfc2047 = false;
88*6777b538SAndroid Build Coastguard Worker output->clear();
89*6777b538SAndroid Build Coastguard Worker if (encoded_word.empty())
90*6777b538SAndroid Build Coastguard Worker return true;
91*6777b538SAndroid Build Coastguard Worker
92*6777b538SAndroid Build Coastguard Worker if (!base::IsStringASCII(encoded_word)) {
93*6777b538SAndroid Build Coastguard Worker // Try UTF-8, referrer_charset and the native OS default charset in turn.
94*6777b538SAndroid Build Coastguard Worker if (base::IsStringUTF8(encoded_word)) {
95*6777b538SAndroid Build Coastguard Worker *output = std::string(encoded_word);
96*6777b538SAndroid Build Coastguard Worker } else {
97*6777b538SAndroid Build Coastguard Worker std::u16string utf16_output;
98*6777b538SAndroid Build Coastguard Worker if (!referrer_charset.empty() &&
99*6777b538SAndroid Build Coastguard Worker ConvertToUTF16(encoded_word, referrer_charset.c_str(),
100*6777b538SAndroid Build Coastguard Worker &utf16_output)) {
101*6777b538SAndroid Build Coastguard Worker *output = base::UTF16ToUTF8(utf16_output);
102*6777b538SAndroid Build Coastguard Worker } else {
103*6777b538SAndroid Build Coastguard Worker *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word));
104*6777b538SAndroid Build Coastguard Worker }
105*6777b538SAndroid Build Coastguard Worker }
106*6777b538SAndroid Build Coastguard Worker
107*6777b538SAndroid Build Coastguard Worker *parse_result_flags |= HttpContentDisposition::HAS_NON_ASCII_STRINGS;
108*6777b538SAndroid Build Coastguard Worker return true;
109*6777b538SAndroid Build Coastguard Worker }
110*6777b538SAndroid Build Coastguard Worker
111*6777b538SAndroid Build Coastguard Worker // RFC 2047 : one of encoding methods supported by Firefox and relatively
112*6777b538SAndroid Build Coastguard Worker // widely used by web servers.
113*6777b538SAndroid Build Coastguard Worker // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.
114*6777b538SAndroid Build Coastguard Worker // We don't care about the length restriction (72 bytes) because
115*6777b538SAndroid Build Coastguard Worker // many web servers generate encoded words longer than the limit.
116*6777b538SAndroid Build Coastguard Worker std::string decoded_word;
117*6777b538SAndroid Build Coastguard Worker *is_rfc2047 = true;
118*6777b538SAndroid Build Coastguard Worker int part_index = 0;
119*6777b538SAndroid Build Coastguard Worker std::string charset;
120*6777b538SAndroid Build Coastguard Worker base::CStringTokenizer t(encoded_word.data(),
121*6777b538SAndroid Build Coastguard Worker encoded_word.data() + encoded_word.size(), "?");
122*6777b538SAndroid Build Coastguard Worker RFC2047EncodingType enc_type = Q_ENCODING;
123*6777b538SAndroid Build Coastguard Worker while (*is_rfc2047 && t.GetNext()) {
124*6777b538SAndroid Build Coastguard Worker std::string_view part = t.token_piece();
125*6777b538SAndroid Build Coastguard Worker switch (part_index) {
126*6777b538SAndroid Build Coastguard Worker case 0:
127*6777b538SAndroid Build Coastguard Worker if (part != "=") {
128*6777b538SAndroid Build Coastguard Worker *is_rfc2047 = false;
129*6777b538SAndroid Build Coastguard Worker break;
130*6777b538SAndroid Build Coastguard Worker }
131*6777b538SAndroid Build Coastguard Worker ++part_index;
132*6777b538SAndroid Build Coastguard Worker break;
133*6777b538SAndroid Build Coastguard Worker case 1:
134*6777b538SAndroid Build Coastguard Worker // Do we need charset validity check here?
135*6777b538SAndroid Build Coastguard Worker charset = std::string(part);
136*6777b538SAndroid Build Coastguard Worker ++part_index;
137*6777b538SAndroid Build Coastguard Worker break;
138*6777b538SAndroid Build Coastguard Worker case 2:
139*6777b538SAndroid Build Coastguard Worker if (part.size() > 1 ||
140*6777b538SAndroid Build Coastguard Worker part.find_first_of("bBqQ") == std::string::npos) {
141*6777b538SAndroid Build Coastguard Worker *is_rfc2047 = false;
142*6777b538SAndroid Build Coastguard Worker break;
143*6777b538SAndroid Build Coastguard Worker }
144*6777b538SAndroid Build Coastguard Worker if (part[0] == 'b' || part[0] == 'B') {
145*6777b538SAndroid Build Coastguard Worker enc_type = B_ENCODING;
146*6777b538SAndroid Build Coastguard Worker }
147*6777b538SAndroid Build Coastguard Worker ++part_index;
148*6777b538SAndroid Build Coastguard Worker break;
149*6777b538SAndroid Build Coastguard Worker case 3:
150*6777b538SAndroid Build Coastguard Worker *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word);
151*6777b538SAndroid Build Coastguard Worker if (!*is_rfc2047) {
152*6777b538SAndroid Build Coastguard Worker // Last minute failure. Invalid B/Q encoding. Rather than
153*6777b538SAndroid Build Coastguard Worker // passing it through, return now.
154*6777b538SAndroid Build Coastguard Worker return false;
155*6777b538SAndroid Build Coastguard Worker }
156*6777b538SAndroid Build Coastguard Worker ++part_index;
157*6777b538SAndroid Build Coastguard Worker break;
158*6777b538SAndroid Build Coastguard Worker case 4:
159*6777b538SAndroid Build Coastguard Worker if (part != "=") {
160*6777b538SAndroid Build Coastguard Worker // Another last minute failure !
161*6777b538SAndroid Build Coastguard Worker // Likely to be a case of two encoded-words in a row or
162*6777b538SAndroid Build Coastguard Worker // an encoded word followed by a non-encoded word. We can be
163*6777b538SAndroid Build Coastguard Worker // generous, but it does not help much in terms of compatibility,
164*6777b538SAndroid Build Coastguard Worker // I believe. Return immediately.
165*6777b538SAndroid Build Coastguard Worker *is_rfc2047 = false;
166*6777b538SAndroid Build Coastguard Worker return false;
167*6777b538SAndroid Build Coastguard Worker }
168*6777b538SAndroid Build Coastguard Worker ++part_index;
169*6777b538SAndroid Build Coastguard Worker break;
170*6777b538SAndroid Build Coastguard Worker default:
171*6777b538SAndroid Build Coastguard Worker *is_rfc2047 = false;
172*6777b538SAndroid Build Coastguard Worker return false;
173*6777b538SAndroid Build Coastguard Worker }
174*6777b538SAndroid Build Coastguard Worker }
175*6777b538SAndroid Build Coastguard Worker
176*6777b538SAndroid Build Coastguard Worker if (*is_rfc2047) {
177*6777b538SAndroid Build Coastguard Worker if (*(encoded_word.end() - 1) == '=') {
178*6777b538SAndroid Build Coastguard Worker output->swap(decoded_word);
179*6777b538SAndroid Build Coastguard Worker *parse_result_flags |=
180*6777b538SAndroid Build Coastguard Worker HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS;
181*6777b538SAndroid Build Coastguard Worker return true;
182*6777b538SAndroid Build Coastguard Worker }
183*6777b538SAndroid Build Coastguard Worker // encoded_word ending prematurelly with '?' or extra '?'
184*6777b538SAndroid Build Coastguard Worker *is_rfc2047 = false;
185*6777b538SAndroid Build Coastguard Worker return false;
186*6777b538SAndroid Build Coastguard Worker }
187*6777b538SAndroid Build Coastguard Worker
188*6777b538SAndroid Build Coastguard Worker // We're not handling 'especial' characters quoted with '\', but
189*6777b538SAndroid Build Coastguard Worker // it should be Ok because we're not an email client but a
190*6777b538SAndroid Build Coastguard Worker // web browser.
191*6777b538SAndroid Build Coastguard Worker
192*6777b538SAndroid Build Coastguard Worker // What IE6/7 does: %-escaped UTF-8.
193*6777b538SAndroid Build Coastguard Worker decoded_word = base::UnescapeBinaryURLComponent(encoded_word,
194*6777b538SAndroid Build Coastguard Worker base::UnescapeRule::NORMAL);
195*6777b538SAndroid Build Coastguard Worker if (decoded_word != encoded_word)
196*6777b538SAndroid Build Coastguard Worker *parse_result_flags |= HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS;
197*6777b538SAndroid Build Coastguard Worker if (base::IsStringUTF8(decoded_word)) {
198*6777b538SAndroid Build Coastguard Worker output->swap(decoded_word);
199*6777b538SAndroid Build Coastguard Worker return true;
200*6777b538SAndroid Build Coastguard Worker // We can try either the OS default charset or 'origin charset' here,
201*6777b538SAndroid Build Coastguard Worker // As far as I can tell, IE does not support it. However, I've seen
202*6777b538SAndroid Build Coastguard Worker // web servers emit %-escaped string in a legacy encoding (usually
203*6777b538SAndroid Build Coastguard Worker // origin charset).
204*6777b538SAndroid Build Coastguard Worker // TODO(jungshik) : Test IE further and consider adding a fallback here.
205*6777b538SAndroid Build Coastguard Worker }
206*6777b538SAndroid Build Coastguard Worker return false;
207*6777b538SAndroid Build Coastguard Worker }
208*6777b538SAndroid Build Coastguard Worker
209*6777b538SAndroid Build Coastguard Worker // Decodes the value of a 'filename' or 'name' parameter given as |input|. The
210*6777b538SAndroid Build Coastguard Worker // value is supposed to be of the form:
211*6777b538SAndroid Build Coastguard Worker //
212*6777b538SAndroid Build Coastguard Worker // value = token | quoted-string
213*6777b538SAndroid Build Coastguard Worker //
214*6777b538SAndroid Build Coastguard Worker // However we currently also allow RFC 2047 encoding and non-ASCII
215*6777b538SAndroid Build Coastguard Worker // strings. Non-ASCII strings are interpreted based on |referrer_charset|.
DecodeFilenameValue(const std::string & input,const std::string & referrer_charset,std::string * output,int * parse_result_flags)216*6777b538SAndroid Build Coastguard Worker bool DecodeFilenameValue(const std::string& input,
217*6777b538SAndroid Build Coastguard Worker const std::string& referrer_charset,
218*6777b538SAndroid Build Coastguard Worker std::string* output,
219*6777b538SAndroid Build Coastguard Worker int* parse_result_flags) {
220*6777b538SAndroid Build Coastguard Worker int current_parse_result_flags = 0;
221*6777b538SAndroid Build Coastguard Worker std::string decoded_value;
222*6777b538SAndroid Build Coastguard Worker bool is_previous_token_rfc2047 = true;
223*6777b538SAndroid Build Coastguard Worker
224*6777b538SAndroid Build Coastguard Worker // Tokenize with whitespace characters.
225*6777b538SAndroid Build Coastguard Worker base::StringTokenizer t(input, " \t\n\r");
226*6777b538SAndroid Build Coastguard Worker t.set_options(base::StringTokenizer::RETURN_DELIMS);
227*6777b538SAndroid Build Coastguard Worker while (t.GetNext()) {
228*6777b538SAndroid Build Coastguard Worker if (t.token_is_delim()) {
229*6777b538SAndroid Build Coastguard Worker // If the previous non-delimeter token is not RFC2047-encoded,
230*6777b538SAndroid Build Coastguard Worker // put in a space in its place. Otheriwse, skip over it.
231*6777b538SAndroid Build Coastguard Worker if (!is_previous_token_rfc2047)
232*6777b538SAndroid Build Coastguard Worker decoded_value.push_back(' ');
233*6777b538SAndroid Build Coastguard Worker continue;
234*6777b538SAndroid Build Coastguard Worker }
235*6777b538SAndroid Build Coastguard Worker // We don't support a single multibyte character split into
236*6777b538SAndroid Build Coastguard Worker // adjacent encoded words. Some broken mail clients emit headers
237*6777b538SAndroid Build Coastguard Worker // with that problem, but most web servers usually encode a filename
238*6777b538SAndroid Build Coastguard Worker // in a single encoded-word. Firefox/Thunderbird do not support
239*6777b538SAndroid Build Coastguard Worker // it, either.
240*6777b538SAndroid Build Coastguard Worker std::string decoded;
241*6777b538SAndroid Build Coastguard Worker if (!DecodeWord(t.token_piece(), referrer_charset,
242*6777b538SAndroid Build Coastguard Worker &is_previous_token_rfc2047, &decoded,
243*6777b538SAndroid Build Coastguard Worker ¤t_parse_result_flags))
244*6777b538SAndroid Build Coastguard Worker return false;
245*6777b538SAndroid Build Coastguard Worker decoded_value.append(decoded);
246*6777b538SAndroid Build Coastguard Worker }
247*6777b538SAndroid Build Coastguard Worker output->swap(decoded_value);
248*6777b538SAndroid Build Coastguard Worker if (parse_result_flags && !output->empty())
249*6777b538SAndroid Build Coastguard Worker *parse_result_flags |= current_parse_result_flags;
250*6777b538SAndroid Build Coastguard Worker return true;
251*6777b538SAndroid Build Coastguard Worker }
252*6777b538SAndroid Build Coastguard Worker
253*6777b538SAndroid Build Coastguard Worker // Parses the charset and value-chars out of an ext-value string.
254*6777b538SAndroid Build Coastguard Worker //
255*6777b538SAndroid Build Coastguard Worker // ext-value = charset "'" [ language ] "'" value-chars
ParseExtValueComponents(const std::string & input,std::string * charset,std::string * value_chars)256*6777b538SAndroid Build Coastguard Worker bool ParseExtValueComponents(const std::string& input,
257*6777b538SAndroid Build Coastguard Worker std::string* charset,
258*6777b538SAndroid Build Coastguard Worker std::string* value_chars) {
259*6777b538SAndroid Build Coastguard Worker base::StringTokenizer t(input, "'");
260*6777b538SAndroid Build Coastguard Worker t.set_options(base::StringTokenizer::RETURN_DELIMS);
261*6777b538SAndroid Build Coastguard Worker std::string_view temp_charset;
262*6777b538SAndroid Build Coastguard Worker std::string_view temp_value;
263*6777b538SAndroid Build Coastguard Worker int num_delims_seen = 0;
264*6777b538SAndroid Build Coastguard Worker while (t.GetNext()) {
265*6777b538SAndroid Build Coastguard Worker if (t.token_is_delim()) {
266*6777b538SAndroid Build Coastguard Worker ++num_delims_seen;
267*6777b538SAndroid Build Coastguard Worker continue;
268*6777b538SAndroid Build Coastguard Worker } else {
269*6777b538SAndroid Build Coastguard Worker switch (num_delims_seen) {
270*6777b538SAndroid Build Coastguard Worker case 0:
271*6777b538SAndroid Build Coastguard Worker temp_charset = t.token_piece();
272*6777b538SAndroid Build Coastguard Worker break;
273*6777b538SAndroid Build Coastguard Worker case 1:
274*6777b538SAndroid Build Coastguard Worker // Language is ignored.
275*6777b538SAndroid Build Coastguard Worker break;
276*6777b538SAndroid Build Coastguard Worker case 2:
277*6777b538SAndroid Build Coastguard Worker temp_value = t.token_piece();
278*6777b538SAndroid Build Coastguard Worker break;
279*6777b538SAndroid Build Coastguard Worker default:
280*6777b538SAndroid Build Coastguard Worker return false;
281*6777b538SAndroid Build Coastguard Worker }
282*6777b538SAndroid Build Coastguard Worker }
283*6777b538SAndroid Build Coastguard Worker }
284*6777b538SAndroid Build Coastguard Worker if (num_delims_seen != 2)
285*6777b538SAndroid Build Coastguard Worker return false;
286*6777b538SAndroid Build Coastguard Worker if (temp_charset.empty() || temp_value.empty())
287*6777b538SAndroid Build Coastguard Worker return false;
288*6777b538SAndroid Build Coastguard Worker *charset = std::string(temp_charset);
289*6777b538SAndroid Build Coastguard Worker *value_chars = std::string(temp_value);
290*6777b538SAndroid Build Coastguard Worker return true;
291*6777b538SAndroid Build Coastguard Worker }
292*6777b538SAndroid Build Coastguard Worker
293*6777b538SAndroid Build Coastguard Worker // http://tools.ietf.org/html/rfc5987#section-3.2
294*6777b538SAndroid Build Coastguard Worker //
295*6777b538SAndroid Build Coastguard Worker // ext-value = charset "'" [ language ] "'" value-chars
296*6777b538SAndroid Build Coastguard Worker //
297*6777b538SAndroid Build Coastguard Worker // charset = "UTF-8" / "ISO-8859-1" / mime-charset
298*6777b538SAndroid Build Coastguard Worker //
299*6777b538SAndroid Build Coastguard Worker // mime-charset = 1*mime-charsetc
300*6777b538SAndroid Build Coastguard Worker // mime-charsetc = ALPHA / DIGIT
301*6777b538SAndroid Build Coastguard Worker // / "!" / "#" / "$" / "%" / "&"
302*6777b538SAndroid Build Coastguard Worker // / "+" / "-" / "^" / "_" / "`"
303*6777b538SAndroid Build Coastguard Worker // / "{" / "}" / "~"
304*6777b538SAndroid Build Coastguard Worker //
305*6777b538SAndroid Build Coastguard Worker // language = <Language-Tag, defined in [RFC5646], Section 2.1>
306*6777b538SAndroid Build Coastguard Worker //
307*6777b538SAndroid Build Coastguard Worker // value-chars = *( pct-encoded / attr-char )
308*6777b538SAndroid Build Coastguard Worker //
309*6777b538SAndroid Build Coastguard Worker // pct-encoded = "%" HEXDIG HEXDIG
310*6777b538SAndroid Build Coastguard Worker //
311*6777b538SAndroid Build Coastguard Worker // attr-char = ALPHA / DIGIT
312*6777b538SAndroid Build Coastguard Worker // / "!" / "#" / "$" / "&" / "+" / "-" / "."
313*6777b538SAndroid Build Coastguard Worker // / "^" / "_" / "`" / "|" / "~"
DecodeExtValue(const std::string & param_value,std::string * decoded)314*6777b538SAndroid Build Coastguard Worker bool DecodeExtValue(const std::string& param_value, std::string* decoded) {
315*6777b538SAndroid Build Coastguard Worker if (param_value.find('"') != std::string::npos)
316*6777b538SAndroid Build Coastguard Worker return false;
317*6777b538SAndroid Build Coastguard Worker
318*6777b538SAndroid Build Coastguard Worker std::string charset;
319*6777b538SAndroid Build Coastguard Worker std::string value;
320*6777b538SAndroid Build Coastguard Worker if (!ParseExtValueComponents(param_value, &charset, &value))
321*6777b538SAndroid Build Coastguard Worker return false;
322*6777b538SAndroid Build Coastguard Worker
323*6777b538SAndroid Build Coastguard Worker // RFC 5987 value should be ASCII-only.
324*6777b538SAndroid Build Coastguard Worker if (!base::IsStringASCII(value)) {
325*6777b538SAndroid Build Coastguard Worker decoded->clear();
326*6777b538SAndroid Build Coastguard Worker return true;
327*6777b538SAndroid Build Coastguard Worker }
328*6777b538SAndroid Build Coastguard Worker
329*6777b538SAndroid Build Coastguard Worker std::string unescaped =
330*6777b538SAndroid Build Coastguard Worker base::UnescapeBinaryURLComponent(value, base::UnescapeRule::NORMAL);
331*6777b538SAndroid Build Coastguard Worker
332*6777b538SAndroid Build Coastguard Worker return ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded);
333*6777b538SAndroid Build Coastguard Worker }
334*6777b538SAndroid Build Coastguard Worker
335*6777b538SAndroid Build Coastguard Worker } // namespace
336*6777b538SAndroid Build Coastguard Worker
HttpContentDisposition(const std::string & header,const std::string & referrer_charset)337*6777b538SAndroid Build Coastguard Worker HttpContentDisposition::HttpContentDisposition(
338*6777b538SAndroid Build Coastguard Worker const std::string& header,
339*6777b538SAndroid Build Coastguard Worker const std::string& referrer_charset) {
340*6777b538SAndroid Build Coastguard Worker Parse(header, referrer_charset);
341*6777b538SAndroid Build Coastguard Worker }
342*6777b538SAndroid Build Coastguard Worker
343*6777b538SAndroid Build Coastguard Worker HttpContentDisposition::~HttpContentDisposition() = default;
344*6777b538SAndroid Build Coastguard Worker
ConsumeDispositionType(std::string::const_iterator begin,std::string::const_iterator end)345*6777b538SAndroid Build Coastguard Worker std::string::const_iterator HttpContentDisposition::ConsumeDispositionType(
346*6777b538SAndroid Build Coastguard Worker std::string::const_iterator begin, std::string::const_iterator end) {
347*6777b538SAndroid Build Coastguard Worker DCHECK(type_ == INLINE);
348*6777b538SAndroid Build Coastguard Worker auto header = base::MakeStringPiece(begin, end);
349*6777b538SAndroid Build Coastguard Worker size_t delimiter = header.find(';');
350*6777b538SAndroid Build Coastguard Worker std::string_view type = header.substr(0, delimiter);
351*6777b538SAndroid Build Coastguard Worker type = HttpUtil::TrimLWS(type);
352*6777b538SAndroid Build Coastguard Worker
353*6777b538SAndroid Build Coastguard Worker // If the disposition-type isn't a valid token the then the
354*6777b538SAndroid Build Coastguard Worker // Content-Disposition header is malformed, and we treat the first bytes as
355*6777b538SAndroid Build Coastguard Worker // a parameter rather than a disposition-type.
356*6777b538SAndroid Build Coastguard Worker if (type.empty() || !HttpUtil::IsToken(type))
357*6777b538SAndroid Build Coastguard Worker return begin;
358*6777b538SAndroid Build Coastguard Worker
359*6777b538SAndroid Build Coastguard Worker parse_result_flags_ |= HAS_DISPOSITION_TYPE;
360*6777b538SAndroid Build Coastguard Worker
361*6777b538SAndroid Build Coastguard Worker DCHECK(type.find('=') == std::string_view::npos);
362*6777b538SAndroid Build Coastguard Worker
363*6777b538SAndroid Build Coastguard Worker if (base::EqualsCaseInsensitiveASCII(type, "inline")) {
364*6777b538SAndroid Build Coastguard Worker type_ = INLINE;
365*6777b538SAndroid Build Coastguard Worker } else if (base::EqualsCaseInsensitiveASCII(type, "attachment")) {
366*6777b538SAndroid Build Coastguard Worker type_ = ATTACHMENT;
367*6777b538SAndroid Build Coastguard Worker } else {
368*6777b538SAndroid Build Coastguard Worker parse_result_flags_ |= HAS_UNKNOWN_DISPOSITION_TYPE;
369*6777b538SAndroid Build Coastguard Worker type_ = ATTACHMENT;
370*6777b538SAndroid Build Coastguard Worker }
371*6777b538SAndroid Build Coastguard Worker return begin + (type.data() + type.size() - header.data());
372*6777b538SAndroid Build Coastguard Worker }
373*6777b538SAndroid Build Coastguard Worker
374*6777b538SAndroid Build Coastguard Worker // http://tools.ietf.org/html/rfc6266
375*6777b538SAndroid Build Coastguard Worker //
376*6777b538SAndroid Build Coastguard Worker // content-disposition = "Content-Disposition" ":"
377*6777b538SAndroid Build Coastguard Worker // disposition-type *( ";" disposition-parm )
378*6777b538SAndroid Build Coastguard Worker //
379*6777b538SAndroid Build Coastguard Worker // disposition-type = "inline" | "attachment" | disp-ext-type
380*6777b538SAndroid Build Coastguard Worker // ; case-insensitive
381*6777b538SAndroid Build Coastguard Worker // disp-ext-type = token
382*6777b538SAndroid Build Coastguard Worker //
383*6777b538SAndroid Build Coastguard Worker // disposition-parm = filename-parm | disp-ext-parm
384*6777b538SAndroid Build Coastguard Worker //
385*6777b538SAndroid Build Coastguard Worker // filename-parm = "filename" "=" value
386*6777b538SAndroid Build Coastguard Worker // | "filename*" "=" ext-value
387*6777b538SAndroid Build Coastguard Worker //
388*6777b538SAndroid Build Coastguard Worker // disp-ext-parm = token "=" value
389*6777b538SAndroid Build Coastguard Worker // | ext-token "=" ext-value
390*6777b538SAndroid Build Coastguard Worker // ext-token = <the characters in token, followed by "*">
391*6777b538SAndroid Build Coastguard Worker //
Parse(const std::string & header,const std::string & referrer_charset)392*6777b538SAndroid Build Coastguard Worker void HttpContentDisposition::Parse(const std::string& header,
393*6777b538SAndroid Build Coastguard Worker const std::string& referrer_charset) {
394*6777b538SAndroid Build Coastguard Worker DCHECK(type_ == INLINE);
395*6777b538SAndroid Build Coastguard Worker DCHECK(filename_.empty());
396*6777b538SAndroid Build Coastguard Worker
397*6777b538SAndroid Build Coastguard Worker std::string::const_iterator pos = header.begin();
398*6777b538SAndroid Build Coastguard Worker std::string::const_iterator end = header.end();
399*6777b538SAndroid Build Coastguard Worker pos = ConsumeDispositionType(pos, end);
400*6777b538SAndroid Build Coastguard Worker
401*6777b538SAndroid Build Coastguard Worker std::string filename;
402*6777b538SAndroid Build Coastguard Worker std::string ext_filename;
403*6777b538SAndroid Build Coastguard Worker
404*6777b538SAndroid Build Coastguard Worker HttpUtil::NameValuePairsIterator iter(pos, end, ';');
405*6777b538SAndroid Build Coastguard Worker while (iter.GetNext()) {
406*6777b538SAndroid Build Coastguard Worker if (filename.empty() &&
407*6777b538SAndroid Build Coastguard Worker base::EqualsCaseInsensitiveASCII(iter.name_piece(), "filename")) {
408*6777b538SAndroid Build Coastguard Worker DecodeFilenameValue(iter.value(), referrer_charset, &filename,
409*6777b538SAndroid Build Coastguard Worker &parse_result_flags_);
410*6777b538SAndroid Build Coastguard Worker if (!filename.empty()) {
411*6777b538SAndroid Build Coastguard Worker parse_result_flags_ |= HAS_FILENAME;
412*6777b538SAndroid Build Coastguard Worker if (filename[0] == '\'')
413*6777b538SAndroid Build Coastguard Worker parse_result_flags_ |= HAS_SINGLE_QUOTED_FILENAME;
414*6777b538SAndroid Build Coastguard Worker }
415*6777b538SAndroid Build Coastguard Worker } else if (ext_filename.empty() && base::EqualsCaseInsensitiveASCII(
416*6777b538SAndroid Build Coastguard Worker iter.name_piece(), "filename*")) {
417*6777b538SAndroid Build Coastguard Worker DecodeExtValue(iter.raw_value(), &ext_filename);
418*6777b538SAndroid Build Coastguard Worker if (!ext_filename.empty())
419*6777b538SAndroid Build Coastguard Worker parse_result_flags_ |= HAS_EXT_FILENAME;
420*6777b538SAndroid Build Coastguard Worker }
421*6777b538SAndroid Build Coastguard Worker }
422*6777b538SAndroid Build Coastguard Worker
423*6777b538SAndroid Build Coastguard Worker if (!ext_filename.empty())
424*6777b538SAndroid Build Coastguard Worker filename_ = ext_filename;
425*6777b538SAndroid Build Coastguard Worker else
426*6777b538SAndroid Build Coastguard Worker filename_ = filename;
427*6777b538SAndroid Build Coastguard Worker
428*6777b538SAndroid Build Coastguard Worker if (!filename.empty() && filename[0] == '\'')
429*6777b538SAndroid Build Coastguard Worker parse_result_flags_ |= HAS_SINGLE_QUOTED_FILENAME;
430*6777b538SAndroid Build Coastguard Worker }
431*6777b538SAndroid Build Coastguard Worker
432*6777b538SAndroid Build Coastguard Worker } // namespace net
433