xref: /aosp_15_r20/external/cronet/net/http/http_content_disposition.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2012 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker #include "net/http/http_content_disposition.h"
6*6777b538SAndroid Build Coastguard Worker 
7*6777b538SAndroid Build Coastguard Worker #include <string_view>
8*6777b538SAndroid Build Coastguard Worker 
9*6777b538SAndroid Build Coastguard Worker #include "base/base64.h"
10*6777b538SAndroid Build Coastguard Worker #include "base/check_op.h"
11*6777b538SAndroid Build Coastguard Worker #include "base/strings/escape.h"
12*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_tokenizer.h"
13*6777b538SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
14*6777b538SAndroid Build Coastguard Worker #include "base/strings/sys_string_conversions.h"
15*6777b538SAndroid Build Coastguard Worker #include "base/strings/utf_string_conversions.h"
16*6777b538SAndroid Build Coastguard Worker #include "net/base/net_string_util.h"
17*6777b538SAndroid Build Coastguard Worker #include "net/http/http_util.h"
18*6777b538SAndroid Build Coastguard Worker 
19*6777b538SAndroid Build Coastguard Worker namespace net {
20*6777b538SAndroid Build Coastguard Worker 
21*6777b538SAndroid Build Coastguard Worker namespace {
22*6777b538SAndroid Build Coastguard Worker 
23*6777b538SAndroid Build Coastguard Worker enum RFC2047EncodingType {
24*6777b538SAndroid Build Coastguard Worker   Q_ENCODING,
25*6777b538SAndroid Build Coastguard Worker   B_ENCODING
26*6777b538SAndroid Build Coastguard Worker };
27*6777b538SAndroid Build Coastguard Worker 
28*6777b538SAndroid Build Coastguard Worker // Decodes a "Q" encoded string as described in RFC 2047 section 4.2. Similar to
29*6777b538SAndroid Build Coastguard Worker // decoding a quoted-printable string.  Returns true if the input was valid.
DecodeQEncoding(std::string_view input,std::string * output)30*6777b538SAndroid Build Coastguard Worker bool DecodeQEncoding(std::string_view input, std::string* output) {
31*6777b538SAndroid Build Coastguard Worker   std::string temp;
32*6777b538SAndroid Build Coastguard Worker   temp.reserve(input.size());
33*6777b538SAndroid Build Coastguard Worker   for (auto it = input.begin(); it != input.end(); ++it) {
34*6777b538SAndroid Build Coastguard Worker     if (*it == '_') {
35*6777b538SAndroid Build Coastguard Worker       temp.push_back(' ');
36*6777b538SAndroid Build Coastguard Worker     } else if (*it == '=') {
37*6777b538SAndroid Build Coastguard Worker       if ((input.end() - it < 3) ||
38*6777b538SAndroid Build Coastguard Worker           !base::IsHexDigit(static_cast<unsigned char>(*(it + 1))) ||
39*6777b538SAndroid Build Coastguard Worker           !base::IsHexDigit(static_cast<unsigned char>(*(it + 2))))
40*6777b538SAndroid Build Coastguard Worker         return false;
41*6777b538SAndroid Build Coastguard Worker       unsigned char ch =
42*6777b538SAndroid Build Coastguard Worker           base::HexDigitToInt(*(it + 1)) * 16 + base::HexDigitToInt(*(it + 2));
43*6777b538SAndroid Build Coastguard Worker       temp.push_back(static_cast<char>(ch));
44*6777b538SAndroid Build Coastguard Worker       ++it;
45*6777b538SAndroid Build Coastguard Worker       ++it;
46*6777b538SAndroid Build Coastguard Worker     } else if (0x20 < *it && *it < 0x7F && *it != '?') {
47*6777b538SAndroid Build Coastguard Worker       // In a Q-encoded word, only printable ASCII characters
48*6777b538SAndroid Build Coastguard Worker       // represent themselves. Besides, space, '=', '_' and '?' are
49*6777b538SAndroid Build Coastguard Worker       // not allowed, but they're already filtered out.
50*6777b538SAndroid Build Coastguard Worker       DCHECK_NE('=', *it);
51*6777b538SAndroid Build Coastguard Worker       DCHECK_NE('?', *it);
52*6777b538SAndroid Build Coastguard Worker       DCHECK_NE('_', *it);
53*6777b538SAndroid Build Coastguard Worker       temp.push_back(*it);
54*6777b538SAndroid Build Coastguard Worker     } else {
55*6777b538SAndroid Build Coastguard Worker       return false;
56*6777b538SAndroid Build Coastguard Worker     }
57*6777b538SAndroid Build Coastguard Worker   }
58*6777b538SAndroid Build Coastguard Worker   output->swap(temp);
59*6777b538SAndroid Build Coastguard Worker   return true;
60*6777b538SAndroid Build Coastguard Worker }
61*6777b538SAndroid Build Coastguard Worker 
62*6777b538SAndroid Build Coastguard Worker // Decodes a "Q" or "B" encoded string as per RFC 2047 section 4. The encoding
63*6777b538SAndroid Build Coastguard Worker // type is specified in |enc_type|.
DecodeBQEncoding(std::string_view part,RFC2047EncodingType enc_type,const std::string & charset,std::string * output)64*6777b538SAndroid Build Coastguard Worker bool DecodeBQEncoding(std::string_view part,
65*6777b538SAndroid Build Coastguard Worker                       RFC2047EncodingType enc_type,
66*6777b538SAndroid Build Coastguard Worker                       const std::string& charset,
67*6777b538SAndroid Build Coastguard Worker                       std::string* output) {
68*6777b538SAndroid Build Coastguard Worker   std::string decoded;
69*6777b538SAndroid Build Coastguard Worker   if (!((enc_type == B_ENCODING) ?
70*6777b538SAndroid Build Coastguard Worker         base::Base64Decode(part, &decoded) : DecodeQEncoding(part, &decoded))) {
71*6777b538SAndroid Build Coastguard Worker     return false;
72*6777b538SAndroid Build Coastguard Worker   }
73*6777b538SAndroid Build Coastguard Worker 
74*6777b538SAndroid Build Coastguard Worker   if (decoded.empty()) {
75*6777b538SAndroid Build Coastguard Worker     output->clear();
76*6777b538SAndroid Build Coastguard Worker     return true;
77*6777b538SAndroid Build Coastguard Worker   }
78*6777b538SAndroid Build Coastguard Worker 
79*6777b538SAndroid Build Coastguard Worker   return ConvertToUtf8(decoded, charset.c_str(), output);
80*6777b538SAndroid Build Coastguard Worker }
81*6777b538SAndroid Build Coastguard Worker 
DecodeWord(std::string_view encoded_word,const std::string & referrer_charset,bool * is_rfc2047,std::string * output,int * parse_result_flags)82*6777b538SAndroid Build Coastguard Worker bool DecodeWord(std::string_view encoded_word,
83*6777b538SAndroid Build Coastguard Worker                 const std::string& referrer_charset,
84*6777b538SAndroid Build Coastguard Worker                 bool* is_rfc2047,
85*6777b538SAndroid Build Coastguard Worker                 std::string* output,
86*6777b538SAndroid Build Coastguard Worker                 int* parse_result_flags) {
87*6777b538SAndroid Build Coastguard Worker   *is_rfc2047 = false;
88*6777b538SAndroid Build Coastguard Worker   output->clear();
89*6777b538SAndroid Build Coastguard Worker   if (encoded_word.empty())
90*6777b538SAndroid Build Coastguard Worker     return true;
91*6777b538SAndroid Build Coastguard Worker 
92*6777b538SAndroid Build Coastguard Worker   if (!base::IsStringASCII(encoded_word)) {
93*6777b538SAndroid Build Coastguard Worker     // Try UTF-8, referrer_charset and the native OS default charset in turn.
94*6777b538SAndroid Build Coastguard Worker     if (base::IsStringUTF8(encoded_word)) {
95*6777b538SAndroid Build Coastguard Worker       *output = std::string(encoded_word);
96*6777b538SAndroid Build Coastguard Worker     } else {
97*6777b538SAndroid Build Coastguard Worker       std::u16string utf16_output;
98*6777b538SAndroid Build Coastguard Worker       if (!referrer_charset.empty() &&
99*6777b538SAndroid Build Coastguard Worker           ConvertToUTF16(encoded_word, referrer_charset.c_str(),
100*6777b538SAndroid Build Coastguard Worker                          &utf16_output)) {
101*6777b538SAndroid Build Coastguard Worker         *output = base::UTF16ToUTF8(utf16_output);
102*6777b538SAndroid Build Coastguard Worker       } else {
103*6777b538SAndroid Build Coastguard Worker         *output = base::WideToUTF8(base::SysNativeMBToWide(encoded_word));
104*6777b538SAndroid Build Coastguard Worker       }
105*6777b538SAndroid Build Coastguard Worker     }
106*6777b538SAndroid Build Coastguard Worker 
107*6777b538SAndroid Build Coastguard Worker     *parse_result_flags |= HttpContentDisposition::HAS_NON_ASCII_STRINGS;
108*6777b538SAndroid Build Coastguard Worker     return true;
109*6777b538SAndroid Build Coastguard Worker   }
110*6777b538SAndroid Build Coastguard Worker 
111*6777b538SAndroid Build Coastguard Worker   // RFC 2047 : one of encoding methods supported by Firefox and relatively
112*6777b538SAndroid Build Coastguard Worker   // widely used by web servers.
113*6777b538SAndroid Build Coastguard Worker   // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'.
114*6777b538SAndroid Build Coastguard Worker   // We don't care about the length restriction (72 bytes) because
115*6777b538SAndroid Build Coastguard Worker   // many web servers generate encoded words longer than the limit.
116*6777b538SAndroid Build Coastguard Worker   std::string decoded_word;
117*6777b538SAndroid Build Coastguard Worker   *is_rfc2047 = true;
118*6777b538SAndroid Build Coastguard Worker   int part_index = 0;
119*6777b538SAndroid Build Coastguard Worker   std::string charset;
120*6777b538SAndroid Build Coastguard Worker   base::CStringTokenizer t(encoded_word.data(),
121*6777b538SAndroid Build Coastguard Worker                            encoded_word.data() + encoded_word.size(), "?");
122*6777b538SAndroid Build Coastguard Worker   RFC2047EncodingType enc_type = Q_ENCODING;
123*6777b538SAndroid Build Coastguard Worker   while (*is_rfc2047 && t.GetNext()) {
124*6777b538SAndroid Build Coastguard Worker     std::string_view part = t.token_piece();
125*6777b538SAndroid Build Coastguard Worker     switch (part_index) {
126*6777b538SAndroid Build Coastguard Worker       case 0:
127*6777b538SAndroid Build Coastguard Worker         if (part != "=") {
128*6777b538SAndroid Build Coastguard Worker           *is_rfc2047 = false;
129*6777b538SAndroid Build Coastguard Worker           break;
130*6777b538SAndroid Build Coastguard Worker         }
131*6777b538SAndroid Build Coastguard Worker         ++part_index;
132*6777b538SAndroid Build Coastguard Worker         break;
133*6777b538SAndroid Build Coastguard Worker       case 1:
134*6777b538SAndroid Build Coastguard Worker         // Do we need charset validity check here?
135*6777b538SAndroid Build Coastguard Worker         charset = std::string(part);
136*6777b538SAndroid Build Coastguard Worker         ++part_index;
137*6777b538SAndroid Build Coastguard Worker         break;
138*6777b538SAndroid Build Coastguard Worker       case 2:
139*6777b538SAndroid Build Coastguard Worker         if (part.size() > 1 ||
140*6777b538SAndroid Build Coastguard Worker             part.find_first_of("bBqQ") == std::string::npos) {
141*6777b538SAndroid Build Coastguard Worker           *is_rfc2047 = false;
142*6777b538SAndroid Build Coastguard Worker           break;
143*6777b538SAndroid Build Coastguard Worker         }
144*6777b538SAndroid Build Coastguard Worker         if (part[0] == 'b' || part[0] == 'B') {
145*6777b538SAndroid Build Coastguard Worker           enc_type = B_ENCODING;
146*6777b538SAndroid Build Coastguard Worker         }
147*6777b538SAndroid Build Coastguard Worker         ++part_index;
148*6777b538SAndroid Build Coastguard Worker         break;
149*6777b538SAndroid Build Coastguard Worker       case 3:
150*6777b538SAndroid Build Coastguard Worker         *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word);
151*6777b538SAndroid Build Coastguard Worker         if (!*is_rfc2047) {
152*6777b538SAndroid Build Coastguard Worker           // Last minute failure. Invalid B/Q encoding. Rather than
153*6777b538SAndroid Build Coastguard Worker           // passing it through, return now.
154*6777b538SAndroid Build Coastguard Worker           return false;
155*6777b538SAndroid Build Coastguard Worker         }
156*6777b538SAndroid Build Coastguard Worker         ++part_index;
157*6777b538SAndroid Build Coastguard Worker         break;
158*6777b538SAndroid Build Coastguard Worker       case 4:
159*6777b538SAndroid Build Coastguard Worker         if (part != "=") {
160*6777b538SAndroid Build Coastguard Worker           // Another last minute failure !
161*6777b538SAndroid Build Coastguard Worker           // Likely to be a case of two encoded-words in a row or
162*6777b538SAndroid Build Coastguard Worker           // an encoded word followed by a non-encoded word. We can be
163*6777b538SAndroid Build Coastguard Worker           // generous, but it does not help much in terms of compatibility,
164*6777b538SAndroid Build Coastguard Worker           // I believe. Return immediately.
165*6777b538SAndroid Build Coastguard Worker           *is_rfc2047 = false;
166*6777b538SAndroid Build Coastguard Worker           return false;
167*6777b538SAndroid Build Coastguard Worker         }
168*6777b538SAndroid Build Coastguard Worker         ++part_index;
169*6777b538SAndroid Build Coastguard Worker         break;
170*6777b538SAndroid Build Coastguard Worker       default:
171*6777b538SAndroid Build Coastguard Worker         *is_rfc2047 = false;
172*6777b538SAndroid Build Coastguard Worker         return false;
173*6777b538SAndroid Build Coastguard Worker     }
174*6777b538SAndroid Build Coastguard Worker   }
175*6777b538SAndroid Build Coastguard Worker 
176*6777b538SAndroid Build Coastguard Worker   if (*is_rfc2047) {
177*6777b538SAndroid Build Coastguard Worker     if (*(encoded_word.end() - 1) == '=') {
178*6777b538SAndroid Build Coastguard Worker       output->swap(decoded_word);
179*6777b538SAndroid Build Coastguard Worker       *parse_result_flags |=
180*6777b538SAndroid Build Coastguard Worker           HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS;
181*6777b538SAndroid Build Coastguard Worker       return true;
182*6777b538SAndroid Build Coastguard Worker     }
183*6777b538SAndroid Build Coastguard Worker     // encoded_word ending prematurelly with '?' or extra '?'
184*6777b538SAndroid Build Coastguard Worker     *is_rfc2047 = false;
185*6777b538SAndroid Build Coastguard Worker     return false;
186*6777b538SAndroid Build Coastguard Worker   }
187*6777b538SAndroid Build Coastguard Worker 
188*6777b538SAndroid Build Coastguard Worker   // We're not handling 'especial' characters quoted with '\', but
189*6777b538SAndroid Build Coastguard Worker   // it should be Ok because we're not an email client but a
190*6777b538SAndroid Build Coastguard Worker   // web browser.
191*6777b538SAndroid Build Coastguard Worker 
192*6777b538SAndroid Build Coastguard Worker   // What IE6/7 does: %-escaped UTF-8.
193*6777b538SAndroid Build Coastguard Worker   decoded_word = base::UnescapeBinaryURLComponent(encoded_word,
194*6777b538SAndroid Build Coastguard Worker                                                   base::UnescapeRule::NORMAL);
195*6777b538SAndroid Build Coastguard Worker   if (decoded_word != encoded_word)
196*6777b538SAndroid Build Coastguard Worker     *parse_result_flags |= HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS;
197*6777b538SAndroid Build Coastguard Worker   if (base::IsStringUTF8(decoded_word)) {
198*6777b538SAndroid Build Coastguard Worker     output->swap(decoded_word);
199*6777b538SAndroid Build Coastguard Worker     return true;
200*6777b538SAndroid Build Coastguard Worker     // We can try either the OS default charset or 'origin charset' here,
201*6777b538SAndroid Build Coastguard Worker     // As far as I can tell, IE does not support it. However, I've seen
202*6777b538SAndroid Build Coastguard Worker     // web servers emit %-escaped string in a legacy encoding (usually
203*6777b538SAndroid Build Coastguard Worker     // origin charset).
204*6777b538SAndroid Build Coastguard Worker     // TODO(jungshik) : Test IE further and consider adding a fallback here.
205*6777b538SAndroid Build Coastguard Worker   }
206*6777b538SAndroid Build Coastguard Worker   return false;
207*6777b538SAndroid Build Coastguard Worker }
208*6777b538SAndroid Build Coastguard Worker 
209*6777b538SAndroid Build Coastguard Worker // Decodes the value of a 'filename' or 'name' parameter given as |input|. The
210*6777b538SAndroid Build Coastguard Worker // value is supposed to be of the form:
211*6777b538SAndroid Build Coastguard Worker //
212*6777b538SAndroid Build Coastguard Worker //   value                   = token | quoted-string
213*6777b538SAndroid Build Coastguard Worker //
214*6777b538SAndroid Build Coastguard Worker // However we currently also allow RFC 2047 encoding and non-ASCII
215*6777b538SAndroid Build Coastguard Worker // strings. Non-ASCII strings are interpreted based on |referrer_charset|.
DecodeFilenameValue(const std::string & input,const std::string & referrer_charset,std::string * output,int * parse_result_flags)216*6777b538SAndroid Build Coastguard Worker bool DecodeFilenameValue(const std::string& input,
217*6777b538SAndroid Build Coastguard Worker                          const std::string& referrer_charset,
218*6777b538SAndroid Build Coastguard Worker                          std::string* output,
219*6777b538SAndroid Build Coastguard Worker                          int* parse_result_flags) {
220*6777b538SAndroid Build Coastguard Worker   int current_parse_result_flags = 0;
221*6777b538SAndroid Build Coastguard Worker   std::string decoded_value;
222*6777b538SAndroid Build Coastguard Worker   bool is_previous_token_rfc2047 = true;
223*6777b538SAndroid Build Coastguard Worker 
224*6777b538SAndroid Build Coastguard Worker   // Tokenize with whitespace characters.
225*6777b538SAndroid Build Coastguard Worker   base::StringTokenizer t(input, " \t\n\r");
226*6777b538SAndroid Build Coastguard Worker   t.set_options(base::StringTokenizer::RETURN_DELIMS);
227*6777b538SAndroid Build Coastguard Worker   while (t.GetNext()) {
228*6777b538SAndroid Build Coastguard Worker     if (t.token_is_delim()) {
229*6777b538SAndroid Build Coastguard Worker       // If the previous non-delimeter token is not RFC2047-encoded,
230*6777b538SAndroid Build Coastguard Worker       // put in a space in its place. Otheriwse, skip over it.
231*6777b538SAndroid Build Coastguard Worker       if (!is_previous_token_rfc2047)
232*6777b538SAndroid Build Coastguard Worker         decoded_value.push_back(' ');
233*6777b538SAndroid Build Coastguard Worker       continue;
234*6777b538SAndroid Build Coastguard Worker     }
235*6777b538SAndroid Build Coastguard Worker     // We don't support a single multibyte character split into
236*6777b538SAndroid Build Coastguard Worker     // adjacent encoded words. Some broken mail clients emit headers
237*6777b538SAndroid Build Coastguard Worker     // with that problem, but most web servers usually encode a filename
238*6777b538SAndroid Build Coastguard Worker     // in a single encoded-word. Firefox/Thunderbird do not support
239*6777b538SAndroid Build Coastguard Worker     // it, either.
240*6777b538SAndroid Build Coastguard Worker     std::string decoded;
241*6777b538SAndroid Build Coastguard Worker     if (!DecodeWord(t.token_piece(), referrer_charset,
242*6777b538SAndroid Build Coastguard Worker                     &is_previous_token_rfc2047, &decoded,
243*6777b538SAndroid Build Coastguard Worker                     &current_parse_result_flags))
244*6777b538SAndroid Build Coastguard Worker       return false;
245*6777b538SAndroid Build Coastguard Worker     decoded_value.append(decoded);
246*6777b538SAndroid Build Coastguard Worker   }
247*6777b538SAndroid Build Coastguard Worker   output->swap(decoded_value);
248*6777b538SAndroid Build Coastguard Worker   if (parse_result_flags && !output->empty())
249*6777b538SAndroid Build Coastguard Worker     *parse_result_flags |= current_parse_result_flags;
250*6777b538SAndroid Build Coastguard Worker   return true;
251*6777b538SAndroid Build Coastguard Worker }
252*6777b538SAndroid Build Coastguard Worker 
253*6777b538SAndroid Build Coastguard Worker // Parses the charset and value-chars out of an ext-value string.
254*6777b538SAndroid Build Coastguard Worker //
255*6777b538SAndroid Build Coastguard Worker //  ext-value     = charset  "'" [ language ] "'" value-chars
ParseExtValueComponents(const std::string & input,std::string * charset,std::string * value_chars)256*6777b538SAndroid Build Coastguard Worker bool ParseExtValueComponents(const std::string& input,
257*6777b538SAndroid Build Coastguard Worker                              std::string* charset,
258*6777b538SAndroid Build Coastguard Worker                              std::string* value_chars) {
259*6777b538SAndroid Build Coastguard Worker   base::StringTokenizer t(input, "'");
260*6777b538SAndroid Build Coastguard Worker   t.set_options(base::StringTokenizer::RETURN_DELIMS);
261*6777b538SAndroid Build Coastguard Worker   std::string_view temp_charset;
262*6777b538SAndroid Build Coastguard Worker   std::string_view temp_value;
263*6777b538SAndroid Build Coastguard Worker   int num_delims_seen = 0;
264*6777b538SAndroid Build Coastguard Worker   while (t.GetNext()) {
265*6777b538SAndroid Build Coastguard Worker     if (t.token_is_delim()) {
266*6777b538SAndroid Build Coastguard Worker       ++num_delims_seen;
267*6777b538SAndroid Build Coastguard Worker       continue;
268*6777b538SAndroid Build Coastguard Worker     } else {
269*6777b538SAndroid Build Coastguard Worker       switch (num_delims_seen) {
270*6777b538SAndroid Build Coastguard Worker         case 0:
271*6777b538SAndroid Build Coastguard Worker           temp_charset = t.token_piece();
272*6777b538SAndroid Build Coastguard Worker           break;
273*6777b538SAndroid Build Coastguard Worker         case 1:
274*6777b538SAndroid Build Coastguard Worker           // Language is ignored.
275*6777b538SAndroid Build Coastguard Worker           break;
276*6777b538SAndroid Build Coastguard Worker         case 2:
277*6777b538SAndroid Build Coastguard Worker           temp_value = t.token_piece();
278*6777b538SAndroid Build Coastguard Worker           break;
279*6777b538SAndroid Build Coastguard Worker         default:
280*6777b538SAndroid Build Coastguard Worker           return false;
281*6777b538SAndroid Build Coastguard Worker       }
282*6777b538SAndroid Build Coastguard Worker     }
283*6777b538SAndroid Build Coastguard Worker   }
284*6777b538SAndroid Build Coastguard Worker   if (num_delims_seen != 2)
285*6777b538SAndroid Build Coastguard Worker     return false;
286*6777b538SAndroid Build Coastguard Worker   if (temp_charset.empty() || temp_value.empty())
287*6777b538SAndroid Build Coastguard Worker     return false;
288*6777b538SAndroid Build Coastguard Worker   *charset = std::string(temp_charset);
289*6777b538SAndroid Build Coastguard Worker   *value_chars = std::string(temp_value);
290*6777b538SAndroid Build Coastguard Worker   return true;
291*6777b538SAndroid Build Coastguard Worker }
292*6777b538SAndroid Build Coastguard Worker 
293*6777b538SAndroid Build Coastguard Worker // http://tools.ietf.org/html/rfc5987#section-3.2
294*6777b538SAndroid Build Coastguard Worker //
295*6777b538SAndroid Build Coastguard Worker //  ext-value     = charset  "'" [ language ] "'" value-chars
296*6777b538SAndroid Build Coastguard Worker //
297*6777b538SAndroid Build Coastguard Worker //  charset       = "UTF-8" / "ISO-8859-1" / mime-charset
298*6777b538SAndroid Build Coastguard Worker //
299*6777b538SAndroid Build Coastguard Worker //  mime-charset  = 1*mime-charsetc
300*6777b538SAndroid Build Coastguard Worker //  mime-charsetc = ALPHA / DIGIT
301*6777b538SAndroid Build Coastguard Worker //                 / "!" / "#" / "$" / "%" / "&"
302*6777b538SAndroid Build Coastguard Worker //                 / "+" / "-" / "^" / "_" / "`"
303*6777b538SAndroid Build Coastguard Worker //                 / "{" / "}" / "~"
304*6777b538SAndroid Build Coastguard Worker //
305*6777b538SAndroid Build Coastguard Worker //  language      = <Language-Tag, defined in [RFC5646], Section 2.1>
306*6777b538SAndroid Build Coastguard Worker //
307*6777b538SAndroid Build Coastguard Worker //  value-chars   = *( pct-encoded / attr-char )
308*6777b538SAndroid Build Coastguard Worker //
309*6777b538SAndroid Build Coastguard Worker //  pct-encoded   = "%" HEXDIG HEXDIG
310*6777b538SAndroid Build Coastguard Worker //
311*6777b538SAndroid Build Coastguard Worker //  attr-char     = ALPHA / DIGIT
312*6777b538SAndroid Build Coastguard Worker //                 / "!" / "#" / "$" / "&" / "+" / "-" / "."
313*6777b538SAndroid Build Coastguard Worker //                 / "^" / "_" / "`" / "|" / "~"
DecodeExtValue(const std::string & param_value,std::string * decoded)314*6777b538SAndroid Build Coastguard Worker bool DecodeExtValue(const std::string& param_value, std::string* decoded) {
315*6777b538SAndroid Build Coastguard Worker   if (param_value.find('"') != std::string::npos)
316*6777b538SAndroid Build Coastguard Worker     return false;
317*6777b538SAndroid Build Coastguard Worker 
318*6777b538SAndroid Build Coastguard Worker   std::string charset;
319*6777b538SAndroid Build Coastguard Worker   std::string value;
320*6777b538SAndroid Build Coastguard Worker   if (!ParseExtValueComponents(param_value, &charset, &value))
321*6777b538SAndroid Build Coastguard Worker     return false;
322*6777b538SAndroid Build Coastguard Worker 
323*6777b538SAndroid Build Coastguard Worker   // RFC 5987 value should be ASCII-only.
324*6777b538SAndroid Build Coastguard Worker   if (!base::IsStringASCII(value)) {
325*6777b538SAndroid Build Coastguard Worker     decoded->clear();
326*6777b538SAndroid Build Coastguard Worker     return true;
327*6777b538SAndroid Build Coastguard Worker   }
328*6777b538SAndroid Build Coastguard Worker 
329*6777b538SAndroid Build Coastguard Worker   std::string unescaped =
330*6777b538SAndroid Build Coastguard Worker       base::UnescapeBinaryURLComponent(value, base::UnescapeRule::NORMAL);
331*6777b538SAndroid Build Coastguard Worker 
332*6777b538SAndroid Build Coastguard Worker   return ConvertToUtf8AndNormalize(unescaped, charset.c_str(), decoded);
333*6777b538SAndroid Build Coastguard Worker }
334*6777b538SAndroid Build Coastguard Worker 
335*6777b538SAndroid Build Coastguard Worker } // namespace
336*6777b538SAndroid Build Coastguard Worker 
HttpContentDisposition(const std::string & header,const std::string & referrer_charset)337*6777b538SAndroid Build Coastguard Worker HttpContentDisposition::HttpContentDisposition(
338*6777b538SAndroid Build Coastguard Worker     const std::string& header,
339*6777b538SAndroid Build Coastguard Worker     const std::string& referrer_charset) {
340*6777b538SAndroid Build Coastguard Worker   Parse(header, referrer_charset);
341*6777b538SAndroid Build Coastguard Worker }
342*6777b538SAndroid Build Coastguard Worker 
343*6777b538SAndroid Build Coastguard Worker HttpContentDisposition::~HttpContentDisposition() = default;
344*6777b538SAndroid Build Coastguard Worker 
ConsumeDispositionType(std::string::const_iterator begin,std::string::const_iterator end)345*6777b538SAndroid Build Coastguard Worker std::string::const_iterator HttpContentDisposition::ConsumeDispositionType(
346*6777b538SAndroid Build Coastguard Worker     std::string::const_iterator begin, std::string::const_iterator end) {
347*6777b538SAndroid Build Coastguard Worker   DCHECK(type_ == INLINE);
348*6777b538SAndroid Build Coastguard Worker   auto header = base::MakeStringPiece(begin, end);
349*6777b538SAndroid Build Coastguard Worker   size_t delimiter = header.find(';');
350*6777b538SAndroid Build Coastguard Worker   std::string_view type = header.substr(0, delimiter);
351*6777b538SAndroid Build Coastguard Worker   type = HttpUtil::TrimLWS(type);
352*6777b538SAndroid Build Coastguard Worker 
353*6777b538SAndroid Build Coastguard Worker   // If the disposition-type isn't a valid token the then the
354*6777b538SAndroid Build Coastguard Worker   // Content-Disposition header is malformed, and we treat the first bytes as
355*6777b538SAndroid Build Coastguard Worker   // a parameter rather than a disposition-type.
356*6777b538SAndroid Build Coastguard Worker   if (type.empty() || !HttpUtil::IsToken(type))
357*6777b538SAndroid Build Coastguard Worker     return begin;
358*6777b538SAndroid Build Coastguard Worker 
359*6777b538SAndroid Build Coastguard Worker   parse_result_flags_ |= HAS_DISPOSITION_TYPE;
360*6777b538SAndroid Build Coastguard Worker 
361*6777b538SAndroid Build Coastguard Worker   DCHECK(type.find('=') == std::string_view::npos);
362*6777b538SAndroid Build Coastguard Worker 
363*6777b538SAndroid Build Coastguard Worker   if (base::EqualsCaseInsensitiveASCII(type, "inline")) {
364*6777b538SAndroid Build Coastguard Worker     type_ = INLINE;
365*6777b538SAndroid Build Coastguard Worker   } else if (base::EqualsCaseInsensitiveASCII(type, "attachment")) {
366*6777b538SAndroid Build Coastguard Worker     type_ = ATTACHMENT;
367*6777b538SAndroid Build Coastguard Worker   } else {
368*6777b538SAndroid Build Coastguard Worker     parse_result_flags_ |= HAS_UNKNOWN_DISPOSITION_TYPE;
369*6777b538SAndroid Build Coastguard Worker     type_ = ATTACHMENT;
370*6777b538SAndroid Build Coastguard Worker   }
371*6777b538SAndroid Build Coastguard Worker   return begin + (type.data() + type.size() - header.data());
372*6777b538SAndroid Build Coastguard Worker }
373*6777b538SAndroid Build Coastguard Worker 
374*6777b538SAndroid Build Coastguard Worker // http://tools.ietf.org/html/rfc6266
375*6777b538SAndroid Build Coastguard Worker //
376*6777b538SAndroid Build Coastguard Worker //  content-disposition = "Content-Disposition" ":"
377*6777b538SAndroid Build Coastguard Worker //                         disposition-type *( ";" disposition-parm )
378*6777b538SAndroid Build Coastguard Worker //
379*6777b538SAndroid Build Coastguard Worker //  disposition-type    = "inline" | "attachment" | disp-ext-type
380*6777b538SAndroid Build Coastguard Worker //                      ; case-insensitive
381*6777b538SAndroid Build Coastguard Worker //  disp-ext-type       = token
382*6777b538SAndroid Build Coastguard Worker //
383*6777b538SAndroid Build Coastguard Worker //  disposition-parm    = filename-parm | disp-ext-parm
384*6777b538SAndroid Build Coastguard Worker //
385*6777b538SAndroid Build Coastguard Worker //  filename-parm       = "filename" "=" value
386*6777b538SAndroid Build Coastguard Worker //                      | "filename*" "=" ext-value
387*6777b538SAndroid Build Coastguard Worker //
388*6777b538SAndroid Build Coastguard Worker //  disp-ext-parm       = token "=" value
389*6777b538SAndroid Build Coastguard Worker //                      | ext-token "=" ext-value
390*6777b538SAndroid Build Coastguard Worker //  ext-token           = <the characters in token, followed by "*">
391*6777b538SAndroid Build Coastguard Worker //
Parse(const std::string & header,const std::string & referrer_charset)392*6777b538SAndroid Build Coastguard Worker void HttpContentDisposition::Parse(const std::string& header,
393*6777b538SAndroid Build Coastguard Worker                                    const std::string& referrer_charset) {
394*6777b538SAndroid Build Coastguard Worker   DCHECK(type_ == INLINE);
395*6777b538SAndroid Build Coastguard Worker   DCHECK(filename_.empty());
396*6777b538SAndroid Build Coastguard Worker 
397*6777b538SAndroid Build Coastguard Worker   std::string::const_iterator pos = header.begin();
398*6777b538SAndroid Build Coastguard Worker   std::string::const_iterator end = header.end();
399*6777b538SAndroid Build Coastguard Worker   pos = ConsumeDispositionType(pos, end);
400*6777b538SAndroid Build Coastguard Worker 
401*6777b538SAndroid Build Coastguard Worker   std::string filename;
402*6777b538SAndroid Build Coastguard Worker   std::string ext_filename;
403*6777b538SAndroid Build Coastguard Worker 
404*6777b538SAndroid Build Coastguard Worker   HttpUtil::NameValuePairsIterator iter(pos, end, ';');
405*6777b538SAndroid Build Coastguard Worker   while (iter.GetNext()) {
406*6777b538SAndroid Build Coastguard Worker     if (filename.empty() &&
407*6777b538SAndroid Build Coastguard Worker         base::EqualsCaseInsensitiveASCII(iter.name_piece(), "filename")) {
408*6777b538SAndroid Build Coastguard Worker       DecodeFilenameValue(iter.value(), referrer_charset, &filename,
409*6777b538SAndroid Build Coastguard Worker                           &parse_result_flags_);
410*6777b538SAndroid Build Coastguard Worker       if (!filename.empty()) {
411*6777b538SAndroid Build Coastguard Worker         parse_result_flags_ |= HAS_FILENAME;
412*6777b538SAndroid Build Coastguard Worker         if (filename[0] == '\'')
413*6777b538SAndroid Build Coastguard Worker           parse_result_flags_ |= HAS_SINGLE_QUOTED_FILENAME;
414*6777b538SAndroid Build Coastguard Worker       }
415*6777b538SAndroid Build Coastguard Worker     } else if (ext_filename.empty() && base::EqualsCaseInsensitiveASCII(
416*6777b538SAndroid Build Coastguard Worker                                            iter.name_piece(), "filename*")) {
417*6777b538SAndroid Build Coastguard Worker       DecodeExtValue(iter.raw_value(), &ext_filename);
418*6777b538SAndroid Build Coastguard Worker       if (!ext_filename.empty())
419*6777b538SAndroid Build Coastguard Worker         parse_result_flags_ |= HAS_EXT_FILENAME;
420*6777b538SAndroid Build Coastguard Worker     }
421*6777b538SAndroid Build Coastguard Worker   }
422*6777b538SAndroid Build Coastguard Worker 
423*6777b538SAndroid Build Coastguard Worker   if (!ext_filename.empty())
424*6777b538SAndroid Build Coastguard Worker     filename_ = ext_filename;
425*6777b538SAndroid Build Coastguard Worker   else
426*6777b538SAndroid Build Coastguard Worker     filename_ = filename;
427*6777b538SAndroid Build Coastguard Worker 
428*6777b538SAndroid Build Coastguard Worker   if (!filename.empty() && filename[0] == '\'')
429*6777b538SAndroid Build Coastguard Worker     parse_result_flags_ |= HAS_SINGLE_QUOTED_FILENAME;
430*6777b538SAndroid Build Coastguard Worker }
431*6777b538SAndroid Build Coastguard Worker 
432*6777b538SAndroid Build Coastguard Worker }  // namespace net
433