xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/email/message.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: [email protected]
4
5"""Basic message object for the email package object model."""
6
7__all__ = ['Message', 'EmailMessage']
8
9import binascii
10import re
11import quopri
12from io import BytesIO, StringIO
13
14# Intrapackage imports
15from email import utils
16from email import errors
17from email._policybase import Policy, compat32
18from email import charset as _charset
19from email._encoded_words import decode_b
20Charset = _charset.Charset
21
22SEMISPACE = '; '
23
24# Regular expression that matches `special' characters in parameters, the
25# existence of which force quoting of the parameter value.
26tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
27
28
29def _splitparam(param):
30    # Split header parameters.  BAW: this may be too simple.  It isn't
31    # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
32    # found in the wild.  We may eventually need a full fledged parser.
33    # RDM: we might have a Header here; for now just stringify it.
34    a, sep, b = str(param).partition(';')
35    if not sep:
36        return a.strip(), None
37    return a.strip(), b.strip()
38
39def _formatparam(param, value=None, quote=True):
40    """Convenience function to format and return a key=value pair.
41
42    This will quote the value if needed or if quote is true.  If value is a
43    three tuple (charset, language, value), it will be encoded according
44    to RFC2231 rules.  If it contains non-ascii characters it will likewise
45    be encoded according to RFC2231 rules, using the utf-8 charset and
46    a null language.
47    """
48    if value is not None and len(value) > 0:
49        # A tuple is used for RFC 2231 encoded parameter values where items
50        # are (charset, language, value).  charset is a string, not a Charset
51        # instance.  RFC 2231 encoded values are never quoted, per RFC.
52        if isinstance(value, tuple):
53            # Encode as per RFC 2231
54            param += '*'
55            value = utils.encode_rfc2231(value[2], value[0], value[1])
56            return '%s=%s' % (param, value)
57        else:
58            try:
59                value.encode('ascii')
60            except UnicodeEncodeError:
61                param += '*'
62                value = utils.encode_rfc2231(value, 'utf-8', '')
63                return '%s=%s' % (param, value)
64        # BAW: Please check this.  I think that if quote is set it should
65        # force quoting even if not necessary.
66        if quote or tspecials.search(value):
67            return '%s="%s"' % (param, utils.quote(value))
68        else:
69            return '%s=%s' % (param, value)
70    else:
71        return param
72
73def _parseparam(s):
74    # RDM This might be a Header, so for now stringify it.
75    s = ';' + str(s)
76    plist = []
77    while s[:1] == ';':
78        s = s[1:]
79        end = s.find(';')
80        while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
81            end = s.find(';', end + 1)
82        if end < 0:
83            end = len(s)
84        f = s[:end]
85        if '=' in f:
86            i = f.index('=')
87            f = f[:i].strip().lower() + '=' + f[i+1:].strip()
88        plist.append(f.strip())
89        s = s[end:]
90    return plist
91
92
93def _unquotevalue(value):
94    # This is different than utils.collapse_rfc2231_value() because it doesn't
95    # try to convert the value to a unicode.  Message.get_param() and
96    # Message.get_params() are both currently defined to return the tuple in
97    # the face of RFC 2231 parameters.
98    if isinstance(value, tuple):
99        return value[0], value[1], utils.unquote(value[2])
100    else:
101        return utils.unquote(value)
102
103
104def _decode_uu(encoded):
105    """Decode uuencoded data."""
106    decoded_lines = []
107    encoded_lines_iter = iter(encoded.splitlines())
108    for line in encoded_lines_iter:
109        if line.startswith(b"begin "):
110            mode, _, path = line.removeprefix(b"begin ").partition(b" ")
111            try:
112                int(mode, base=8)
113            except ValueError:
114                continue
115            else:
116                break
117    else:
118        raise ValueError("`begin` line not found")
119    for line in encoded_lines_iter:
120        if not line:
121            raise ValueError("Truncated input")
122        elif line.strip(b' \t\r\n\f') == b'end':
123            break
124        try:
125            decoded_line = binascii.a2b_uu(line)
126        except binascii.Error:
127            # Workaround for broken uuencoders by /Fredrik Lundh
128            nbytes = (((line[0]-32) & 63) * 4 + 5) // 3
129            decoded_line = binascii.a2b_uu(line[:nbytes])
130        decoded_lines.append(decoded_line)
131
132    return b''.join(decoded_lines)
133
134
135class Message:
136    """Basic message object.
137
138    A message object is defined as something that has a bunch of RFC 2822
139    headers and a payload.  It may optionally have an envelope header
140    (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
141    multipart or a message/rfc822), then the payload is a list of Message
142    objects, otherwise it is a string.
143
144    Message objects implement part of the `mapping' interface, which assumes
145    there is exactly one occurrence of the header per message.  Some headers
146    do in fact appear multiple times (e.g. Received) and for those headers,
147    you must use the explicit API to set or get all the headers.  Not all of
148    the mapping methods are implemented.
149    """
150    def __init__(self, policy=compat32):
151        self.policy = policy
152        self._headers = []
153        self._unixfrom = None
154        self._payload = None
155        self._charset = None
156        # Defaults for multipart messages
157        self.preamble = self.epilogue = None
158        self.defects = []
159        # Default content type
160        self._default_type = 'text/plain'
161
162    def __str__(self):
163        """Return the entire formatted message as a string.
164        """
165        return self.as_string()
166
167    def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
168        """Return the entire formatted message as a string.
169
170        Optional 'unixfrom', when true, means include the Unix From_ envelope
171        header.  For backward compatibility reasons, if maxheaderlen is
172        not specified it defaults to 0, so you must override it explicitly
173        if you want a different maxheaderlen.  'policy' is passed to the
174        Generator instance used to serialize the message; if it is not
175        specified the policy associated with the message instance is used.
176
177        If the message object contains binary data that is not encoded
178        according to RFC standards, the non-compliant data will be replaced by
179        unicode "unknown character" code points.
180        """
181        from email.generator import Generator
182        policy = self.policy if policy is None else policy
183        fp = StringIO()
184        g = Generator(fp,
185                      mangle_from_=False,
186                      maxheaderlen=maxheaderlen,
187                      policy=policy)
188        g.flatten(self, unixfrom=unixfrom)
189        return fp.getvalue()
190
191    def __bytes__(self):
192        """Return the entire formatted message as a bytes object.
193        """
194        return self.as_bytes()
195
196    def as_bytes(self, unixfrom=False, policy=None):
197        """Return the entire formatted message as a bytes object.
198
199        Optional 'unixfrom', when true, means include the Unix From_ envelope
200        header.  'policy' is passed to the BytesGenerator instance used to
201        serialize the message; if not specified the policy associated with
202        the message instance is used.
203        """
204        from email.generator import BytesGenerator
205        policy = self.policy if policy is None else policy
206        fp = BytesIO()
207        g = BytesGenerator(fp, mangle_from_=False, policy=policy)
208        g.flatten(self, unixfrom=unixfrom)
209        return fp.getvalue()
210
211    def is_multipart(self):
212        """Return True if the message consists of multiple parts."""
213        return isinstance(self._payload, list)
214
215    #
216    # Unix From_ line
217    #
218    def set_unixfrom(self, unixfrom):
219        self._unixfrom = unixfrom
220
221    def get_unixfrom(self):
222        return self._unixfrom
223
224    #
225    # Payload manipulation.
226    #
227    def attach(self, payload):
228        """Add the given payload to the current payload.
229
230        The current payload will always be a list of objects after this method
231        is called.  If you want to set the payload to a scalar object, use
232        set_payload() instead.
233        """
234        if self._payload is None:
235            self._payload = [payload]
236        else:
237            try:
238                self._payload.append(payload)
239            except AttributeError:
240                raise TypeError("Attach is not valid on a message with a"
241                                " non-multipart payload")
242
243    def get_payload(self, i=None, decode=False):
244        """Return a reference to the payload.
245
246        The payload will either be a list object or a string.  If you mutate
247        the list object, you modify the message's payload in place.  Optional
248        i returns that index into the payload.
249
250        Optional decode is a flag indicating whether the payload should be
251        decoded or not, according to the Content-Transfer-Encoding header
252        (default is False).
253
254        When True and the message is not a multipart, the payload will be
255        decoded if this header's value is `quoted-printable' or `base64'.  If
256        some other encoding is used, or the header is missing, or if the
257        payload has bogus data (i.e. bogus base64 or uuencoded data), the
258        payload is returned as-is.
259
260        If the message is a multipart and the decode flag is True, then None
261        is returned.
262        """
263        # Here is the logic table for this code, based on the email5.0.0 code:
264        #   i     decode  is_multipart  result
265        # ------  ------  ------------  ------------------------------
266        #  None   True    True          None
267        #   i     True    True          None
268        #  None   False   True          _payload (a list)
269        #   i     False   True          _payload element i (a Message)
270        #   i     False   False         error (not a list)
271        #   i     True    False         error (not a list)
272        #  None   False   False         _payload
273        #  None   True    False         _payload decoded (bytes)
274        # Note that Barry planned to factor out the 'decode' case, but that
275        # isn't so easy now that we handle the 8 bit data, which needs to be
276        # converted in both the decode and non-decode path.
277        if self.is_multipart():
278            if decode:
279                return None
280            if i is None:
281                return self._payload
282            else:
283                return self._payload[i]
284        # For backward compatibility, Use isinstance and this error message
285        # instead of the more logical is_multipart test.
286        if i is not None and not isinstance(self._payload, list):
287            raise TypeError('Expected list, got %s' % type(self._payload))
288        payload = self._payload
289        # cte might be a Header, so for now stringify it.
290        cte = str(self.get('content-transfer-encoding', '')).lower()
291        # payload may be bytes here.
292        if isinstance(payload, str):
293            if utils._has_surrogates(payload):
294                bpayload = payload.encode('ascii', 'surrogateescape')
295                if not decode:
296                    try:
297                        payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
298                    except LookupError:
299                        payload = bpayload.decode('ascii', 'replace')
300            elif decode:
301                try:
302                    bpayload = payload.encode('ascii')
303                except UnicodeError:
304                    # This won't happen for RFC compliant messages (messages
305                    # containing only ASCII code points in the unicode input).
306                    # If it does happen, turn the string into bytes in a way
307                    # guaranteed not to fail.
308                    bpayload = payload.encode('raw-unicode-escape')
309        if not decode:
310            return payload
311        if cte == 'quoted-printable':
312            return quopri.decodestring(bpayload)
313        elif cte == 'base64':
314            # XXX: this is a bit of a hack; decode_b should probably be factored
315            # out somewhere, but I haven't figured out where yet.
316            value, defects = decode_b(b''.join(bpayload.splitlines()))
317            for defect in defects:
318                self.policy.handle_defect(self, defect)
319            return value
320        elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
321            try:
322                return _decode_uu(bpayload)
323            except ValueError:
324                # Some decoding problem.
325                return bpayload
326        if isinstance(payload, str):
327            return bpayload
328        return payload
329
330    def set_payload(self, payload, charset=None):
331        """Set the payload to the given value.
332
333        Optional charset sets the message's default character set.  See
334        set_charset() for details.
335        """
336        if hasattr(payload, 'encode'):
337            if charset is None:
338                self._payload = payload
339                return
340            if not isinstance(charset, Charset):
341                charset = Charset(charset)
342            payload = payload.encode(charset.output_charset)
343        if hasattr(payload, 'decode'):
344            self._payload = payload.decode('ascii', 'surrogateescape')
345        else:
346            self._payload = payload
347        if charset is not None:
348            self.set_charset(charset)
349
350    def set_charset(self, charset):
351        """Set the charset of the payload to a given character set.
352
353        charset can be a Charset instance, a string naming a character set, or
354        None.  If it is a string it will be converted to a Charset instance.
355        If charset is None, the charset parameter will be removed from the
356        Content-Type field.  Anything else will generate a TypeError.
357
358        The message will be assumed to be of type text/* encoded with
359        charset.input_charset.  It will be converted to charset.output_charset
360        and encoded properly, if needed, when generating the plain text
361        representation of the message.  MIME headers (MIME-Version,
362        Content-Type, Content-Transfer-Encoding) will be added as needed.
363        """
364        if charset is None:
365            self.del_param('charset')
366            self._charset = None
367            return
368        if not isinstance(charset, Charset):
369            charset = Charset(charset)
370        self._charset = charset
371        if 'MIME-Version' not in self:
372            self.add_header('MIME-Version', '1.0')
373        if 'Content-Type' not in self:
374            self.add_header('Content-Type', 'text/plain',
375                            charset=charset.get_output_charset())
376        else:
377            self.set_param('charset', charset.get_output_charset())
378        if charset != charset.get_output_charset():
379            self._payload = charset.body_encode(self._payload)
380        if 'Content-Transfer-Encoding' not in self:
381            cte = charset.get_body_encoding()
382            try:
383                cte(self)
384            except TypeError:
385                # This 'if' is for backward compatibility, it allows unicode
386                # through even though that won't work correctly if the
387                # message is serialized.
388                payload = self._payload
389                if payload:
390                    try:
391                        payload = payload.encode('ascii', 'surrogateescape')
392                    except UnicodeError:
393                        payload = payload.encode(charset.output_charset)
394                self._payload = charset.body_encode(payload)
395                self.add_header('Content-Transfer-Encoding', cte)
396
397    def get_charset(self):
398        """Return the Charset instance associated with the message's payload.
399        """
400        return self._charset
401
402    #
403    # MAPPING INTERFACE (partial)
404    #
405    def __len__(self):
406        """Return the total number of headers, including duplicates."""
407        return len(self._headers)
408
409    def __getitem__(self, name):
410        """Get a header value.
411
412        Return None if the header is missing instead of raising an exception.
413
414        Note that if the header appeared multiple times, exactly which
415        occurrence gets returned is undefined.  Use get_all() to get all
416        the values matching a header field name.
417        """
418        return self.get(name)
419
420    def __setitem__(self, name, val):
421        """Set the value of a header.
422
423        Note: this does not overwrite an existing header with the same field
424        name.  Use __delitem__() first to delete any existing headers.
425        """
426        max_count = self.policy.header_max_count(name)
427        if max_count:
428            lname = name.lower()
429            found = 0
430            for k, v in self._headers:
431                if k.lower() == lname:
432                    found += 1
433                    if found >= max_count:
434                        raise ValueError("There may be at most {} {} headers "
435                                         "in a message".format(max_count, name))
436        self._headers.append(self.policy.header_store_parse(name, val))
437
438    def __delitem__(self, name):
439        """Delete all occurrences of a header, if present.
440
441        Does not raise an exception if the header is missing.
442        """
443        name = name.lower()
444        newheaders = []
445        for k, v in self._headers:
446            if k.lower() != name:
447                newheaders.append((k, v))
448        self._headers = newheaders
449
450    def __contains__(self, name):
451        return name.lower() in [k.lower() for k, v in self._headers]
452
453    def __iter__(self):
454        for field, value in self._headers:
455            yield field
456
457    def keys(self):
458        """Return a list of all the message's header field names.
459
460        These will be sorted in the order they appeared in the original
461        message, or were added to the message, and may contain duplicates.
462        Any fields deleted and re-inserted are always appended to the header
463        list.
464        """
465        return [k for k, v in self._headers]
466
467    def values(self):
468        """Return a list of all the message's header values.
469
470        These will be sorted in the order they appeared in the original
471        message, or were added to the message, and may contain duplicates.
472        Any fields deleted and re-inserted are always appended to the header
473        list.
474        """
475        return [self.policy.header_fetch_parse(k, v)
476                for k, v in self._headers]
477
478    def items(self):
479        """Get all the message's header fields and values.
480
481        These will be sorted in the order they appeared in the original
482        message, or were added to the message, and may contain duplicates.
483        Any fields deleted and re-inserted are always appended to the header
484        list.
485        """
486        return [(k, self.policy.header_fetch_parse(k, v))
487                for k, v in self._headers]
488
489    def get(self, name, failobj=None):
490        """Get a header value.
491
492        Like __getitem__() but return failobj instead of None when the field
493        is missing.
494        """
495        name = name.lower()
496        for k, v in self._headers:
497            if k.lower() == name:
498                return self.policy.header_fetch_parse(k, v)
499        return failobj
500
501    #
502    # "Internal" methods (public API, but only intended for use by a parser
503    # or generator, not normal application code.
504    #
505
506    def set_raw(self, name, value):
507        """Store name and value in the model without modification.
508
509        This is an "internal" API, intended only for use by a parser.
510        """
511        self._headers.append((name, value))
512
513    def raw_items(self):
514        """Return the (name, value) header pairs without modification.
515
516        This is an "internal" API, intended only for use by a generator.
517        """
518        return iter(self._headers.copy())
519
520    #
521    # Additional useful stuff
522    #
523
524    def get_all(self, name, failobj=None):
525        """Return a list of all the values for the named field.
526
527        These will be sorted in the order they appeared in the original
528        message, and may contain duplicates.  Any fields deleted and
529        re-inserted are always appended to the header list.
530
531        If no such fields exist, failobj is returned (defaults to None).
532        """
533        values = []
534        name = name.lower()
535        for k, v in self._headers:
536            if k.lower() == name:
537                values.append(self.policy.header_fetch_parse(k, v))
538        if not values:
539            return failobj
540        return values
541
542    def add_header(self, _name, _value, **_params):
543        """Extended header setting.
544
545        name is the header field to add.  keyword arguments can be used to set
546        additional parameters for the header field, with underscores converted
547        to dashes.  Normally the parameter will be added as key="value" unless
548        value is None, in which case only the key will be added.  If a
549        parameter value contains non-ASCII characters it can be specified as a
550        three-tuple of (charset, language, value), in which case it will be
551        encoded according to RFC2231 rules.  Otherwise it will be encoded using
552        the utf-8 charset and a language of ''.
553
554        Examples:
555
556        msg.add_header('content-disposition', 'attachment', filename='bud.gif')
557        msg.add_header('content-disposition', 'attachment',
558                       filename=('utf-8', '', Fußballer.ppt'))
559        msg.add_header('content-disposition', 'attachment',
560                       filename='Fußballer.ppt'))
561        """
562        parts = []
563        for k, v in _params.items():
564            if v is None:
565                parts.append(k.replace('_', '-'))
566            else:
567                parts.append(_formatparam(k.replace('_', '-'), v))
568        if _value is not None:
569            parts.insert(0, _value)
570        self[_name] = SEMISPACE.join(parts)
571
572    def replace_header(self, _name, _value):
573        """Replace a header.
574
575        Replace the first matching header found in the message, retaining
576        header order and case.  If no matching header was found, a KeyError is
577        raised.
578        """
579        _name = _name.lower()
580        for i, (k, v) in zip(range(len(self._headers)), self._headers):
581            if k.lower() == _name:
582                self._headers[i] = self.policy.header_store_parse(k, _value)
583                break
584        else:
585            raise KeyError(_name)
586
587    #
588    # Use these three methods instead of the three above.
589    #
590
591    def get_content_type(self):
592        """Return the message's content type.
593
594        The returned string is coerced to lower case of the form
595        `maintype/subtype'.  If there was no Content-Type header in the
596        message, the default type as given by get_default_type() will be
597        returned.  Since according to RFC 2045, messages always have a default
598        type this will always return a value.
599
600        RFC 2045 defines a message's default type to be text/plain unless it
601        appears inside a multipart/digest container, in which case it would be
602        message/rfc822.
603        """
604        missing = object()
605        value = self.get('content-type', missing)
606        if value is missing:
607            # This should have no parameters
608            return self.get_default_type()
609        ctype = _splitparam(value)[0].lower()
610        # RFC 2045, section 5.2 says if its invalid, use text/plain
611        if ctype.count('/') != 1:
612            return 'text/plain'
613        return ctype
614
615    def get_content_maintype(self):
616        """Return the message's main content type.
617
618        This is the `maintype' part of the string returned by
619        get_content_type().
620        """
621        ctype = self.get_content_type()
622        return ctype.split('/')[0]
623
624    def get_content_subtype(self):
625        """Returns the message's sub-content type.
626
627        This is the `subtype' part of the string returned by
628        get_content_type().
629        """
630        ctype = self.get_content_type()
631        return ctype.split('/')[1]
632
633    def get_default_type(self):
634        """Return the `default' content type.
635
636        Most messages have a default content type of text/plain, except for
637        messages that are subparts of multipart/digest containers.  Such
638        subparts have a default content type of message/rfc822.
639        """
640        return self._default_type
641
642    def set_default_type(self, ctype):
643        """Set the `default' content type.
644
645        ctype should be either "text/plain" or "message/rfc822", although this
646        is not enforced.  The default content type is not stored in the
647        Content-Type header.
648        """
649        self._default_type = ctype
650
651    def _get_params_preserve(self, failobj, header):
652        # Like get_params() but preserves the quoting of values.  BAW:
653        # should this be part of the public interface?
654        missing = object()
655        value = self.get(header, missing)
656        if value is missing:
657            return failobj
658        params = []
659        for p in _parseparam(value):
660            try:
661                name, val = p.split('=', 1)
662                name = name.strip()
663                val = val.strip()
664            except ValueError:
665                # Must have been a bare attribute
666                name = p.strip()
667                val = ''
668            params.append((name, val))
669        params = utils.decode_params(params)
670        return params
671
672    def get_params(self, failobj=None, header='content-type', unquote=True):
673        """Return the message's Content-Type parameters, as a list.
674
675        The elements of the returned list are 2-tuples of key/value pairs, as
676        split on the `=' sign.  The left hand side of the `=' is the key,
677        while the right hand side is the value.  If there is no `=' sign in
678        the parameter the value is the empty string.  The value is as
679        described in the get_param() method.
680
681        Optional failobj is the object to return if there is no Content-Type
682        header.  Optional header is the header to search instead of
683        Content-Type.  If unquote is True, the value is unquoted.
684        """
685        missing = object()
686        params = self._get_params_preserve(missing, header)
687        if params is missing:
688            return failobj
689        if unquote:
690            return [(k, _unquotevalue(v)) for k, v in params]
691        else:
692            return params
693
694    def get_param(self, param, failobj=None, header='content-type',
695                  unquote=True):
696        """Return the parameter value if found in the Content-Type header.
697
698        Optional failobj is the object to return if there is no Content-Type
699        header, or the Content-Type header has no such parameter.  Optional
700        header is the header to search instead of Content-Type.
701
702        Parameter keys are always compared case insensitively.  The return
703        value can either be a string, or a 3-tuple if the parameter was RFC
704        2231 encoded.  When it's a 3-tuple, the elements of the value are of
705        the form (CHARSET, LANGUAGE, VALUE).  Note that both CHARSET and
706        LANGUAGE can be None, in which case you should consider VALUE to be
707        encoded in the us-ascii charset.  You can usually ignore LANGUAGE.
708        The parameter value (either the returned string, or the VALUE item in
709        the 3-tuple) is always unquoted, unless unquote is set to False.
710
711        If your application doesn't care whether the parameter was RFC 2231
712        encoded, it can turn the return value into a string as follows:
713
714            rawparam = msg.get_param('foo')
715            param = email.utils.collapse_rfc2231_value(rawparam)
716
717        """
718        if header not in self:
719            return failobj
720        for k, v in self._get_params_preserve(failobj, header):
721            if k.lower() == param.lower():
722                if unquote:
723                    return _unquotevalue(v)
724                else:
725                    return v
726        return failobj
727
728    def set_param(self, param, value, header='Content-Type', requote=True,
729                  charset=None, language='', replace=False):
730        """Set a parameter in the Content-Type header.
731
732        If the parameter already exists in the header, its value will be
733        replaced with the new value.
734
735        If header is Content-Type and has not yet been defined for this
736        message, it will be set to "text/plain" and the new parameter and
737        value will be appended as per RFC 2045.
738
739        An alternate header can be specified in the header argument, and all
740        parameters will be quoted as necessary unless requote is False.
741
742        If charset is specified, the parameter will be encoded according to RFC
743        2231.  Optional language specifies the RFC 2231 language, defaulting
744        to the empty string.  Both charset and language should be strings.
745        """
746        if not isinstance(value, tuple) and charset:
747            value = (charset, language, value)
748
749        if header not in self and header.lower() == 'content-type':
750            ctype = 'text/plain'
751        else:
752            ctype = self.get(header)
753        if not self.get_param(param, header=header):
754            if not ctype:
755                ctype = _formatparam(param, value, requote)
756            else:
757                ctype = SEMISPACE.join(
758                    [ctype, _formatparam(param, value, requote)])
759        else:
760            ctype = ''
761            for old_param, old_value in self.get_params(header=header,
762                                                        unquote=requote):
763                append_param = ''
764                if old_param.lower() == param.lower():
765                    append_param = _formatparam(param, value, requote)
766                else:
767                    append_param = _formatparam(old_param, old_value, requote)
768                if not ctype:
769                    ctype = append_param
770                else:
771                    ctype = SEMISPACE.join([ctype, append_param])
772        if ctype != self.get(header):
773            if replace:
774                self.replace_header(header, ctype)
775            else:
776                del self[header]
777                self[header] = ctype
778
779    def del_param(self, param, header='content-type', requote=True):
780        """Remove the given parameter completely from the Content-Type header.
781
782        The header will be re-written in place without the parameter or its
783        value. All values will be quoted as necessary unless requote is
784        False.  Optional header specifies an alternative to the Content-Type
785        header.
786        """
787        if header not in self:
788            return
789        new_ctype = ''
790        for p, v in self.get_params(header=header, unquote=requote):
791            if p.lower() != param.lower():
792                if not new_ctype:
793                    new_ctype = _formatparam(p, v, requote)
794                else:
795                    new_ctype = SEMISPACE.join([new_ctype,
796                                                _formatparam(p, v, requote)])
797        if new_ctype != self.get(header):
798            del self[header]
799            self[header] = new_ctype
800
801    def set_type(self, type, header='Content-Type', requote=True):
802        """Set the main type and subtype for the Content-Type header.
803
804        type must be a string in the form "maintype/subtype", otherwise a
805        ValueError is raised.
806
807        This method replaces the Content-Type header, keeping all the
808        parameters in place.  If requote is False, this leaves the existing
809        header's quoting as is.  Otherwise, the parameters will be quoted (the
810        default).
811
812        An alternative header can be specified in the header argument.  When
813        the Content-Type header is set, we'll always also add a MIME-Version
814        header.
815        """
816        # BAW: should we be strict?
817        if not type.count('/') == 1:
818            raise ValueError
819        # Set the Content-Type, you get a MIME-Version
820        if header.lower() == 'content-type':
821            del self['mime-version']
822            self['MIME-Version'] = '1.0'
823        if header not in self:
824            self[header] = type
825            return
826        params = self.get_params(header=header, unquote=requote)
827        del self[header]
828        self[header] = type
829        # Skip the first param; it's the old type.
830        for p, v in params[1:]:
831            self.set_param(p, v, header, requote)
832
833    def get_filename(self, failobj=None):
834        """Return the filename associated with the payload if present.
835
836        The filename is extracted from the Content-Disposition header's
837        `filename' parameter, and it is unquoted.  If that header is missing
838        the `filename' parameter, this method falls back to looking for the
839        `name' parameter.
840        """
841        missing = object()
842        filename = self.get_param('filename', missing, 'content-disposition')
843        if filename is missing:
844            filename = self.get_param('name', missing, 'content-type')
845        if filename is missing:
846            return failobj
847        return utils.collapse_rfc2231_value(filename).strip()
848
849    def get_boundary(self, failobj=None):
850        """Return the boundary associated with the payload if present.
851
852        The boundary is extracted from the Content-Type header's `boundary'
853        parameter, and it is unquoted.
854        """
855        missing = object()
856        boundary = self.get_param('boundary', missing)
857        if boundary is missing:
858            return failobj
859        # RFC 2046 says that boundaries may begin but not end in w/s
860        return utils.collapse_rfc2231_value(boundary).rstrip()
861
862    def set_boundary(self, boundary):
863        """Set the boundary parameter in Content-Type to 'boundary'.
864
865        This is subtly different than deleting the Content-Type header and
866        adding a new one with a new boundary parameter via add_header().  The
867        main difference is that using the set_boundary() method preserves the
868        order of the Content-Type header in the original message.
869
870        HeaderParseError is raised if the message has no Content-Type header.
871        """
872        missing = object()
873        params = self._get_params_preserve(missing, 'content-type')
874        if params is missing:
875            # There was no Content-Type header, and we don't know what type
876            # to set it to, so raise an exception.
877            raise errors.HeaderParseError('No Content-Type header found')
878        newparams = []
879        foundp = False
880        for pk, pv in params:
881            if pk.lower() == 'boundary':
882                newparams.append(('boundary', '"%s"' % boundary))
883                foundp = True
884            else:
885                newparams.append((pk, pv))
886        if not foundp:
887            # The original Content-Type header had no boundary attribute.
888            # Tack one on the end.  BAW: should we raise an exception
889            # instead???
890            newparams.append(('boundary', '"%s"' % boundary))
891        # Replace the existing Content-Type header with the new value
892        newheaders = []
893        for h, v in self._headers:
894            if h.lower() == 'content-type':
895                parts = []
896                for k, v in newparams:
897                    if v == '':
898                        parts.append(k)
899                    else:
900                        parts.append('%s=%s' % (k, v))
901                val = SEMISPACE.join(parts)
902                newheaders.append(self.policy.header_store_parse(h, val))
903
904            else:
905                newheaders.append((h, v))
906        self._headers = newheaders
907
908    def get_content_charset(self, failobj=None):
909        """Return the charset parameter of the Content-Type header.
910
911        The returned string is always coerced to lower case.  If there is no
912        Content-Type header, or if that header has no charset parameter,
913        failobj is returned.
914        """
915        missing = object()
916        charset = self.get_param('charset', missing)
917        if charset is missing:
918            return failobj
919        if isinstance(charset, tuple):
920            # RFC 2231 encoded, so decode it, and it better end up as ascii.
921            pcharset = charset[0] or 'us-ascii'
922            try:
923                # LookupError will be raised if the charset isn't known to
924                # Python.  UnicodeError will be raised if the encoded text
925                # contains a character not in the charset.
926                as_bytes = charset[2].encode('raw-unicode-escape')
927                charset = str(as_bytes, pcharset)
928            except (LookupError, UnicodeError):
929                charset = charset[2]
930        # charset characters must be in us-ascii range
931        try:
932            charset.encode('us-ascii')
933        except UnicodeError:
934            return failobj
935        # RFC 2046, $4.1.2 says charsets are not case sensitive
936        return charset.lower()
937
938    def get_charsets(self, failobj=None):
939        """Return a list containing the charset(s) used in this message.
940
941        The returned list of items describes the Content-Type headers'
942        charset parameter for this message and all the subparts in its
943        payload.
944
945        Each item will either be a string (the value of the charset parameter
946        in the Content-Type header of that part) or the value of the
947        'failobj' parameter (defaults to None), if the part does not have a
948        main MIME type of "text", or the charset is not defined.
949
950        The list will contain one string for each part of the message, plus
951        one for the container message (i.e. self), so that a non-multipart
952        message will still return a list of length 1.
953        """
954        return [part.get_content_charset(failobj) for part in self.walk()]
955
956    def get_content_disposition(self):
957        """Return the message's content-disposition if it exists, or None.
958
959        The return values can be either 'inline', 'attachment' or None
960        according to the rfc2183.
961        """
962        value = self.get('content-disposition')
963        if value is None:
964            return None
965        c_d = _splitparam(value)[0].lower()
966        return c_d
967
968    # I.e. def walk(self): ...
969    from email.iterators import walk
970
971
972class MIMEPart(Message):
973
974    def __init__(self, policy=None):
975        if policy is None:
976            from email.policy import default
977            policy = default
978        super().__init__(policy)
979
980
981    def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
982        """Return the entire formatted message as a string.
983
984        Optional 'unixfrom', when true, means include the Unix From_ envelope
985        header.  maxheaderlen is retained for backward compatibility with the
986        base Message class, but defaults to None, meaning that the policy value
987        for max_line_length controls the header maximum length.  'policy' is
988        passed to the Generator instance used to serialize the message; if it
989        is not specified the policy associated with the message instance is
990        used.
991        """
992        policy = self.policy if policy is None else policy
993        if maxheaderlen is None:
994            maxheaderlen = policy.max_line_length
995        return super().as_string(unixfrom, maxheaderlen, policy)
996
997    def __str__(self):
998        return self.as_string(policy=self.policy.clone(utf8=True))
999
1000    def is_attachment(self):
1001        c_d = self.get('content-disposition')
1002        return False if c_d is None else c_d.content_disposition == 'attachment'
1003
1004    def _find_body(self, part, preferencelist):
1005        if part.is_attachment():
1006            return
1007        maintype, subtype = part.get_content_type().split('/')
1008        if maintype == 'text':
1009            if subtype in preferencelist:
1010                yield (preferencelist.index(subtype), part)
1011            return
1012        if maintype != 'multipart' or not self.is_multipart():
1013            return
1014        if subtype != 'related':
1015            for subpart in part.iter_parts():
1016                yield from self._find_body(subpart, preferencelist)
1017            return
1018        if 'related' in preferencelist:
1019            yield (preferencelist.index('related'), part)
1020        candidate = None
1021        start = part.get_param('start')
1022        if start:
1023            for subpart in part.iter_parts():
1024                if subpart['content-id'] == start:
1025                    candidate = subpart
1026                    break
1027        if candidate is None:
1028            subparts = part.get_payload()
1029            candidate = subparts[0] if subparts else None
1030        if candidate is not None:
1031            yield from self._find_body(candidate, preferencelist)
1032
1033    def get_body(self, preferencelist=('related', 'html', 'plain')):
1034        """Return best candidate mime part for display as 'body' of message.
1035
1036        Do a depth first search, starting with self, looking for the first part
1037        matching each of the items in preferencelist, and return the part
1038        corresponding to the first item that has a match, or None if no items
1039        have a match.  If 'related' is not included in preferencelist, consider
1040        the root part of any multipart/related encountered as a candidate
1041        match.  Ignore parts with 'Content-Disposition: attachment'.
1042        """
1043        best_prio = len(preferencelist)
1044        body = None
1045        for prio, part in self._find_body(self, preferencelist):
1046            if prio < best_prio:
1047                best_prio = prio
1048                body = part
1049                if prio == 0:
1050                    break
1051        return body
1052
1053    _body_types = {('text', 'plain'),
1054                   ('text', 'html'),
1055                   ('multipart', 'related'),
1056                   ('multipart', 'alternative')}
1057    def iter_attachments(self):
1058        """Return an iterator over the non-main parts of a multipart.
1059
1060        Skip the first of each occurrence of text/plain, text/html,
1061        multipart/related, or multipart/alternative in the multipart (unless
1062        they have a 'Content-Disposition: attachment' header) and include all
1063        remaining subparts in the returned iterator.  When applied to a
1064        multipart/related, return all parts except the root part.  Return an
1065        empty iterator when applied to a multipart/alternative or a
1066        non-multipart.
1067        """
1068        maintype, subtype = self.get_content_type().split('/')
1069        if maintype != 'multipart' or subtype == 'alternative':
1070            return
1071        payload = self.get_payload()
1072        # Certain malformed messages can have content type set to `multipart/*`
1073        # but still have single part body, in which case payload.copy() can
1074        # fail with AttributeError.
1075        try:
1076            parts = payload.copy()
1077        except AttributeError:
1078            # payload is not a list, it is most probably a string.
1079            return
1080
1081        if maintype == 'multipart' and subtype == 'related':
1082            # For related, we treat everything but the root as an attachment.
1083            # The root may be indicated by 'start'; if there's no start or we
1084            # can't find the named start, treat the first subpart as the root.
1085            start = self.get_param('start')
1086            if start:
1087                found = False
1088                attachments = []
1089                for part in parts:
1090                    if part.get('content-id') == start:
1091                        found = True
1092                    else:
1093                        attachments.append(part)
1094                if found:
1095                    yield from attachments
1096                    return
1097            parts.pop(0)
1098            yield from parts
1099            return
1100        # Otherwise we more or less invert the remaining logic in get_body.
1101        # This only really works in edge cases (ex: non-text related or
1102        # alternatives) if the sending agent sets content-disposition.
1103        seen = []   # Only skip the first example of each candidate type.
1104        for part in parts:
1105            maintype, subtype = part.get_content_type().split('/')
1106            if ((maintype, subtype) in self._body_types and
1107                    not part.is_attachment() and subtype not in seen):
1108                seen.append(subtype)
1109                continue
1110            yield part
1111
1112    def iter_parts(self):
1113        """Return an iterator over all immediate subparts of a multipart.
1114
1115        Return an empty iterator for a non-multipart.
1116        """
1117        if self.is_multipart():
1118            yield from self.get_payload()
1119
1120    def get_content(self, *args, content_manager=None, **kw):
1121        if content_manager is None:
1122            content_manager = self.policy.content_manager
1123        return content_manager.get_content(self, *args, **kw)
1124
1125    def set_content(self, *args, content_manager=None, **kw):
1126        if content_manager is None:
1127            content_manager = self.policy.content_manager
1128        content_manager.set_content(self, *args, **kw)
1129
1130    def _make_multipart(self, subtype, disallowed_subtypes, boundary):
1131        if self.get_content_maintype() == 'multipart':
1132            existing_subtype = self.get_content_subtype()
1133            disallowed_subtypes = disallowed_subtypes + (subtype,)
1134            if existing_subtype in disallowed_subtypes:
1135                raise ValueError("Cannot convert {} to {}".format(
1136                    existing_subtype, subtype))
1137        keep_headers = []
1138        part_headers = []
1139        for name, value in self._headers:
1140            if name.lower().startswith('content-'):
1141                part_headers.append((name, value))
1142            else:
1143                keep_headers.append((name, value))
1144        if part_headers:
1145            # There is existing content, move it to the first subpart.
1146            part = type(self)(policy=self.policy)
1147            part._headers = part_headers
1148            part._payload = self._payload
1149            self._payload = [part]
1150        else:
1151            self._payload = []
1152        self._headers = keep_headers
1153        self['Content-Type'] = 'multipart/' + subtype
1154        if boundary is not None:
1155            self.set_param('boundary', boundary)
1156
1157    def make_related(self, boundary=None):
1158        self._make_multipart('related', ('alternative', 'mixed'), boundary)
1159
1160    def make_alternative(self, boundary=None):
1161        self._make_multipart('alternative', ('mixed',), boundary)
1162
1163    def make_mixed(self, boundary=None):
1164        self._make_multipart('mixed', (), boundary)
1165
1166    def _add_multipart(self, _subtype, *args, _disp=None, **kw):
1167        if (self.get_content_maintype() != 'multipart' or
1168                self.get_content_subtype() != _subtype):
1169            getattr(self, 'make_' + _subtype)()
1170        part = type(self)(policy=self.policy)
1171        part.set_content(*args, **kw)
1172        if _disp and 'content-disposition' not in part:
1173            part['Content-Disposition'] = _disp
1174        self.attach(part)
1175
1176    def add_related(self, *args, **kw):
1177        self._add_multipart('related', *args, _disp='inline', **kw)
1178
1179    def add_alternative(self, *args, **kw):
1180        self._add_multipart('alternative', *args, **kw)
1181
1182    def add_attachment(self, *args, **kw):
1183        self._add_multipart('mixed', *args, _disp='attachment', **kw)
1184
1185    def clear(self):
1186        self._headers = []
1187        self._payload = None
1188
1189    def clear_content(self):
1190        self._headers = [(n, v) for n, v in self._headers
1191                         if not n.lower().startswith('content-')]
1192        self._payload = None
1193
1194
1195class EmailMessage(MIMEPart):
1196
1197    def set_content(self, *args, **kw):
1198        super().set_content(*args, **kw)
1199        if 'MIME-Version' not in self:
1200            self['MIME-Version'] = '1.0'
1201