1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: [email protected]
3# email package unit tests
4
5import re
6import time
7import base64
8import unittest
9import textwrap
10import warnings
11
12from io import StringIO, BytesIO
13from itertools import chain
14from random import choice
15from threading import Thread
16from unittest.mock import patch
17
18import email
19import email.policy
20
21from email.charset import Charset
22from email.generator import Generator, DecodedGenerator, BytesGenerator
23from email.header import Header, decode_header, make_header
24from email.headerregistry import HeaderRegistry
25from email.message import Message
26from email.mime.application import MIMEApplication
27from email.mime.audio import MIMEAudio
28from email.mime.base import MIMEBase
29from email.mime.image import MIMEImage
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email.mime.nonmultipart import MIMENonMultipart
33from email.mime.text import MIMEText
34from email.parser import Parser, HeaderParser
35from email import base64mime
36from email import encoders
37from email import errors
38from email import iterators
39from email import quoprimime
40from email import utils
41
42from test.support import threading_helper
43from test.support.os_helper import unlink
44from test.test_email import openfile, TestEmailBase
45
46# These imports are documented to work, but we are testing them using a
47# different path, so we import them here just to make sure they are importable.
48from email.parser import FeedParser, BytesFeedParser
49
50NL = '\n'
51EMPTYSTRING = ''
52SPACE = ' '
53
54
55# Test various aspects of the Message class's API
56class TestMessageAPI(TestEmailBase):
57    def test_get_all(self):
58        eq = self.assertEqual
59        msg = self._msgobj('msg_20.txt')
60        eq(msg.get_all('cc'), ['[email protected]', '[email protected]', '[email protected]'])
61        eq(msg.get_all('xx', 'n/a'), 'n/a')
62
63    def test_getset_charset(self):
64        eq = self.assertEqual
65        msg = Message()
66        eq(msg.get_charset(), None)
67        charset = Charset('iso-8859-1')
68        msg.set_charset(charset)
69        eq(msg['mime-version'], '1.0')
70        eq(msg.get_content_type(), 'text/plain')
71        eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
72        eq(msg.get_param('charset'), 'iso-8859-1')
73        eq(msg['content-transfer-encoding'], 'quoted-printable')
74        eq(msg.get_charset().input_charset, 'iso-8859-1')
75        # Remove the charset
76        msg.set_charset(None)
77        eq(msg.get_charset(), None)
78        eq(msg['content-type'], 'text/plain')
79        # Try adding a charset when there's already MIME headers present
80        msg = Message()
81        msg['MIME-Version'] = '2.0'
82        msg['Content-Type'] = 'text/x-weird'
83        msg['Content-Transfer-Encoding'] = 'quinted-puntable'
84        msg.set_charset(charset)
85        eq(msg['mime-version'], '2.0')
86        eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
87        eq(msg['content-transfer-encoding'], 'quinted-puntable')
88
89    def test_set_charset_from_string(self):
90        eq = self.assertEqual
91        msg = Message()
92        msg.set_charset('us-ascii')
93        eq(msg.get_charset().input_charset, 'us-ascii')
94        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
95
96    def test_set_payload_with_charset(self):
97        msg = Message()
98        charset = Charset('iso-8859-1')
99        msg.set_payload('This is a string payload', charset)
100        self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
101
102    def test_set_payload_with_8bit_data_and_charset(self):
103        data = b'\xd0\x90\xd0\x91\xd0\x92'
104        charset = Charset('utf-8')
105        msg = Message()
106        msg.set_payload(data, charset)
107        self.assertEqual(msg['content-transfer-encoding'], 'base64')
108        self.assertEqual(msg.get_payload(decode=True), data)
109        self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
110
111    def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
112        data = b'\xd0\x90\xd0\x91\xd0\x92'
113        charset = Charset('utf-8')
114        charset.body_encoding = None # Disable base64 encoding
115        msg = Message()
116        msg.set_payload(data.decode('utf-8'), charset)
117        self.assertEqual(msg['content-transfer-encoding'], '8bit')
118        self.assertEqual(msg.get_payload(decode=True), data)
119
120    def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
121        data = b'\xd0\x90\xd0\x91\xd0\x92'
122        charset = Charset('utf-8')
123        charset.body_encoding = None # Disable base64 encoding
124        msg = Message()
125        msg.set_payload(data, charset)
126        self.assertEqual(msg['content-transfer-encoding'], '8bit')
127        self.assertEqual(msg.get_payload(decode=True), data)
128
129    def test_set_payload_to_list(self):
130        msg = Message()
131        msg.set_payload([])
132        self.assertEqual(msg.get_payload(), [])
133
134    def test_attach_when_payload_is_string(self):
135        msg = Message()
136        msg['Content-Type'] = 'multipart/mixed'
137        msg.set_payload('string payload')
138        sub_msg = MIMEMessage(Message())
139        self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart",
140                               msg.attach, sub_msg)
141
142    def test_get_charsets(self):
143        eq = self.assertEqual
144
145        msg = self._msgobj('msg_08.txt')
146        charsets = msg.get_charsets()
147        eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
148
149        msg = self._msgobj('msg_09.txt')
150        charsets = msg.get_charsets('dingbat')
151        eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
152                      'koi8-r'])
153
154        msg = self._msgobj('msg_12.txt')
155        charsets = msg.get_charsets()
156        eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
157                      'iso-8859-3', 'us-ascii', 'koi8-r'])
158
159    def test_get_filename(self):
160        eq = self.assertEqual
161
162        msg = self._msgobj('msg_04.txt')
163        filenames = [p.get_filename() for p in msg.get_payload()]
164        eq(filenames, ['msg.txt', 'msg.txt'])
165
166        msg = self._msgobj('msg_07.txt')
167        subpart = msg.get_payload(1)
168        eq(subpart.get_filename(), 'dingusfish.gif')
169
170    def test_get_filename_with_name_parameter(self):
171        eq = self.assertEqual
172
173        msg = self._msgobj('msg_44.txt')
174        filenames = [p.get_filename() for p in msg.get_payload()]
175        eq(filenames, ['msg.txt', 'msg.txt'])
176
177    def test_get_boundary(self):
178        eq = self.assertEqual
179        msg = self._msgobj('msg_07.txt')
180        # No quotes!
181        eq(msg.get_boundary(), 'BOUNDARY')
182
183    def test_set_boundary(self):
184        eq = self.assertEqual
185        # This one has no existing boundary parameter, but the Content-Type:
186        # header appears fifth.
187        msg = self._msgobj('msg_01.txt')
188        msg.set_boundary('BOUNDARY')
189        header, value = msg.items()[4]
190        eq(header.lower(), 'content-type')
191        eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
192        # This one has a Content-Type: header, with a boundary, stuck in the
193        # middle of its headers.  Make sure the order is preserved; it should
194        # be fifth.
195        msg = self._msgobj('msg_04.txt')
196        msg.set_boundary('BOUNDARY')
197        header, value = msg.items()[4]
198        eq(header.lower(), 'content-type')
199        eq(value, 'multipart/mixed; boundary="BOUNDARY"')
200        # And this one has no Content-Type: header at all.
201        msg = self._msgobj('msg_03.txt')
202        self.assertRaises(errors.HeaderParseError,
203                          msg.set_boundary, 'BOUNDARY')
204
205    def test_make_boundary(self):
206        msg = MIMEMultipart('form-data')
207        # Note that when the boundary gets created is an implementation
208        # detail and might change.
209        self.assertEqual(msg.items()[0][1], 'multipart/form-data')
210        # Trigger creation of boundary
211        msg.as_string()
212        self.assertEqual(msg.items()[0][1][:33],
213                        'multipart/form-data; boundary="==')
214        # XXX: there ought to be tests of the uniqueness of the boundary, too.
215
216    def test_message_rfc822_only(self):
217        # Issue 7970: message/rfc822 not in multipart parsed by
218        # HeaderParser caused an exception when flattened.
219        with openfile('msg_46.txt', encoding="utf-8") as fp:
220            msgdata = fp.read()
221        parser = HeaderParser()
222        msg = parser.parsestr(msgdata)
223        out = StringIO()
224        gen = Generator(out, True, 0)
225        gen.flatten(msg, False)
226        self.assertEqual(out.getvalue(), msgdata)
227
228    def test_byte_message_rfc822_only(self):
229        # Make sure new bytes header parser also passes this.
230        with openfile('msg_46.txt', encoding="utf-8") as fp:
231            msgdata = fp.read().encode('ascii')
232        parser = email.parser.BytesHeaderParser()
233        msg = parser.parsebytes(msgdata)
234        out = BytesIO()
235        gen = email.generator.BytesGenerator(out)
236        gen.flatten(msg)
237        self.assertEqual(out.getvalue(), msgdata)
238
239    def test_get_decoded_payload(self):
240        eq = self.assertEqual
241        msg = self._msgobj('msg_10.txt')
242        # The outer message is a multipart
243        eq(msg.get_payload(decode=True), None)
244        # Subpart 1 is 7bit encoded
245        eq(msg.get_payload(0).get_payload(decode=True),
246           b'This is a 7bit encoded message.\n')
247        # Subpart 2 is quopri
248        eq(msg.get_payload(1).get_payload(decode=True),
249           b'\xa1This is a Quoted Printable encoded message!\n')
250        # Subpart 3 is base64
251        eq(msg.get_payload(2).get_payload(decode=True),
252           b'This is a Base64 encoded message.')
253        # Subpart 4 is base64 with a trailing newline, which
254        # used to be stripped (issue 7143).
255        eq(msg.get_payload(3).get_payload(decode=True),
256           b'This is a Base64 encoded message.\n')
257        # Subpart 5 has no Content-Transfer-Encoding: header.
258        eq(msg.get_payload(4).get_payload(decode=True),
259           b'This has no Content-Transfer-Encoding: header.\n')
260
261    def test_get_decoded_uu_payload(self):
262        eq = self.assertEqual
263        msg = Message()
264        msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
265        for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
266            msg['content-transfer-encoding'] = cte
267            eq(msg.get_payload(decode=True), b'hello world')
268        # Now try some bogus data
269        msg.set_payload('foo')
270        eq(msg.get_payload(decode=True), b'foo')
271
272    def test_get_payload_n_raises_on_non_multipart(self):
273        msg = Message()
274        self.assertRaises(TypeError, msg.get_payload, 1)
275
276    def test_decoded_generator(self):
277        eq = self.assertEqual
278        msg = self._msgobj('msg_07.txt')
279        with openfile('msg_17.txt', encoding="utf-8") as fp:
280            text = fp.read()
281        s = StringIO()
282        g = DecodedGenerator(s)
283        g.flatten(msg)
284        eq(s.getvalue(), text)
285
286    def test__contains__(self):
287        msg = Message()
288        msg['From'] = 'Me'
289        msg['to'] = 'You'
290        # Check for case insensitivity
291        self.assertIn('from', msg)
292        self.assertIn('From', msg)
293        self.assertIn('FROM', msg)
294        self.assertIn('to', msg)
295        self.assertIn('To', msg)
296        self.assertIn('TO', msg)
297
298    def test_as_string(self):
299        msg = self._msgobj('msg_01.txt')
300        with openfile('msg_01.txt', encoding="utf-8") as fp:
301            text = fp.read()
302        self.assertEqual(text, str(msg))
303        fullrepr = msg.as_string(unixfrom=True)
304        lines = fullrepr.split('\n')
305        self.assertTrue(lines[0].startswith('From '))
306        self.assertEqual(text, NL.join(lines[1:]))
307
308    def test_as_string_policy(self):
309        msg = self._msgobj('msg_01.txt')
310        newpolicy = msg.policy.clone(linesep='\r\n')
311        fullrepr = msg.as_string(policy=newpolicy)
312        s = StringIO()
313        g = Generator(s, policy=newpolicy)
314        g.flatten(msg)
315        self.assertEqual(fullrepr, s.getvalue())
316
317    def test_nonascii_as_string_without_cte(self):
318        m = textwrap.dedent("""\
319            MIME-Version: 1.0
320            Content-type: text/plain; charset="iso-8859-1"
321
322            Test if non-ascii messages with no Content-Transfer-Encoding set
323            can be as_string'd:
324            Föö bär
325            """)
326        source = m.encode('iso-8859-1')
327        expected = textwrap.dedent("""\
328            MIME-Version: 1.0
329            Content-type: text/plain; charset="iso-8859-1"
330            Content-Transfer-Encoding: quoted-printable
331
332            Test if non-ascii messages with no Content-Transfer-Encoding set
333            can be as_string'd:
334            F=F6=F6 b=E4r
335            """)
336        msg = email.message_from_bytes(source)
337        self.assertEqual(msg.as_string(), expected)
338
339    def test_nonascii_as_string_without_content_type_and_cte(self):
340        m = textwrap.dedent("""\
341            MIME-Version: 1.0
342
343            Test if non-ascii messages with no Content-Type nor
344            Content-Transfer-Encoding set can be as_string'd:
345            Föö bär
346            """)
347        source = m.encode('iso-8859-1')
348        expected = source.decode('ascii', 'replace')
349        msg = email.message_from_bytes(source)
350        self.assertEqual(msg.as_string(), expected)
351
352    def test_as_bytes(self):
353        msg = self._msgobj('msg_01.txt')
354        with openfile('msg_01.txt', encoding="utf-8") as fp:
355            data = fp.read().encode('ascii')
356        self.assertEqual(data, bytes(msg))
357        fullrepr = msg.as_bytes(unixfrom=True)
358        lines = fullrepr.split(b'\n')
359        self.assertTrue(lines[0].startswith(b'From '))
360        self.assertEqual(data, b'\n'.join(lines[1:]))
361
362    def test_as_bytes_policy(self):
363        msg = self._msgobj('msg_01.txt')
364        newpolicy = msg.policy.clone(linesep='\r\n')
365        fullrepr = msg.as_bytes(policy=newpolicy)
366        s = BytesIO()
367        g = BytesGenerator(s,policy=newpolicy)
368        g.flatten(msg)
369        self.assertEqual(fullrepr, s.getvalue())
370
371    # test_headerregistry.TestContentTypeHeader.bad_params
372    def test_bad_param(self):
373        msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
374        self.assertEqual(msg.get_param('baz'), '')
375
376    def test_missing_filename(self):
377        msg = email.message_from_string("From: foo\n")
378        self.assertEqual(msg.get_filename(), None)
379
380    def test_bogus_filename(self):
381        msg = email.message_from_string(
382        "Content-Disposition: blarg; filename\n")
383        self.assertEqual(msg.get_filename(), '')
384
385    def test_missing_boundary(self):
386        msg = email.message_from_string("From: foo\n")
387        self.assertEqual(msg.get_boundary(), None)
388
389    def test_get_params(self):
390        eq = self.assertEqual
391        msg = email.message_from_string(
392            'X-Header: foo=one; bar=two; baz=three\n')
393        eq(msg.get_params(header='x-header'),
394           [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
395        msg = email.message_from_string(
396            'X-Header: foo; bar=one; baz=two\n')
397        eq(msg.get_params(header='x-header'),
398           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
399        eq(msg.get_params(), None)
400        msg = email.message_from_string(
401            'X-Header: foo; bar="one"; baz=two\n')
402        eq(msg.get_params(header='x-header'),
403           [('foo', ''), ('bar', 'one'), ('baz', 'two')])
404
405    # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
406    def test_get_param_liberal(self):
407        msg = Message()
408        msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
409        self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
410
411    def test_get_param(self):
412        eq = self.assertEqual
413        msg = email.message_from_string(
414            "X-Header: foo=one; bar=two; baz=three\n")
415        eq(msg.get_param('bar', header='x-header'), 'two')
416        eq(msg.get_param('quuz', header='x-header'), None)
417        eq(msg.get_param('quuz'), None)
418        msg = email.message_from_string(
419            'X-Header: foo; bar="one"; baz=two\n')
420        eq(msg.get_param('foo', header='x-header'), '')
421        eq(msg.get_param('bar', header='x-header'), 'one')
422        eq(msg.get_param('baz', header='x-header'), 'two')
423        # XXX: We are not RFC-2045 compliant!  We cannot parse:
424        # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
425        # msg.get_param("weird")
426        # yet.
427
428    # test_headerregistry.TestContentTypeHeader.spaces_around_semis
429    def test_get_param_funky_continuation_lines(self):
430        msg = self._msgobj('msg_22.txt')
431        self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
432
433    # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
434    def test_get_param_with_semis_in_quotes(self):
435        msg = email.message_from_string(
436            'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
437        self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
438        self.assertEqual(msg.get_param('name', unquote=False),
439                         '"Jim&amp;&amp;Jill"')
440
441    # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
442    def test_get_param_with_quotes(self):
443        msg = email.message_from_string(
444            'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
445        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
446        msg = email.message_from_string(
447            "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
448        self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
449
450    def test_field_containment(self):
451        msg = email.message_from_string('Header: exists')
452        self.assertIn('header', msg)
453        self.assertIn('Header', msg)
454        self.assertIn('HEADER', msg)
455        self.assertNotIn('headerx', msg)
456
457    def test_set_param(self):
458        eq = self.assertEqual
459        msg = Message()
460        msg.set_param('charset', 'iso-2022-jp')
461        eq(msg.get_param('charset'), 'iso-2022-jp')
462        msg.set_param('importance', 'high value')
463        eq(msg.get_param('importance'), 'high value')
464        eq(msg.get_param('importance', unquote=False), '"high value"')
465        eq(msg.get_params(), [('text/plain', ''),
466                              ('charset', 'iso-2022-jp'),
467                              ('importance', 'high value')])
468        eq(msg.get_params(unquote=False), [('text/plain', ''),
469                                       ('charset', '"iso-2022-jp"'),
470                                       ('importance', '"high value"')])
471        msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
472        eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
473
474    def test_del_param(self):
475        eq = self.assertEqual
476        msg = self._msgobj('msg_05.txt')
477        eq(msg.get_params(),
478           [('multipart/report', ''), ('report-type', 'delivery-status'),
479            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
480        old_val = msg.get_param("report-type")
481        msg.del_param("report-type")
482        eq(msg.get_params(),
483           [('multipart/report', ''),
484            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
485        msg.set_param("report-type", old_val)
486        eq(msg.get_params(),
487           [('multipart/report', ''),
488            ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
489            ('report-type', old_val)])
490
491    def test_del_param_on_other_header(self):
492        msg = Message()
493        msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
494        msg.del_param('filename', 'content-disposition')
495        self.assertEqual(msg['content-disposition'], 'attachment')
496
497    def test_del_param_on_nonexistent_header(self):
498        msg = Message()
499        # Deleting param on empty msg should not raise exception.
500        msg.del_param('filename', 'content-disposition')
501
502    def test_del_nonexistent_param(self):
503        msg = Message()
504        msg.add_header('Content-Type', 'text/plain', charset='utf-8')
505        existing_header = msg['Content-Type']
506        msg.del_param('foobar', header='Content-Type')
507        self.assertEqual(msg['Content-Type'], existing_header)
508
509    def test_set_type(self):
510        eq = self.assertEqual
511        msg = Message()
512        self.assertRaises(ValueError, msg.set_type, 'text')
513        msg.set_type('text/plain')
514        eq(msg['content-type'], 'text/plain')
515        msg.set_param('charset', 'us-ascii')
516        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
517        msg.set_type('text/html')
518        eq(msg['content-type'], 'text/html; charset="us-ascii"')
519
520    def test_set_type_on_other_header(self):
521        msg = Message()
522        msg['X-Content-Type'] = 'text/plain'
523        msg.set_type('application/octet-stream', 'X-Content-Type')
524        self.assertEqual(msg['x-content-type'], 'application/octet-stream')
525
526    def test_get_content_type_missing(self):
527        msg = Message()
528        self.assertEqual(msg.get_content_type(), 'text/plain')
529
530    def test_get_content_type_missing_with_default_type(self):
531        msg = Message()
532        msg.set_default_type('message/rfc822')
533        self.assertEqual(msg.get_content_type(), 'message/rfc822')
534
535    def test_get_content_type_from_message_implicit(self):
536        msg = self._msgobj('msg_30.txt')
537        self.assertEqual(msg.get_payload(0).get_content_type(),
538                         'message/rfc822')
539
540    def test_get_content_type_from_message_explicit(self):
541        msg = self._msgobj('msg_28.txt')
542        self.assertEqual(msg.get_payload(0).get_content_type(),
543                         'message/rfc822')
544
545    def test_get_content_type_from_message_text_plain_implicit(self):
546        msg = self._msgobj('msg_03.txt')
547        self.assertEqual(msg.get_content_type(), 'text/plain')
548
549    def test_get_content_type_from_message_text_plain_explicit(self):
550        msg = self._msgobj('msg_01.txt')
551        self.assertEqual(msg.get_content_type(), 'text/plain')
552
553    def test_get_content_maintype_missing(self):
554        msg = Message()
555        self.assertEqual(msg.get_content_maintype(), 'text')
556
557    def test_get_content_maintype_missing_with_default_type(self):
558        msg = Message()
559        msg.set_default_type('message/rfc822')
560        self.assertEqual(msg.get_content_maintype(), 'message')
561
562    def test_get_content_maintype_from_message_implicit(self):
563        msg = self._msgobj('msg_30.txt')
564        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
565
566    def test_get_content_maintype_from_message_explicit(self):
567        msg = self._msgobj('msg_28.txt')
568        self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
569
570    def test_get_content_maintype_from_message_text_plain_implicit(self):
571        msg = self._msgobj('msg_03.txt')
572        self.assertEqual(msg.get_content_maintype(), 'text')
573
574    def test_get_content_maintype_from_message_text_plain_explicit(self):
575        msg = self._msgobj('msg_01.txt')
576        self.assertEqual(msg.get_content_maintype(), 'text')
577
578    def test_get_content_subtype_missing(self):
579        msg = Message()
580        self.assertEqual(msg.get_content_subtype(), 'plain')
581
582    def test_get_content_subtype_missing_with_default_type(self):
583        msg = Message()
584        msg.set_default_type('message/rfc822')
585        self.assertEqual(msg.get_content_subtype(), 'rfc822')
586
587    def test_get_content_subtype_from_message_implicit(self):
588        msg = self._msgobj('msg_30.txt')
589        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
590
591    def test_get_content_subtype_from_message_explicit(self):
592        msg = self._msgobj('msg_28.txt')
593        self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
594
595    def test_get_content_subtype_from_message_text_plain_implicit(self):
596        msg = self._msgobj('msg_03.txt')
597        self.assertEqual(msg.get_content_subtype(), 'plain')
598
599    def test_get_content_subtype_from_message_text_plain_explicit(self):
600        msg = self._msgobj('msg_01.txt')
601        self.assertEqual(msg.get_content_subtype(), 'plain')
602
603    def test_get_content_maintype_error(self):
604        msg = Message()
605        msg['Content-Type'] = 'no-slash-in-this-string'
606        self.assertEqual(msg.get_content_maintype(), 'text')
607
608    def test_get_content_subtype_error(self):
609        msg = Message()
610        msg['Content-Type'] = 'no-slash-in-this-string'
611        self.assertEqual(msg.get_content_subtype(), 'plain')
612
613    def test_replace_header(self):
614        eq = self.assertEqual
615        msg = Message()
616        msg.add_header('First', 'One')
617        msg.add_header('Second', 'Two')
618        msg.add_header('Third', 'Three')
619        eq(msg.keys(), ['First', 'Second', 'Third'])
620        eq(msg.values(), ['One', 'Two', 'Three'])
621        msg.replace_header('Second', 'Twenty')
622        eq(msg.keys(), ['First', 'Second', 'Third'])
623        eq(msg.values(), ['One', 'Twenty', 'Three'])
624        msg.add_header('First', 'Eleven')
625        msg.replace_header('First', 'One Hundred')
626        eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
627        eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
628        self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
629
630    def test_get_content_disposition(self):
631        msg = Message()
632        self.assertIsNone(msg.get_content_disposition())
633        msg.add_header('Content-Disposition', 'attachment',
634                       filename='random.avi')
635        self.assertEqual(msg.get_content_disposition(), 'attachment')
636        msg.replace_header('Content-Disposition', 'inline')
637        self.assertEqual(msg.get_content_disposition(), 'inline')
638        msg.replace_header('Content-Disposition', 'InlinE')
639        self.assertEqual(msg.get_content_disposition(), 'inline')
640
641    # test_defect_handling:test_invalid_chars_in_base64_payload
642    def test_broken_base64_payload(self):
643        x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
644        msg = Message()
645        msg['content-type'] = 'audio/x-midi'
646        msg['content-transfer-encoding'] = 'base64'
647        msg.set_payload(x)
648        self.assertEqual(msg.get_payload(decode=True),
649                         (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
650                          b'\xa1\x00p\xf6\xbf\xe9\x0f'))
651        self.assertIsInstance(msg.defects[0],
652                              errors.InvalidBase64CharactersDefect)
653
654    def test_broken_unicode_payload(self):
655        # This test improves coverage but is not a compliance test.
656        # The behavior in this situation is currently undefined by the API.
657        x = 'this is a br\xf6ken thing to do'
658        msg = Message()
659        msg['content-type'] = 'text/plain'
660        msg['content-transfer-encoding'] = '8bit'
661        msg.set_payload(x)
662        self.assertEqual(msg.get_payload(decode=True),
663                         bytes(x, 'raw-unicode-escape'))
664
665    def test_questionable_bytes_payload(self):
666        # This test improves coverage but is not a compliance test,
667        # since it involves poking inside the black box.
668        x = 'this is a quéstionable thing to do'.encode('utf-8')
669        msg = Message()
670        msg['content-type'] = 'text/plain; charset="utf-8"'
671        msg['content-transfer-encoding'] = '8bit'
672        msg._payload = x
673        self.assertEqual(msg.get_payload(decode=True), x)
674
675    # Issue 1078919
676    def test_ascii_add_header(self):
677        msg = Message()
678        msg.add_header('Content-Disposition', 'attachment',
679                       filename='bud.gif')
680        self.assertEqual('attachment; filename="bud.gif"',
681            msg['Content-Disposition'])
682
683    def test_noascii_add_header(self):
684        msg = Message()
685        msg.add_header('Content-Disposition', 'attachment',
686            filename="Fußballer.ppt")
687        self.assertEqual(
688            'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
689            msg['Content-Disposition'])
690
691    def test_nonascii_add_header_via_triple(self):
692        msg = Message()
693        msg.add_header('Content-Disposition', 'attachment',
694            filename=('iso-8859-1', '', 'Fußballer.ppt'))
695        self.assertEqual(
696            'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
697            msg['Content-Disposition'])
698
699    def test_ascii_add_header_with_tspecial(self):
700        msg = Message()
701        msg.add_header('Content-Disposition', 'attachment',
702            filename="windows [filename].ppt")
703        self.assertEqual(
704            'attachment; filename="windows [filename].ppt"',
705            msg['Content-Disposition'])
706
707    def test_nonascii_add_header_with_tspecial(self):
708        msg = Message()
709        msg.add_header('Content-Disposition', 'attachment',
710            filename="Fußballer [filename].ppt")
711        self.assertEqual(
712            "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
713            msg['Content-Disposition'])
714
715    def test_binary_quopri_payload(self):
716        for charset in ('latin-1', 'ascii'):
717            msg = Message()
718            msg['content-type'] = 'text/plain; charset=%s' % charset
719            msg['content-transfer-encoding'] = 'quoted-printable'
720            msg.set_payload(b'foo=e6=96=87bar')
721            self.assertEqual(
722                msg.get_payload(decode=True),
723                b'foo\xe6\x96\x87bar',
724                'get_payload returns wrong result with charset %s.' % charset)
725
726    def test_binary_base64_payload(self):
727        for charset in ('latin-1', 'ascii'):
728            msg = Message()
729            msg['content-type'] = 'text/plain; charset=%s' % charset
730            msg['content-transfer-encoding'] = 'base64'
731            msg.set_payload(b'Zm9v5paHYmFy')
732            self.assertEqual(
733                msg.get_payload(decode=True),
734                b'foo\xe6\x96\x87bar',
735                'get_payload returns wrong result with charset %s.' % charset)
736
737    def test_binary_uuencode_payload(self):
738        for charset in ('latin-1', 'ascii'):
739            for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
740                msg = Message()
741                msg['content-type'] = 'text/plain; charset=%s' % charset
742                msg['content-transfer-encoding'] = encoding
743                msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
744                self.assertEqual(
745                    msg.get_payload(decode=True),
746                    b'foo\xe6\x96\x87bar',
747                    str(('get_payload returns wrong result ',
748                         'with charset {0} and encoding {1}.')).\
749                        format(charset, encoding))
750
751    def test_add_header_with_name_only_param(self):
752        msg = Message()
753        msg.add_header('Content-Disposition', 'inline', foo_bar=None)
754        self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
755
756    def test_add_header_with_no_value(self):
757        msg = Message()
758        msg.add_header('X-Status', None)
759        self.assertEqual('', msg['X-Status'])
760
761    # Issue 5871: reject an attempt to embed a header inside a header value
762    # (header injection attack).
763    def test_embedded_header_via_Header_rejected(self):
764        msg = Message()
765        msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
766        self.assertRaises(errors.HeaderParseError, msg.as_string)
767
768    def test_embedded_header_via_string_rejected(self):
769        msg = Message()
770        msg['Dummy'] = 'dummy\nX-Injected-Header: test'
771        self.assertRaises(errors.HeaderParseError, msg.as_string)
772
773    def test_unicode_header_defaults_to_utf8_encoding(self):
774        # Issue 14291
775        m = MIMEText('abc\n')
776        m['Subject'] = 'É test'
777        self.assertEqual(str(m),textwrap.dedent("""\
778            Content-Type: text/plain; charset="us-ascii"
779            MIME-Version: 1.0
780            Content-Transfer-Encoding: 7bit
781            Subject: =?utf-8?q?=C3=89_test?=
782
783            abc
784            """))
785
786    def test_unicode_body_defaults_to_utf8_encoding(self):
787        # Issue 14291
788        m = MIMEText('É testabc\n')
789        self.assertEqual(str(m),textwrap.dedent("""\
790            Content-Type: text/plain; charset="utf-8"
791            MIME-Version: 1.0
792            Content-Transfer-Encoding: base64
793
794            w4kgdGVzdGFiYwo=
795            """))
796
797
798# Test the email.encoders module
799class TestEncoders(unittest.TestCase):
800
801    def test_EncodersEncode_base64(self):
802        with openfile('python.gif', 'rb') as fp:
803            bindata = fp.read()
804        mimed = email.mime.image.MIMEImage(bindata)
805        base64ed = mimed.get_payload()
806        # the transfer-encoded body lines should all be <=76 characters
807        lines = base64ed.split('\n')
808        self.assertLessEqual(max([ len(x) for x in lines ]), 76)
809
810    def test_encode_empty_payload(self):
811        eq = self.assertEqual
812        msg = Message()
813        msg.set_charset('us-ascii')
814        eq(msg['content-transfer-encoding'], '7bit')
815
816    def test_default_cte(self):
817        eq = self.assertEqual
818        # 7bit data and the default us-ascii _charset
819        msg = MIMEText('hello world')
820        eq(msg['content-transfer-encoding'], '7bit')
821        # Similar, but with 8bit data
822        msg = MIMEText('hello \xf8 world')
823        eq(msg['content-transfer-encoding'], 'base64')
824        # And now with a different charset
825        msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
826        eq(msg['content-transfer-encoding'], 'quoted-printable')
827
828    def test_encode7or8bit(self):
829        # Make sure a charset whose input character set is 8bit but
830        # whose output character set is 7bit gets a transfer-encoding
831        # of 7bit.
832        eq = self.assertEqual
833        msg = MIMEText('文\n', _charset='euc-jp')
834        eq(msg['content-transfer-encoding'], '7bit')
835        eq(msg.as_string(), textwrap.dedent("""\
836            MIME-Version: 1.0
837            Content-Type: text/plain; charset="iso-2022-jp"
838            Content-Transfer-Encoding: 7bit
839
840            \x1b$BJ8\x1b(B
841            """))
842
843    def test_qp_encode_latin1(self):
844        msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
845        self.assertEqual(str(msg), textwrap.dedent("""\
846            MIME-Version: 1.0
847            Content-Type: text/text; charset="iso-8859-1"
848            Content-Transfer-Encoding: quoted-printable
849
850            =E1=F6
851            """))
852
853    def test_qp_encode_non_latin1(self):
854        # Issue 16948
855        msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
856        self.assertEqual(str(msg), textwrap.dedent("""\
857            MIME-Version: 1.0
858            Content-Type: text/text; charset="iso-8859-2"
859            Content-Transfer-Encoding: quoted-printable
860
861            =BF
862            """))
863
864
865# Test long header wrapping
866class TestLongHeaders(TestEmailBase):
867
868    maxDiff = None
869
870    def test_split_long_continuation(self):
871        eq = self.ndiffAssertEqual
872        msg = email.message_from_string("""\
873Subject: bug demonstration
874\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
875\tmore text
876
877test
878""")
879        sfp = StringIO()
880        g = Generator(sfp)
881        g.flatten(msg)
882        eq(sfp.getvalue(), """\
883Subject: bug demonstration
884\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
885\tmore text
886
887test
888""")
889
890    def test_another_long_almost_unsplittable_header(self):
891        eq = self.ndiffAssertEqual
892        hstr = """\
893bug demonstration
894\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
895\tmore text"""
896        h = Header(hstr, continuation_ws='\t')
897        eq(h.encode(), """\
898bug demonstration
899\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
900\tmore text""")
901        h = Header(hstr.replace('\t', ' '))
902        eq(h.encode(), """\
903bug demonstration
904 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
905 more text""")
906
907    def test_long_nonstring(self):
908        eq = self.ndiffAssertEqual
909        g = Charset("iso-8859-1")
910        cz = Charset("iso-8859-2")
911        utf8 = Charset("utf-8")
912        g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
913                  b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
914                  b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
915                  b'bef\xf6rdert. ')
916        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
917                   b'd\xf9vtipu.. ')
918        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
919                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
920                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
921                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
922                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
923                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
924                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
925                     '\u3044\u307e\u3059\u3002')
926        h = Header(g_head, g, header_name='Subject')
927        h.append(cz_head, cz)
928        h.append(utf8_head, utf8)
929        msg = Message()
930        msg['Subject'] = h
931        sfp = StringIO()
932        g = Generator(sfp)
933        g.flatten(msg)
934        eq(sfp.getvalue(), """\
935Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
936 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
937 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
938 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
939 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
940 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
941 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
942 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
943 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
944 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
945 =?utf-8?b?44CC?=
946
947""")
948        eq(h.encode(maxlinelen=76), """\
949=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
950 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
951 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
952 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
953 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
954 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
955 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
956 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
957 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
958 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
959 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
960
961    def test_long_header_encode(self):
962        eq = self.ndiffAssertEqual
963        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
964                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
965                   header_name='X-Foobar-Spoink-Defrobnit')
966        eq(h.encode(), '''\
967wasnipoop; giraffes="very-long-necked-animals";
968 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
969
970    def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
971        eq = self.ndiffAssertEqual
972        h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
973                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
974                   header_name='X-Foobar-Spoink-Defrobnit',
975                   continuation_ws='\t')
976        eq(h.encode(), '''\
977wasnipoop; giraffes="very-long-necked-animals";
978 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
979
980    def test_long_header_encode_with_tab_continuation(self):
981        eq = self.ndiffAssertEqual
982        h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
983                   'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
984                   header_name='X-Foobar-Spoink-Defrobnit',
985                   continuation_ws='\t')
986        eq(h.encode(), '''\
987wasnipoop; giraffes="very-long-necked-animals";
988\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
989
990    def test_header_encode_with_different_output_charset(self):
991        h = Header('文', 'euc-jp')
992        self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
993
994    def test_long_header_encode_with_different_output_charset(self):
995        h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
996            b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
997            b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
998            b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
999        res = """\
1000=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
1001 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
1002        self.assertEqual(h.encode(), res)
1003
1004    def test_header_splitter(self):
1005        eq = self.ndiffAssertEqual
1006        msg = MIMEText('')
1007        # It'd be great if we could use add_header() here, but that doesn't
1008        # guarantee an order of the parameters.
1009        msg['X-Foobar-Spoink-Defrobnit'] = (
1010            'wasnipoop; giraffes="very-long-necked-animals"; '
1011            'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
1012        sfp = StringIO()
1013        g = Generator(sfp)
1014        g.flatten(msg)
1015        eq(sfp.getvalue(), '''\
1016Content-Type: text/plain; charset="us-ascii"
1017MIME-Version: 1.0
1018Content-Transfer-Encoding: 7bit
1019X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
1020 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
1021
1022''')
1023
1024    def test_no_semis_header_splitter(self):
1025        eq = self.ndiffAssertEqual
1026        msg = Message()
1027        msg['From'] = '[email protected]'
1028        msg['References'] = SPACE.join('<%[email protected]>' % i for i in range(10))
1029        msg.set_payload('Test')
1030        sfp = StringIO()
1031        g = Generator(sfp)
1032        g.flatten(msg)
1033        eq(sfp.getvalue(), """\
1034From: [email protected]
1035References: <[email protected]> <[email protected]> <[email protected]> <[email protected]> <[email protected]>
1036 <[email protected]> <[email protected]> <[email protected]> <[email protected]> <[email protected]>
1037
1038Test""")
1039
1040    def test_last_split_chunk_does_not_fit(self):
1041        eq = self.ndiffAssertEqual
1042        h = Header('Subject: the first part of this is short, but_the_second'
1043            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1044            '_all_by_itself')
1045        eq(h.encode(), """\
1046Subject: the first part of this is short,
1047 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1048
1049    def test_splittable_leading_char_followed_by_overlong_unsplittable(self):
1050        eq = self.ndiffAssertEqual
1051        h = Header(', but_the_second'
1052            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1053            '_all_by_itself')
1054        eq(h.encode(), """\
1055,
1056 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1057
1058    def test_multiple_splittable_leading_char_followed_by_overlong_unsplittable(self):
1059        eq = self.ndiffAssertEqual
1060        h = Header(', , but_the_second'
1061            '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
1062            '_all_by_itself')
1063        eq(h.encode(), """\
1064, ,
1065 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
1066
1067    def test_trailing_splittable_on_overlong_unsplittable(self):
1068        eq = self.ndiffAssertEqual
1069        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1070            'be_on_a_line_all_by_itself;')
1071        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
1072            "be_on_a_line_all_by_itself;")
1073
1074    def test_trailing_splittable_on_overlong_unsplittable_with_leading_splittable(self):
1075        eq = self.ndiffAssertEqual
1076        h = Header('; '
1077            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1078            'be_on_a_line_all_by_itself; ')
1079        eq(h.encode(), """\
1080;
1081 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1082
1083    def test_long_header_with_multiple_sequential_split_chars(self):
1084        eq = self.ndiffAssertEqual
1085        h = Header('This is a long line that has two whitespaces  in a row.  '
1086            'This used to cause truncation of the header when folded')
1087        eq(h.encode(), """\
1088This is a long line that has two whitespaces  in a row.  This used to cause
1089 truncation of the header when folded""")
1090
1091    def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
1092        eq = self.ndiffAssertEqual
1093        h = Header('thisverylongheaderhas;semicolons;and,commas,but'
1094            'they;arenotlegal;fold,points')
1095        eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
1096                        "arenotlegal;fold,points")
1097
1098    def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
1099        eq = self.ndiffAssertEqual
1100        h = Header('this is a  test where we need to have more than one line '
1101            'before; our final line that is just too big to fit;; '
1102            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1103            'be_on_a_line_all_by_itself;')
1104        eq(h.encode(), """\
1105this is a  test where we need to have more than one line before;
1106 our final line that is just too big to fit;;
1107 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
1108
1109    def test_overlong_last_part_followed_by_split_point(self):
1110        eq = self.ndiffAssertEqual
1111        h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1112            'be_on_a_line_all_by_itself ')
1113        eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
1114                        "should_be_on_a_line_all_by_itself ")
1115
1116    def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
1117        eq = self.ndiffAssertEqual
1118        h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
1119            'before_our_final_line_; ; '
1120            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1121            'be_on_a_line_all_by_itself; ')
1122        eq(h.encode(), """\
1123this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
1124 ;
1125 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1126
1127    def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1128        eq = self.ndiffAssertEqual
1129        h = Header('this is a test where we need to have more than one line '
1130            'before our final line; ; '
1131            'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1132            'be_on_a_line_all_by_itself; ')
1133        eq(h.encode(), """\
1134this is a test where we need to have more than one line before our final line;
1135 ;
1136 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1137
1138    def test_long_header_with_whitespace_runs(self):
1139        eq = self.ndiffAssertEqual
1140        msg = Message()
1141        msg['From'] = '[email protected]'
1142        msg['References'] = SPACE.join(['<[email protected]>  '] * 10)
1143        msg.set_payload('Test')
1144        sfp = StringIO()
1145        g = Generator(sfp)
1146        g.flatten(msg)
1147        eq(sfp.getvalue(), """\
1148From: [email protected]
1149References: <[email protected]>   <[email protected]>   <[email protected]>   <[email protected]>
1150   <[email protected]>   <[email protected]>   <[email protected]>   <[email protected]>
1151   <[email protected]>   <[email protected]>\x20\x20
1152
1153Test""")
1154
1155    def test_long_run_with_semi_header_splitter(self):
1156        eq = self.ndiffAssertEqual
1157        msg = Message()
1158        msg['From'] = '[email protected]'
1159        msg['References'] = SPACE.join(['<[email protected]>'] * 10) + '; abc'
1160        msg.set_payload('Test')
1161        sfp = StringIO()
1162        g = Generator(sfp)
1163        g.flatten(msg)
1164        eq(sfp.getvalue(), """\
1165From: [email protected]
1166References: <[email protected]> <[email protected]> <[email protected]> <[email protected]>
1167 <[email protected]> <[email protected]> <[email protected]> <[email protected]> <[email protected]>
1168 <[email protected]>; abc
1169
1170Test""")
1171
1172    def test_splitter_split_on_punctuation_only_if_fws(self):
1173        eq = self.ndiffAssertEqual
1174        msg = Message()
1175        msg['From'] = '[email protected]'
1176        msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1177            'they;arenotlegal;fold,points')
1178        msg.set_payload('Test')
1179        sfp = StringIO()
1180        g = Generator(sfp)
1181        g.flatten(msg)
1182        # XXX the space after the header should not be there.
1183        eq(sfp.getvalue(), """\
1184From: [email protected]
1185References:\x20
1186 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1187
1188Test""")
1189
1190    def test_no_split_long_header(self):
1191        eq = self.ndiffAssertEqual
1192        hstr = 'References: ' + 'x' * 80
1193        h = Header(hstr)
1194        # These come on two lines because Headers are really field value
1195        # classes and don't really know about their field names.
1196        eq(h.encode(), """\
1197References:
1198 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1199        h = Header('x' * 80)
1200        eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
1201
1202    def test_splitting_multiple_long_lines(self):
1203        eq = self.ndiffAssertEqual
1204        hstr = """\
1205from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <[email protected]>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1206\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <[email protected]>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1207\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <[email protected]>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1208"""
1209        h = Header(hstr, continuation_ws='\t')
1210        eq(h.encode(), """\
1211from babylon.socal-raves.org (localhost [127.0.0.1]);
1212 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1213 for <[email protected]>;
1214 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1215\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1216 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1217 for <[email protected]>;
1218 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1219\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1220 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1221 for <[email protected]>;
1222 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1223
1224    def test_splitting_first_line_only_is_long(self):
1225        eq = self.ndiffAssertEqual
1226        hstr = """\
1227from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1228\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1229\tid 17k4h5-00034i-00
1230\tfor [email protected]; Wed, 28 Aug 2002 11:25:20 -0400"""
1231        h = Header(hstr, maxlinelen=78, header_name='Received',
1232                   continuation_ws='\t')
1233        eq(h.encode(), """\
1234from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1235 helo=cthulhu.gerg.ca)
1236\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1237\tid 17k4h5-00034i-00
1238\tfor [email protected]; Wed, 28 Aug 2002 11:25:20 -0400""")
1239
1240    def test_long_8bit_header(self):
1241        eq = self.ndiffAssertEqual
1242        msg = Message()
1243        h = Header('Britische Regierung gibt', 'iso-8859-1',
1244                    header_name='Subject')
1245        h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
1246        eq(h.encode(maxlinelen=76), """\
1247=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1248 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
1249        msg['Subject'] = h
1250        eq(msg.as_string(maxheaderlen=76), """\
1251Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1252 =?iso-8859-1?q?hore-Windkraftprojekte?=
1253
1254""")
1255        eq(msg.as_string(maxheaderlen=0), """\
1256Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
1257
1258""")
1259
1260    def test_long_8bit_header_no_charset(self):
1261        eq = self.ndiffAssertEqual
1262        msg = Message()
1263        header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1264                         'f\xfcr Offshore-Windkraftprojekte '
1265                         '<[email protected]>')
1266        msg['Reply-To'] = header_string
1267        eq(msg.as_string(maxheaderlen=78), """\
1268Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1269 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1270
1271""")
1272        msg = Message()
1273        msg['Reply-To'] = Header(header_string,
1274                                 header_name='Reply-To')
1275        eq(msg.as_string(maxheaderlen=78), """\
1276Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1277 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1278
1279""")
1280
1281    def test_long_to_header(self):
1282        eq = self.ndiffAssertEqual
1283        to = ('"Someone Test #A" <[email protected]>,'
1284              '<[email protected]>, '
1285              '"Someone Test #B" <[email protected]>, '
1286              '"Someone Test #C" <[email protected]>, '
1287              '"Someone Test #D" <[email protected]>')
1288        msg = Message()
1289        msg['To'] = to
1290        eq(msg.as_string(maxheaderlen=78), '''\
1291To: "Someone Test #A" <[email protected]>,<[email protected]>,
1292 "Someone Test #B" <[email protected]>,
1293 "Someone Test #C" <[email protected]>,
1294 "Someone Test #D" <[email protected]>
1295
1296''')
1297
1298    def test_long_line_after_append(self):
1299        eq = self.ndiffAssertEqual
1300        s = 'This is an example of string which has almost the limit of header length.'
1301        h = Header(s)
1302        h.append('Add another line.')
1303        eq(h.encode(maxlinelen=76), """\
1304This is an example of string which has almost the limit of header length.
1305 Add another line.""")
1306
1307    def test_shorter_line_with_append(self):
1308        eq = self.ndiffAssertEqual
1309        s = 'This is a shorter line.'
1310        h = Header(s)
1311        h.append('Add another sentence. (Surprise?)')
1312        eq(h.encode(),
1313           'This is a shorter line. Add another sentence. (Surprise?)')
1314
1315    def test_long_field_name(self):
1316        eq = self.ndiffAssertEqual
1317        fn = 'X-Very-Very-Very-Long-Header-Name'
1318        gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1319              'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1320              'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1321              'bef\xf6rdert. ')
1322        h = Header(gs, 'iso-8859-1', header_name=fn)
1323        # BAW: this seems broken because the first line is too long
1324        eq(h.encode(maxlinelen=76), """\
1325=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1326 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1327 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1328 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
1329
1330    def test_long_received_header(self):
1331        h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1332             'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1333             'Wed, 05 Mar 2003 18:10:18 -0700')
1334        msg = Message()
1335        msg['Received-1'] = Header(h, continuation_ws='\t')
1336        msg['Received-2'] = h
1337        # This should be splitting on spaces not semicolons.
1338        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1339Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1340 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1341 Wed, 05 Mar 2003 18:10:18 -0700
1342Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1343 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1344 Wed, 05 Mar 2003 18:10:18 -0700
1345
1346""")
1347
1348    def test_string_headerinst_eq(self):
1349        h = ('<[email protected].'
1350             'tu-muenchen.de> (David Bremner\'s message of '
1351             '"Thu, 6 Mar 2003 13:58:21 +0100")')
1352        msg = Message()
1353        msg['Received-1'] = Header(h, header_name='Received-1',
1354                                   continuation_ws='\t')
1355        msg['Received-2'] = h
1356        # XXX The space after the ':' should not be there.
1357        self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
1358Received-1:\x20
1359 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1360 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1361Received-2:\x20
1362 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1363 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1364
1365""")
1366
1367    def test_long_unbreakable_lines_with_continuation(self):
1368        eq = self.ndiffAssertEqual
1369        msg = Message()
1370        t = """\
1371iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1372 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1373        msg['Face-1'] = t
1374        msg['Face-2'] = Header(t, header_name='Face-2')
1375        msg['Face-3'] = ' ' + t
1376        # XXX This splitting is all wrong.  It the first value line should be
1377        # snug against the field name or the space after the header not there.
1378        eq(msg.as_string(maxheaderlen=78), """\
1379Face-1:\x20
1380 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1381 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1382Face-2:\x20
1383 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1384 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1385Face-3:\x20
1386 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1387 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1388
1389""")
1390
1391    def test_another_long_multiline_header(self):
1392        eq = self.ndiffAssertEqual
1393        m = ('Received: from siimage.com '
1394             '([172.25.1.3]) by zima.siliconimage.com with '
1395             'Microsoft SMTPSVC(5.0.2195.4905); '
1396             'Wed, 16 Oct 2002 07:41:11 -0700')
1397        msg = email.message_from_string(m)
1398        eq(msg.as_string(maxheaderlen=78), '''\
1399Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1400 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
1401
1402''')
1403
1404    def test_long_lines_with_different_header(self):
1405        eq = self.ndiffAssertEqual
1406        h = ('List-Unsubscribe: '
1407             '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1408             '        <mailto:[email protected]'
1409             '?subject=unsubscribe>')
1410        msg = Message()
1411        msg['List'] = h
1412        msg['List'] = Header(h, header_name='List')
1413        eq(msg.as_string(maxheaderlen=78), """\
1414List: List-Unsubscribe:
1415 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1416        <mailto:[email protected]?subject=unsubscribe>
1417List: List-Unsubscribe:
1418 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
1419        <mailto:[email protected]?subject=unsubscribe>
1420
1421""")
1422
1423    def test_long_rfc2047_header_with_embedded_fws(self):
1424        h = Header(textwrap.dedent("""\
1425            We're going to pretend this header is in a non-ascii character set
1426            \tto see if line wrapping with encoded words and embedded
1427               folding white space works"""),
1428                   charset='utf-8',
1429                   header_name='Test')
1430        self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1431            =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1432             =?utf-8?q?cter_set?=
1433             =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1434             =?utf-8?q?_folding_white_space_works?=""")+'\n')
1435
1436
1437
1438# Test mangling of "From " lines in the body of a message
1439class TestFromMangling(unittest.TestCase):
1440    def setUp(self):
1441        self.msg = Message()
1442        self.msg['From'] = '[email protected]'
1443        self.msg.set_payload("""\
1444From the desk of A.A.A.:
1445Blah blah blah
1446""")
1447
1448    def test_mangled_from(self):
1449        s = StringIO()
1450        g = Generator(s, mangle_from_=True)
1451        g.flatten(self.msg)
1452        self.assertEqual(s.getvalue(), """\
1453From: [email protected]
1454
1455>From the desk of A.A.A.:
1456Blah blah blah
1457""")
1458
1459    def test_dont_mangle_from(self):
1460        s = StringIO()
1461        g = Generator(s, mangle_from_=False)
1462        g.flatten(self.msg)
1463        self.assertEqual(s.getvalue(), """\
1464From: [email protected]
1465
1466From the desk of A.A.A.:
1467Blah blah blah
1468""")
1469
1470    def test_mangle_from_in_preamble_and_epilog(self):
1471        s = StringIO()
1472        g = Generator(s, mangle_from_=True)
1473        msg = email.message_from_string(textwrap.dedent("""\
1474            From: [email protected]
1475            Mime-Version: 1.0
1476            Content-Type: multipart/mixed; boundary=XXX
1477
1478            From somewhere unknown
1479
1480            --XXX
1481            Content-Type: text/plain
1482
1483            foo
1484
1485            --XXX--
1486
1487            From somewhere unknowable
1488            """))
1489        g.flatten(msg)
1490        self.assertEqual(len([1 for x in s.getvalue().split('\n')
1491                                  if x.startswith('>From ')]), 2)
1492
1493    def test_mangled_from_with_bad_bytes(self):
1494        source = textwrap.dedent("""\
1495            Content-Type: text/plain; charset="utf-8"
1496            MIME-Version: 1.0
1497            Content-Transfer-Encoding: 8bit
1498            From: [email protected]
1499
1500        """).encode('utf-8')
1501        msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1502        b = BytesIO()
1503        g = BytesGenerator(b, mangle_from_=True)
1504        g.flatten(msg)
1505        self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1506
1507    def test_multipart_with_bad_bytes_in_cte(self):
1508        # bpo30835
1509        source = textwrap.dedent("""\
1510            From: [email protected]
1511            Content-Type: multipart/mixed; boundary="1"
1512            Content-Transfer-Encoding: \xc8
1513        """).encode('utf-8')
1514        msg = email.message_from_bytes(source)
1515
1516
1517# Test the basic MIMEAudio class
1518class TestMIMEAudio(unittest.TestCase):
1519    def _make_audio(self, ext):
1520        with openfile(f'sndhdr.{ext}', 'rb') as fp:
1521            self._audiodata = fp.read()
1522        self._au = MIMEAudio(self._audiodata)
1523
1524    def test_guess_minor_type(self):
1525        for ext, subtype in {
1526            'aifc': 'x-aiff',
1527            'aiff': 'x-aiff',
1528            'wav': 'x-wav',
1529            'au': 'basic',
1530        }.items():
1531            self._make_audio(ext)
1532            subtype = ext if subtype is None else subtype
1533            self.assertEqual(self._au.get_content_type(), f'audio/{subtype}')
1534
1535    def test_encoding(self):
1536        self._make_audio('au')
1537        payload = self._au.get_payload()
1538        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1539                         self._audiodata)
1540
1541    def test_checkSetMinor(self):
1542        self._make_audio('au')
1543        au = MIMEAudio(self._audiodata, 'fish')
1544        self.assertEqual(au.get_content_type(), 'audio/fish')
1545
1546    def test_add_header(self):
1547        self._make_audio('au')
1548        eq = self.assertEqual
1549        self._au.add_header('Content-Disposition', 'attachment',
1550                            filename='sndhdr.au')
1551        eq(self._au['content-disposition'],
1552           'attachment; filename="sndhdr.au"')
1553        eq(self._au.get_params(header='content-disposition'),
1554           [('attachment', ''), ('filename', 'sndhdr.au')])
1555        eq(self._au.get_param('filename', header='content-disposition'),
1556           'sndhdr.au')
1557        missing = []
1558        eq(self._au.get_param('attachment', header='content-disposition'), '')
1559        self.assertIs(self._au.get_param(
1560            'foo', failobj=missing,
1561            header='content-disposition'), missing)
1562        # Try some missing stuff
1563        self.assertIs(self._au.get_param('foobar', missing), missing)
1564        self.assertIs(self._au.get_param('attachment', missing,
1565                                         header='foobar'), missing)
1566
1567
1568
1569# Test the basic MIMEImage class
1570class TestMIMEImage(unittest.TestCase):
1571    def _make_image(self, ext):
1572        with openfile(f'python.{ext}', 'rb') as fp:
1573            self._imgdata = fp.read()
1574        self._im = MIMEImage(self._imgdata)
1575
1576    def test_guess_minor_type(self):
1577        for ext, subtype in {
1578            'bmp': None,
1579            'exr': None,
1580            'gif': None,
1581            'jpg': 'jpeg',
1582            'pbm': None,
1583            'pgm': None,
1584            'png': None,
1585            'ppm': None,
1586            'ras': 'rast',
1587            'sgi': 'rgb',
1588            'tiff': None,
1589            'webp': None,
1590            'xbm': None,
1591        }.items():
1592            self._make_image(ext)
1593            subtype = ext if subtype is None else subtype
1594            self.assertEqual(self._im.get_content_type(), f'image/{subtype}')
1595
1596    def test_encoding(self):
1597        self._make_image('gif')
1598        payload = self._im.get_payload()
1599        self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1600                         self._imgdata)
1601
1602    def test_checkSetMinor(self):
1603        self._make_image('gif')
1604        im = MIMEImage(self._imgdata, 'fish')
1605        self.assertEqual(im.get_content_type(), 'image/fish')
1606
1607    def test_add_header(self):
1608        self._make_image('gif')
1609        eq = self.assertEqual
1610        self._im.add_header('Content-Disposition', 'attachment',
1611                            filename='dingusfish.gif')
1612        eq(self._im['content-disposition'],
1613           'attachment; filename="dingusfish.gif"')
1614        eq(self._im.get_params(header='content-disposition'),
1615           [('attachment', ''), ('filename', 'dingusfish.gif')])
1616        eq(self._im.get_param('filename', header='content-disposition'),
1617           'dingusfish.gif')
1618        missing = []
1619        eq(self._im.get_param('attachment', header='content-disposition'), '')
1620        self.assertIs(self._im.get_param('foo', failobj=missing,
1621                                         header='content-disposition'), missing)
1622        # Try some missing stuff
1623        self.assertIs(self._im.get_param('foobar', missing), missing)
1624        self.assertIs(self._im.get_param('attachment', missing,
1625                                         header='foobar'), missing)
1626
1627
1628# Test the basic MIMEApplication class
1629class TestMIMEApplication(unittest.TestCase):
1630    def test_headers(self):
1631        eq = self.assertEqual
1632        msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
1633        eq(msg.get_content_type(), 'application/octet-stream')
1634        eq(msg['content-transfer-encoding'], 'base64')
1635
1636    def test_body(self):
1637        eq = self.assertEqual
1638        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1639        msg = MIMEApplication(bytesdata)
1640        # whitespace in the cte encoded block is RFC-irrelevant.
1641        eq(msg.get_payload().strip(), '+vv8/f7/')
1642        eq(msg.get_payload(decode=True), bytesdata)
1643
1644    def test_binary_body_with_encode_7or8bit(self):
1645        # Issue 17171.
1646        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1647        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1648        # Treated as a string, this will be invalid code points.
1649        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1650        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1651        self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1652        s = BytesIO()
1653        g = BytesGenerator(s)
1654        g.flatten(msg)
1655        wireform = s.getvalue()
1656        msg2 = email.message_from_bytes(wireform)
1657        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1658        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1659        self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1660
1661    def test_binary_body_with_encode_noop(self):
1662        # Issue 16564: This does not produce an RFC valid message, since to be
1663        # valid it should have a CTE of binary.  But the below works in
1664        # Python2, and is documented as working this way.
1665        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1666        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1667        # Treated as a string, this will be invalid code points.
1668        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1669        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1670        s = BytesIO()
1671        g = BytesGenerator(s)
1672        g.flatten(msg)
1673        wireform = s.getvalue()
1674        msg2 = email.message_from_bytes(wireform)
1675        self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1676        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1677
1678    def test_binary_body_with_unicode_linend_encode_noop(self):
1679        # Issue 19003: This is a variation on #16564.
1680        bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff'
1681        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1682        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1683        s = BytesIO()
1684        g = BytesGenerator(s)
1685        g.flatten(msg)
1686        wireform = s.getvalue()
1687        msg2 = email.message_from_bytes(wireform)
1688        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1689
1690    def test_binary_body_with_encode_quopri(self):
1691        # Issue 14360.
1692        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1693        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1694        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1695        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1696        self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1697        s = BytesIO()
1698        g = BytesGenerator(s)
1699        g.flatten(msg)
1700        wireform = s.getvalue()
1701        msg2 = email.message_from_bytes(wireform)
1702        self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1703        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1704        self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1705
1706    def test_binary_body_with_encode_base64(self):
1707        bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1708        msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1709        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1710        self.assertEqual(msg.get_payload(decode=True), bytesdata)
1711        s = BytesIO()
1712        g = BytesGenerator(s)
1713        g.flatten(msg)
1714        wireform = s.getvalue()
1715        msg2 = email.message_from_bytes(wireform)
1716        self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1717        self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1718
1719
1720# Test the basic MIMEText class
1721class TestMIMEText(unittest.TestCase):
1722    def setUp(self):
1723        self._msg = MIMEText('hello there')
1724
1725    def test_types(self):
1726        eq = self.assertEqual
1727        eq(self._msg.get_content_type(), 'text/plain')
1728        eq(self._msg.get_param('charset'), 'us-ascii')
1729        missing = []
1730        self.assertIs(self._msg.get_param('foobar', missing), missing)
1731        self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
1732                      missing)
1733
1734    def test_payload(self):
1735        self.assertEqual(self._msg.get_payload(), 'hello there')
1736        self.assertFalse(self._msg.is_multipart())
1737
1738    def test_charset(self):
1739        eq = self.assertEqual
1740        msg = MIMEText('hello there', _charset='us-ascii')
1741        eq(msg.get_charset().input_charset, 'us-ascii')
1742        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1743        # Also accept a Charset instance
1744        charset = Charset('utf-8')
1745        charset.body_encoding = None
1746        msg = MIMEText('hello there', _charset=charset)
1747        eq(msg.get_charset().input_charset, 'utf-8')
1748        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1749        eq(msg.get_payload(), 'hello there')
1750
1751    def test_7bit_input(self):
1752        eq = self.assertEqual
1753        msg = MIMEText('hello there', _charset='us-ascii')
1754        eq(msg.get_charset().input_charset, 'us-ascii')
1755        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1756
1757    def test_7bit_input_no_charset(self):
1758        eq = self.assertEqual
1759        msg = MIMEText('hello there')
1760        eq(msg.get_charset(), 'us-ascii')
1761        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1762        self.assertIn('hello there', msg.as_string())
1763
1764    def test_utf8_input(self):
1765        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1766        eq = self.assertEqual
1767        msg = MIMEText(teststr, _charset='utf-8')
1768        eq(msg.get_charset().output_charset, 'utf-8')
1769        eq(msg['content-type'], 'text/plain; charset="utf-8"')
1770        eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1771
1772    @unittest.skip("can't fix because of backward compat in email5, "
1773        "will fix in email6")
1774    def test_utf8_input_no_charset(self):
1775        teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1776        self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1777
1778
1779
1780# Test complicated multipart/* messages
1781class TestMultipart(TestEmailBase):
1782    def setUp(self):
1783        with openfile('python.gif', 'rb') as fp:
1784            data = fp.read()
1785        container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1786        image = MIMEImage(data, name='dingusfish.gif')
1787        image.add_header('content-disposition', 'attachment',
1788                         filename='dingusfish.gif')
1789        intro = MIMEText('''\
1790Hi there,
1791
1792This is the dingus fish.
1793''')
1794        container.attach(intro)
1795        container.attach(image)
1796        container['From'] = 'Barry <[email protected]>'
1797        container['To'] = 'Dingus Lovers <[email protected]>'
1798        container['Subject'] = 'Here is your dingus fish'
1799
1800        now = 987809702.54848599
1801        timetuple = time.localtime(now)
1802        if timetuple[-1] == 0:
1803            tzsecs = time.timezone
1804        else:
1805            tzsecs = time.altzone
1806        if tzsecs > 0:
1807            sign = '-'
1808        else:
1809            sign = '+'
1810        tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1811        container['Date'] = time.strftime(
1812            '%a, %d %b %Y %H:%M:%S',
1813            time.localtime(now)) + tzoffset
1814        self._msg = container
1815        self._im = image
1816        self._txt = intro
1817
1818    def test_hierarchy(self):
1819        # convenience
1820        eq = self.assertEqual
1821        raises = self.assertRaises
1822        # tests
1823        m = self._msg
1824        self.assertTrue(m.is_multipart())
1825        eq(m.get_content_type(), 'multipart/mixed')
1826        eq(len(m.get_payload()), 2)
1827        raises(IndexError, m.get_payload, 2)
1828        m0 = m.get_payload(0)
1829        m1 = m.get_payload(1)
1830        self.assertIs(m0, self._txt)
1831        self.assertIs(m1, self._im)
1832        eq(m.get_payload(), [m0, m1])
1833        self.assertFalse(m0.is_multipart())
1834        self.assertFalse(m1.is_multipart())
1835
1836    def test_empty_multipart_idempotent(self):
1837        text = """\
1838Content-Type: multipart/mixed; boundary="BOUNDARY"
1839MIME-Version: 1.0
1840Subject: A subject
1841To: [email protected]
1842From: [email protected]
1843
1844
1845--BOUNDARY
1846
1847
1848--BOUNDARY--
1849"""
1850        msg = Parser().parsestr(text)
1851        self.ndiffAssertEqual(text, msg.as_string())
1852
1853    def test_no_parts_in_a_multipart_with_none_epilogue(self):
1854        outer = MIMEBase('multipart', 'mixed')
1855        outer['Subject'] = 'A subject'
1856        outer['To'] = '[email protected]'
1857        outer['From'] = '[email protected]'
1858        outer.set_boundary('BOUNDARY')
1859        self.ndiffAssertEqual(outer.as_string(), '''\
1860Content-Type: multipart/mixed; boundary="BOUNDARY"
1861MIME-Version: 1.0
1862Subject: A subject
1863To: [email protected]
1864From: [email protected]
1865
1866--BOUNDARY
1867
1868--BOUNDARY--
1869''')
1870
1871    def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1872        outer = MIMEBase('multipart', 'mixed')
1873        outer['Subject'] = 'A subject'
1874        outer['To'] = '[email protected]'
1875        outer['From'] = '[email protected]'
1876        outer.preamble = ''
1877        outer.epilogue = ''
1878        outer.set_boundary('BOUNDARY')
1879        self.ndiffAssertEqual(outer.as_string(), '''\
1880Content-Type: multipart/mixed; boundary="BOUNDARY"
1881MIME-Version: 1.0
1882Subject: A subject
1883To: [email protected]
1884From: [email protected]
1885
1886
1887--BOUNDARY
1888
1889--BOUNDARY--
1890''')
1891
1892    def test_one_part_in_a_multipart(self):
1893        eq = self.ndiffAssertEqual
1894        outer = MIMEBase('multipart', 'mixed')
1895        outer['Subject'] = 'A subject'
1896        outer['To'] = '[email protected]'
1897        outer['From'] = '[email protected]'
1898        outer.set_boundary('BOUNDARY')
1899        msg = MIMEText('hello world')
1900        outer.attach(msg)
1901        eq(outer.as_string(), '''\
1902Content-Type: multipart/mixed; boundary="BOUNDARY"
1903MIME-Version: 1.0
1904Subject: A subject
1905To: [email protected]
1906From: [email protected]
1907
1908--BOUNDARY
1909Content-Type: text/plain; charset="us-ascii"
1910MIME-Version: 1.0
1911Content-Transfer-Encoding: 7bit
1912
1913hello world
1914--BOUNDARY--
1915''')
1916
1917    def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1918        eq = self.ndiffAssertEqual
1919        outer = MIMEBase('multipart', 'mixed')
1920        outer['Subject'] = 'A subject'
1921        outer['To'] = '[email protected]'
1922        outer['From'] = '[email protected]'
1923        outer.preamble = ''
1924        msg = MIMEText('hello world')
1925        outer.attach(msg)
1926        outer.set_boundary('BOUNDARY')
1927        eq(outer.as_string(), '''\
1928Content-Type: multipart/mixed; boundary="BOUNDARY"
1929MIME-Version: 1.0
1930Subject: A subject
1931To: [email protected]
1932From: [email protected]
1933
1934
1935--BOUNDARY
1936Content-Type: text/plain; charset="us-ascii"
1937MIME-Version: 1.0
1938Content-Transfer-Encoding: 7bit
1939
1940hello world
1941--BOUNDARY--
1942''')
1943
1944
1945    def test_seq_parts_in_a_multipart_with_none_preamble(self):
1946        eq = self.ndiffAssertEqual
1947        outer = MIMEBase('multipart', 'mixed')
1948        outer['Subject'] = 'A subject'
1949        outer['To'] = '[email protected]'
1950        outer['From'] = '[email protected]'
1951        outer.preamble = None
1952        msg = MIMEText('hello world')
1953        outer.attach(msg)
1954        outer.set_boundary('BOUNDARY')
1955        eq(outer.as_string(), '''\
1956Content-Type: multipart/mixed; boundary="BOUNDARY"
1957MIME-Version: 1.0
1958Subject: A subject
1959To: [email protected]
1960From: [email protected]
1961
1962--BOUNDARY
1963Content-Type: text/plain; charset="us-ascii"
1964MIME-Version: 1.0
1965Content-Transfer-Encoding: 7bit
1966
1967hello world
1968--BOUNDARY--
1969''')
1970
1971
1972    def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1973        eq = self.ndiffAssertEqual
1974        outer = MIMEBase('multipart', 'mixed')
1975        outer['Subject'] = 'A subject'
1976        outer['To'] = '[email protected]'
1977        outer['From'] = '[email protected]'
1978        outer.epilogue = None
1979        msg = MIMEText('hello world')
1980        outer.attach(msg)
1981        outer.set_boundary('BOUNDARY')
1982        eq(outer.as_string(), '''\
1983Content-Type: multipart/mixed; boundary="BOUNDARY"
1984MIME-Version: 1.0
1985Subject: A subject
1986To: [email protected]
1987From: [email protected]
1988
1989--BOUNDARY
1990Content-Type: text/plain; charset="us-ascii"
1991MIME-Version: 1.0
1992Content-Transfer-Encoding: 7bit
1993
1994hello world
1995--BOUNDARY--
1996''')
1997
1998
1999    def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
2000        eq = self.ndiffAssertEqual
2001        outer = MIMEBase('multipart', 'mixed')
2002        outer['Subject'] = 'A subject'
2003        outer['To'] = '[email protected]'
2004        outer['From'] = '[email protected]'
2005        outer.epilogue = ''
2006        msg = MIMEText('hello world')
2007        outer.attach(msg)
2008        outer.set_boundary('BOUNDARY')
2009        eq(outer.as_string(), '''\
2010Content-Type: multipart/mixed; boundary="BOUNDARY"
2011MIME-Version: 1.0
2012Subject: A subject
2013To: [email protected]
2014From: [email protected]
2015
2016--BOUNDARY
2017Content-Type: text/plain; charset="us-ascii"
2018MIME-Version: 1.0
2019Content-Transfer-Encoding: 7bit
2020
2021hello world
2022--BOUNDARY--
2023''')
2024
2025
2026    def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
2027        eq = self.ndiffAssertEqual
2028        outer = MIMEBase('multipart', 'mixed')
2029        outer['Subject'] = 'A subject'
2030        outer['To'] = '[email protected]'
2031        outer['From'] = '[email protected]'
2032        outer.epilogue = '\n'
2033        msg = MIMEText('hello world')
2034        outer.attach(msg)
2035        outer.set_boundary('BOUNDARY')
2036        eq(outer.as_string(), '''\
2037Content-Type: multipart/mixed; boundary="BOUNDARY"
2038MIME-Version: 1.0
2039Subject: A subject
2040To: [email protected]
2041From: [email protected]
2042
2043--BOUNDARY
2044Content-Type: text/plain; charset="us-ascii"
2045MIME-Version: 1.0
2046Content-Transfer-Encoding: 7bit
2047
2048hello world
2049--BOUNDARY--
2050
2051''')
2052
2053    def test_message_external_body(self):
2054        eq = self.assertEqual
2055        msg = self._msgobj('msg_36.txt')
2056        eq(len(msg.get_payload()), 2)
2057        msg1 = msg.get_payload(1)
2058        eq(msg1.get_content_type(), 'multipart/alternative')
2059        eq(len(msg1.get_payload()), 2)
2060        for subpart in msg1.get_payload():
2061            eq(subpart.get_content_type(), 'message/external-body')
2062            eq(len(subpart.get_payload()), 1)
2063            subsubpart = subpart.get_payload(0)
2064            eq(subsubpart.get_content_type(), 'text/plain')
2065
2066    def test_double_boundary(self):
2067        # msg_37.txt is a multipart that contains two dash-boundary's in a
2068        # row.  Our interpretation of RFC 2046 calls for ignoring the second
2069        # and subsequent boundaries.
2070        msg = self._msgobj('msg_37.txt')
2071        self.assertEqual(len(msg.get_payload()), 3)
2072
2073    def test_nested_inner_contains_outer_boundary(self):
2074        eq = self.ndiffAssertEqual
2075        # msg_38.txt has an inner part that contains outer boundaries.  My
2076        # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
2077        # these are illegal and should be interpreted as unterminated inner
2078        # parts.
2079        msg = self._msgobj('msg_38.txt')
2080        sfp = StringIO()
2081        iterators._structure(msg, sfp)
2082        eq(sfp.getvalue(), """\
2083multipart/mixed
2084    multipart/mixed
2085        multipart/alternative
2086            text/plain
2087        text/plain
2088    text/plain
2089    text/plain
2090""")
2091
2092    def test_nested_with_same_boundary(self):
2093        eq = self.ndiffAssertEqual
2094        # msg 39.txt is similarly evil in that it's got inner parts that use
2095        # the same boundary as outer parts.  Again, I believe the way this is
2096        # parsed is closest to the spirit of RFC 2046
2097        msg = self._msgobj('msg_39.txt')
2098        sfp = StringIO()
2099        iterators._structure(msg, sfp)
2100        eq(sfp.getvalue(), """\
2101multipart/mixed
2102    multipart/mixed
2103        multipart/alternative
2104        application/octet-stream
2105        application/octet-stream
2106    text/plain
2107""")
2108
2109    def test_boundary_in_non_multipart(self):
2110        msg = self._msgobj('msg_40.txt')
2111        self.assertEqual(msg.as_string(), '''\
2112MIME-Version: 1.0
2113Content-Type: text/html; boundary="--961284236552522269"
2114
2115----961284236552522269
2116Content-Type: text/html;
2117Content-Transfer-Encoding: 7Bit
2118
2119<html></html>
2120
2121----961284236552522269--
2122''')
2123
2124    def test_boundary_with_leading_space(self):
2125        eq = self.assertEqual
2126        msg = email.message_from_string('''\
2127MIME-Version: 1.0
2128Content-Type: multipart/mixed; boundary="    XXXX"
2129
2130--    XXXX
2131Content-Type: text/plain
2132
2133
2134--    XXXX
2135Content-Type: text/plain
2136
2137--    XXXX--
2138''')
2139        self.assertTrue(msg.is_multipart())
2140        eq(msg.get_boundary(), '    XXXX')
2141        eq(len(msg.get_payload()), 2)
2142
2143    def test_boundary_without_trailing_newline(self):
2144        m = Parser().parsestr("""\
2145Content-Type: multipart/mixed; boundary="===============0012394164=="
2146MIME-Version: 1.0
2147
2148--===============0012394164==
2149Content-Type: image/file1.jpg
2150MIME-Version: 1.0
2151Content-Transfer-Encoding: base64
2152
2153YXNkZg==
2154--===============0012394164==--""")
2155        self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
2156
2157    def test_mimebase_default_policy(self):
2158        m = MIMEBase('multipart', 'mixed')
2159        self.assertIs(m.policy, email.policy.compat32)
2160
2161    def test_mimebase_custom_policy(self):
2162        m = MIMEBase('multipart', 'mixed', policy=email.policy.default)
2163        self.assertIs(m.policy, email.policy.default)
2164
2165# Test some badly formatted messages
2166class TestNonConformant(TestEmailBase):
2167
2168    def test_parse_missing_minor_type(self):
2169        eq = self.assertEqual
2170        msg = self._msgobj('msg_14.txt')
2171        eq(msg.get_content_type(), 'text/plain')
2172        eq(msg.get_content_maintype(), 'text')
2173        eq(msg.get_content_subtype(), 'plain')
2174
2175    # test_defect_handling
2176    def test_same_boundary_inner_outer(self):
2177        msg = self._msgobj('msg_15.txt')
2178        # XXX We can probably eventually do better
2179        inner = msg.get_payload(0)
2180        self.assertTrue(hasattr(inner, 'defects'))
2181        self.assertEqual(len(inner.defects), 1)
2182        self.assertIsInstance(inner.defects[0],
2183                              errors.StartBoundaryNotFoundDefect)
2184
2185    # test_defect_handling
2186    def test_multipart_no_boundary(self):
2187        msg = self._msgobj('msg_25.txt')
2188        self.assertIsInstance(msg.get_payload(), str)
2189        self.assertEqual(len(msg.defects), 2)
2190        self.assertIsInstance(msg.defects[0],
2191                              errors.NoBoundaryInMultipartDefect)
2192        self.assertIsInstance(msg.defects[1],
2193                              errors.MultipartInvariantViolationDefect)
2194
2195    multipart_msg = textwrap.dedent("""\
2196        Date: Wed, 14 Nov 2007 12:56:23 GMT
2197        From: [email protected]
2198        To: [email protected]
2199        Subject: Content-Transfer-Encoding: base64 and multipart
2200        MIME-Version: 1.0
2201        Content-Type: multipart/mixed;
2202            boundary="===============3344438784458119861=="{}
2203
2204        --===============3344438784458119861==
2205        Content-Type: text/plain
2206
2207        Test message
2208
2209        --===============3344438784458119861==
2210        Content-Type: application/octet-stream
2211        Content-Transfer-Encoding: base64
2212
2213        YWJj
2214
2215        --===============3344438784458119861==--
2216        """)
2217
2218    # test_defect_handling
2219    def test_multipart_invalid_cte(self):
2220        msg = self._str_msg(
2221            self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2222        self.assertEqual(len(msg.defects), 1)
2223        self.assertIsInstance(msg.defects[0],
2224            errors.InvalidMultipartContentTransferEncodingDefect)
2225
2226    # test_defect_handling
2227    def test_multipart_no_cte_no_defect(self):
2228        msg = self._str_msg(self.multipart_msg.format(''))
2229        self.assertEqual(len(msg.defects), 0)
2230
2231    # test_defect_handling
2232    def test_multipart_valid_cte_no_defect(self):
2233        for cte in ('7bit', '8bit', 'BINary'):
2234            msg = self._str_msg(
2235                self.multipart_msg.format(
2236                    "\nContent-Transfer-Encoding: {}".format(cte)))
2237            self.assertEqual(len(msg.defects), 0)
2238
2239    # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
2240    def test_invalid_content_type(self):
2241        eq = self.assertEqual
2242        neq = self.ndiffAssertEqual
2243        msg = Message()
2244        # RFC 2045, $5.2 says invalid yields text/plain
2245        msg['Content-Type'] = 'text'
2246        eq(msg.get_content_maintype(), 'text')
2247        eq(msg.get_content_subtype(), 'plain')
2248        eq(msg.get_content_type(), 'text/plain')
2249        # Clear the old value and try something /really/ invalid
2250        del msg['content-type']
2251        msg['Content-Type'] = 'foo'
2252        eq(msg.get_content_maintype(), 'text')
2253        eq(msg.get_content_subtype(), 'plain')
2254        eq(msg.get_content_type(), 'text/plain')
2255        # Still, make sure that the message is idempotently generated
2256        s = StringIO()
2257        g = Generator(s)
2258        g.flatten(msg)
2259        neq(s.getvalue(), 'Content-Type: foo\n\n')
2260
2261    def test_no_start_boundary(self):
2262        eq = self.ndiffAssertEqual
2263        msg = self._msgobj('msg_31.txt')
2264        eq(msg.get_payload(), """\
2265--BOUNDARY
2266Content-Type: text/plain
2267
2268message 1
2269
2270--BOUNDARY
2271Content-Type: text/plain
2272
2273message 2
2274
2275--BOUNDARY--
2276""")
2277
2278    def test_no_separating_blank_line(self):
2279        eq = self.ndiffAssertEqual
2280        msg = self._msgobj('msg_35.txt')
2281        eq(msg.as_string(), """\
2282From: [email protected]
2283To: [email protected]
2284Subject: here's something interesting
2285
2286counter to RFC 2822, there's no separating newline here
2287""")
2288
2289    # test_defect_handling
2290    def test_lying_multipart(self):
2291        msg = self._msgobj('msg_41.txt')
2292        self.assertTrue(hasattr(msg, 'defects'))
2293        self.assertEqual(len(msg.defects), 2)
2294        self.assertIsInstance(msg.defects[0],
2295                              errors.NoBoundaryInMultipartDefect)
2296        self.assertIsInstance(msg.defects[1],
2297                              errors.MultipartInvariantViolationDefect)
2298
2299    # test_defect_handling
2300    def test_missing_start_boundary(self):
2301        outer = self._msgobj('msg_42.txt')
2302        # The message structure is:
2303        #
2304        # multipart/mixed
2305        #    text/plain
2306        #    message/rfc822
2307        #        multipart/mixed [*]
2308        #
2309        # [*] This message is missing its start boundary
2310        bad = outer.get_payload(1).get_payload(0)
2311        self.assertEqual(len(bad.defects), 1)
2312        self.assertIsInstance(bad.defects[0],
2313                              errors.StartBoundaryNotFoundDefect)
2314
2315    # test_defect_handling
2316    def test_first_line_is_continuation_header(self):
2317        eq = self.assertEqual
2318        m = ' Line 1\nSubject: test\n\nbody'
2319        msg = email.message_from_string(m)
2320        eq(msg.keys(), ['Subject'])
2321        eq(msg.get_payload(), 'body')
2322        eq(len(msg.defects), 1)
2323        self.assertDefectsEqual(msg.defects,
2324                                 [errors.FirstHeaderLineIsContinuationDefect])
2325        eq(msg.defects[0].line, ' Line 1\n')
2326
2327    # test_defect_handling
2328    def test_missing_header_body_separator(self):
2329        # Our heuristic if we see a line that doesn't look like a header (no
2330        # leading whitespace but no ':') is to assume that the blank line that
2331        # separates the header from the body is missing, and to stop parsing
2332        # headers and start parsing the body.
2333        msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2334        self.assertEqual(msg.keys(), ['Subject'])
2335        self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2336        self.assertDefectsEqual(msg.defects,
2337                                [errors.MissingHeaderBodySeparatorDefect])
2338
2339
2340# Test RFC 2047 header encoding and decoding
2341class TestRFC2047(TestEmailBase):
2342    def test_rfc2047_multiline(self):
2343        eq = self.assertEqual
2344        s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2345 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2346        dh = decode_header(s)
2347        eq(dh, [
2348            (b'Re: ', None),
2349            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
2350            (b' baz foo bar ', None),
2351            (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2352        header = make_header(dh)
2353        eq(str(header),
2354           'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
2355        self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
2356Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2357 =?mac-iceland?q?=9Arg=8Cs?=""")
2358
2359    def test_whitespace_keeper_unicode(self):
2360        eq = self.assertEqual
2361        s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <[email protected]>'
2362        dh = decode_header(s)
2363        eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
2364                (b' Pirard <[email protected]>', None)])
2365        header = str(make_header(dh))
2366        eq(header, 'Andr\xe9 Pirard <[email protected]>')
2367
2368    def test_whitespace_keeper_unicode_2(self):
2369        eq = self.assertEqual
2370        s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2371        dh = decode_header(s)
2372        eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2373                (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
2374        hu = str(make_header(dh))
2375        eq(hu, 'The quick brown fox jumped over the lazy dog')
2376
2377    def test_rfc2047_missing_whitespace(self):
2378        s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2379        dh = decode_header(s)
2380        self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2381                              (b'rg', None), (b'\xe5', 'iso-8859-1'),
2382                              (b'sbord', None)])
2383
2384    def test_rfc2047_with_whitespace(self):
2385        s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2386        dh = decode_header(s)
2387        self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2388                              (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2389                              (b' sbord', None)])
2390
2391    def test_rfc2047_B_bad_padding(self):
2392        s = '=?iso-8859-1?B?%s?='
2393        data = [                                # only test complete bytes
2394            ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2395            ('dmk=', b'vi'), ('dmk', b'vi')
2396          ]
2397        for q, a in data:
2398            dh = decode_header(s % q)
2399            self.assertEqual(dh, [(a, 'iso-8859-1')])
2400
2401    def test_rfc2047_Q_invalid_digits(self):
2402        # issue 10004.
2403        s = '=?iso-8859-1?Q?andr=e9=zz?='
2404        self.assertEqual(decode_header(s),
2405                        [(b'andr\xe9=zz', 'iso-8859-1')])
2406
2407    def test_rfc2047_rfc2047_1(self):
2408        # 1st testcase at end of rfc2047
2409        s = '(=?ISO-8859-1?Q?a?=)'
2410        self.assertEqual(decode_header(s),
2411            [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2412
2413    def test_rfc2047_rfc2047_2(self):
2414        # 2nd testcase at end of rfc2047
2415        s = '(=?ISO-8859-1?Q?a?= b)'
2416        self.assertEqual(decode_header(s),
2417            [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2418
2419    def test_rfc2047_rfc2047_3(self):
2420        # 3rd testcase at end of rfc2047
2421        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2422        self.assertEqual(decode_header(s),
2423            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2424
2425    def test_rfc2047_rfc2047_4(self):
2426        # 4th testcase at end of rfc2047
2427        s = '(=?ISO-8859-1?Q?a?=  =?ISO-8859-1?Q?b?=)'
2428        self.assertEqual(decode_header(s),
2429            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2430
2431    def test_rfc2047_rfc2047_5a(self):
2432        # 5th testcase at end of rfc2047 newline is \r\n
2433        s = '(=?ISO-8859-1?Q?a?=\r\n    =?ISO-8859-1?Q?b?=)'
2434        self.assertEqual(decode_header(s),
2435            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2436
2437    def test_rfc2047_rfc2047_5b(self):
2438        # 5th testcase at end of rfc2047 newline is \n
2439        s = '(=?ISO-8859-1?Q?a?=\n    =?ISO-8859-1?Q?b?=)'
2440        self.assertEqual(decode_header(s),
2441            [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2442
2443    def test_rfc2047_rfc2047_6(self):
2444        # 6th testcase at end of rfc2047
2445        s = '(=?ISO-8859-1?Q?a_b?=)'
2446        self.assertEqual(decode_header(s),
2447            [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2448
2449    def test_rfc2047_rfc2047_7(self):
2450        # 7th testcase at end of rfc2047
2451        s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2452        self.assertEqual(decode_header(s),
2453            [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2454             (b')', None)])
2455        self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2456        self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2457
2458    def test_multiline_header(self):
2459        s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <[email protected]>'
2460        self.assertEqual(decode_header(s),
2461            [(b'"M\xfcller T"', 'windows-1252'),
2462             (b'<[email protected]>', None)])
2463        self.assertEqual(make_header(decode_header(s)).encode(),
2464                         ''.join(s.splitlines()))
2465        self.assertEqual(str(make_header(decode_header(s))),
2466                         '"Müller T" <[email protected]>')
2467
2468
2469# Test the MIMEMessage class
2470class TestMIMEMessage(TestEmailBase):
2471    def setUp(self):
2472        with openfile('msg_11.txt', encoding="utf-8") as fp:
2473            self._text = fp.read()
2474
2475    def test_type_error(self):
2476        self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2477
2478    def test_valid_argument(self):
2479        eq = self.assertEqual
2480        subject = 'A sub-message'
2481        m = Message()
2482        m['Subject'] = subject
2483        r = MIMEMessage(m)
2484        eq(r.get_content_type(), 'message/rfc822')
2485        payload = r.get_payload()
2486        self.assertIsInstance(payload, list)
2487        eq(len(payload), 1)
2488        subpart = payload[0]
2489        self.assertIs(subpart, m)
2490        eq(subpart['subject'], subject)
2491
2492    def test_bad_multipart(self):
2493        msg1 = Message()
2494        msg1['Subject'] = 'subpart 1'
2495        msg2 = Message()
2496        msg2['Subject'] = 'subpart 2'
2497        r = MIMEMessage(msg1)
2498        self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2499
2500    def test_generate(self):
2501        # First craft the message to be encapsulated
2502        m = Message()
2503        m['Subject'] = 'An enclosed message'
2504        m.set_payload('Here is the body of the message.\n')
2505        r = MIMEMessage(m)
2506        r['Subject'] = 'The enclosing message'
2507        s = StringIO()
2508        g = Generator(s)
2509        g.flatten(r)
2510        self.assertEqual(s.getvalue(), """\
2511Content-Type: message/rfc822
2512MIME-Version: 1.0
2513Subject: The enclosing message
2514
2515Subject: An enclosed message
2516
2517Here is the body of the message.
2518""")
2519
2520    def test_parse_message_rfc822(self):
2521        eq = self.assertEqual
2522        msg = self._msgobj('msg_11.txt')
2523        eq(msg.get_content_type(), 'message/rfc822')
2524        payload = msg.get_payload()
2525        self.assertIsInstance(payload, list)
2526        eq(len(payload), 1)
2527        submsg = payload[0]
2528        self.assertIsInstance(submsg, Message)
2529        eq(submsg['subject'], 'An enclosed message')
2530        eq(submsg.get_payload(), 'Here is the body of the message.\n')
2531
2532    def test_dsn(self):
2533        eq = self.assertEqual
2534        # msg 16 is a Delivery Status Notification, see RFC 1894
2535        msg = self._msgobj('msg_16.txt')
2536        eq(msg.get_content_type(), 'multipart/report')
2537        self.assertTrue(msg.is_multipart())
2538        eq(len(msg.get_payload()), 3)
2539        # Subpart 1 is a text/plain, human readable section
2540        subpart = msg.get_payload(0)
2541        eq(subpart.get_content_type(), 'text/plain')
2542        eq(subpart.get_payload(), """\
2543This report relates to a message you sent with the following header fields:
2544
2545  Message-id: <[email protected]>
2546  Date: Sun, 23 Sep 2001 20:10:55 -0700
2547  From: "Ian T. Henry" <[email protected]>
2548  To: SoCal Raves <[email protected]>
2549  Subject: [scr] yeah for Ians!!
2550
2551Your message cannot be delivered to the following recipients:
2552
2553  Recipient address: [email protected]
2554  Reason: recipient reached disk quota
2555
2556""")
2557        # Subpart 2 contains the machine parsable DSN information.  It
2558        # consists of two blocks of headers, represented by two nested Message
2559        # objects.
2560        subpart = msg.get_payload(1)
2561        eq(subpart.get_content_type(), 'message/delivery-status')
2562        eq(len(subpart.get_payload()), 2)
2563        # message/delivery-status should treat each block as a bunch of
2564        # headers, i.e. a bunch of Message objects.
2565        dsn1 = subpart.get_payload(0)
2566        self.assertIsInstance(dsn1, Message)
2567        eq(dsn1['original-envelope-id'], '[email protected]')
2568        eq(dsn1.get_param('dns', header='reporting-mta'), '')
2569        # Try a missing one <wink>
2570        eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2571        dsn2 = subpart.get_payload(1)
2572        self.assertIsInstance(dsn2, Message)
2573        eq(dsn2['action'], 'failed')
2574        eq(dsn2.get_params(header='original-recipient'),
2575           [('rfc822', ''), ('[email protected]', '')])
2576        eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2577        # Subpart 3 is the original message
2578        subpart = msg.get_payload(2)
2579        eq(subpart.get_content_type(), 'message/rfc822')
2580        payload = subpart.get_payload()
2581        self.assertIsInstance(payload, list)
2582        eq(len(payload), 1)
2583        subsubpart = payload[0]
2584        self.assertIsInstance(subsubpart, Message)
2585        eq(subsubpart.get_content_type(), 'text/plain')
2586        eq(subsubpart['message-id'],
2587           '<[email protected]>')
2588
2589    def test_epilogue(self):
2590        eq = self.ndiffAssertEqual
2591        with openfile('msg_21.txt', encoding="utf-8") as fp:
2592            text = fp.read()
2593        msg = Message()
2594        msg['From'] = '[email protected]'
2595        msg['To'] = '[email protected]'
2596        msg['Subject'] = 'Test'
2597        msg.preamble = 'MIME message'
2598        msg.epilogue = 'End of MIME message\n'
2599        msg1 = MIMEText('One')
2600        msg2 = MIMEText('Two')
2601        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2602        msg.attach(msg1)
2603        msg.attach(msg2)
2604        sfp = StringIO()
2605        g = Generator(sfp)
2606        g.flatten(msg)
2607        eq(sfp.getvalue(), text)
2608
2609    def test_no_nl_preamble(self):
2610        eq = self.ndiffAssertEqual
2611        msg = Message()
2612        msg['From'] = '[email protected]'
2613        msg['To'] = '[email protected]'
2614        msg['Subject'] = 'Test'
2615        msg.preamble = 'MIME message'
2616        msg.epilogue = ''
2617        msg1 = MIMEText('One')
2618        msg2 = MIMEText('Two')
2619        msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2620        msg.attach(msg1)
2621        msg.attach(msg2)
2622        eq(msg.as_string(), """\
2623From: [email protected]
2624To: [email protected]
2625Subject: Test
2626Content-Type: multipart/mixed; boundary="BOUNDARY"
2627
2628MIME message
2629--BOUNDARY
2630Content-Type: text/plain; charset="us-ascii"
2631MIME-Version: 1.0
2632Content-Transfer-Encoding: 7bit
2633
2634One
2635--BOUNDARY
2636Content-Type: text/plain; charset="us-ascii"
2637MIME-Version: 1.0
2638Content-Transfer-Encoding: 7bit
2639
2640Two
2641--BOUNDARY--
2642""")
2643
2644    def test_default_type(self):
2645        eq = self.assertEqual
2646        with openfile('msg_30.txt', encoding="utf-8") as fp:
2647            msg = email.message_from_file(fp)
2648        container1 = msg.get_payload(0)
2649        eq(container1.get_default_type(), 'message/rfc822')
2650        eq(container1.get_content_type(), 'message/rfc822')
2651        container2 = msg.get_payload(1)
2652        eq(container2.get_default_type(), 'message/rfc822')
2653        eq(container2.get_content_type(), 'message/rfc822')
2654        container1a = container1.get_payload(0)
2655        eq(container1a.get_default_type(), 'text/plain')
2656        eq(container1a.get_content_type(), 'text/plain')
2657        container2a = container2.get_payload(0)
2658        eq(container2a.get_default_type(), 'text/plain')
2659        eq(container2a.get_content_type(), 'text/plain')
2660
2661    def test_default_type_with_explicit_container_type(self):
2662        eq = self.assertEqual
2663        with openfile('msg_28.txt', encoding="utf-8") as fp:
2664            msg = email.message_from_file(fp)
2665        container1 = msg.get_payload(0)
2666        eq(container1.get_default_type(), 'message/rfc822')
2667        eq(container1.get_content_type(), 'message/rfc822')
2668        container2 = msg.get_payload(1)
2669        eq(container2.get_default_type(), 'message/rfc822')
2670        eq(container2.get_content_type(), 'message/rfc822')
2671        container1a = container1.get_payload(0)
2672        eq(container1a.get_default_type(), 'text/plain')
2673        eq(container1a.get_content_type(), 'text/plain')
2674        container2a = container2.get_payload(0)
2675        eq(container2a.get_default_type(), 'text/plain')
2676        eq(container2a.get_content_type(), 'text/plain')
2677
2678    def test_default_type_non_parsed(self):
2679        eq = self.assertEqual
2680        neq = self.ndiffAssertEqual
2681        # Set up container
2682        container = MIMEMultipart('digest', 'BOUNDARY')
2683        container.epilogue = ''
2684        # Set up subparts
2685        subpart1a = MIMEText('message 1\n')
2686        subpart2a = MIMEText('message 2\n')
2687        subpart1 = MIMEMessage(subpart1a)
2688        subpart2 = MIMEMessage(subpart2a)
2689        container.attach(subpart1)
2690        container.attach(subpart2)
2691        eq(subpart1.get_content_type(), 'message/rfc822')
2692        eq(subpart1.get_default_type(), 'message/rfc822')
2693        eq(subpart2.get_content_type(), 'message/rfc822')
2694        eq(subpart2.get_default_type(), 'message/rfc822')
2695        neq(container.as_string(0), '''\
2696Content-Type: multipart/digest; boundary="BOUNDARY"
2697MIME-Version: 1.0
2698
2699--BOUNDARY
2700Content-Type: message/rfc822
2701MIME-Version: 1.0
2702
2703Content-Type: text/plain; charset="us-ascii"
2704MIME-Version: 1.0
2705Content-Transfer-Encoding: 7bit
2706
2707message 1
2708
2709--BOUNDARY
2710Content-Type: message/rfc822
2711MIME-Version: 1.0
2712
2713Content-Type: text/plain; charset="us-ascii"
2714MIME-Version: 1.0
2715Content-Transfer-Encoding: 7bit
2716
2717message 2
2718
2719--BOUNDARY--
2720''')
2721        del subpart1['content-type']
2722        del subpart1['mime-version']
2723        del subpart2['content-type']
2724        del subpart2['mime-version']
2725        eq(subpart1.get_content_type(), 'message/rfc822')
2726        eq(subpart1.get_default_type(), 'message/rfc822')
2727        eq(subpart2.get_content_type(), 'message/rfc822')
2728        eq(subpart2.get_default_type(), 'message/rfc822')
2729        neq(container.as_string(0), '''\
2730Content-Type: multipart/digest; boundary="BOUNDARY"
2731MIME-Version: 1.0
2732
2733--BOUNDARY
2734
2735Content-Type: text/plain; charset="us-ascii"
2736MIME-Version: 1.0
2737Content-Transfer-Encoding: 7bit
2738
2739message 1
2740
2741--BOUNDARY
2742
2743Content-Type: text/plain; charset="us-ascii"
2744MIME-Version: 1.0
2745Content-Transfer-Encoding: 7bit
2746
2747message 2
2748
2749--BOUNDARY--
2750''')
2751
2752    def test_mime_attachments_in_constructor(self):
2753        eq = self.assertEqual
2754        text1 = MIMEText('')
2755        text2 = MIMEText('')
2756        msg = MIMEMultipart(_subparts=(text1, text2))
2757        eq(len(msg.get_payload()), 2)
2758        eq(msg.get_payload(0), text1)
2759        eq(msg.get_payload(1), text2)
2760
2761    def test_default_multipart_constructor(self):
2762        msg = MIMEMultipart()
2763        self.assertTrue(msg.is_multipart())
2764
2765    def test_multipart_default_policy(self):
2766        msg = MIMEMultipart()
2767        msg['To'] = '[email protected]'
2768        msg['To'] = '[email protected]'
2769        self.assertEqual(msg.get_all('to'), ['[email protected]', '[email protected]'])
2770
2771    def test_multipart_custom_policy(self):
2772        msg = MIMEMultipart(policy=email.policy.default)
2773        msg['To'] = '[email protected]'
2774        with self.assertRaises(ValueError) as cm:
2775            msg['To'] = '[email protected]'
2776        self.assertEqual(str(cm.exception),
2777                         'There may be at most 1 To headers in a message')
2778
2779
2780# Test the NonMultipart class
2781class TestNonMultipart(TestEmailBase):
2782    def test_nonmultipart_is_not_multipart(self):
2783        msg = MIMENonMultipart('text', 'plain')
2784        self.assertFalse(msg.is_multipart())
2785
2786    def test_attach_raises_exception(self):
2787        msg = Message()
2788        msg['Subject'] = 'subpart 1'
2789        r = MIMENonMultipart('text', 'plain')
2790        self.assertRaises(errors.MultipartConversionError, r.attach, msg)
2791
2792
2793# A general test of parser->model->generator idempotency.  IOW, read a message
2794# in, parse it into a message object tree, then without touching the tree,
2795# regenerate the plain text.  The original text and the transformed text
2796# should be identical.  Note: that we ignore the Unix-From since that may
2797# contain a changed date.
2798class TestIdempotent(TestEmailBase):
2799
2800    linesep = '\n'
2801
2802    def _msgobj(self, filename):
2803        with openfile(filename, encoding="utf-8") as fp:
2804            data = fp.read()
2805        msg = email.message_from_string(data)
2806        return msg, data
2807
2808    def _idempotent(self, msg, text, unixfrom=False):
2809        eq = self.ndiffAssertEqual
2810        s = StringIO()
2811        g = Generator(s, maxheaderlen=0)
2812        g.flatten(msg, unixfrom=unixfrom)
2813        eq(text, s.getvalue())
2814
2815    def test_parse_text_message(self):
2816        eq = self.assertEqual
2817        msg, text = self._msgobj('msg_01.txt')
2818        eq(msg.get_content_type(), 'text/plain')
2819        eq(msg.get_content_maintype(), 'text')
2820        eq(msg.get_content_subtype(), 'plain')
2821        eq(msg.get_params()[1], ('charset', 'us-ascii'))
2822        eq(msg.get_param('charset'), 'us-ascii')
2823        eq(msg.preamble, None)
2824        eq(msg.epilogue, None)
2825        self._idempotent(msg, text)
2826
2827    def test_parse_untyped_message(self):
2828        eq = self.assertEqual
2829        msg, text = self._msgobj('msg_03.txt')
2830        eq(msg.get_content_type(), 'text/plain')
2831        eq(msg.get_params(), None)
2832        eq(msg.get_param('charset'), None)
2833        self._idempotent(msg, text)
2834
2835    def test_simple_multipart(self):
2836        msg, text = self._msgobj('msg_04.txt')
2837        self._idempotent(msg, text)
2838
2839    def test_MIME_digest(self):
2840        msg, text = self._msgobj('msg_02.txt')
2841        self._idempotent(msg, text)
2842
2843    def test_long_header(self):
2844        msg, text = self._msgobj('msg_27.txt')
2845        self._idempotent(msg, text)
2846
2847    def test_MIME_digest_with_part_headers(self):
2848        msg, text = self._msgobj('msg_28.txt')
2849        self._idempotent(msg, text)
2850
2851    def test_mixed_with_image(self):
2852        msg, text = self._msgobj('msg_06.txt')
2853        self._idempotent(msg, text)
2854
2855    def test_multipart_report(self):
2856        msg, text = self._msgobj('msg_05.txt')
2857        self._idempotent(msg, text)
2858
2859    def test_dsn(self):
2860        msg, text = self._msgobj('msg_16.txt')
2861        self._idempotent(msg, text)
2862
2863    def test_preamble_epilogue(self):
2864        msg, text = self._msgobj('msg_21.txt')
2865        self._idempotent(msg, text)
2866
2867    def test_multipart_one_part(self):
2868        msg, text = self._msgobj('msg_23.txt')
2869        self._idempotent(msg, text)
2870
2871    def test_multipart_no_parts(self):
2872        msg, text = self._msgobj('msg_24.txt')
2873        self._idempotent(msg, text)
2874
2875    def test_no_start_boundary(self):
2876        msg, text = self._msgobj('msg_31.txt')
2877        self._idempotent(msg, text)
2878
2879    def test_rfc2231_charset(self):
2880        msg, text = self._msgobj('msg_32.txt')
2881        self._idempotent(msg, text)
2882
2883    def test_more_rfc2231_parameters(self):
2884        msg, text = self._msgobj('msg_33.txt')
2885        self._idempotent(msg, text)
2886
2887    def test_text_plain_in_a_multipart_digest(self):
2888        msg, text = self._msgobj('msg_34.txt')
2889        self._idempotent(msg, text)
2890
2891    def test_nested_multipart_mixeds(self):
2892        msg, text = self._msgobj('msg_12a.txt')
2893        self._idempotent(msg, text)
2894
2895    def test_message_external_body_idempotent(self):
2896        msg, text = self._msgobj('msg_36.txt')
2897        self._idempotent(msg, text)
2898
2899    def test_message_delivery_status(self):
2900        msg, text = self._msgobj('msg_43.txt')
2901        self._idempotent(msg, text, unixfrom=True)
2902
2903    def test_message_signed_idempotent(self):
2904        msg, text = self._msgobj('msg_45.txt')
2905        self._idempotent(msg, text)
2906
2907    def test_content_type(self):
2908        eq = self.assertEqual
2909        # Get a message object and reset the seek pointer for other tests
2910        msg, text = self._msgobj('msg_05.txt')
2911        eq(msg.get_content_type(), 'multipart/report')
2912        # Test the Content-Type: parameters
2913        params = {}
2914        for pk, pv in msg.get_params():
2915            params[pk] = pv
2916        eq(params['report-type'], 'delivery-status')
2917        eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2918        eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2919        eq(msg.epilogue, self.linesep)
2920        eq(len(msg.get_payload()), 3)
2921        # Make sure the subparts are what we expect
2922        msg1 = msg.get_payload(0)
2923        eq(msg1.get_content_type(), 'text/plain')
2924        eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
2925        msg2 = msg.get_payload(1)
2926        eq(msg2.get_content_type(), 'text/plain')
2927        eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
2928        msg3 = msg.get_payload(2)
2929        eq(msg3.get_content_type(), 'message/rfc822')
2930        self.assertIsInstance(msg3, Message)
2931        payload = msg3.get_payload()
2932        self.assertIsInstance(payload, list)
2933        eq(len(payload), 1)
2934        msg4 = payload[0]
2935        self.assertIsInstance(msg4, Message)
2936        eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
2937
2938    def test_parser(self):
2939        eq = self.assertEqual
2940        msg, text = self._msgobj('msg_06.txt')
2941        # Check some of the outer headers
2942        eq(msg.get_content_type(), 'message/rfc822')
2943        # Make sure the payload is a list of exactly one sub-Message, and that
2944        # that submessage has a type of text/plain
2945        payload = msg.get_payload()
2946        self.assertIsInstance(payload, list)
2947        eq(len(payload), 1)
2948        msg1 = payload[0]
2949        self.assertIsInstance(msg1, Message)
2950        eq(msg1.get_content_type(), 'text/plain')
2951        self.assertIsInstance(msg1.get_payload(), str)
2952        eq(msg1.get_payload(), self.linesep)
2953
2954
2955
2956# Test various other bits of the package's functionality
2957class TestMiscellaneous(TestEmailBase):
2958    def test_message_from_string(self):
2959        with openfile('msg_01.txt', encoding="utf-8") as fp:
2960            text = fp.read()
2961        msg = email.message_from_string(text)
2962        s = StringIO()
2963        # Don't wrap/continue long headers since we're trying to test
2964        # idempotency.
2965        g = Generator(s, maxheaderlen=0)
2966        g.flatten(msg)
2967        self.assertEqual(text, s.getvalue())
2968
2969    def test_message_from_file(self):
2970        with openfile('msg_01.txt', encoding="utf-8") as fp:
2971            text = fp.read()
2972            fp.seek(0)
2973            msg = email.message_from_file(fp)
2974            s = StringIO()
2975            # Don't wrap/continue long headers since we're trying to test
2976            # idempotency.
2977            g = Generator(s, maxheaderlen=0)
2978            g.flatten(msg)
2979            self.assertEqual(text, s.getvalue())
2980
2981    def test_message_from_string_with_class(self):
2982        with openfile('msg_01.txt', encoding="utf-8") as fp:
2983            text = fp.read()
2984
2985        # Create a subclass
2986        class MyMessage(Message):
2987            pass
2988
2989        msg = email.message_from_string(text, MyMessage)
2990        self.assertIsInstance(msg, MyMessage)
2991        # Try something more complicated
2992        with openfile('msg_02.txt', encoding="utf-8") as fp:
2993            text = fp.read()
2994        msg = email.message_from_string(text, MyMessage)
2995        for subpart in msg.walk():
2996            self.assertIsInstance(subpart, MyMessage)
2997
2998    def test_message_from_file_with_class(self):
2999        # Create a subclass
3000        class MyMessage(Message):
3001            pass
3002
3003        with openfile('msg_01.txt', encoding="utf-8") as fp:
3004            msg = email.message_from_file(fp, MyMessage)
3005        self.assertIsInstance(msg, MyMessage)
3006        # Try something more complicated
3007        with openfile('msg_02.txt', encoding="utf-8") as fp:
3008            msg = email.message_from_file(fp, MyMessage)
3009        for subpart in msg.walk():
3010            self.assertIsInstance(subpart, MyMessage)
3011
3012    def test_custom_message_does_not_require_arguments(self):
3013        class MyMessage(Message):
3014            def __init__(self):
3015                super().__init__()
3016        msg = self._str_msg("Subject: test\n\ntest", MyMessage)
3017        self.assertIsInstance(msg, MyMessage)
3018
3019    def test__all__(self):
3020        module = __import__('email')
3021        self.assertEqual(sorted(module.__all__), [
3022            'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
3023            'generator', 'header', 'iterators', 'message',
3024            'message_from_binary_file', 'message_from_bytes',
3025            'message_from_file', 'message_from_string', 'mime', 'parser',
3026            'quoprimime', 'utils',
3027            ])
3028
3029    def test_formatdate(self):
3030        now = time.time()
3031        self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
3032                         time.gmtime(now)[:6])
3033
3034    def test_formatdate_localtime(self):
3035        now = time.time()
3036        self.assertEqual(
3037            utils.parsedate(utils.formatdate(now, localtime=True))[:6],
3038            time.localtime(now)[:6])
3039
3040    def test_formatdate_usegmt(self):
3041        now = time.time()
3042        self.assertEqual(
3043            utils.formatdate(now, localtime=False),
3044            time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
3045        self.assertEqual(
3046            utils.formatdate(now, localtime=False, usegmt=True),
3047            time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
3048
3049    # parsedate and parsedate_tz will become deprecated interfaces someday
3050    def test_parsedate_returns_None_for_invalid_strings(self):
3051        # See also test_parsedate_to_datetime_with_invalid_raises_valueerror
3052        # in test_utils.
3053        invalid_dates = [
3054            '',
3055            ' ',
3056            '0',
3057            'A Complete Waste of Time',
3058            'Wed, 3 Apr 2002 12.34.56.78+0800',
3059            '17 June , 2022',
3060            'Friday, -Nov-82 16:14:55 EST',
3061            'Friday, Nov--82 16:14:55 EST',
3062            'Friday, 19-Nov- 16:14:55 EST',
3063        ]
3064        for dtstr in invalid_dates:
3065            with self.subTest(dtstr=dtstr):
3066                self.assertIsNone(utils.parsedate(dtstr))
3067                self.assertIsNone(utils.parsedate_tz(dtstr))
3068        # Not a part of the spec but, but this has historically worked:
3069        self.assertIsNone(utils.parsedate(None))
3070        self.assertIsNone(utils.parsedate_tz(None))
3071
3072    def test_parsedate_compact(self):
3073        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26 +0800'),
3074                         (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3075        # The FWS after the comma is optional
3076        self.assertEqual(utils.parsedate_tz('Wed,3 Apr 2002 14:58:26 +0800'),
3077                         (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3078        # The comma is optional
3079        self.assertEqual(utils.parsedate_tz('Wed 3 Apr 2002 14:58:26 +0800'),
3080                         (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3081
3082    def test_parsedate_no_dayofweek(self):
3083        eq = self.assertEqual
3084        eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
3085           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3086        eq(utils.parsedate_tz('February 5, 2003 13:47:26 -0800'),
3087           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3088
3089    def test_parsedate_no_space_before_positive_offset(self):
3090        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
3091           (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3092
3093    def test_parsedate_no_space_before_negative_offset(self):
3094        # Issue 1155362: we already handled '+' for this case.
3095        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
3096           (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
3097
3098    def test_parsedate_accepts_time_with_dots(self):
3099        eq = self.assertEqual
3100        eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
3101           (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
3102        eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
3103           (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
3104
3105    def test_parsedate_rfc_850(self):
3106        self.assertEqual(utils.parsedate_tz('Friday, 19-Nov-82 16:14:55 EST'),
3107           (1982, 11, 19, 16, 14, 55, 0, 1, -1, -18000))
3108
3109    def test_parsedate_no_seconds(self):
3110        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58 +0800'),
3111                         (2002, 4, 3, 14, 58, 0, 0, 1, -1, 28800))
3112
3113    def test_parsedate_dot_time_delimiter(self):
3114        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14.58.26 +0800'),
3115                         (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
3116        self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14.58 +0800'),
3117                         (2002, 4, 3, 14, 58, 0, 0, 1, -1, 28800))
3118
3119    def test_parsedate_acceptable_to_time_functions(self):
3120        eq = self.assertEqual
3121        timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
3122        t = int(time.mktime(timetup))
3123        eq(time.localtime(t)[:6], timetup[:6])
3124        eq(int(time.strftime('%Y', timetup)), 2003)
3125        timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
3126        t = int(time.mktime(timetup[:9]))
3127        eq(time.localtime(t)[:6], timetup[:6])
3128        eq(int(time.strftime('%Y', timetup[:9])), 2003)
3129
3130    def test_mktime_tz(self):
3131        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3132                                          -1, -1, -1, 0)), 0)
3133        self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
3134                                          -1, -1, -1, 1234)), -1234)
3135
3136    def test_parsedate_y2k(self):
3137        """Test for parsing a date with a two-digit year.
3138
3139        Parsing a date with a two-digit year should return the correct
3140        four-digit year. RFC822 allows two-digit years, but RFC2822 (which
3141        obsoletes RFC822) requires four-digit years.
3142
3143        """
3144        self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
3145                         utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
3146        self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
3147                         utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
3148
3149    def test_parseaddr_empty(self):
3150        self.assertEqual(utils.parseaddr('<>'), ('', ''))
3151        self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
3152
3153    def test_parseaddr_multiple_domains(self):
3154        self.assertEqual(
3155            utils.parseaddr('a@b@c'),
3156            ('', '')
3157        )
3158        self.assertEqual(
3159            utils.parseaddr('[email protected]@c'),
3160            ('', '')
3161        )
3162        self.assertEqual(
3163            utils.parseaddr('[email protected]@c'),
3164            ('', '')
3165        )
3166
3167    def test_noquote_dump(self):
3168        self.assertEqual(
3169            utils.formataddr(('A Silly Person', '[email protected]')),
3170            'A Silly Person <[email protected]>')
3171
3172    def test_escape_dump(self):
3173        self.assertEqual(
3174            utils.formataddr(('A (Very) Silly Person', '[email protected]')),
3175            r'"A (Very) Silly Person" <[email protected]>')
3176        self.assertEqual(
3177            utils.parseaddr(r'"A \(Very\) Silly Person" <[email protected]>'),
3178            ('A (Very) Silly Person', '[email protected]'))
3179        a = r'A \(Special\) Person'
3180        b = '[email protected]'
3181        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3182
3183    def test_escape_backslashes(self):
3184        self.assertEqual(
3185            utils.formataddr((r'Arthur \Backslash\ Foobar', '[email protected]')),
3186            r'"Arthur \\Backslash\\ Foobar" <[email protected]>')
3187        a = r'Arthur \Backslash\ Foobar'
3188        b = '[email protected]'
3189        self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
3190
3191    def test_quotes_unicode_names(self):
3192        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3193        name = "H\u00e4ns W\u00fcrst"
3194        addr = '[email protected]'
3195        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <[email protected]>"
3196        latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <[email protected]>"
3197        self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
3198        self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
3199            latin1_quopri)
3200
3201    def test_accepts_any_charset_like_object(self):
3202        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3203        name = "H\u00e4ns W\u00fcrst"
3204        addr = '[email protected]'
3205        utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <[email protected]>"
3206        foobar = "FOOBAR"
3207        class CharsetMock:
3208            def header_encode(self, string):
3209                return foobar
3210        mock = CharsetMock()
3211        mock_expected = "%s <%s>" % (foobar, addr)
3212        self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
3213        self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
3214            utf8_base64)
3215
3216    def test_invalid_charset_like_object_raises_error(self):
3217        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3218        name = "H\u00e4ns W\u00fcrst"
3219        addr = '[email protected]'
3220        # An object without a header_encode method:
3221        bad_charset = object()
3222        self.assertRaises(AttributeError, utils.formataddr, (name, addr),
3223            bad_charset)
3224
3225    def test_unicode_address_raises_error(self):
3226        # issue 1690608.  email.utils.formataddr() should be rfc2047 aware.
3227        addr = 'pers\[email protected]'
3228        self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
3229        self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
3230
3231    def test_name_with_dot(self):
3232        x = 'John X. Doe <[email protected]>'
3233        y = '"John X. Doe" <[email protected]>'
3234        a, b = ('John X. Doe', '[email protected]')
3235        self.assertEqual(utils.parseaddr(x), (a, b))
3236        self.assertEqual(utils.parseaddr(y), (a, b))
3237        # formataddr() quotes the name if there's a dot in it
3238        self.assertEqual(utils.formataddr((a, b)), y)
3239
3240    def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
3241        # issue 10005.  Note that in the third test the second pair of
3242        # backslashes is not actually a quoted pair because it is not inside a
3243        # comment or quoted string: the address being parsed has a quoted
3244        # string containing a quoted backslash, followed by 'example' and two
3245        # backslashes, followed by another quoted string containing a space and
3246        # the word 'example'.  parseaddr copies those two backslashes
3247        # literally.  Per rfc5322 this is not technically correct since a \ may
3248        # not appear in an address outside of a quoted string.  It is probably
3249        # a sensible Postel interpretation, though.
3250        eq = self.assertEqual
3251        eq(utils.parseaddr('""example" example"@example.com'),
3252          ('', '""example" example"@example.com'))
3253        eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3254          ('', '"\\"example\\" example"@example.com'))
3255        eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3256          ('', '"\\\\"example\\\\" example"@example.com'))
3257
3258    def test_parseaddr_preserves_spaces_in_local_part(self):
3259        # issue 9286.  A normal RFC5322 local part should not contain any
3260        # folding white space, but legacy local parts can (they are a sequence
3261        # of atoms, not dotatoms).  On the other hand we strip whitespace from
3262        # before the @ and around dots, on the assumption that the whitespace
3263        # around the punctuation is a mistake in what would otherwise be
3264        # an RFC5322 local part.  Leading whitespace is, usual, stripped as well.
3265        self.assertEqual(('', "merwok [email protected]"),
3266            utils.parseaddr("merwok [email protected]"))
3267        self.assertEqual(('', "merwok  [email protected]"),
3268            utils.parseaddr("merwok  [email protected]"))
3269        self.assertEqual(('', "merwok  [email protected]"),
3270            utils.parseaddr(" merwok  wok  @xample.com"))
3271        self.assertEqual(('', 'merwok"wok"  [email protected]'),
3272            utils.parseaddr('merwok"wok"  [email protected]'))
3273        self.assertEqual(('', '[email protected]'),
3274            utils.parseaddr('merwok. wok .  [email protected]'))
3275
3276    def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3277        addr = ("'[email protected]' ([email protected])",
3278                '[email protected]')
3279        addrstr = ('"\'[email protected]\' '
3280                            '([email protected])" <[email protected]>')
3281        self.assertEqual(utils.parseaddr(addrstr), addr)
3282        self.assertEqual(utils.formataddr(addr), addrstr)
3283
3284
3285    def test_multiline_from_comment(self):
3286        x = """\
3287Foo
3288\tBar <[email protected]>"""
3289        self.assertEqual(utils.parseaddr(x), ('Foo Bar', '[email protected]'))
3290
3291    def test_quote_dump(self):
3292        self.assertEqual(
3293            utils.formataddr(('A Silly; Person', '[email protected]')),
3294            r'"A Silly; Person" <[email protected]>')
3295
3296    def test_charset_richcomparisons(self):
3297        eq = self.assertEqual
3298        ne = self.assertNotEqual
3299        cset1 = Charset()
3300        cset2 = Charset()
3301        eq(cset1, 'us-ascii')
3302        eq(cset1, 'US-ASCII')
3303        eq(cset1, 'Us-AsCiI')
3304        eq('us-ascii', cset1)
3305        eq('US-ASCII', cset1)
3306        eq('Us-AsCiI', cset1)
3307        ne(cset1, 'usascii')
3308        ne(cset1, 'USASCII')
3309        ne(cset1, 'UsAsCiI')
3310        ne('usascii', cset1)
3311        ne('USASCII', cset1)
3312        ne('UsAsCiI', cset1)
3313        eq(cset1, cset2)
3314        eq(cset2, cset1)
3315
3316    def test_getaddresses(self):
3317        eq = self.assertEqual
3318        eq(utils.getaddresses(['[email protected] (Al Person)',
3319                               'Bud Person <[email protected]>']),
3320           [('Al Person', '[email protected]'),
3321            ('Bud Person', '[email protected]')])
3322
3323    def test_getaddresses_nasty(self):
3324        eq = self.assertEqual
3325        eq(utils.getaddresses(['foo: ;']), [('', '')])
3326        eq(utils.getaddresses(
3327           ['[]*-- =~$']),
3328           [('', ''), ('', ''), ('', '*--')])
3329        eq(utils.getaddresses(
3330           ['foo: ;', '"Jason R. Mastaler" <[email protected]>']),
3331           [('', ''), ('Jason R. Mastaler', '[email protected]')])
3332
3333    def test_getaddresses_embedded_comment(self):
3334        """Test proper handling of a nested comment"""
3335        eq = self.assertEqual
3336        addrs = utils.getaddresses(['User ((nested comment)) <[email protected]>'])
3337        eq(addrs[0][1], '[email protected]')
3338
3339    def test_getaddresses_header_obj(self):
3340        """Test the handling of a Header object."""
3341        addrs = utils.getaddresses([Header('Al Person <[email protected]>')])
3342        self.assertEqual(addrs[0][1], '[email protected]')
3343
3344    @threading_helper.requires_working_threading()
3345    def test_make_msgid_collisions(self):
3346        # Test make_msgid uniqueness, even with multiple threads
3347        class MsgidsThread(Thread):
3348            def run(self):
3349                # generate msgids for 3 seconds
3350                self.msgids = []
3351                append = self.msgids.append
3352                make_msgid = utils.make_msgid
3353                clock = time.monotonic
3354                tfin = clock() + 3.0
3355                while clock() < tfin:
3356                    append(make_msgid(domain='testdomain-string'))
3357
3358        threads = [MsgidsThread() for i in range(5)]
3359        with threading_helper.start_threads(threads):
3360            pass
3361        all_ids = sum([t.msgids for t in threads], [])
3362        self.assertEqual(len(set(all_ids)), len(all_ids))
3363
3364    def test_utils_quote_unquote(self):
3365        eq = self.assertEqual
3366        msg = Message()
3367        msg.add_header('content-disposition', 'attachment',
3368                       filename='foo\\wacky"name')
3369        eq(msg.get_filename(), 'foo\\wacky"name')
3370
3371    def test_get_body_encoding_with_bogus_charset(self):
3372        charset = Charset('not a charset')
3373        self.assertEqual(charset.get_body_encoding(), 'base64')
3374
3375    def test_get_body_encoding_with_uppercase_charset(self):
3376        eq = self.assertEqual
3377        msg = Message()
3378        msg['Content-Type'] = 'text/plain; charset=UTF-8'
3379        eq(msg['content-type'], 'text/plain; charset=UTF-8')
3380        charsets = msg.get_charsets()
3381        eq(len(charsets), 1)
3382        eq(charsets[0], 'utf-8')
3383        charset = Charset(charsets[0])
3384        eq(charset.get_body_encoding(), 'base64')
3385        msg.set_payload(b'hello world', charset=charset)
3386        eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3387        eq(msg.get_payload(decode=True), b'hello world')
3388        eq(msg['content-transfer-encoding'], 'base64')
3389        # Try another one
3390        msg = Message()
3391        msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3392        charsets = msg.get_charsets()
3393        eq(len(charsets), 1)
3394        eq(charsets[0], 'us-ascii')
3395        charset = Charset(charsets[0])
3396        eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3397        msg.set_payload('hello world', charset=charset)
3398        eq(msg.get_payload(), 'hello world')
3399        eq(msg['content-transfer-encoding'], '7bit')
3400
3401    def test_charsets_case_insensitive(self):
3402        lc = Charset('us-ascii')
3403        uc = Charset('US-ASCII')
3404        self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3405
3406    def test_partial_falls_inside_message_delivery_status(self):
3407        eq = self.ndiffAssertEqual
3408        # The Parser interface provides chunks of data to FeedParser in 8192
3409        # byte gulps.  SF bug #1076485 found one of those chunks inside
3410        # message/delivery-status header block, which triggered an
3411        # unreadline() of NeedMoreData.
3412        msg = self._msgobj('msg_43.txt')
3413        sfp = StringIO()
3414        iterators._structure(msg, sfp)
3415        eq(sfp.getvalue(), """\
3416multipart/report
3417    text/plain
3418    message/delivery-status
3419        text/plain
3420        text/plain
3421        text/plain
3422        text/plain
3423        text/plain
3424        text/plain
3425        text/plain
3426        text/plain
3427        text/plain
3428        text/plain
3429        text/plain
3430        text/plain
3431        text/plain
3432        text/plain
3433        text/plain
3434        text/plain
3435        text/plain
3436        text/plain
3437        text/plain
3438        text/plain
3439        text/plain
3440        text/plain
3441        text/plain
3442        text/plain
3443        text/plain
3444        text/plain
3445    text/rfc822-headers
3446""")
3447
3448    def test_make_msgid_domain(self):
3449        self.assertEqual(
3450            email.utils.make_msgid(domain='testdomain-string')[-19:],
3451            '@testdomain-string>')
3452
3453    def test_make_msgid_idstring(self):
3454        self.assertEqual(
3455            email.utils.make_msgid(idstring='test-idstring',
3456                domain='testdomain-string')[-33:],
3457            '.test-idstring@testdomain-string>')
3458
3459    def test_make_msgid_default_domain(self):
3460        with patch('socket.getfqdn') as mock_getfqdn:
3461            mock_getfqdn.return_value = domain = 'pythontest.example.com'
3462            self.assertTrue(
3463                email.utils.make_msgid().endswith(
3464                    '@' + domain + '>'))
3465
3466    def test_Generator_linend(self):
3467        # Issue 14645.
3468        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f:
3469            msgtxt = f.read()
3470        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3471        msg = email.message_from_string(msgtxt)
3472        s = StringIO()
3473        g = email.generator.Generator(s)
3474        g.flatten(msg)
3475        self.assertEqual(s.getvalue(), msgtxt_nl)
3476
3477    def test_BytesGenerator_linend(self):
3478        # Issue 14645.
3479        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f:
3480            msgtxt = f.read()
3481        msgtxt_nl = msgtxt.replace('\r\n', '\n')
3482        msg = email.message_from_string(msgtxt_nl)
3483        s = BytesIO()
3484        g = email.generator.BytesGenerator(s)
3485        g.flatten(msg, linesep='\r\n')
3486        self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3487
3488    def test_BytesGenerator_linend_with_non_ascii(self):
3489        # Issue 14645.
3490        with openfile('msg_26.txt', 'rb') as f:
3491            msgtxt = f.read()
3492        msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3493        msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3494        msg = email.message_from_bytes(msgtxt_nl)
3495        s = BytesIO()
3496        g = email.generator.BytesGenerator(s)
3497        g.flatten(msg, linesep='\r\n')
3498        self.assertEqual(s.getvalue(), msgtxt)
3499
3500    def test_mime_classes_policy_argument(self):
3501        with openfile('sndhdr.au', 'rb') as fp:
3502            audiodata = fp.read()
3503        with openfile('python.gif', 'rb') as fp:
3504            bindata = fp.read()
3505        classes = [
3506            (MIMEApplication, ('',)),
3507            (MIMEAudio, (audiodata,)),
3508            (MIMEImage, (bindata,)),
3509            (MIMEMessage, (Message(),)),
3510            (MIMENonMultipart, ('multipart', 'mixed')),
3511            (MIMEText, ('',)),
3512        ]
3513        for cls, constructor in classes:
3514            with self.subTest(cls=cls.__name__, policy='compat32'):
3515                m = cls(*constructor)
3516                self.assertIs(m.policy, email.policy.compat32)
3517            with self.subTest(cls=cls.__name__, policy='default'):
3518                m = cls(*constructor, policy=email.policy.default)
3519                self.assertIs(m.policy, email.policy.default)
3520
3521
3522# Test the iterator/generators
3523class TestIterators(TestEmailBase):
3524    def test_body_line_iterator(self):
3525        eq = self.assertEqual
3526        neq = self.ndiffAssertEqual
3527        # First a simple non-multipart message
3528        msg = self._msgobj('msg_01.txt')
3529        it = iterators.body_line_iterator(msg)
3530        lines = list(it)
3531        eq(len(lines), 6)
3532        neq(EMPTYSTRING.join(lines), msg.get_payload())
3533        # Now a more complicated multipart
3534        msg = self._msgobj('msg_02.txt')
3535        it = iterators.body_line_iterator(msg)
3536        lines = list(it)
3537        eq(len(lines), 43)
3538        with openfile('msg_19.txt', encoding="utf-8") as fp:
3539            neq(EMPTYSTRING.join(lines), fp.read())
3540
3541    def test_typed_subpart_iterator(self):
3542        eq = self.assertEqual
3543        msg = self._msgobj('msg_04.txt')
3544        it = iterators.typed_subpart_iterator(msg, 'text')
3545        lines = []
3546        subparts = 0
3547        for subpart in it:
3548            subparts += 1
3549            lines.append(subpart.get_payload())
3550        eq(subparts, 2)
3551        eq(EMPTYSTRING.join(lines), """\
3552a simple kind of mirror
3553to reflect upon our own
3554a simple kind of mirror
3555to reflect upon our own
3556""")
3557
3558    def test_typed_subpart_iterator_default_type(self):
3559        eq = self.assertEqual
3560        msg = self._msgobj('msg_03.txt')
3561        it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3562        lines = []
3563        subparts = 0
3564        for subpart in it:
3565            subparts += 1
3566            lines.append(subpart.get_payload())
3567        eq(subparts, 1)
3568        eq(EMPTYSTRING.join(lines), """\
3569
3570Hi,
3571
3572Do you like this message?
3573
3574-Me
3575""")
3576
3577    def test_pushCR_LF(self):
3578        '''FeedParser BufferedSubFile.push() assumed it received complete
3579           line endings.  A CR ending one push() followed by a LF starting
3580           the next push() added an empty line.
3581        '''
3582        imt = [
3583            ("a\r \n",  2),
3584            ("b",       0),
3585            ("c\n",     1),
3586            ("",        0),
3587            ("d\r\n",   1),
3588            ("e\r",     0),
3589            ("\nf",     1),
3590            ("\r\n",    1),
3591          ]
3592        from email.feedparser import BufferedSubFile, NeedMoreData
3593        bsf = BufferedSubFile()
3594        om = []
3595        nt = 0
3596        for il, n in imt:
3597            bsf.push(il)
3598            nt += n
3599            n1 = 0
3600            for ol in iter(bsf.readline, NeedMoreData):
3601                om.append(ol)
3602                n1 += 1
3603            self.assertEqual(n, n1)
3604        self.assertEqual(len(om), nt)
3605        self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
3606
3607    def test_push_random(self):
3608        from email.feedparser import BufferedSubFile, NeedMoreData
3609
3610        n = 10000
3611        chunksize = 5
3612        chars = 'abcd \t\r\n'
3613
3614        s = ''.join(choice(chars) for i in range(n)) + '\n'
3615        target = s.splitlines(True)
3616
3617        bsf = BufferedSubFile()
3618        lines = []
3619        for i in range(0, len(s), chunksize):
3620            chunk = s[i:i+chunksize]
3621            bsf.push(chunk)
3622            lines.extend(iter(bsf.readline, NeedMoreData))
3623        self.assertEqual(lines, target)
3624
3625
3626class TestFeedParsers(TestEmailBase):
3627
3628    def parse(self, chunks):
3629        feedparser = FeedParser()
3630        for chunk in chunks:
3631            feedparser.feed(chunk)
3632        return feedparser.close()
3633
3634    def test_empty_header_name_handled(self):
3635        # Issue 19996
3636        msg = self.parse("First: val\n: bad\nSecond: val")
3637        self.assertEqual(msg['First'], 'val')
3638        self.assertEqual(msg['Second'], 'val')
3639
3640    def test_newlines(self):
3641        m = self.parse(['a:\nb:\rc:\r\nd:\n'])
3642        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3643        m = self.parse(['a:\nb:\rc:\r\nd:'])
3644        self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
3645        m = self.parse(['a:\rb', 'c:\n'])
3646        self.assertEqual(m.keys(), ['a', 'bc'])
3647        m = self.parse(['a:\r', 'b:\n'])
3648        self.assertEqual(m.keys(), ['a', 'b'])
3649        m = self.parse(['a:\r', '\nb:\n'])
3650        self.assertEqual(m.keys(), ['a', 'b'])
3651
3652        # Only CR and LF should break header fields
3653        m = self.parse(['a:\x85b:\u2028c:\n'])
3654        self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')])
3655        m = self.parse(['a:\r', 'b:\x85', 'c:\n'])
3656        self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')])
3657
3658    def test_long_lines(self):
3659        # Expected peak memory use on 32-bit platform: 6*N*M bytes.
3660        M, N = 1000, 20000
3661        m = self.parse(['a:b\n\n'] + ['x'*M] * N)
3662        self.assertEqual(m.items(), [('a', 'b')])
3663        self.assertEqual(m.get_payload(), 'x'*M*N)
3664        m = self.parse(['a:b\r\r'] + ['x'*M] * N)
3665        self.assertEqual(m.items(), [('a', 'b')])
3666        self.assertEqual(m.get_payload(), 'x'*M*N)
3667        m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N)
3668        self.assertEqual(m.items(), [('a', 'b')])
3669        self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N)
3670        m = self.parse(['a:\r', 'b: '] + ['x'*M] * N)
3671        self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)])
3672
3673
3674class TestParsers(TestEmailBase):
3675
3676    def test_header_parser(self):
3677        eq = self.assertEqual
3678        # Parse only the headers of a complex multipart MIME document
3679        with openfile('msg_02.txt', encoding="utf-8") as fp:
3680            msg = HeaderParser().parse(fp)
3681        eq(msg['from'], '[email protected]')
3682        eq(msg['to'], '[email protected]')
3683        eq(msg.get_content_type(), 'multipart/mixed')
3684        self.assertFalse(msg.is_multipart())
3685        self.assertIsInstance(msg.get_payload(), str)
3686
3687    def test_bytes_header_parser(self):
3688        eq = self.assertEqual
3689        # Parse only the headers of a complex multipart MIME document
3690        with openfile('msg_02.txt', 'rb') as fp:
3691            msg = email.parser.BytesHeaderParser().parse(fp)
3692        eq(msg['from'], '[email protected]')
3693        eq(msg['to'], '[email protected]')
3694        eq(msg.get_content_type(), 'multipart/mixed')
3695        self.assertFalse(msg.is_multipart())
3696        self.assertIsInstance(msg.get_payload(), str)
3697        self.assertIsInstance(msg.get_payload(decode=True), bytes)
3698
3699    def test_bytes_parser_does_not_close_file(self):
3700        with openfile('msg_02.txt', 'rb') as fp:
3701            email.parser.BytesParser().parse(fp)
3702            self.assertFalse(fp.closed)
3703
3704    def test_bytes_parser_on_exception_does_not_close_file(self):
3705        with openfile('msg_15.txt', 'rb') as fp:
3706            bytesParser = email.parser.BytesParser
3707            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3708                              bytesParser(policy=email.policy.strict).parse,
3709                              fp)
3710            self.assertFalse(fp.closed)
3711
3712    def test_parser_does_not_close_file(self):
3713        with openfile('msg_02.txt', encoding="utf-8") as fp:
3714            email.parser.Parser().parse(fp)
3715            self.assertFalse(fp.closed)
3716
3717    def test_parser_on_exception_does_not_close_file(self):
3718        with openfile('msg_15.txt', encoding="utf-8") as fp:
3719            parser = email.parser.Parser
3720            self.assertRaises(email.errors.StartBoundaryNotFoundDefect,
3721                              parser(policy=email.policy.strict).parse, fp)
3722            self.assertFalse(fp.closed)
3723
3724    def test_whitespace_continuation(self):
3725        eq = self.assertEqual
3726        # This message contains a line after the Subject: header that has only
3727        # whitespace, but it is not empty!
3728        msg = email.message_from_string("""\
3729From: [email protected]
3730To: [email protected]
3731Subject: the next line has a space on it
3732\x20
3733Date: Mon, 8 Apr 2002 15:09:19 -0400
3734Message-ID: spam
3735
3736Here's the message body
3737""")
3738        eq(msg['subject'], 'the next line has a space on it\n ')
3739        eq(msg['message-id'], 'spam')
3740        eq(msg.get_payload(), "Here's the message body\n")
3741
3742    def test_whitespace_continuation_last_header(self):
3743        eq = self.assertEqual
3744        # Like the previous test, but the subject line is the last
3745        # header.
3746        msg = email.message_from_string("""\
3747From: [email protected]
3748To: [email protected]
3749Date: Mon, 8 Apr 2002 15:09:19 -0400
3750Message-ID: spam
3751Subject: the next line has a space on it
3752\x20
3753
3754Here's the message body
3755""")
3756        eq(msg['subject'], 'the next line has a space on it\n ')
3757        eq(msg['message-id'], 'spam')
3758        eq(msg.get_payload(), "Here's the message body\n")
3759
3760    def test_crlf_separation(self):
3761        eq = self.assertEqual
3762        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp:
3763            msg = Parser().parse(fp)
3764        eq(len(msg.get_payload()), 2)
3765        part1 = msg.get_payload(0)
3766        eq(part1.get_content_type(), 'text/plain')
3767        eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3768        part2 = msg.get_payload(1)
3769        eq(part2.get_content_type(), 'application/riscos')
3770
3771    def test_crlf_flatten(self):
3772        # Using newline='\n' preserves the crlfs in this input file.
3773        with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp:
3774            text = fp.read()
3775        msg = email.message_from_string(text)
3776        s = StringIO()
3777        g = Generator(s)
3778        g.flatten(msg, linesep='\r\n')
3779        self.assertEqual(s.getvalue(), text)
3780
3781    maxDiff = None
3782
3783    def test_multipart_digest_with_extra_mime_headers(self):
3784        eq = self.assertEqual
3785        neq = self.ndiffAssertEqual
3786        with openfile('msg_28.txt', encoding="utf-8") as fp:
3787            msg = email.message_from_file(fp)
3788        # Structure is:
3789        # multipart/digest
3790        #   message/rfc822
3791        #     text/plain
3792        #   message/rfc822
3793        #     text/plain
3794        eq(msg.is_multipart(), 1)
3795        eq(len(msg.get_payload()), 2)
3796        part1 = msg.get_payload(0)
3797        eq(part1.get_content_type(), 'message/rfc822')
3798        eq(part1.is_multipart(), 1)
3799        eq(len(part1.get_payload()), 1)
3800        part1a = part1.get_payload(0)
3801        eq(part1a.is_multipart(), 0)
3802        eq(part1a.get_content_type(), 'text/plain')
3803        neq(part1a.get_payload(), 'message 1\n')
3804        # next message/rfc822
3805        part2 = msg.get_payload(1)
3806        eq(part2.get_content_type(), 'message/rfc822')
3807        eq(part2.is_multipart(), 1)
3808        eq(len(part2.get_payload()), 1)
3809        part2a = part2.get_payload(0)
3810        eq(part2a.is_multipart(), 0)
3811        eq(part2a.get_content_type(), 'text/plain')
3812        neq(part2a.get_payload(), 'message 2\n')
3813
3814    def test_three_lines(self):
3815        # A bug report by Andrew McNamara
3816        lines = ['From: Andrew Person <[email protected]',
3817                 'Subject: Test',
3818                 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3819        msg = email.message_from_string(NL.join(lines))
3820        self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3821
3822    def test_strip_line_feed_and_carriage_return_in_headers(self):
3823        eq = self.assertEqual
3824        # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3825        value1 = 'text'
3826        value2 = 'more text'
3827        m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3828            value1, value2)
3829        msg = email.message_from_string(m)
3830        eq(msg.get('Header'), value1)
3831        eq(msg.get('Next-Header'), value2)
3832
3833    def test_rfc2822_header_syntax(self):
3834        eq = self.assertEqual
3835        m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3836        msg = email.message_from_string(m)
3837        eq(len(msg), 3)
3838        eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3839        eq(msg.get_payload(), 'body')
3840
3841    def test_rfc2822_space_not_allowed_in_header(self):
3842        eq = self.assertEqual
3843        m = '>From [email protected] 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3844        msg = email.message_from_string(m)
3845        eq(len(msg.keys()), 0)
3846
3847    def test_rfc2822_one_character_header(self):
3848        eq = self.assertEqual
3849        m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3850        msg = email.message_from_string(m)
3851        headers = msg.keys()
3852        headers.sort()
3853        eq(headers, ['A', 'B', 'CC'])
3854        eq(msg.get_payload(), 'body')
3855
3856    def test_CRLFLF_at_end_of_part(self):
3857        # issue 5610: feedparser should not eat two chars from body part ending
3858        # with "\r\n\n".
3859        m = (
3860            "From: [email protected]\n"
3861            "To: baz\n"
3862            "Mime-Version: 1.0\n"
3863            "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3864            "\n"
3865            "--BOUNDARY\n"
3866            "Content-Type: text/plain\n"
3867            "\n"
3868            "body ending with CRLF newline\r\n"
3869            "\n"
3870            "--BOUNDARY--\n"
3871          )
3872        msg = email.message_from_string(m)
3873        self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
3874
3875
3876class Test8BitBytesHandling(TestEmailBase):
3877    # In Python3 all input is string, but that doesn't work if the actual input
3878    # uses an 8bit transfer encoding.  To hack around that, in email 5.1 we
3879    # decode byte streams using the surrogateescape error handler, and
3880    # reconvert to binary at appropriate places if we detect surrogates.  This
3881    # doesn't allow us to transform headers with 8bit bytes (they get munged),
3882    # but it does allow us to parse and preserve them, and to decode body
3883    # parts that use an 8bit CTE.
3884
3885    bodytest_msg = textwrap.dedent("""\
3886        From: [email protected]
3887        To: baz
3888        Mime-Version: 1.0
3889        Content-Type: text/plain; charset={charset}
3890        Content-Transfer-Encoding: {cte}
3891
3892        {bodyline}
3893        """)
3894
3895    def test_known_8bit_CTE(self):
3896        m = self.bodytest_msg.format(charset='utf-8',
3897                                     cte='8bit',
3898                                     bodyline='pöstal').encode('utf-8')
3899        msg = email.message_from_bytes(m)
3900        self.assertEqual(msg.get_payload(), "pöstal\n")
3901        self.assertEqual(msg.get_payload(decode=True),
3902                         "pöstal\n".encode('utf-8'))
3903
3904    def test_unknown_8bit_CTE(self):
3905        m = self.bodytest_msg.format(charset='notavalidcharset',
3906                                     cte='8bit',
3907                                     bodyline='pöstal').encode('utf-8')
3908        msg = email.message_from_bytes(m)
3909        self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
3910        self.assertEqual(msg.get_payload(decode=True),
3911                         "pöstal\n".encode('utf-8'))
3912
3913    def test_8bit_in_quopri_body(self):
3914        # This is non-RFC compliant data...without 'decode' the library code
3915        # decodes the body using the charset from the headers, and because the
3916        # source byte really is utf-8 this works.  This is likely to fail
3917        # against real dirty data (ie: produce mojibake), but the data is
3918        # invalid anyway so it is as good a guess as any.  But this means that
3919        # this test just confirms the current behavior; that behavior is not
3920        # necessarily the best possible behavior.  With 'decode' it is
3921        # returning the raw bytes, so that test should be of correct behavior,
3922        # or at least produce the same result that email4 did.
3923        m = self.bodytest_msg.format(charset='utf-8',
3924                                     cte='quoted-printable',
3925                                     bodyline='p=C3=B6stál').encode('utf-8')
3926        msg = email.message_from_bytes(m)
3927        self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3928        self.assertEqual(msg.get_payload(decode=True),
3929                         'pöstál\n'.encode('utf-8'))
3930
3931    def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3932        # This is similar to the previous test, but proves that if the 8bit
3933        # byte is undecodeable in the specified charset, it gets replaced
3934        # by the unicode 'unknown' character.  Again, this may or may not
3935        # be the ideal behavior.  Note that if decode=False none of the
3936        # decoders will get involved, so this is the only test we need
3937        # for this behavior.
3938        m = self.bodytest_msg.format(charset='ascii',
3939                                     cte='quoted-printable',
3940                                     bodyline='p=C3=B6stál').encode('utf-8')
3941        msg = email.message_from_bytes(m)
3942        self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
3943        self.assertEqual(msg.get_payload(decode=True),
3944                        'pöstál\n'.encode('utf-8'))
3945
3946    # test_defect_handling:test_invalid_chars_in_base64_payload
3947    def test_8bit_in_base64_body(self):
3948        # If we get 8bit bytes in a base64 body, we can just ignore them
3949        # as being outside the base64 alphabet and decode anyway.  But
3950        # we register a defect.
3951        m = self.bodytest_msg.format(charset='utf-8',
3952                                     cte='base64',
3953                                     bodyline='cMO2c3RhbAá=').encode('utf-8')
3954        msg = email.message_from_bytes(m)
3955        self.assertEqual(msg.get_payload(decode=True),
3956                         'pöstal'.encode('utf-8'))
3957        self.assertIsInstance(msg.defects[0],
3958                              errors.InvalidBase64CharactersDefect)
3959
3960    def test_8bit_in_uuencode_body(self):
3961        # Sticking an 8bit byte in a uuencode block makes it undecodable by
3962        # normal means, so the block is returned undecoded, but as bytes.
3963        m = self.bodytest_msg.format(charset='utf-8',
3964                                     cte='uuencode',
3965                                     bodyline='<,.V<W1A; á ').encode('utf-8')
3966        msg = email.message_from_bytes(m)
3967        self.assertEqual(msg.get_payload(decode=True),
3968                         '<,.V<W1A; á \n'.encode('utf-8'))
3969
3970
3971    headertest_headers = (
3972        ('From: [email protected]', ('From', '[email protected]')),
3973        ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3974        ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3975            '\tJean de Baddie',
3976            ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3977                'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3978                ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3979        ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3980        )
3981    headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3982        '\nYes, they are flying.\n').encode('utf-8')
3983
3984    def test_get_8bit_header(self):
3985        msg = email.message_from_bytes(self.headertest_msg)
3986        self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3987        self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
3988
3989    def test_print_8bit_headers(self):
3990        msg = email.message_from_bytes(self.headertest_msg)
3991        self.assertEqual(str(msg),
3992                         textwrap.dedent("""\
3993                            From: {}
3994                            To: {}
3995                            Subject: {}
3996                            From: {}
3997
3998                            Yes, they are flying.
3999                            """).format(*[expected[1] for (_, expected) in
4000                                        self.headertest_headers]))
4001
4002    def test_values_with_8bit_headers(self):
4003        msg = email.message_from_bytes(self.headertest_msg)
4004        self.assertListEqual([str(x) for x in msg.values()],
4005                              ['[email protected]',
4006                               'b\uFFFD\uFFFDz',
4007                               'Maintenant je vous pr\uFFFD\uFFFDsente mon '
4008                                   'coll\uFFFD\uFFFDgue, le pouf '
4009                                   'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
4010                                   '\tJean de Baddie',
4011                               "g\uFFFD\uFFFDst"])
4012
4013    def test_items_with_8bit_headers(self):
4014        msg = email.message_from_bytes(self.headertest_msg)
4015        self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
4016                              [('From', '[email protected]'),
4017                               ('To', 'b\uFFFD\uFFFDz'),
4018                               ('Subject', 'Maintenant je vous '
4019                                  'pr\uFFFD\uFFFDsente '
4020                                  'mon coll\uFFFD\uFFFDgue, le pouf '
4021                                  'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
4022                                  '\tJean de Baddie'),
4023                               ('From', 'g\uFFFD\uFFFDst')])
4024
4025    def test_get_all_with_8bit_headers(self):
4026        msg = email.message_from_bytes(self.headertest_msg)
4027        self.assertListEqual([str(x) for x in msg.get_all('from')],
4028                              ['[email protected]',
4029                               'g\uFFFD\uFFFDst'])
4030
4031    def test_get_content_type_with_8bit(self):
4032        msg = email.message_from_bytes(textwrap.dedent("""\
4033            Content-Type: text/pl\xA7in; charset=utf-8
4034            """).encode('latin-1'))
4035        self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
4036        self.assertEqual(msg.get_content_maintype(), "text")
4037        self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
4038
4039    # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
4040    def test_get_params_with_8bit(self):
4041        msg = email.message_from_bytes(
4042            'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
4043        self.assertEqual(msg.get_params(header='x-header'),
4044           [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
4045        self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
4046        # XXX: someday you might be able to get 'b\xa7r', for now you can't.
4047        self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
4048
4049    # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
4050    def test_get_rfc2231_params_with_8bit(self):
4051        msg = email.message_from_bytes(textwrap.dedent("""\
4052            Content-Type: text/plain; charset=us-ascii;
4053             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
4054             ).encode('latin-1'))
4055        self.assertEqual(msg.get_param('title'),
4056            ('us-ascii', 'en', 'This is not f\uFFFDn'))
4057
4058    def test_set_rfc2231_params_with_8bit(self):
4059        msg = email.message_from_bytes(textwrap.dedent("""\
4060            Content-Type: text/plain; charset=us-ascii;
4061             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
4062             ).encode('latin-1'))
4063        msg.set_param('title', 'test')
4064        self.assertEqual(msg.get_param('title'), 'test')
4065
4066    def test_del_rfc2231_params_with_8bit(self):
4067        msg = email.message_from_bytes(textwrap.dedent("""\
4068            Content-Type: text/plain; charset=us-ascii;
4069             title*=us-ascii'en'This%20is%20not%20f\xa7n"""
4070             ).encode('latin-1'))
4071        msg.del_param('title')
4072        self.assertEqual(msg.get_param('title'), None)
4073        self.assertEqual(msg.get_content_maintype(), 'text')
4074
4075    def test_get_payload_with_8bit_cte_header(self):
4076        msg = email.message_from_bytes(textwrap.dedent("""\
4077            Content-Transfer-Encoding: b\xa7se64
4078            Content-Type: text/plain; charset=latin-1
4079
4080            payload
4081            """).encode('latin-1'))
4082        self.assertEqual(msg.get_payload(), 'payload\n')
4083        self.assertEqual(msg.get_payload(decode=True), b'payload\n')
4084
4085    non_latin_bin_msg = textwrap.dedent("""\
4086        From: [email protected]
4087        To: báz
4088        Subject: Maintenant je vous présente mon collègue, le pouf célèbre
4089        \tJean de Baddie
4090        Mime-Version: 1.0
4091        Content-Type: text/plain; charset="utf-8"
4092        Content-Transfer-Encoding: 8bit
4093
4094        Да, они летят.
4095        """).encode('utf-8')
4096
4097    def test_bytes_generator(self):
4098        msg = email.message_from_bytes(self.non_latin_bin_msg)
4099        out = BytesIO()
4100        email.generator.BytesGenerator(out).flatten(msg)
4101        self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
4102
4103    def test_bytes_generator_handles_None_body(self):
4104        #Issue 11019
4105        msg = email.message.Message()
4106        out = BytesIO()
4107        email.generator.BytesGenerator(out).flatten(msg)
4108        self.assertEqual(out.getvalue(), b"\n")
4109
4110    non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
4111        From: [email protected]
4112        To: =?unknown-8bit?q?b=C3=A1z?=
4113        Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
4114         =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
4115         =?unknown-8bit?q?_Jean_de_Baddie?=
4116        Mime-Version: 1.0
4117        Content-Type: text/plain; charset="utf-8"
4118        Content-Transfer-Encoding: base64
4119
4120        0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
4121        """)
4122
4123    def test_generator_handles_8bit(self):
4124        msg = email.message_from_bytes(self.non_latin_bin_msg)
4125        out = StringIO()
4126        email.generator.Generator(out).flatten(msg)
4127        self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
4128
4129    def test_str_generator_should_not_mutate_msg_when_handling_8bit(self):
4130        msg = email.message_from_bytes(self.non_latin_bin_msg)
4131        out = BytesIO()
4132        BytesGenerator(out).flatten(msg)
4133        orig_value = out.getvalue()
4134        Generator(StringIO()).flatten(msg) # Should not mutate msg!
4135        out = BytesIO()
4136        BytesGenerator(out).flatten(msg)
4137        self.assertEqual(out.getvalue(), orig_value)
4138
4139    def test_bytes_generator_with_unix_from(self):
4140        # The unixfrom contains a current date, so we can't check it
4141        # literally.  Just make sure the first word is 'From' and the
4142        # rest of the message matches the input.
4143        msg = email.message_from_bytes(self.non_latin_bin_msg)
4144        out = BytesIO()
4145        email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
4146        lines = out.getvalue().split(b'\n')
4147        self.assertEqual(lines[0].split()[0], b'From')
4148        self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
4149
4150    non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
4151    non_latin_bin_msg_as7bit[2:4] = [
4152        'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
4153         'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
4154    non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
4155
4156    def test_message_from_binary_file(self):
4157        fn = 'test.msg'
4158        self.addCleanup(unlink, fn)
4159        with open(fn, 'wb') as testfile:
4160            testfile.write(self.non_latin_bin_msg)
4161        with open(fn, 'rb') as testfile:
4162            m = email.parser.BytesParser().parse(testfile)
4163        self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
4164
4165    latin_bin_msg = textwrap.dedent("""\
4166        From: [email protected]
4167        To: Dinsdale
4168        Subject: Nudge nudge, wink, wink
4169        Mime-Version: 1.0
4170        Content-Type: text/plain; charset="latin-1"
4171        Content-Transfer-Encoding: 8bit
4172
4173        oh là là, know what I mean, know what I mean?
4174        """).encode('latin-1')
4175
4176    latin_bin_msg_as7bit = textwrap.dedent("""\
4177        From: [email protected]
4178        To: Dinsdale
4179        Subject: Nudge nudge, wink, wink
4180        Mime-Version: 1.0
4181        Content-Type: text/plain; charset="iso-8859-1"
4182        Content-Transfer-Encoding: quoted-printable
4183
4184        oh l=E0 l=E0, know what I mean, know what I mean?
4185        """)
4186
4187    def test_string_generator_reencodes_to_quopri_when_appropriate(self):
4188        m = email.message_from_bytes(self.latin_bin_msg)
4189        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4190
4191    def test_decoded_generator_emits_unicode_body(self):
4192        m = email.message_from_bytes(self.latin_bin_msg)
4193        out = StringIO()
4194        email.generator.DecodedGenerator(out).flatten(m)
4195        #DecodedHeader output contains an extra blank line compared
4196        #to the input message.  RDM: not sure if this is a bug or not,
4197        #but it is not specific to the 8bit->7bit conversion.
4198        self.assertEqual(out.getvalue(),
4199            self.latin_bin_msg.decode('latin-1')+'\n')
4200
4201    def test_bytes_feedparser(self):
4202        bfp = email.feedparser.BytesFeedParser()
4203        for i in range(0, len(self.latin_bin_msg), 10):
4204            bfp.feed(self.latin_bin_msg[i:i+10])
4205        m = bfp.close()
4206        self.assertEqual(str(m), self.latin_bin_msg_as7bit)
4207
4208    def test_crlf_flatten(self):
4209        with openfile('msg_26.txt', 'rb') as fp:
4210            text = fp.read()
4211        msg = email.message_from_bytes(text)
4212        s = BytesIO()
4213        g = email.generator.BytesGenerator(s)
4214        g.flatten(msg, linesep='\r\n')
4215        self.assertEqual(s.getvalue(), text)
4216
4217    def test_8bit_multipart(self):
4218        # Issue 11605
4219        source = textwrap.dedent("""\
4220            Date: Fri, 18 Mar 2011 17:15:43 +0100
4221            To: [email protected]
4222            From: foodwatch-Newsletter <[email protected]>
4223            Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
4224            Message-ID: <[email protected]>
4225            MIME-Version: 1.0
4226            Content-Type: multipart/alternative;
4227                    boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
4228
4229            --b1_76a486bee62b0d200f33dc2ca08220ad
4230            Content-Type: text/plain; charset="utf-8"
4231            Content-Transfer-Encoding: 8bit
4232
4233            Guten Tag, ,
4234
4235            mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
4236            Nachrichten aus Japan.
4237
4238
4239            --b1_76a486bee62b0d200f33dc2ca08220ad
4240            Content-Type: text/html; charset="utf-8"
4241            Content-Transfer-Encoding: 8bit
4242
4243            <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
4244                "http://www.w3.org/TR/html4/loose.dtd">
4245            <html lang="de">
4246            <head>
4247                    <title>foodwatch - Newsletter</title>
4248            </head>
4249            <body>
4250              <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
4251                 die Nachrichten aus Japan.</p>
4252            </body>
4253            </html>
4254            --b1_76a486bee62b0d200f33dc2ca08220ad--
4255
4256            """).encode('utf-8')
4257        msg = email.message_from_bytes(source)
4258        s = BytesIO()
4259        g = email.generator.BytesGenerator(s)
4260        g.flatten(msg)
4261        self.assertEqual(s.getvalue(), source)
4262
4263    def test_bytes_generator_b_encoding_linesep(self):
4264        # Issue 14062: b encoding was tacking on an extra \n.
4265        m = Message()
4266        # This has enough non-ascii that it should always end up b encoded.
4267        m['Subject'] = Header('žluťoučký kůň')
4268        s = BytesIO()
4269        g = email.generator.BytesGenerator(s)
4270        g.flatten(m, linesep='\r\n')
4271        self.assertEqual(
4272            s.getvalue(),
4273            b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4274
4275    def test_generator_b_encoding_linesep(self):
4276        # Since this broke in ByteGenerator, test Generator for completeness.
4277        m = Message()
4278        # This has enough non-ascii that it should always end up b encoded.
4279        m['Subject'] = Header('žluťoučký kůň')
4280        s = StringIO()
4281        g = email.generator.Generator(s)
4282        g.flatten(m, linesep='\r\n')
4283        self.assertEqual(
4284            s.getvalue(),
4285            'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
4286
4287    maxDiff = None
4288
4289
4290class BaseTestBytesGeneratorIdempotent:
4291
4292    maxDiff = None
4293
4294    def _msgobj(self, filename):
4295        with openfile(filename, 'rb') as fp:
4296            data = fp.read()
4297        data = self.normalize_linesep_regex.sub(self.blinesep, data)
4298        msg = email.message_from_bytes(data)
4299        return msg, data
4300
4301    def _idempotent(self, msg, data, unixfrom=False):
4302        b = BytesIO()
4303        g = email.generator.BytesGenerator(b, maxheaderlen=0)
4304        g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
4305        self.assertEqual(data, b.getvalue())
4306
4307
4308class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
4309                                    TestIdempotent):
4310    linesep = '\n'
4311    blinesep = b'\n'
4312    normalize_linesep_regex = re.compile(br'\r\n')
4313
4314
4315class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
4316                                       TestIdempotent):
4317    linesep = '\r\n'
4318    blinesep = b'\r\n'
4319    normalize_linesep_regex = re.compile(br'(?<!\r)\n')
4320
4321
4322class TestBase64(unittest.TestCase):
4323    def test_len(self):
4324        eq = self.assertEqual
4325        eq(base64mime.header_length('hello'),
4326           len(base64mime.body_encode(b'hello', eol='')))
4327        for size in range(15):
4328            if   size == 0 : bsize = 0
4329            elif size <= 3 : bsize = 4
4330            elif size <= 6 : bsize = 8
4331            elif size <= 9 : bsize = 12
4332            elif size <= 12: bsize = 16
4333            else           : bsize = 20
4334            eq(base64mime.header_length('x' * size), bsize)
4335
4336    def test_decode(self):
4337        eq = self.assertEqual
4338        eq(base64mime.decode(''), b'')
4339        eq(base64mime.decode('aGVsbG8='), b'hello')
4340
4341    def test_encode(self):
4342        eq = self.assertEqual
4343        eq(base64mime.body_encode(b''), '')
4344        eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
4345        # Test the binary flag
4346        eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
4347        # Test the maxlinelen arg
4348        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
4349eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4350eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4351eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
4352eHh4eCB4eHh4IA==
4353""")
4354        # Test the eol argument
4355        eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4356           """\
4357eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4358eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4359eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
4360eHh4eCB4eHh4IA==\r
4361""")
4362
4363    def test_header_encode(self):
4364        eq = self.assertEqual
4365        he = base64mime.header_encode
4366        eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
4367        eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
4368        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4369        # Test the charset option
4370        eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
4371        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
4372
4373
4374class TestQuopri(unittest.TestCase):
4375    def setUp(self):
4376        # Set of characters (as byte integers) that don't need to be encoded
4377        # in headers.
4378        self.hlit = list(chain(
4379            range(ord('a'), ord('z') + 1),
4380            range(ord('A'), ord('Z') + 1),
4381            range(ord('0'), ord('9') + 1),
4382            (c for c in b'!*+-/')))
4383        # Set of characters (as byte integers) that do need to be encoded in
4384        # headers.
4385        self.hnon = [c for c in range(256) if c not in self.hlit]
4386        assert len(self.hlit) + len(self.hnon) == 256
4387        # Set of characters (as byte integers) that don't need to be encoded
4388        # in bodies.
4389        self.blit = list(range(ord(' '), ord('~') + 1))
4390        self.blit.append(ord('\t'))
4391        self.blit.remove(ord('='))
4392        # Set of characters (as byte integers) that do need to be encoded in
4393        # bodies.
4394        self.bnon = [c for c in range(256) if c not in self.blit]
4395        assert len(self.blit) + len(self.bnon) == 256
4396
4397    def test_quopri_header_check(self):
4398        for c in self.hlit:
4399            self.assertFalse(quoprimime.header_check(c),
4400                        'Should not be header quopri encoded: %s' % chr(c))
4401        for c in self.hnon:
4402            self.assertTrue(quoprimime.header_check(c),
4403                            'Should be header quopri encoded: %s' % chr(c))
4404
4405    def test_quopri_body_check(self):
4406        for c in self.blit:
4407            self.assertFalse(quoprimime.body_check(c),
4408                        'Should not be body quopri encoded: %s' % chr(c))
4409        for c in self.bnon:
4410            self.assertTrue(quoprimime.body_check(c),
4411                            'Should be body quopri encoded: %s' % chr(c))
4412
4413    def test_header_quopri_len(self):
4414        eq = self.assertEqual
4415        eq(quoprimime.header_length(b'hello'), 5)
4416        # RFC 2047 chrome is not included in header_length().
4417        eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
4418           quoprimime.header_length(b'hello') +
4419           # =?xxx?q?...?= means 10 extra characters
4420           10)
4421        eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4422        # RFC 2047 chrome is not included in header_length().
4423        eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
4424           quoprimime.header_length(b'h@e@l@l@o@') +
4425           # =?xxx?q?...?= means 10 extra characters
4426           10)
4427        for c in self.hlit:
4428            eq(quoprimime.header_length(bytes([c])), 1,
4429               'expected length 1 for %r' % chr(c))
4430        for c in self.hnon:
4431            # Space is special; it's encoded to _
4432            if c == ord(' '):
4433                continue
4434            eq(quoprimime.header_length(bytes([c])), 3,
4435               'expected length 3 for %r' % chr(c))
4436        eq(quoprimime.header_length(b' '), 1)
4437
4438    def test_body_quopri_len(self):
4439        eq = self.assertEqual
4440        for c in self.blit:
4441            eq(quoprimime.body_length(bytes([c])), 1)
4442        for c in self.bnon:
4443            eq(quoprimime.body_length(bytes([c])), 3)
4444
4445    def test_quote_unquote_idempotent(self):
4446        for x in range(256):
4447            c = chr(x)
4448            self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4449
4450    def _test_header_encode(self, header, expected_encoded_header, charset=None):
4451        if charset is None:
4452            encoded_header = quoprimime.header_encode(header)
4453        else:
4454            encoded_header = quoprimime.header_encode(header, charset)
4455        self.assertEqual(encoded_header, expected_encoded_header)
4456
4457    def test_header_encode_null(self):
4458        self._test_header_encode(b'', '')
4459
4460    def test_header_encode_one_word(self):
4461        self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4462
4463    def test_header_encode_two_lines(self):
4464        self._test_header_encode(b'hello\nworld',
4465                                '=?iso-8859-1?q?hello=0Aworld?=')
4466
4467    def test_header_encode_non_ascii(self):
4468        self._test_header_encode(b'hello\xc7there',
4469                                '=?iso-8859-1?q?hello=C7there?=')
4470
4471    def test_header_encode_alt_charset(self):
4472        self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4473                charset='iso-8859-2')
4474
4475    def _test_header_decode(self, encoded_header, expected_decoded_header):
4476        decoded_header = quoprimime.header_decode(encoded_header)
4477        self.assertEqual(decoded_header, expected_decoded_header)
4478
4479    def test_header_decode_null(self):
4480        self._test_header_decode('', '')
4481
4482    def test_header_decode_one_word(self):
4483        self._test_header_decode('hello', 'hello')
4484
4485    def test_header_decode_two_lines(self):
4486        self._test_header_decode('hello=0Aworld', 'hello\nworld')
4487
4488    def test_header_decode_non_ascii(self):
4489        self._test_header_decode('hello=C7there', 'hello\xc7there')
4490
4491    def test_header_decode_re_bug_18380(self):
4492        # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4493        self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4494
4495    def _test_decode(self, encoded, expected_decoded, eol=None):
4496        if eol is None:
4497            decoded = quoprimime.decode(encoded)
4498        else:
4499            decoded = quoprimime.decode(encoded, eol=eol)
4500        self.assertEqual(decoded, expected_decoded)
4501
4502    def test_decode_null_word(self):
4503        self._test_decode('', '')
4504
4505    def test_decode_null_line_null_word(self):
4506        self._test_decode('\r\n', '\n')
4507
4508    def test_decode_one_word(self):
4509        self._test_decode('hello', 'hello')
4510
4511    def test_decode_one_word_eol(self):
4512        self._test_decode('hello', 'hello', eol='X')
4513
4514    def test_decode_one_line(self):
4515        self._test_decode('hello\r\n', 'hello\n')
4516
4517    def test_decode_one_line_lf(self):
4518        self._test_decode('hello\n', 'hello\n')
4519
4520    def test_decode_one_line_cr(self):
4521        self._test_decode('hello\r', 'hello\n')
4522
4523    def test_decode_one_line_nl(self):
4524        self._test_decode('hello\n', 'helloX', eol='X')
4525
4526    def test_decode_one_line_crnl(self):
4527        self._test_decode('hello\r\n', 'helloX', eol='X')
4528
4529    def test_decode_one_line_one_word(self):
4530        self._test_decode('hello\r\nworld', 'hello\nworld')
4531
4532    def test_decode_one_line_one_word_eol(self):
4533        self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4534
4535    def test_decode_two_lines(self):
4536        self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4537
4538    def test_decode_two_lines_eol(self):
4539        self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4540
4541    def test_decode_one_long_line(self):
4542        self._test_decode('Spam' * 250, 'Spam' * 250)
4543
4544    def test_decode_one_space(self):
4545        self._test_decode(' ', '')
4546
4547    def test_decode_multiple_spaces(self):
4548        self._test_decode(' ' * 5, '')
4549
4550    def test_decode_one_line_trailing_spaces(self):
4551        self._test_decode('hello    \r\n', 'hello\n')
4552
4553    def test_decode_two_lines_trailing_spaces(self):
4554        self._test_decode('hello    \r\nworld   \r\n', 'hello\nworld\n')
4555
4556    def test_decode_quoted_word(self):
4557        self._test_decode('=22quoted=20words=22', '"quoted words"')
4558
4559    def test_decode_uppercase_quoting(self):
4560        self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4561
4562    def test_decode_lowercase_quoting(self):
4563        self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4564
4565    def test_decode_soft_line_break(self):
4566        self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4567
4568    def test_decode_false_quoting(self):
4569        self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4570
4571    def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4572        kwargs = {}
4573        if maxlinelen is None:
4574            # Use body_encode's default.
4575            maxlinelen = 76
4576        else:
4577            kwargs['maxlinelen'] = maxlinelen
4578        if eol is None:
4579            # Use body_encode's default.
4580            eol = '\n'
4581        else:
4582            kwargs['eol'] = eol
4583        encoded_body = quoprimime.body_encode(body, **kwargs)
4584        self.assertEqual(encoded_body, expected_encoded_body)
4585        if eol == '\n' or eol == '\r\n':
4586            # We know how to split the result back into lines, so maxlinelen
4587            # can be checked.
4588            for line in encoded_body.splitlines():
4589                self.assertLessEqual(len(line), maxlinelen)
4590
4591    def test_encode_null(self):
4592        self._test_encode('', '')
4593
4594    def test_encode_null_lines(self):
4595        self._test_encode('\n\n', '\n\n')
4596
4597    def test_encode_one_line(self):
4598        self._test_encode('hello\n', 'hello\n')
4599
4600    def test_encode_one_line_crlf(self):
4601        self._test_encode('hello\r\n', 'hello\n')
4602
4603    def test_encode_one_line_eol(self):
4604        self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4605
4606    def test_encode_one_line_eol_after_non_ascii(self):
4607        # issue 20206; see changeset 0cf700464177 for why the encode/decode.
4608        self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'),
4609                          'hello=CF=85\r\n', eol='\r\n')
4610
4611    def test_encode_one_space(self):
4612        self._test_encode(' ', '=20')
4613
4614    def test_encode_one_line_one_space(self):
4615        self._test_encode(' \n', '=20\n')
4616
4617# XXX: body_encode() expect strings, but uses ord(char) from these strings
4618# to index into a 256-entry list.  For code points above 255, this will fail.
4619# Should there be a check for 8-bit only ord() values in body, or at least
4620# a comment about the expected input?
4621
4622    def test_encode_two_lines_one_space(self):
4623        self._test_encode(' \n \n', '=20\n=20\n')
4624
4625    def test_encode_one_word_trailing_spaces(self):
4626        self._test_encode('hello   ', 'hello  =20')
4627
4628    def test_encode_one_line_trailing_spaces(self):
4629        self._test_encode('hello   \n', 'hello  =20\n')
4630
4631    def test_encode_one_word_trailing_tab(self):
4632        self._test_encode('hello  \t', 'hello  =09')
4633
4634    def test_encode_one_line_trailing_tab(self):
4635        self._test_encode('hello  \t\n', 'hello  =09\n')
4636
4637    def test_encode_trailing_space_before_maxlinelen(self):
4638        self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4639
4640    def test_encode_trailing_space_at_maxlinelen(self):
4641        self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4642
4643    def test_encode_trailing_space_beyond_maxlinelen(self):
4644        self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4645
4646    def test_encode_whitespace_lines(self):
4647        self._test_encode(' \n' * 5, '=20\n' * 5)
4648
4649    def test_encode_quoted_equals(self):
4650        self._test_encode('a = b', 'a =3D b')
4651
4652    def test_encode_one_long_string(self):
4653        self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4654
4655    def test_encode_one_long_line(self):
4656        self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4657
4658    def test_encode_one_very_long_line(self):
4659        self._test_encode('x' * 200 + '\n',
4660                2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4661
4662    def test_encode_shortest_maxlinelen(self):
4663        self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
4664
4665    def test_encode_maxlinelen_too_small(self):
4666        self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4667
4668    def test_encode(self):
4669        eq = self.assertEqual
4670        eq(quoprimime.body_encode(''), '')
4671        eq(quoprimime.body_encode('hello'), 'hello')
4672        # Test the binary flag
4673        eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
4674        # Test the maxlinelen arg
4675        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
4676xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4677 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4678x xxxx xxxx xxxx xxxx=20""")
4679        # Test the eol argument
4680        eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4681           """\
4682xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4683 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4684x xxxx xxxx xxxx xxxx=20""")
4685        eq(quoprimime.body_encode("""\
4686one line
4687
4688two line"""), """\
4689one line
4690
4691two line""")
4692
4693
4694
4695# Test the Charset class
4696class TestCharset(unittest.TestCase):
4697    def tearDown(self):
4698        from email import charset as CharsetModule
4699        try:
4700            del CharsetModule.CHARSETS['fake']
4701        except KeyError:
4702            pass
4703
4704    def test_codec_encodeable(self):
4705        eq = self.assertEqual
4706        # Make sure us-ascii = no Unicode conversion
4707        c = Charset('us-ascii')
4708        eq(c.header_encode('Hello World!'), 'Hello World!')
4709        # Test 8-bit idempotency with us-ascii
4710        s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
4711        self.assertRaises(UnicodeError, c.header_encode, s)
4712        c = Charset('utf-8')
4713        eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
4714
4715    def test_body_encode(self):
4716        eq = self.assertEqual
4717        # Try a charset with QP body encoding
4718        c = Charset('iso-8859-1')
4719        eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
4720        # Try a charset with Base64 body encoding
4721        c = Charset('utf-8')
4722        eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
4723        # Try a charset with None body encoding
4724        c = Charset('us-ascii')
4725        eq('hello world', c.body_encode('hello world'))
4726        # Try the convert argument, where input codec != output codec
4727        c = Charset('euc-jp')
4728        # With apologies to Tokio Kikuchi ;)
4729        # XXX FIXME
4730##         try:
4731##             eq('\x1b$B5FCO;~IW\x1b(B',
4732##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4733##             eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4734##                c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4735##         except LookupError:
4736##             # We probably don't have the Japanese codecs installed
4737##             pass
4738        # Testing SF bug #625509, which we have to fake, since there are no
4739        # built-in encodings where the header encoding is QP but the body
4740        # encoding is not.
4741        from email import charset as CharsetModule
4742        CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
4743        c = Charset('fake')
4744        eq('hello world', c.body_encode('hello world'))
4745
4746    def test_unicode_charset_name(self):
4747        charset = Charset('us-ascii')
4748        self.assertEqual(str(charset), 'us-ascii')
4749        self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4750
4751
4752
4753# Test multilingual MIME headers.
4754class TestHeader(TestEmailBase):
4755    def test_simple(self):
4756        eq = self.ndiffAssertEqual
4757        h = Header('Hello World!')
4758        eq(h.encode(), 'Hello World!')
4759        h.append(' Goodbye World!')
4760        eq(h.encode(), 'Hello World!  Goodbye World!')
4761
4762    def test_simple_surprise(self):
4763        eq = self.ndiffAssertEqual
4764        h = Header('Hello World!')
4765        eq(h.encode(), 'Hello World!')
4766        h.append('Goodbye World!')
4767        eq(h.encode(), 'Hello World! Goodbye World!')
4768
4769    def test_header_needs_no_decoding(self):
4770        h = 'no decoding needed'
4771        self.assertEqual(decode_header(h), [(h, None)])
4772
4773    def test_long(self):
4774        h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4775                   maxlinelen=76)
4776        for l in h.encode(splitchars=' ').split('\n '):
4777            self.assertLessEqual(len(l), 76)
4778
4779    def test_multilingual(self):
4780        eq = self.ndiffAssertEqual
4781        g = Charset("iso-8859-1")
4782        cz = Charset("iso-8859-2")
4783        utf8 = Charset("utf-8")
4784        g_head = (b'Die Mieter treten hier ein werden mit einem '
4785                  b'Foerderband komfortabel den Korridor entlang, '
4786                  b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4787                  b'gegen die rotierenden Klingen bef\xf6rdert. ')
4788        cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4789                   b'd\xf9vtipu.. ')
4790        utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4791                     '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4792                     '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4793                     '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4794                     '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4795                     'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4796                     'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4797                     '\u3044\u307e\u3059\u3002')
4798        h = Header(g_head, g)
4799        h.append(cz_head, cz)
4800        h.append(utf8_head, utf8)
4801        enc = h.encode(maxlinelen=76)
4802        eq(enc, """\
4803=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4804 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4805 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4806 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
4807 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4808 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4809 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4810 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
4811 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4812 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4813 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4814        decoded = decode_header(enc)
4815        eq(len(decoded), 3)
4816        eq(decoded[0], (g_head, 'iso-8859-1'))
4817        eq(decoded[1], (cz_head, 'iso-8859-2'))
4818        eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
4819        ustr = str(h)
4820        eq(ustr,
4821           (b'Die Mieter treten hier ein werden mit einem Foerderband '
4822            b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4823            b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4824            b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4825            b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4826            b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4827            b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4828            b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4829            b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4830            b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4831            b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4832            b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4833            b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4834            b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4835            b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4836            b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4837            ).decode('utf-8'))
4838        # Test make_header()
4839        newh = make_header(decode_header(enc))
4840        eq(newh, h)
4841
4842    def test_empty_header_encode(self):
4843        h = Header()
4844        self.assertEqual(h.encode(), '')
4845
4846    def test_header_ctor_default_args(self):
4847        eq = self.ndiffAssertEqual
4848        h = Header()
4849        eq(h, '')
4850        h.append('foo', Charset('iso-8859-1'))
4851        eq(h, 'foo')
4852
4853    def test_explicit_maxlinelen(self):
4854        eq = self.ndiffAssertEqual
4855        hstr = ('A very long line that must get split to something other '
4856                'than at the 76th character boundary to test the non-default '
4857                'behavior')
4858        h = Header(hstr)
4859        eq(h.encode(), '''\
4860A very long line that must get split to something other than at the 76th
4861 character boundary to test the non-default behavior''')
4862        eq(str(h), hstr)
4863        h = Header(hstr, header_name='Subject')
4864        eq(h.encode(), '''\
4865A very long line that must get split to something other than at the
4866 76th character boundary to test the non-default behavior''')
4867        eq(str(h), hstr)
4868        h = Header(hstr, maxlinelen=1024, header_name='Subject')
4869        eq(h.encode(), hstr)
4870        eq(str(h), hstr)
4871
4872    def test_quopri_splittable(self):
4873        eq = self.ndiffAssertEqual
4874        h = Header(charset='iso-8859-1', maxlinelen=20)
4875        x = 'xxxx ' * 20
4876        h.append(x)
4877        s = h.encode()
4878        eq(s, """\
4879=?iso-8859-1?q?xxx?=
4880 =?iso-8859-1?q?x_?=
4881 =?iso-8859-1?q?xx?=
4882 =?iso-8859-1?q?xx?=
4883 =?iso-8859-1?q?_x?=
4884 =?iso-8859-1?q?xx?=
4885 =?iso-8859-1?q?x_?=
4886 =?iso-8859-1?q?xx?=
4887 =?iso-8859-1?q?xx?=
4888 =?iso-8859-1?q?_x?=
4889 =?iso-8859-1?q?xx?=
4890 =?iso-8859-1?q?x_?=
4891 =?iso-8859-1?q?xx?=
4892 =?iso-8859-1?q?xx?=
4893 =?iso-8859-1?q?_x?=
4894 =?iso-8859-1?q?xx?=
4895 =?iso-8859-1?q?x_?=
4896 =?iso-8859-1?q?xx?=
4897 =?iso-8859-1?q?xx?=
4898 =?iso-8859-1?q?_x?=
4899 =?iso-8859-1?q?xx?=
4900 =?iso-8859-1?q?x_?=
4901 =?iso-8859-1?q?xx?=
4902 =?iso-8859-1?q?xx?=
4903 =?iso-8859-1?q?_x?=
4904 =?iso-8859-1?q?xx?=
4905 =?iso-8859-1?q?x_?=
4906 =?iso-8859-1?q?xx?=
4907 =?iso-8859-1?q?xx?=
4908 =?iso-8859-1?q?_x?=
4909 =?iso-8859-1?q?xx?=
4910 =?iso-8859-1?q?x_?=
4911 =?iso-8859-1?q?xx?=
4912 =?iso-8859-1?q?xx?=
4913 =?iso-8859-1?q?_x?=
4914 =?iso-8859-1?q?xx?=
4915 =?iso-8859-1?q?x_?=
4916 =?iso-8859-1?q?xx?=
4917 =?iso-8859-1?q?xx?=
4918 =?iso-8859-1?q?_x?=
4919 =?iso-8859-1?q?xx?=
4920 =?iso-8859-1?q?x_?=
4921 =?iso-8859-1?q?xx?=
4922 =?iso-8859-1?q?xx?=
4923 =?iso-8859-1?q?_x?=
4924 =?iso-8859-1?q?xx?=
4925 =?iso-8859-1?q?x_?=
4926 =?iso-8859-1?q?xx?=
4927 =?iso-8859-1?q?xx?=
4928 =?iso-8859-1?q?_?=""")
4929        eq(x, str(make_header(decode_header(s))))
4930        h = Header(charset='iso-8859-1', maxlinelen=40)
4931        h.append('xxxx ' * 20)
4932        s = h.encode()
4933        eq(s, """\
4934=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4935 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4936 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4937 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4938 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4939        eq(x, str(make_header(decode_header(s))))
4940
4941    def test_base64_splittable(self):
4942        eq = self.ndiffAssertEqual
4943        h = Header(charset='koi8-r', maxlinelen=20)
4944        x = 'xxxx ' * 20
4945        h.append(x)
4946        s = h.encode()
4947        eq(s, """\
4948=?koi8-r?b?eHh4?=
4949 =?koi8-r?b?eCB4?=
4950 =?koi8-r?b?eHh4?=
4951 =?koi8-r?b?IHh4?=
4952 =?koi8-r?b?eHgg?=
4953 =?koi8-r?b?eHh4?=
4954 =?koi8-r?b?eCB4?=
4955 =?koi8-r?b?eHh4?=
4956 =?koi8-r?b?IHh4?=
4957 =?koi8-r?b?eHgg?=
4958 =?koi8-r?b?eHh4?=
4959 =?koi8-r?b?eCB4?=
4960 =?koi8-r?b?eHh4?=
4961 =?koi8-r?b?IHh4?=
4962 =?koi8-r?b?eHgg?=
4963 =?koi8-r?b?eHh4?=
4964 =?koi8-r?b?eCB4?=
4965 =?koi8-r?b?eHh4?=
4966 =?koi8-r?b?IHh4?=
4967 =?koi8-r?b?eHgg?=
4968 =?koi8-r?b?eHh4?=
4969 =?koi8-r?b?eCB4?=
4970 =?koi8-r?b?eHh4?=
4971 =?koi8-r?b?IHh4?=
4972 =?koi8-r?b?eHgg?=
4973 =?koi8-r?b?eHh4?=
4974 =?koi8-r?b?eCB4?=
4975 =?koi8-r?b?eHh4?=
4976 =?koi8-r?b?IHh4?=
4977 =?koi8-r?b?eHgg?=
4978 =?koi8-r?b?eHh4?=
4979 =?koi8-r?b?eCB4?=
4980 =?koi8-r?b?eHh4?=
4981 =?koi8-r?b?IA==?=""")
4982        eq(x, str(make_header(decode_header(s))))
4983        h = Header(charset='koi8-r', maxlinelen=40)
4984        h.append(x)
4985        s = h.encode()
4986        eq(s, """\
4987=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4988 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4989 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4990 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4991 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4992 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4993        eq(x, str(make_header(decode_header(s))))
4994
4995    def test_us_ascii_header(self):
4996        eq = self.assertEqual
4997        s = 'hello'
4998        x = decode_header(s)
4999        eq(x, [('hello', None)])
5000        h = make_header(x)
5001        eq(s, h.encode())
5002
5003    def test_string_charset(self):
5004        eq = self.assertEqual
5005        h = Header()
5006        h.append('hello', 'iso-8859-1')
5007        eq(h, 'hello')
5008
5009##    def test_unicode_error(self):
5010##        raises = self.assertRaises
5011##        raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
5012##        raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
5013##        h = Header()
5014##        raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
5015##        raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
5016##        raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
5017
5018    def test_utf8_shortest(self):
5019        eq = self.assertEqual
5020        h = Header('p\xf6stal', 'utf-8')
5021        eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
5022        h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
5023        eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
5024
5025    def test_bad_8bit_header(self):
5026        raises = self.assertRaises
5027        eq = self.assertEqual
5028        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
5029        raises(UnicodeError, Header, x)
5030        h = Header()
5031        raises(UnicodeError, h.append, x)
5032        e = x.decode('utf-8', 'replace')
5033        eq(str(Header(x, errors='replace')), e)
5034        h.append(x, errors='replace')
5035        eq(str(h), e)
5036
5037    def test_escaped_8bit_header(self):
5038        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
5039        e = x.decode('ascii', 'surrogateescape')
5040        h = Header(e, charset=email.charset.UNKNOWN8BIT)
5041        self.assertEqual(str(h),
5042                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
5043        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
5044
5045    def test_header_handles_binary_unknown8bit(self):
5046        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
5047        h = Header(x, charset=email.charset.UNKNOWN8BIT)
5048        self.assertEqual(str(h),
5049                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
5050        self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
5051
5052    def test_make_header_handles_binary_unknown8bit(self):
5053        x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
5054        h = Header(x, charset=email.charset.UNKNOWN8BIT)
5055        h2 = email.header.make_header(email.header.decode_header(h))
5056        self.assertEqual(str(h2),
5057                        'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
5058        self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
5059
5060    def test_modify_returned_list_does_not_change_header(self):
5061        h = Header('test')
5062        chunks = email.header.decode_header(h)
5063        chunks.append(('ascii', 'test2'))
5064        self.assertEqual(str(h), 'test')
5065
5066    def test_encoded_adjacent_nonencoded(self):
5067        eq = self.assertEqual
5068        h = Header()
5069        h.append('hello', 'iso-8859-1')
5070        h.append('world')
5071        s = h.encode()
5072        eq(s, '=?iso-8859-1?q?hello?= world')
5073        h = make_header(decode_header(s))
5074        eq(h.encode(), s)
5075
5076    def test_whitespace_keeper(self):
5077        eq = self.assertEqual
5078        s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
5079        parts = decode_header(s)
5080        eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
5081        hdr = make_header(parts)
5082        eq(hdr.encode(),
5083           'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
5084
5085    def test_broken_base64_header(self):
5086        raises = self.assertRaises
5087        s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
5088        raises(errors.HeaderParseError, decode_header, s)
5089
5090    def test_shift_jis_charset(self):
5091        h = Header('文', charset='shift_jis')
5092        self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
5093
5094    def test_flatten_header_with_no_value(self):
5095        # Issue 11401 (regression from email 4.x)  Note that the space after
5096        # the header doesn't reflect the input, but this is also the way
5097        # email 4.x behaved.  At some point it would be nice to fix that.
5098        msg = email.message_from_string("EmptyHeader:")
5099        self.assertEqual(str(msg), "EmptyHeader: \n\n")
5100
5101    def test_encode_preserves_leading_ws_on_value(self):
5102        msg = Message()
5103        msg['SomeHeader'] = '   value with leading ws'
5104        self.assertEqual(str(msg), "SomeHeader:    value with leading ws\n\n")
5105
5106    def test_whitespace_header(self):
5107        self.assertEqual(Header(' ').encode(), ' ')
5108
5109
5110
5111# Test RFC 2231 header parameters (en/de)coding
5112class TestRFC2231(TestEmailBase):
5113
5114    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5115    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5116    def test_get_param(self):
5117        eq = self.assertEqual
5118        msg = self._msgobj('msg_29.txt')
5119        eq(msg.get_param('title'),
5120           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5121        eq(msg.get_param('title', unquote=False),
5122           ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
5123
5124    def test_set_param(self):
5125        eq = self.ndiffAssertEqual
5126        msg = Message()
5127        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5128                      charset='us-ascii')
5129        eq(msg.get_param('title'),
5130           ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
5131        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5132                      charset='us-ascii', language='en')
5133        eq(msg.get_param('title'),
5134           ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
5135        msg = self._msgobj('msg_01.txt')
5136        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5137                      charset='us-ascii', language='en')
5138        eq(msg.as_string(maxheaderlen=78), """\
5139Return-Path: <[email protected]>
5140Delivered-To: [email protected]
5141Received: by mail.zzz.org (Postfix, from userid 889)
5142\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5143MIME-Version: 1.0
5144Content-Transfer-Encoding: 7bit
5145Message-ID: <[email protected]>
5146From: [email protected] (John X. Doe)
5147To: [email protected]
5148Subject: This is a test message
5149Date: Fri, 4 May 2001 14:05:44 -0400
5150Content-Type: text/plain; charset=us-ascii;
5151 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5152
5153
5154Hi,
5155
5156Do you like this message?
5157
5158-Me
5159""")
5160
5161    def test_set_param_requote(self):
5162        msg = Message()
5163        msg.set_param('title', 'foo')
5164        self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
5165        msg.set_param('title', 'bar', requote=False)
5166        self.assertEqual(msg['content-type'], 'text/plain; title=bar')
5167        # tspecial is still quoted.
5168        msg.set_param('title', "(bar)bell", requote=False)
5169        self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
5170
5171    def test_del_param(self):
5172        eq = self.ndiffAssertEqual
5173        msg = self._msgobj('msg_01.txt')
5174        msg.set_param('foo', 'bar', charset='us-ascii', language='en')
5175        msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
5176            charset='us-ascii', language='en')
5177        msg.del_param('foo', header='Content-Type')
5178        eq(msg.as_string(maxheaderlen=78), """\
5179Return-Path: <[email protected]>
5180Delivered-To: [email protected]
5181Received: by mail.zzz.org (Postfix, from userid 889)
5182\tid 27CEAD38CC; Fri,  4 May 2001 14:05:44 -0400 (EDT)
5183MIME-Version: 1.0
5184Content-Transfer-Encoding: 7bit
5185Message-ID: <[email protected]>
5186From: [email protected] (John X. Doe)
5187To: [email protected]
5188Subject: This is a test message
5189Date: Fri, 4 May 2001 14:05:44 -0400
5190Content-Type: text/plain; charset="us-ascii";
5191 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
5192
5193
5194Hi,
5195
5196Do you like this message?
5197
5198-Me
5199""")
5200
5201    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
5202    # I changed the charset name, though, because the one in the file isn't
5203    # a legal charset name.  Should add a test for an illegal charset.
5204    def test_rfc2231_get_content_charset(self):
5205        eq = self.assertEqual
5206        msg = self._msgobj('msg_32.txt')
5207        eq(msg.get_content_charset(), 'us-ascii')
5208
5209    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
5210    def test_rfc2231_parse_rfc_quoting(self):
5211        m = textwrap.dedent('''\
5212            Content-Disposition: inline;
5213            \tfilename*0*=''This%20is%20even%20more%20;
5214            \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
5215            \tfilename*2="is it not.pdf"
5216
5217            ''')
5218        msg = email.message_from_string(m)
5219        self.assertEqual(msg.get_filename(),
5220                         'This is even more ***fun*** is it not.pdf')
5221        self.assertEqual(m, msg.as_string())
5222
5223    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
5224    def test_rfc2231_parse_extra_quoting(self):
5225        m = textwrap.dedent('''\
5226            Content-Disposition: inline;
5227            \tfilename*0*="''This%20is%20even%20more%20";
5228            \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5229            \tfilename*2="is it not.pdf"
5230
5231            ''')
5232        msg = email.message_from_string(m)
5233        self.assertEqual(msg.get_filename(),
5234                         'This is even more ***fun*** is it not.pdf')
5235        self.assertEqual(m, msg.as_string())
5236
5237    # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
5238    # but new test uses *0* because otherwise lang/charset is not valid.
5239    # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
5240    def test_rfc2231_no_language_or_charset(self):
5241        m = '''\
5242Content-Transfer-Encoding: 8bit
5243Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
5244Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
5245
5246'''
5247        msg = email.message_from_string(m)
5248        param = msg.get_param('NAME')
5249        self.assertNotIsInstance(param, tuple)
5250        self.assertEqual(
5251            param,
5252            'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
5253
5254    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
5255    def test_rfc2231_no_language_or_charset_in_filename(self):
5256        m = '''\
5257Content-Disposition: inline;
5258\tfilename*0*="''This%20is%20even%20more%20";
5259\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5260\tfilename*2="is it not.pdf"
5261
5262'''
5263        msg = email.message_from_string(m)
5264        self.assertEqual(msg.get_filename(),
5265                         'This is even more ***fun*** is it not.pdf')
5266
5267    # Duplicate of previous test?
5268    def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
5269        m = '''\
5270Content-Disposition: inline;
5271\tfilename*0*="''This%20is%20even%20more%20";
5272\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5273\tfilename*2="is it not.pdf"
5274
5275'''
5276        msg = email.message_from_string(m)
5277        self.assertEqual(msg.get_filename(),
5278                         'This is even more ***fun*** is it not.pdf')
5279
5280    # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
5281    # but the test below is wrong (the first part should be decoded).
5282    def test_rfc2231_partly_encoded(self):
5283        m = '''\
5284Content-Disposition: inline;
5285\tfilename*0="''This%20is%20even%20more%20";
5286\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5287\tfilename*2="is it not.pdf"
5288
5289'''
5290        msg = email.message_from_string(m)
5291        self.assertEqual(
5292            msg.get_filename(),
5293            'This%20is%20even%20more%20***fun*** is it not.pdf')
5294
5295    def test_rfc2231_partly_nonencoded(self):
5296        m = '''\
5297Content-Disposition: inline;
5298\tfilename*0="This%20is%20even%20more%20";
5299\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
5300\tfilename*2="is it not.pdf"
5301
5302'''
5303        msg = email.message_from_string(m)
5304        self.assertEqual(
5305            msg.get_filename(),
5306            'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
5307
5308    def test_rfc2231_no_language_or_charset_in_boundary(self):
5309        m = '''\
5310Content-Type: multipart/alternative;
5311\tboundary*0*="''This%20is%20even%20more%20";
5312\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
5313\tboundary*2="is it not.pdf"
5314
5315'''
5316        msg = email.message_from_string(m)
5317        self.assertEqual(msg.get_boundary(),
5318                         'This is even more ***fun*** is it not.pdf')
5319
5320    def test_rfc2231_no_language_or_charset_in_charset(self):
5321        # This is a nonsensical charset value, but tests the code anyway
5322        m = '''\
5323Content-Type: text/plain;
5324\tcharset*0*="This%20is%20even%20more%20";
5325\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
5326\tcharset*2="is it not.pdf"
5327
5328'''
5329        msg = email.message_from_string(m)
5330        self.assertEqual(msg.get_content_charset(),
5331                         'this is even more ***fun*** is it not.pdf')
5332
5333    # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
5334    def test_rfc2231_bad_encoding_in_filename(self):
5335        m = '''\
5336Content-Disposition: inline;
5337\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
5338\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5339\tfilename*2="is it not.pdf"
5340
5341'''
5342        msg = email.message_from_string(m)
5343        self.assertEqual(msg.get_filename(),
5344                         'This is even more ***fun*** is it not.pdf')
5345
5346    def test_rfc2231_bad_encoding_in_charset(self):
5347        m = """\
5348Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
5349
5350"""
5351        msg = email.message_from_string(m)
5352        # This should return None because non-ascii characters in the charset
5353        # are not allowed.
5354        self.assertEqual(msg.get_content_charset(), None)
5355
5356    def test_rfc2231_bad_character_in_charset(self):
5357        m = """\
5358Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
5359
5360"""
5361        msg = email.message_from_string(m)
5362        # This should return None because non-ascii characters in the charset
5363        # are not allowed.
5364        self.assertEqual(msg.get_content_charset(), None)
5365
5366    def test_rfc2231_bad_character_in_filename(self):
5367        m = '''\
5368Content-Disposition: inline;
5369\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
5370\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
5371\tfilename*2*="is it not.pdf%E2"
5372
5373'''
5374        msg = email.message_from_string(m)
5375        self.assertEqual(msg.get_filename(),
5376                         'This is even more ***fun*** is it not.pdf\ufffd')
5377
5378    def test_rfc2231_unknown_encoding(self):
5379        m = """\
5380Content-Transfer-Encoding: 8bit
5381Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
5382
5383"""
5384        msg = email.message_from_string(m)
5385        self.assertEqual(msg.get_filename(), 'myfile.txt')
5386
5387    def test_rfc2231_bad_character_in_encoding(self):
5388        m = """\
5389Content-Transfer-Encoding: 8bit
5390Content-Disposition: inline; filename*=utf-8\udce2\udc80\udc9d''myfile.txt
5391
5392"""
5393        msg = email.message_from_string(m)
5394        self.assertEqual(msg.get_filename(), 'myfile.txt')
5395
5396    def test_rfc2231_single_tick_in_filename_extended(self):
5397        eq = self.assertEqual
5398        m = """\
5399Content-Type: application/x-foo;
5400\tname*0*=\"Frank's\"; name*1*=\" Document\"
5401
5402"""
5403        msg = email.message_from_string(m)
5404        charset, language, s = msg.get_param('name')
5405        eq(charset, None)
5406        eq(language, None)
5407        eq(s, "Frank's Document")
5408
5409    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
5410    def test_rfc2231_single_tick_in_filename(self):
5411        m = """\
5412Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
5413
5414"""
5415        msg = email.message_from_string(m)
5416        param = msg.get_param('name')
5417        self.assertNotIsInstance(param, tuple)
5418        self.assertEqual(param, "Frank's Document")
5419
5420    def test_rfc2231_missing_tick(self):
5421        m = '''\
5422Content-Disposition: inline;
5423\tfilename*0*="'This%20is%20broken";
5424'''
5425        msg = email.message_from_string(m)
5426        self.assertEqual(
5427            msg.get_filename(),
5428            "'This is broken")
5429
5430    def test_rfc2231_missing_tick_with_encoded_non_ascii(self):
5431        m = '''\
5432Content-Disposition: inline;
5433\tfilename*0*="'This%20is%E2broken";
5434'''
5435        msg = email.message_from_string(m)
5436        self.assertEqual(
5437            msg.get_filename(),
5438            "'This is\ufffdbroken")
5439
5440    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
5441    def test_rfc2231_tick_attack_extended(self):
5442        eq = self.assertEqual
5443        m = """\
5444Content-Type: application/x-foo;
5445\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5446
5447"""
5448        msg = email.message_from_string(m)
5449        charset, language, s = msg.get_param('name')
5450        eq(charset, 'us-ascii')
5451        eq(language, 'en-us')
5452        eq(s, "Frank's Document")
5453
5454    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
5455    def test_rfc2231_tick_attack(self):
5456        m = """\
5457Content-Type: application/x-foo;
5458\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5459
5460"""
5461        msg = email.message_from_string(m)
5462        param = msg.get_param('name')
5463        self.assertNotIsInstance(param, tuple)
5464        self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5465
5466    # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
5467    def test_rfc2231_no_extended_values(self):
5468        eq = self.assertEqual
5469        m = """\
5470Content-Type: application/x-foo; name=\"Frank's Document\"
5471
5472"""
5473        msg = email.message_from_string(m)
5474        eq(msg.get_param('name'), "Frank's Document")
5475
5476    # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
5477    def test_rfc2231_encoded_then_unencoded_segments(self):
5478        eq = self.assertEqual
5479        m = """\
5480Content-Type: application/x-foo;
5481\tname*0*=\"us-ascii'en-us'My\";
5482\tname*1=\" Document\";
5483\tname*2*=\" For You\"
5484
5485"""
5486        msg = email.message_from_string(m)
5487        charset, language, s = msg.get_param('name')
5488        eq(charset, 'us-ascii')
5489        eq(language, 'en-us')
5490        eq(s, 'My Document For You')
5491
5492    # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5493    # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
5494    def test_rfc2231_unencoded_then_encoded_segments(self):
5495        eq = self.assertEqual
5496        m = """\
5497Content-Type: application/x-foo;
5498\tname*0=\"us-ascii'en-us'My\";
5499\tname*1*=\" Document\";
5500\tname*2*=\" For You\"
5501
5502"""
5503        msg = email.message_from_string(m)
5504        charset, language, s = msg.get_param('name')
5505        eq(charset, 'us-ascii')
5506        eq(language, 'en-us')
5507        eq(s, 'My Document For You')
5508
5509    def test_should_not_hang_on_invalid_ew_messages(self):
5510        messages = ["""From: [email protected]
5511To: [email protected]
5512Bad-Header:
5513 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
5514 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
5515 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
5516
5517Hello!
5518""", """From: ����� �������� <xxx@xxx>
5519To: "xxx" <xxx@xxx>
5520Subject:   ��� ���������� ����� ����� � ��������� �� ����
5521MIME-Version: 1.0
5522Content-Type: text/plain; charset="windows-1251";
5523Content-Transfer-Encoding: 8bit
5524
5525�� ����� � ���� ������ ��� ��������
5526"""]
5527        for m in messages:
5528            with self.subTest(m=m):
5529                msg = email.message_from_string(m)
5530
5531
5532# Tests to ensure that signed parts of an email are completely preserved, as
5533# required by RFC1847 section 2.1.  Note that these are incomplete, because the
5534# email package does not currently always preserve the body.  See issue 1670765.
5535class TestSigned(TestEmailBase):
5536
5537    def _msg_and_obj(self, filename):
5538        with openfile(filename, encoding="utf-8") as fp:
5539            original = fp.read()
5540            msg = email.message_from_string(original)
5541        return original, msg
5542
5543    def _signed_parts_eq(self, original, result):
5544        # Extract the first mime part of each message
5545        import re
5546        repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5547        inpart = repart.search(original).group(2)
5548        outpart = repart.search(result).group(2)
5549        self.assertEqual(outpart, inpart)
5550
5551    def test_long_headers_as_string(self):
5552        original, msg = self._msg_and_obj('msg_45.txt')
5553        result = msg.as_string()
5554        self._signed_parts_eq(original, result)
5555
5556    def test_long_headers_as_string_maxheaderlen(self):
5557        original, msg = self._msg_and_obj('msg_45.txt')
5558        result = msg.as_string(maxheaderlen=60)
5559        self._signed_parts_eq(original, result)
5560
5561    def test_long_headers_flatten(self):
5562        original, msg = self._msg_and_obj('msg_45.txt')
5563        fp = StringIO()
5564        Generator(fp).flatten(msg)
5565        result = fp.getvalue()
5566        self._signed_parts_eq(original, result)
5567
5568class TestHeaderRegistry(TestEmailBase):
5569    # See issue gh-93010.
5570    def test_HeaderRegistry(self):
5571        reg = HeaderRegistry()
5572        a = reg('Content-Disposition', 'attachment; 0*00="foo"')
5573        self.assertIsInstance(a.defects[0], errors.InvalidHeaderDefect)
5574
5575if __name__ == '__main__':
5576    unittest.main()
5577