1import datetime
2import textwrap
3import unittest
4from email import errors
5from email import policy
6from email.message import Message
7from test.test_email import TestEmailBase, parameterize
8from email import headerregistry
9from email.headerregistry import Address, Group
10from test.support import ALWAYS_EQ
11
12
13DITTO = object()
14
15
16class TestHeaderRegistry(TestEmailBase):
17
18    def test_arbitrary_name_unstructured(self):
19        factory = headerregistry.HeaderRegistry()
20        h = factory('foobar', 'test')
21        self.assertIsInstance(h, headerregistry.BaseHeader)
22        self.assertIsInstance(h, headerregistry.UnstructuredHeader)
23
24    def test_name_case_ignored(self):
25        factory = headerregistry.HeaderRegistry()
26        # Whitebox check that test is valid
27        self.assertNotIn('Subject', factory.registry)
28        h = factory('Subject', 'test')
29        self.assertIsInstance(h, headerregistry.BaseHeader)
30        self.assertIsInstance(h, headerregistry.UniqueUnstructuredHeader)
31
32    class FooBase:
33        def __init__(self, *args, **kw):
34            pass
35
36    def test_override_default_base_class(self):
37        factory = headerregistry.HeaderRegistry(base_class=self.FooBase)
38        h = factory('foobar', 'test')
39        self.assertIsInstance(h, self.FooBase)
40        self.assertIsInstance(h, headerregistry.UnstructuredHeader)
41
42    class FooDefault:
43        parse = headerregistry.UnstructuredHeader.parse
44
45    def test_override_default_class(self):
46        factory = headerregistry.HeaderRegistry(default_class=self.FooDefault)
47        h = factory('foobar', 'test')
48        self.assertIsInstance(h, headerregistry.BaseHeader)
49        self.assertIsInstance(h, self.FooDefault)
50
51    def test_override_default_class_only_overrides_default(self):
52        factory = headerregistry.HeaderRegistry(default_class=self.FooDefault)
53        h = factory('subject', 'test')
54        self.assertIsInstance(h, headerregistry.BaseHeader)
55        self.assertIsInstance(h, headerregistry.UniqueUnstructuredHeader)
56
57    def test_dont_use_default_map(self):
58        factory = headerregistry.HeaderRegistry(use_default_map=False)
59        h = factory('subject', 'test')
60        self.assertIsInstance(h, headerregistry.BaseHeader)
61        self.assertIsInstance(h, headerregistry.UnstructuredHeader)
62
63    def test_map_to_type(self):
64        factory = headerregistry.HeaderRegistry()
65        h1 = factory('foobar', 'test')
66        factory.map_to_type('foobar', headerregistry.UniqueUnstructuredHeader)
67        h2 = factory('foobar', 'test')
68        self.assertIsInstance(h1, headerregistry.BaseHeader)
69        self.assertIsInstance(h1, headerregistry.UnstructuredHeader)
70        self.assertIsInstance(h2, headerregistry.BaseHeader)
71        self.assertIsInstance(h2, headerregistry.UniqueUnstructuredHeader)
72
73
74class TestHeaderBase(TestEmailBase):
75
76    factory = headerregistry.HeaderRegistry()
77
78    def make_header(self, name, value):
79        return self.factory(name, value)
80
81
82class TestBaseHeaderFeatures(TestHeaderBase):
83
84    def test_str(self):
85        h = self.make_header('subject', 'this is a test')
86        self.assertIsInstance(h, str)
87        self.assertEqual(h, 'this is a test')
88        self.assertEqual(str(h), 'this is a test')
89
90    def test_substr(self):
91        h = self.make_header('subject', 'this is a test')
92        self.assertEqual(h[5:7], 'is')
93
94    def test_has_name(self):
95        h = self.make_header('subject', 'this is a test')
96        self.assertEqual(h.name, 'subject')
97
98    def _test_attr_ro(self, attr):
99        h = self.make_header('subject', 'this is a test')
100        with self.assertRaises(AttributeError):
101            setattr(h, attr, 'foo')
102
103    def test_name_read_only(self):
104        self._test_attr_ro('name')
105
106    def test_defects_read_only(self):
107        self._test_attr_ro('defects')
108
109    def test_defects_is_tuple(self):
110        h = self.make_header('subject', 'this is a test')
111        self.assertEqual(len(h.defects), 0)
112        self.assertIsInstance(h.defects, tuple)
113        # Make sure it is still true when there are defects.
114        h = self.make_header('date', '')
115        self.assertEqual(len(h.defects), 1)
116        self.assertIsInstance(h.defects, tuple)
117
118    # XXX: FIXME
119    #def test_CR_in_value(self):
120    #    # XXX: this also re-raises the issue of embedded headers,
121    #    # need test and solution for that.
122    #    value = '\r'.join(['this is', ' a test'])
123    #    h = self.make_header('subject', value)
124    #    self.assertEqual(h, value)
125    #    self.assertDefectsEqual(h.defects, [errors.ObsoleteHeaderDefect])
126
127
128@parameterize
129class TestUnstructuredHeader(TestHeaderBase):
130
131    def string_as_value(self,
132                        source,
133                        decoded,
134                        *args):
135        l = len(args)
136        defects = args[0] if l>0 else []
137        header = 'Subject:' + (' ' if source else '')
138        folded = header + (args[1] if l>1 else source) + '\n'
139        h = self.make_header('Subject', source)
140        self.assertEqual(h, decoded)
141        self.assertDefectsEqual(h.defects, defects)
142        self.assertEqual(h.fold(policy=policy.default), folded)
143
144    string_params = {
145
146        'rfc2047_simple_quopri': (
147            '=?utf-8?q?this_is_a_test?=',
148            'this is a test',
149            [],
150            'this is a test'),
151
152        'rfc2047_gb2312_base64': (
153            '=?gb2312?b?1eLKx9bQzsSy4srUo6E=?=',
154            '\u8fd9\u662f\u4e2d\u6587\u6d4b\u8bd5\uff01',
155            [],
156            '=?utf-8?b?6L+Z5piv5Lit5paH5rWL6K+V77yB?='),
157
158        'rfc2047_simple_nonascii_quopri': (
159            '=?utf-8?q?=C3=89ric?=',
160            'Éric'),
161
162        'rfc2047_quopri_with_regular_text': (
163            'The =?utf-8?q?=C3=89ric=2C?= Himself',
164            'The Éric, Himself'),
165
166    }
167
168
169@parameterize
170class TestDateHeader(TestHeaderBase):
171
172    datestring = 'Sun, 23 Sep 2001 20:10:55 -0700'
173    utcoffset = datetime.timedelta(hours=-7)
174    tz = datetime.timezone(utcoffset)
175    dt = datetime.datetime(2001, 9, 23, 20, 10, 55, tzinfo=tz)
176
177    def test_parse_date(self):
178        h = self.make_header('date', self.datestring)
179        self.assertEqual(h, self.datestring)
180        self.assertEqual(h.datetime, self.dt)
181        self.assertEqual(h.datetime.utcoffset(), self.utcoffset)
182        self.assertEqual(h.defects, ())
183
184    def test_set_from_datetime(self):
185        h = self.make_header('date', self.dt)
186        self.assertEqual(h, self.datestring)
187        self.assertEqual(h.datetime, self.dt)
188        self.assertEqual(h.defects, ())
189
190    def test_date_header_properties(self):
191        h = self.make_header('date', self.datestring)
192        self.assertIsInstance(h, headerregistry.UniqueDateHeader)
193        self.assertEqual(h.max_count, 1)
194        self.assertEqual(h.defects, ())
195
196    def test_resent_date_header_properties(self):
197        h = self.make_header('resent-date', self.datestring)
198        self.assertIsInstance(h, headerregistry.DateHeader)
199        self.assertEqual(h.max_count, None)
200        self.assertEqual(h.defects, ())
201
202    def test_no_value_is_defect(self):
203        h = self.make_header('date', '')
204        self.assertEqual(len(h.defects), 1)
205        self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue)
206
207    def test_invalid_date_format(self):
208        s = 'Not a date header'
209        h = self.make_header('date', s)
210        self.assertEqual(h, s)
211        self.assertIsNone(h.datetime)
212        self.assertEqual(len(h.defects), 1)
213        self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)
214
215    def test_invalid_date_value(self):
216        s = 'Tue, 06 Jun 2017 27:39:33 +0600'
217        h = self.make_header('date', s)
218        self.assertEqual(h, s)
219        self.assertIsNone(h.datetime)
220        self.assertEqual(len(h.defects), 1)
221        self.assertIsInstance(h.defects[0], errors.InvalidDateDefect)
222
223    def test_datetime_read_only(self):
224        h = self.make_header('date', self.datestring)
225        with self.assertRaises(AttributeError):
226            h.datetime = 'foo'
227
228    def test_set_date_header_from_datetime(self):
229        m = Message(policy=policy.default)
230        m['Date'] = self.dt
231        self.assertEqual(m['Date'], self.datestring)
232        self.assertEqual(m['Date'].datetime, self.dt)
233
234
235@parameterize
236class TestContentTypeHeader(TestHeaderBase):
237
238    def content_type_as_value(self,
239                              source,
240                              content_type,
241                              maintype,
242                              subtype,
243                              *args):
244        l = len(args)
245        parmdict = args[0] if l>0 else {}
246        defects =  args[1] if l>1 else []
247        decoded =  args[2] if l>2 and args[2] is not DITTO else source
248        header = 'Content-Type:' + ' ' if source else ''
249        folded = args[3] if l>3 else header + decoded + '\n'
250        h = self.make_header('Content-Type', source)
251        self.assertEqual(h.content_type, content_type)
252        self.assertEqual(h.maintype, maintype)
253        self.assertEqual(h.subtype, subtype)
254        self.assertEqual(h.params, parmdict)
255        with self.assertRaises(TypeError):
256            h.params['abc'] = 'xyz'   # make sure params is read-only.
257        self.assertDefectsEqual(h.defects, defects)
258        self.assertEqual(h, decoded)
259        self.assertEqual(h.fold(policy=policy.default), folded)
260
261    content_type_params = {
262
263        # Examples from RFC 2045.
264
265        'RFC_2045_1': (
266            'text/plain; charset=us-ascii (Plain text)',
267            'text/plain',
268            'text',
269            'plain',
270            {'charset': 'us-ascii'},
271            [],
272            'text/plain; charset="us-ascii"'),
273
274        'RFC_2045_2': (
275            'text/plain; charset=us-ascii',
276            'text/plain',
277            'text',
278            'plain',
279            {'charset': 'us-ascii'},
280            [],
281            'text/plain; charset="us-ascii"'),
282
283        'RFC_2045_3': (
284            'text/plain; charset="us-ascii"',
285            'text/plain',
286            'text',
287            'plain',
288            {'charset': 'us-ascii'}),
289
290        # RFC 2045 5.2 says syntactically invalid values are to be treated as
291        # text/plain.
292
293        'no_subtype_in_content_type': (
294            'text/',
295            'text/plain',
296            'text',
297            'plain',
298            {},
299            [errors.InvalidHeaderDefect]),
300
301        'no_slash_in_content_type': (
302            'foo',
303            'text/plain',
304            'text',
305            'plain',
306            {},
307            [errors.InvalidHeaderDefect]),
308
309        'junk_text_in_content_type': (
310            '<crazy "stuff">',
311            'text/plain',
312            'text',
313            'plain',
314            {},
315            [errors.InvalidHeaderDefect]),
316
317        'too_many_slashes_in_content_type': (
318            'image/jpeg/foo',
319            'text/plain',
320            'text',
321            'plain',
322            {},
323            [errors.InvalidHeaderDefect]),
324
325        # But unknown names are OK.  We could make non-IANA names a defect, but
326        # by not doing so we make ourselves future proof.  The fact that they
327        # are unknown will be detectable by the fact that they don't appear in
328        # the mime_registry...and the application is free to extend that list
329        # to handle them even if the core library doesn't.
330
331        'unknown_content_type': (
332            'bad/names',
333            'bad/names',
334            'bad',
335            'names'),
336
337        # The content type is case insensitive, and CFWS is ignored.
338
339        'mixed_case_content_type': (
340            'ImAge/JPeg',
341            'image/jpeg',
342            'image',
343            'jpeg'),
344
345        'spaces_in_content_type': (
346            '  text  /  plain  ',
347            'text/plain',
348            'text',
349            'plain'),
350
351        'cfws_in_content_type': (
352            '(foo) text (bar)/(baz)plain(stuff)',
353            'text/plain',
354            'text',
355            'plain'),
356
357        # test some parameters (more tests could be added for parameters
358        # associated with other content types, but since parameter parsing is
359        # generic they would be redundant for the current implementation).
360
361        'charset_param': (
362            'text/plain; charset="utf-8"',
363            'text/plain',
364            'text',
365            'plain',
366            {'charset': 'utf-8'}),
367
368        'capitalized_charset': (
369            'text/plain; charset="US-ASCII"',
370            'text/plain',
371            'text',
372            'plain',
373            {'charset': 'US-ASCII'}),
374
375        'unknown_charset': (
376            'text/plain; charset="fOo"',
377            'text/plain',
378            'text',
379            'plain',
380            {'charset': 'fOo'}),
381
382        'capitalized_charset_param_name_and_comment': (
383            'text/plain; (interjection) Charset="utf-8"',
384            'text/plain',
385            'text',
386            'plain',
387            {'charset': 'utf-8'},
388            [],
389            # Should the parameter name be lowercased here?
390            'text/plain; Charset="utf-8"'),
391
392        # Since this is pretty much the ur-mimeheader, we'll put all the tests
393        # that exercise the parameter parsing and formatting here.  Note that
394        # when we refold we may canonicalize, so things like whitespace,
395        # quoting, and rfc2231 encoding may change from what was in the input
396        # header.
397
398        'unquoted_param_value': (
399            'text/plain; title=foo',
400            'text/plain',
401            'text',
402            'plain',
403            {'title': 'foo'},
404            [],
405            'text/plain; title="foo"',
406            ),
407
408        'param_value_with_tspecials': (
409            'text/plain; title="(bar)foo blue"',
410            'text/plain',
411            'text',
412            'plain',
413            {'title': '(bar)foo blue'}),
414
415        'param_with_extra_quoted_whitespace': (
416            'text/plain; title="  a     loong  way \t home   "',
417            'text/plain',
418            'text',
419            'plain',
420            {'title': '  a     loong  way \t home   '}),
421
422        'bad_params': (
423            'blarg; baz; boo',
424            'text/plain',
425            'text',
426            'plain',
427            {'baz': '', 'boo': ''},
428            [errors.InvalidHeaderDefect]*3),
429
430        'spaces_around_param_equals': (
431            'Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"',
432            'multipart/mixed',
433            'multipart',
434            'mixed',
435            {'boundary': 'CPIMSSMTPC06p5f3tG'},
436            [],
437            'Multipart/mixed; boundary="CPIMSSMTPC06p5f3tG"',
438            ),
439
440        'spaces_around_semis': (
441            ('image/jpeg; name="wibble.JPG" ; x-mac-type="4A504547" ; '
442                'x-mac-creator="474B4F4E"'),
443            'image/jpeg',
444            'image',
445            'jpeg',
446            {'name': 'wibble.JPG',
447             'x-mac-type': '4A504547',
448             'x-mac-creator': '474B4F4E'},
449            [],
450            ('image/jpeg; name="wibble.JPG"; x-mac-type="4A504547"; '
451                'x-mac-creator="474B4F4E"'),
452            ('Content-Type: image/jpeg; name="wibble.JPG";'
453                ' x-mac-type="4A504547";\n'
454             ' x-mac-creator="474B4F4E"\n'),
455            ),
456
457        'lots_of_mime_params': (
458            ('image/jpeg; name="wibble.JPG"; x-mac-type="4A504547"; '
459                'x-mac-creator="474B4F4E"; x-extrastuff="make it longer"'),
460            'image/jpeg',
461            'image',
462            'jpeg',
463            {'name': 'wibble.JPG',
464             'x-mac-type': '4A504547',
465             'x-mac-creator': '474B4F4E',
466             'x-extrastuff': 'make it longer'},
467            [],
468            ('image/jpeg; name="wibble.JPG"; x-mac-type="4A504547"; '
469                'x-mac-creator="474B4F4E"; x-extrastuff="make it longer"'),
470            # In this case the whole of the MimeParameters does *not* fit
471            # one one line, so we break at a lower syntactic level.
472            ('Content-Type: image/jpeg; name="wibble.JPG";'
473                ' x-mac-type="4A504547";\n'
474             ' x-mac-creator="474B4F4E"; x-extrastuff="make it longer"\n'),
475            ),
476
477        'semis_inside_quotes': (
478            'image/jpeg; name="Jim&amp;&amp;Jill"',
479            'image/jpeg',
480            'image',
481            'jpeg',
482            {'name': 'Jim&amp;&amp;Jill'}),
483
484        'single_quotes_inside_quotes': (
485            'image/jpeg; name="Jim \'Bob\' Jill"',
486            'image/jpeg',
487            'image',
488            'jpeg',
489            {'name': "Jim 'Bob' Jill"}),
490
491        'double_quotes_inside_quotes': (
492            r'image/jpeg; name="Jim \"Bob\" Jill"',
493            'image/jpeg',
494            'image',
495            'jpeg',
496            {'name': 'Jim "Bob" Jill'},
497            [],
498            r'image/jpeg; name="Jim \"Bob\" Jill"'),
499
500        'non_ascii_in_params': (
501            ('foo\xa7/bar; b\xa7r=two; '
502                'baz=thr\xa7e'.encode('latin-1').decode('us-ascii',
503                                                        'surrogateescape')),
504            'foo\uFFFD/bar',
505            'foo\uFFFD',
506            'bar',
507            {'b\uFFFDr': 'two', 'baz': 'thr\uFFFDe'},
508            [errors.UndecodableBytesDefect]*3,
509            'foo�/bar; b�r="two"; baz="thr�e"',
510            # XXX Two bugs here: the mime type is not allowed to be an encoded
511            # word, and we shouldn't be emitting surrogates in the parameter
512            # names.  But I don't know what the behavior should be here, so I'm
513            # punting for now.  In practice this is unlikely to be encountered
514            # since headers with binary in them only come from a binary source
515            # and are almost certain to be re-emitted without refolding.
516            'Content-Type: =?unknown-8bit?q?foo=A7?=/bar; b\udca7r="two";\n'
517            " baz*=unknown-8bit''thr%A7e\n",
518            ),
519
520        # RFC 2231 parameter tests.
521
522        'rfc2231_segmented_normal_values': (
523            'image/jpeg; name*0="abc"; name*1=".html"',
524            'image/jpeg',
525            'image',
526            'jpeg',
527            {'name': "abc.html"},
528            [],
529            'image/jpeg; name="abc.html"'),
530
531        'quotes_inside_rfc2231_value': (
532            r'image/jpeg; bar*0="baz\"foobar"; bar*1="\"baz"',
533            'image/jpeg',
534            'image',
535            'jpeg',
536            {'bar': 'baz"foobar"baz'},
537            [],
538            r'image/jpeg; bar="baz\"foobar\"baz"'),
539
540        'non_ascii_rfc2231_value': (
541            ('text/plain; charset=us-ascii; '
542             "title*=us-ascii'en'This%20is%20"
543             'not%20f\xa7n').encode('latin-1').decode('us-ascii',
544                                                     'surrogateescape'),
545            'text/plain',
546            'text',
547            'plain',
548            {'charset': 'us-ascii', 'title': 'This is not f\uFFFDn'},
549             [errors.UndecodableBytesDefect],
550             'text/plain; charset="us-ascii"; title="This is not f�n"',
551            'Content-Type: text/plain; charset="us-ascii";\n'
552            " title*=unknown-8bit''This%20is%20not%20f%A7n\n",
553            ),
554
555        'rfc2231_encoded_charset': (
556            'text/plain; charset*=ansi-x3.4-1968\'\'us-ascii',
557            'text/plain',
558            'text',
559            'plain',
560            {'charset': 'us-ascii'},
561            [],
562            'text/plain; charset="us-ascii"'),
563
564        # This follows the RFC: no double quotes around encoded values.
565        'rfc2231_encoded_no_double_quotes': (
566            ("text/plain;"
567                "\tname*0*=''This%20is%20;"
568                "\tname*1*=%2A%2A%2Afun%2A%2A%2A%20;"
569                '\tname*2="is it not.pdf"'),
570            'text/plain',
571            'text',
572            'plain',
573            {'name': 'This is ***fun*** is it not.pdf'},
574            [],
575            'text/plain; name="This is ***fun*** is it not.pdf"',
576            ),
577
578        # Make sure we also handle it if there are spurious double quotes.
579        'rfc2231_encoded_with_double_quotes': (
580            ("text/plain;"
581                '\tname*0*="us-ascii\'\'This%20is%20even%20more%20";'
582                '\tname*1*="%2A%2A%2Afun%2A%2A%2A%20";'
583                '\tname*2="is it not.pdf"'),
584            'text/plain',
585            'text',
586            'plain',
587            {'name': 'This is even more ***fun*** is it not.pdf'},
588            [errors.InvalidHeaderDefect]*2,
589            'text/plain; name="This is even more ***fun*** is it not.pdf"',
590            ),
591
592        'rfc2231_single_quote_inside_double_quotes': (
593            ('text/plain; charset=us-ascii;'
594               '\ttitle*0*="us-ascii\'en\'This%20is%20really%20";'
595               '\ttitle*1*="%2A%2A%2Afun%2A%2A%2A%20";'
596               '\ttitle*2="isn\'t it!"'),
597            'text/plain',
598            'text',
599            'plain',
600            {'charset': 'us-ascii', 'title': "This is really ***fun*** isn't it!"},
601            [errors.InvalidHeaderDefect]*2,
602            ('text/plain; charset="us-ascii"; '
603               'title="This is really ***fun*** isn\'t it!"'),
604            ('Content-Type: text/plain; charset="us-ascii";\n'
605                ' title="This is really ***fun*** isn\'t it!"\n'),
606            ),
607
608        'rfc2231_single_quote_in_value_with_charset_and_lang': (
609            ('application/x-foo;'
610                "\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\""),
611            'application/x-foo',
612            'application',
613            'x-foo',
614            {'name': "Frank's Document"},
615            [errors.InvalidHeaderDefect]*2,
616            'application/x-foo; name="Frank\'s Document"',
617            ),
618
619        'rfc2231_single_quote_in_non_encoded_value': (
620            ('application/x-foo;'
621                "\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\""),
622            'application/x-foo',
623            'application',
624            'x-foo',
625            {'name': "us-ascii'en-us'Frank's Document"},
626            [],
627            'application/x-foo; name="us-ascii\'en-us\'Frank\'s Document"',
628             ),
629
630        'rfc2231_no_language_or_charset': (
631            'text/plain; NAME*0*=english_is_the_default.html',
632            'text/plain',
633            'text',
634            'plain',
635            {'name': 'english_is_the_default.html'},
636            [errors.InvalidHeaderDefect],
637            'text/plain; NAME="english_is_the_default.html"'),
638
639        'rfc2231_encoded_no_charset': (
640            ("text/plain;"
641                '\tname*0*="\'\'This%20is%20even%20more%20";'
642                '\tname*1*="%2A%2A%2Afun%2A%2A%2A%20";'
643                '\tname*2="is it.pdf"'),
644            'text/plain',
645            'text',
646            'plain',
647            {'name': 'This is even more ***fun*** is it.pdf'},
648            [errors.InvalidHeaderDefect]*2,
649            'text/plain; name="This is even more ***fun*** is it.pdf"',
650            ),
651
652        'rfc2231_partly_encoded': (
653            ("text/plain;"
654                '\tname*0*="\'\'This%20is%20even%20more%20";'
655                '\tname*1*="%2A%2A%2Afun%2A%2A%2A%20";'
656                '\tname*2="is it.pdf"'),
657            'text/plain',
658            'text',
659            'plain',
660            {'name': 'This is even more ***fun*** is it.pdf'},
661            [errors.InvalidHeaderDefect]*2,
662            'text/plain; name="This is even more ***fun*** is it.pdf"',
663            ),
664
665        'rfc2231_partly_encoded_2': (
666            ("text/plain;"
667                '\tname*0*="\'\'This%20is%20even%20more%20";'
668                '\tname*1="%2A%2A%2Afun%2A%2A%2A%20";'
669                '\tname*2="is it.pdf"'),
670            'text/plain',
671            'text',
672            'plain',
673            {'name': 'This is even more %2A%2A%2Afun%2A%2A%2A%20is it.pdf'},
674            [errors.InvalidHeaderDefect],
675            ('text/plain;'
676             ' name="This is even more %2A%2A%2Afun%2A%2A%2A%20is it.pdf"'),
677            ('Content-Type: text/plain;\n'
678             ' name="This is even more %2A%2A%2Afun%2A%2A%2A%20is'
679                ' it.pdf"\n'),
680            ),
681
682        'rfc2231_unknown_charset_treated_as_ascii': (
683            "text/plain; name*0*=bogus'xx'ascii_is_the_default",
684            'text/plain',
685            'text',
686            'plain',
687            {'name': 'ascii_is_the_default'},
688            [],
689            'text/plain; name="ascii_is_the_default"'),
690
691        'rfc2231_bad_character_in_charset_parameter_value': (
692            "text/plain; charset*=ascii''utf-8%F1%F2%F3",
693            'text/plain',
694            'text',
695            'plain',
696            {'charset': 'utf-8\uFFFD\uFFFD\uFFFD'},
697            [errors.UndecodableBytesDefect],
698            'text/plain; charset="utf-8\uFFFD\uFFFD\uFFFD"',
699            "Content-Type: text/plain;"
700            " charset*=unknown-8bit''utf-8%F1%F2%F3\n",
701            ),
702
703        'rfc2231_utf8_in_supposedly_ascii_charset_parameter_value': (
704            "text/plain; charset*=ascii''utf-8%E2%80%9D",
705            'text/plain',
706            'text',
707            'plain',
708            {'charset': 'utf-8”'},
709            [errors.UndecodableBytesDefect],
710            'text/plain; charset="utf-8”"',
711            # XXX Should folding change the charset to utf8?  Currently it just
712            # reproduces the original, which is arguably fine.
713            "Content-Type: text/plain;"
714            " charset*=unknown-8bit''utf-8%E2%80%9D\n",
715            ),
716
717        'rfc2231_nonascii_in_charset_of_charset_parameter_value': (
718            "text/plain; charset*=utf-8”''utf-8%E2%80%9D",
719            'text/plain',
720            'text',
721            'plain',
722            {'charset': 'utf-8”'},
723            [],
724            'text/plain; charset="utf-8”"',
725            "Content-Type: text/plain;"
726            " charset*=utf-8''utf-8%E2%80%9D\n",
727            ),
728
729        'rfc2231_encoded_then_unencoded_segments': (
730            ('application/x-foo;'
731                '\tname*0*="us-ascii\'en-us\'My";'
732                '\tname*1=" Document";'
733                '\tname*2=" For You"'),
734            'application/x-foo',
735            'application',
736            'x-foo',
737            {'name': 'My Document For You'},
738            [errors.InvalidHeaderDefect],
739            'application/x-foo; name="My Document For You"',
740            ),
741
742        # My reading of the RFC is that this is an invalid header.  The RFC
743        # says that if charset and language information is given, the first
744        # segment *must* be encoded.
745        'rfc2231_unencoded_then_encoded_segments': (
746            ('application/x-foo;'
747                '\tname*0=us-ascii\'en-us\'My;'
748                '\tname*1*=" Document";'
749                '\tname*2*=" For You"'),
750            'application/x-foo',
751            'application',
752            'x-foo',
753            {'name': 'My Document For You'},
754            [errors.InvalidHeaderDefect]*3,
755            'application/x-foo; name="My Document For You"',
756            ),
757
758        # XXX: I would say this one should default to ascii/en for the
759        # "encoded" segment, since the first segment is not encoded and is
760        # in double quotes, making the value a valid non-encoded string.  The
761        # old parser decodes this just like the previous case, which may be the
762        # better Postel rule, but could equally result in borking headers that
763        # intentionally have quoted quotes in them.  We could get this 98%
764        # right if we treat it as a quoted string *unless* it matches the
765        # charset'lang'value pattern exactly *and* there is at least one
766        # encoded segment.  Implementing that algorithm will require some
767        # refactoring, so I haven't done it (yet).
768        'rfc2231_quoted_unencoded_then_encoded_segments': (
769            ('application/x-foo;'
770                '\tname*0="us-ascii\'en-us\'My";'
771                '\tname*1*=" Document";'
772                '\tname*2*=" For You"'),
773            'application/x-foo',
774            'application',
775            'x-foo',
776            {'name': "us-ascii'en-us'My Document For You"},
777            [errors.InvalidHeaderDefect]*2,
778            'application/x-foo; name="us-ascii\'en-us\'My Document For You"',
779            ),
780
781        # Make sure our folding algorithm produces multiple sections correctly.
782        # We could mix encoded and non-encoded segments, but we don't, we just
783        # make them all encoded.  It might be worth fixing that, since the
784        # sections can get used for wrapping ascii text.
785        'rfc2231_folded_segments_correctly_formatted': (
786            ('application/x-foo;'
787                '\tname="' + "with spaces"*8 + '"'),
788            'application/x-foo',
789            'application',
790            'x-foo',
791            {'name': "with spaces"*8},
792            [],
793            'application/x-foo; name="' + "with spaces"*8 + '"',
794            "Content-Type: application/x-foo;\n"
795            " name*0*=us-ascii''with%20spaceswith%20spaceswith%20spaceswith"
796                "%20spaceswith;\n"
797            " name*1*=%20spaceswith%20spaceswith%20spaceswith%20spaces\n"
798            ),
799
800    }
801
802
803@parameterize
804class TestContentTransferEncoding(TestHeaderBase):
805
806    def cte_as_value(self,
807                     source,
808                     cte,
809                     *args):
810        l = len(args)
811        defects =  args[0] if l>0 else []
812        decoded =  args[1] if l>1 and args[1] is not DITTO else source
813        header = 'Content-Transfer-Encoding:' + ' ' if source else ''
814        folded = args[2] if l>2 else header + source + '\n'
815        h = self.make_header('Content-Transfer-Encoding', source)
816        self.assertEqual(h.cte, cte)
817        self.assertDefectsEqual(h.defects, defects)
818        self.assertEqual(h, decoded)
819        self.assertEqual(h.fold(policy=policy.default), folded)
820
821    cte_params = {
822
823        'RFC_2183_1': (
824            'base64',
825            'base64',),
826
827        'no_value': (
828            '',
829            '7bit',
830            [errors.HeaderMissingRequiredValue],
831            '',
832            'Content-Transfer-Encoding:\n',
833            ),
834
835        'junk_after_cte': (
836            '7bit and a bunch more',
837            '7bit',
838            [errors.InvalidHeaderDefect]),
839
840    }
841
842
843@parameterize
844class TestContentDisposition(TestHeaderBase):
845
846    def content_disp_as_value(self,
847                              source,
848                              content_disposition,
849                              *args):
850        l = len(args)
851        parmdict = args[0] if l>0 else {}
852        defects =  args[1] if l>1 else []
853        decoded =  args[2] if l>2 and args[2] is not DITTO else source
854        header = 'Content-Disposition:' + ' ' if source else ''
855        folded = args[3] if l>3 else header + source + '\n'
856        h = self.make_header('Content-Disposition', source)
857        self.assertEqual(h.content_disposition, content_disposition)
858        self.assertEqual(h.params, parmdict)
859        self.assertDefectsEqual(h.defects, defects)
860        self.assertEqual(h, decoded)
861        self.assertEqual(h.fold(policy=policy.default), folded)
862
863    content_disp_params = {
864
865        # Examples from RFC 2183.
866
867        'RFC_2183_1': (
868            'inline',
869            'inline',),
870
871        'RFC_2183_2': (
872            ('attachment; filename=genome.jpeg;'
873             '  modification-date="Wed, 12 Feb 1997 16:29:51 -0500";'),
874            'attachment',
875            {'filename': 'genome.jpeg',
876             'modification-date': 'Wed, 12 Feb 1997 16:29:51 -0500'},
877            [],
878            ('attachment; filename="genome.jpeg"; '
879                 'modification-date="Wed, 12 Feb 1997 16:29:51 -0500"'),
880            ('Content-Disposition: attachment; filename="genome.jpeg";\n'
881             ' modification-date="Wed, 12 Feb 1997 16:29:51 -0500"\n'),
882            ),
883
884        'no_value': (
885            '',
886            None,
887            {},
888            [errors.HeaderMissingRequiredValue],
889            '',
890            'Content-Disposition:\n'),
891
892        'invalid_value': (
893            'ab./k',
894            'ab.',
895            {},
896            [errors.InvalidHeaderDefect]),
897
898        'invalid_value_with_params': (
899            'ab./k; filename="foo"',
900            'ab.',
901            {'filename': 'foo'},
902            [errors.InvalidHeaderDefect]),
903
904        'invalid_parameter_value_with_fws_between_ew': (
905            'attachment; filename="=?UTF-8?Q?Schulbesuchsbest=C3=A4ttigung=2E?='
906            '               =?UTF-8?Q?pdf?="',
907            'attachment',
908            {'filename': 'Schulbesuchsbestättigung.pdf'},
909            [errors.InvalidHeaderDefect]*3,
910            ('attachment; filename="Schulbesuchsbestättigung.pdf"'),
911            ('Content-Disposition: attachment;\n'
912             ' filename*=utf-8\'\'Schulbesuchsbest%C3%A4ttigung.pdf\n'),
913            ),
914
915        'parameter_value_with_fws_between_tokens': (
916            'attachment; filename="File =?utf-8?q?Name?= With Spaces.pdf"',
917            'attachment',
918            {'filename': 'File Name With Spaces.pdf'},
919            [errors.InvalidHeaderDefect],
920            'attachment; filename="File Name With Spaces.pdf"',
921            ('Content-Disposition: attachment; filename="File Name With Spaces.pdf"\n'),
922            )
923    }
924
925
926@parameterize
927class TestMIMEVersionHeader(TestHeaderBase):
928
929    def version_string_as_MIME_Version(self,
930                                       source,
931                                       decoded,
932                                       version,
933                                       major,
934                                       minor,
935                                       defects):
936        h = self.make_header('MIME-Version', source)
937        self.assertEqual(h, decoded)
938        self.assertEqual(h.version, version)
939        self.assertEqual(h.major, major)
940        self.assertEqual(h.minor, minor)
941        self.assertDefectsEqual(h.defects, defects)
942        if source:
943            source = ' ' + source
944        self.assertEqual(h.fold(policy=policy.default),
945                         'MIME-Version:' + source + '\n')
946
947    version_string_params = {
948
949        # Examples from the RFC.
950
951        'RFC_2045_1': (
952            '1.0',
953            '1.0',
954            '1.0',
955            1,
956            0,
957            []),
958
959        'RFC_2045_2': (
960            '1.0 (produced by MetaSend Vx.x)',
961            '1.0 (produced by MetaSend Vx.x)',
962            '1.0',
963            1,
964            0,
965            []),
966
967        'RFC_2045_3': (
968            '(produced by MetaSend Vx.x) 1.0',
969            '(produced by MetaSend Vx.x) 1.0',
970            '1.0',
971            1,
972            0,
973            []),
974
975        'RFC_2045_4': (
976            '1.(produced by MetaSend Vx.x)0',
977            '1.(produced by MetaSend Vx.x)0',
978            '1.0',
979            1,
980            0,
981            []),
982
983        # Other valid values.
984
985        '1_1': (
986            '1.1',
987            '1.1',
988            '1.1',
989            1,
990            1,
991            []),
992
993        '2_1': (
994            '2.1',
995            '2.1',
996            '2.1',
997            2,
998            1,
999            []),
1000
1001        'whitespace': (
1002            '1 .0',
1003            '1 .0',
1004            '1.0',
1005            1,
1006            0,
1007            []),
1008
1009        'leading_trailing_whitespace_ignored': (
1010            '  1.0  ',
1011            '  1.0  ',
1012            '1.0',
1013            1,
1014            0,
1015            []),
1016
1017        # Recoverable invalid values.  We can recover here only because we
1018        # already have a valid value by the time we encounter the garbage.
1019        # Anywhere else, and we don't know where the garbage ends.
1020
1021        'non_comment_garbage_after': (
1022            '1.0 <abc>',
1023            '1.0 <abc>',
1024            '1.0',
1025            1,
1026            0,
1027            [errors.InvalidHeaderDefect]),
1028
1029        # Unrecoverable invalid values.  We *could* apply more heuristics to
1030        # get something out of the first two, but doing so is not worth the
1031        # effort.
1032
1033        'non_comment_garbage_before': (
1034            '<abc> 1.0',
1035            '<abc> 1.0',
1036            None,
1037            None,
1038            None,
1039            [errors.InvalidHeaderDefect]),
1040
1041        'non_comment_garbage_inside': (
1042            '1.<abc>0',
1043            '1.<abc>0',
1044            None,
1045            None,
1046            None,
1047            [errors.InvalidHeaderDefect]),
1048
1049        'two_periods': (
1050            '1..0',
1051            '1..0',
1052            None,
1053            None,
1054            None,
1055            [errors.InvalidHeaderDefect]),
1056
1057        '2_x': (
1058            '2.x',
1059            '2.x',
1060            None,  # This could be 2, but it seems safer to make it None.
1061            None,
1062            None,
1063            [errors.InvalidHeaderDefect]),
1064
1065        'foo': (
1066            'foo',
1067            'foo',
1068            None,
1069            None,
1070            None,
1071            [errors.InvalidHeaderDefect]),
1072
1073        'missing': (
1074            '',
1075            '',
1076            None,
1077            None,
1078            None,
1079            [errors.HeaderMissingRequiredValue]),
1080
1081        }
1082
1083
1084@parameterize
1085class TestAddressHeader(TestHeaderBase):
1086
1087    example_params = {
1088
1089        'empty':
1090            ('<>',
1091             [errors.InvalidHeaderDefect],
1092             '<>',
1093             '',
1094             '<>',
1095             '',
1096             '',
1097             None),
1098
1099        'address_only':
1100            ('[email protected]',
1101             [],
1102             '[email protected]',
1103             '',
1104             '[email protected]',
1105             'zippy',
1106             'pinhead.com',
1107             None),
1108
1109        'name_and_address':
1110            ('Zaphrod Beblebrux <[email protected]>',
1111             [],
1112             'Zaphrod Beblebrux <[email protected]>',
1113             'Zaphrod Beblebrux',
1114             '[email protected]',
1115             'zippy',
1116             'pinhead.com',
1117             None),
1118
1119        'quoted_local_part':
1120            ('Zaphrod Beblebrux <"foo bar"@pinhead.com>',
1121             [],
1122             'Zaphrod Beblebrux <"foo bar"@pinhead.com>',
1123             'Zaphrod Beblebrux',
1124             '"foo bar"@pinhead.com',
1125             'foo bar',
1126             'pinhead.com',
1127             None),
1128
1129        'quoted_parens_in_name':
1130            (r'"A \(Special\) Person" <[email protected]>',
1131             [],
1132             '"A (Special) Person" <[email protected]>',
1133             'A (Special) Person',
1134             '[email protected]',
1135             'person',
1136             'dom.ain',
1137             None),
1138
1139        'quoted_backslashes_in_name':
1140            (r'"Arthur \\Backslash\\ Foobar" <[email protected]>',
1141             [],
1142             r'"Arthur \\Backslash\\ Foobar" <[email protected]>',
1143             r'Arthur \Backslash\ Foobar',
1144             '[email protected]',
1145             'person',
1146             'dom.ain',
1147             None),
1148
1149        'name_with_dot':
1150            ('John X. Doe <[email protected]>',
1151             [errors.ObsoleteHeaderDefect],
1152             '"John X. Doe" <[email protected]>',
1153             'John X. Doe',
1154             '[email protected]',
1155             'jxd',
1156             'example.com',
1157             None),
1158
1159        'quoted_strings_in_local_part':
1160            ('""example" example"@example.com',
1161             [errors.InvalidHeaderDefect]*3,
1162             '"example example"@example.com',
1163             '',
1164             '"example example"@example.com',
1165             'example example',
1166             'example.com',
1167             None),
1168
1169        'escaped_quoted_strings_in_local_part':
1170            (r'"\"example\" example"@example.com',
1171             [],
1172             r'"\"example\" example"@example.com',
1173             '',
1174             r'"\"example\" example"@example.com',
1175             r'"example" example',
1176             'example.com',
1177            None),
1178
1179        'escaped_escapes_in_local_part':
1180            (r'"\\"example\\" example"@example.com',
1181             [errors.InvalidHeaderDefect]*5,
1182             r'"\\example\\\\ example"@example.com',
1183             '',
1184             r'"\\example\\\\ example"@example.com',
1185             r'\example\\ example',
1186             'example.com',
1187            None),
1188
1189        'spaces_in_unquoted_local_part_collapsed':
1190            ('merwok  wok  @example.com',
1191             [errors.InvalidHeaderDefect]*2,
1192             '"merwok wok"@example.com',
1193             '',
1194             '"merwok wok"@example.com',
1195             'merwok wok',
1196             'example.com',
1197             None),
1198
1199        'spaces_around_dots_in_local_part_removed':
1200            ('merwok. wok .  [email protected]',
1201             [errors.ObsoleteHeaderDefect],
1202             '[email protected]',
1203             '',
1204             '[email protected]',
1205             'merwok.wok.wok',
1206             'example.com',
1207             None),
1208
1209        'rfc2047_atom_is_decoded':
1210            ('=?utf-8?q?=C3=89ric?= <[email protected]>',
1211            [],
1212            'Éric <[email protected]>',
1213            'Éric',
1214            '[email protected]',
1215            'foo',
1216            'example.com',
1217            None),
1218
1219        'rfc2047_atom_in_phrase_is_decoded':
1220            ('The =?utf-8?q?=C3=89ric=2C?= Himself <[email protected]>',
1221            [],
1222            '"The Éric, Himself" <[email protected]>',
1223            'The Éric, Himself',
1224            '[email protected]',
1225            'foo',
1226            'example.com',
1227            None),
1228
1229        'rfc2047_atom_in_quoted_string_is_decoded':
1230            ('"=?utf-8?q?=C3=89ric?=" <[email protected]>',
1231            [errors.InvalidHeaderDefect,
1232            errors.InvalidHeaderDefect],
1233            'Éric <[email protected]>',
1234            'Éric',
1235            '[email protected]',
1236            'foo',
1237            'example.com',
1238            None),
1239
1240        }
1241
1242        # XXX: Need many more examples, and in particular some with names in
1243        # trailing comments, which aren't currently handled.  comments in
1244        # general are not handled yet.
1245
1246    def example_as_address(self, source, defects, decoded, display_name,
1247                           addr_spec, username, domain, comment):
1248        h = self.make_header('sender', source)
1249        self.assertEqual(h, decoded)
1250        self.assertDefectsEqual(h.defects, defects)
1251        a = h.address
1252        self.assertEqual(str(a), decoded)
1253        self.assertEqual(len(h.groups), 1)
1254        self.assertEqual([a], list(h.groups[0].addresses))
1255        self.assertEqual([a], list(h.addresses))
1256        self.assertEqual(a.display_name, display_name)
1257        self.assertEqual(a.addr_spec, addr_spec)
1258        self.assertEqual(a.username, username)
1259        self.assertEqual(a.domain, domain)
1260        # XXX: we have no comment support yet.
1261        #self.assertEqual(a.comment, comment)
1262
1263    def example_as_group(self, source, defects, decoded, display_name,
1264                         addr_spec, username, domain, comment):
1265        source = 'foo: {};'.format(source)
1266        gdecoded = 'foo: {};'.format(decoded) if decoded else 'foo:;'
1267        h = self.make_header('to', source)
1268        self.assertEqual(h, gdecoded)
1269        self.assertDefectsEqual(h.defects, defects)
1270        self.assertEqual(h.groups[0].addresses, h.addresses)
1271        self.assertEqual(len(h.groups), 1)
1272        self.assertEqual(len(h.addresses), 1)
1273        a = h.addresses[0]
1274        self.assertEqual(str(a), decoded)
1275        self.assertEqual(a.display_name, display_name)
1276        self.assertEqual(a.addr_spec, addr_spec)
1277        self.assertEqual(a.username, username)
1278        self.assertEqual(a.domain, domain)
1279
1280    def test_simple_address_list(self):
1281        value = ('Fred <[email protected]>, [email protected], '
1282                    '"Harry W. Hastings" <[email protected]>')
1283        h = self.make_header('to', value)
1284        self.assertEqual(h, value)
1285        self.assertEqual(len(h.groups), 3)
1286        self.assertEqual(len(h.addresses), 3)
1287        for i in range(3):
1288            self.assertEqual(h.groups[i].addresses[0], h.addresses[i])
1289        self.assertEqual(str(h.addresses[0]), 'Fred <[email protected]>')
1290        self.assertEqual(str(h.addresses[1]), '[email protected]')
1291        self.assertEqual(str(h.addresses[2]),
1292            '"Harry W. Hastings" <[email protected]>')
1293        self.assertEqual(h.addresses[2].display_name,
1294            'Harry W. Hastings')
1295
1296    def test_complex_address_list(self):
1297        examples = list(self.example_params.values())
1298        source = ('dummy list:;, another: (empty);,' +
1299                 ', '.join([x[0] for x in examples[:4]]) + ', ' +
1300                 r'"A \"list\"": ' +
1301                    ', '.join([x[0] for x in examples[4:6]]) + ';,' +
1302                 ', '.join([x[0] for x in examples[6:]])
1303            )
1304        # XXX: the fact that (empty) disappears here is a potential API design
1305        # bug.  We don't currently have a way to preserve comments.
1306        expected = ('dummy list:;, another:;, ' +
1307                 ', '.join([x[2] for x in examples[:4]]) + ', ' +
1308                 r'"A \"list\"": ' +
1309                    ', '.join([x[2] for x in examples[4:6]]) + ';, ' +
1310                 ', '.join([x[2] for x in examples[6:]])
1311            )
1312
1313        h = self.make_header('to', source)
1314        self.assertEqual(h.split(','), expected.split(','))
1315        self.assertEqual(h, expected)
1316        self.assertEqual(len(h.groups), 7 + len(examples) - 6)
1317        self.assertEqual(h.groups[0].display_name, 'dummy list')
1318        self.assertEqual(h.groups[1].display_name, 'another')
1319        self.assertEqual(h.groups[6].display_name, 'A "list"')
1320        self.assertEqual(len(h.addresses), len(examples))
1321        for i in range(4):
1322            self.assertIsNone(h.groups[i+2].display_name)
1323            self.assertEqual(str(h.groups[i+2].addresses[0]), examples[i][2])
1324        for i in range(7, 7 + len(examples) - 6):
1325            self.assertIsNone(h.groups[i].display_name)
1326            self.assertEqual(str(h.groups[i].addresses[0]), examples[i-1][2])
1327        for i in range(len(examples)):
1328            self.assertEqual(str(h.addresses[i]), examples[i][2])
1329            self.assertEqual(h.addresses[i].addr_spec, examples[i][4])
1330
1331    def test_address_read_only(self):
1332        h = self.make_header('sender', '[email protected]')
1333        with self.assertRaises(AttributeError):
1334            h.address = 'foo'
1335
1336    def test_addresses_read_only(self):
1337        h = self.make_header('sender', '[email protected]')
1338        with self.assertRaises(AttributeError):
1339            h.addresses = 'foo'
1340
1341    def test_groups_read_only(self):
1342        h = self.make_header('sender', '[email protected]')
1343        with self.assertRaises(AttributeError):
1344            h.groups = 'foo'
1345
1346    def test_addresses_types(self):
1347        source = 'me <[email protected]>'
1348        h = self.make_header('to', source)
1349        self.assertIsInstance(h.addresses, tuple)
1350        self.assertIsInstance(h.addresses[0], Address)
1351
1352    def test_groups_types(self):
1353        source = 'me <[email protected]>'
1354        h = self.make_header('to', source)
1355        self.assertIsInstance(h.groups, tuple)
1356        self.assertIsInstance(h.groups[0], Group)
1357
1358    def test_set_from_Address(self):
1359        h = self.make_header('to', Address('me', 'foo', 'example.com'))
1360        self.assertEqual(h, 'me <[email protected]>')
1361
1362    def test_set_from_Address_list(self):
1363        h = self.make_header('to', [Address('me', 'foo', 'example.com'),
1364                                    Address('you', 'bar', 'example.com')])
1365        self.assertEqual(h, 'me <[email protected]>, you <[email protected]>')
1366
1367    def test_set_from_Address_and_Group_list(self):
1368        h = self.make_header('to', [Address('me', 'foo', 'example.com'),
1369                                    Group('bing', [Address('fiz', 'z', 'b.com'),
1370                                                   Address('zif', 'f', 'c.com')]),
1371                                    Address('you', 'bar', 'example.com')])
1372        self.assertEqual(h, 'me <[email protected]>, bing: fiz <[email protected]>, '
1373                            'zif <[email protected]>;, you <[email protected]>')
1374        self.assertEqual(h.fold(policy=policy.default.clone(max_line_length=40)),
1375                        'to: me <[email protected]>,\n'
1376                        ' bing: fiz <[email protected]>, zif <[email protected]>;,\n'
1377                        ' you <[email protected]>\n')
1378
1379    def test_set_from_Group_list(self):
1380        h = self.make_header('to', [Group('bing', [Address('fiz', 'z', 'b.com'),
1381                                                   Address('zif', 'f', 'c.com')])])
1382        self.assertEqual(h, 'bing: fiz <[email protected]>, zif <[email protected]>;')
1383
1384
1385class TestAddressAndGroup(TestEmailBase):
1386
1387    def _test_attr_ro(self, obj, attr):
1388        with self.assertRaises(AttributeError):
1389            setattr(obj, attr, 'foo')
1390
1391    def test_address_display_name_ro(self):
1392        self._test_attr_ro(Address('foo', 'bar', 'baz'), 'display_name')
1393
1394    def test_address_username_ro(self):
1395        self._test_attr_ro(Address('foo', 'bar', 'baz'), 'username')
1396
1397    def test_address_domain_ro(self):
1398        self._test_attr_ro(Address('foo', 'bar', 'baz'), 'domain')
1399
1400    def test_group_display_name_ro(self):
1401        self._test_attr_ro(Group('foo'), 'display_name')
1402
1403    def test_group_addresses_ro(self):
1404        self._test_attr_ro(Group('foo'), 'addresses')
1405
1406    def test_address_from_username_domain(self):
1407        a = Address('foo', 'bar', 'baz')
1408        self.assertEqual(a.display_name, 'foo')
1409        self.assertEqual(a.username, 'bar')
1410        self.assertEqual(a.domain, 'baz')
1411        self.assertEqual(a.addr_spec, 'bar@baz')
1412        self.assertEqual(str(a), 'foo <bar@baz>')
1413
1414    def test_address_from_addr_spec(self):
1415        a = Address('foo', addr_spec='bar@baz')
1416        self.assertEqual(a.display_name, 'foo')
1417        self.assertEqual(a.username, 'bar')
1418        self.assertEqual(a.domain, 'baz')
1419        self.assertEqual(a.addr_spec, 'bar@baz')
1420        self.assertEqual(str(a), 'foo <bar@baz>')
1421
1422    def test_address_with_no_display_name(self):
1423        a = Address(addr_spec='bar@baz')
1424        self.assertEqual(a.display_name, '')
1425        self.assertEqual(a.username, 'bar')
1426        self.assertEqual(a.domain, 'baz')
1427        self.assertEqual(a.addr_spec, 'bar@baz')
1428        self.assertEqual(str(a), 'bar@baz')
1429
1430    def test_null_address(self):
1431        a = Address()
1432        self.assertEqual(a.display_name, '')
1433        self.assertEqual(a.username, '')
1434        self.assertEqual(a.domain, '')
1435        self.assertEqual(a.addr_spec, '<>')
1436        self.assertEqual(str(a), '<>')
1437
1438    def test_domain_only(self):
1439        # This isn't really a valid address.
1440        a = Address(domain='buzz')
1441        self.assertEqual(a.display_name, '')
1442        self.assertEqual(a.username, '')
1443        self.assertEqual(a.domain, 'buzz')
1444        self.assertEqual(a.addr_spec, '@buzz')
1445        self.assertEqual(str(a), '@buzz')
1446
1447    def test_username_only(self):
1448        # This isn't really a valid address.
1449        a = Address(username='buzz')
1450        self.assertEqual(a.display_name, '')
1451        self.assertEqual(a.username, 'buzz')
1452        self.assertEqual(a.domain, '')
1453        self.assertEqual(a.addr_spec, 'buzz')
1454        self.assertEqual(str(a), 'buzz')
1455
1456    def test_display_name_only(self):
1457        a = Address('buzz')
1458        self.assertEqual(a.display_name, 'buzz')
1459        self.assertEqual(a.username, '')
1460        self.assertEqual(a.domain, '')
1461        self.assertEqual(a.addr_spec, '<>')
1462        self.assertEqual(str(a), 'buzz <>')
1463
1464    def test_quoting(self):
1465        # Ideally we'd check every special individually, but I'm not up for
1466        # writing that many tests.
1467        a = Address('Sara J.', 'bad name', 'example.com')
1468        self.assertEqual(a.display_name, 'Sara J.')
1469        self.assertEqual(a.username, 'bad name')
1470        self.assertEqual(a.domain, 'example.com')
1471        self.assertEqual(a.addr_spec, '"bad name"@example.com')
1472        self.assertEqual(str(a), '"Sara J." <"bad name"@example.com>')
1473
1474    def test_il8n(self):
1475        a = Address('Éric', 'wok', 'exàmple.com')
1476        self.assertEqual(a.display_name, 'Éric')
1477        self.assertEqual(a.username, 'wok')
1478        self.assertEqual(a.domain, 'exàmple.com')
1479        self.assertEqual(a.addr_spec, 'wok@exàmple.com')
1480        self.assertEqual(str(a), 'Éric <wok@exàmple.com>')
1481
1482    # XXX: there is an API design issue that needs to be solved here.
1483    #def test_non_ascii_username_raises(self):
1484    #    with self.assertRaises(ValueError):
1485    #        Address('foo', 'wők', 'example.com')
1486
1487    def test_crlf_in_constructor_args_raises(self):
1488        cases = (
1489            dict(display_name='foo\r'),
1490            dict(display_name='foo\n'),
1491            dict(display_name='foo\r\n'),
1492            dict(domain='example.com\r'),
1493            dict(domain='example.com\n'),
1494            dict(domain='example.com\r\n'),
1495            dict(username='wok\r'),
1496            dict(username='wok\n'),
1497            dict(username='wok\r\n'),
1498            dict(addr_spec='[email protected]\r'),
1499            dict(addr_spec='[email protected]\n'),
1500            dict(addr_spec='[email protected]\r\n')
1501        )
1502        for kwargs in cases:
1503            with self.subTest(kwargs=kwargs), self.assertRaisesRegex(ValueError, "invalid arguments"):
1504                Address(**kwargs)
1505
1506    def test_non_ascii_username_in_addr_spec_raises(self):
1507        with self.assertRaises(ValueError):
1508            Address('foo', addr_spec='wő[email protected]')
1509
1510    def test_address_addr_spec_and_username_raises(self):
1511        with self.assertRaises(TypeError):
1512            Address('foo', username='bing', addr_spec='bar@baz')
1513
1514    def test_address_addr_spec_and_domain_raises(self):
1515        with self.assertRaises(TypeError):
1516            Address('foo', domain='bing', addr_spec='bar@baz')
1517
1518    def test_address_addr_spec_and_username_and_domain_raises(self):
1519        with self.assertRaises(TypeError):
1520            Address('foo', username='bong', domain='bing', addr_spec='bar@baz')
1521
1522    def test_space_in_addr_spec_username_raises(self):
1523        with self.assertRaises(ValueError):
1524            Address('foo', addr_spec="bad [email protected]")
1525
1526    def test_bad_addr_sepc_raises(self):
1527        with self.assertRaises(ValueError):
1528            Address('foo', addr_spec="name@ex[]ample.com")
1529
1530    def test_empty_group(self):
1531        g = Group('foo')
1532        self.assertEqual(g.display_name, 'foo')
1533        self.assertEqual(g.addresses, tuple())
1534        self.assertEqual(str(g), 'foo:;')
1535
1536    def test_empty_group_list(self):
1537        g = Group('foo', addresses=[])
1538        self.assertEqual(g.display_name, 'foo')
1539        self.assertEqual(g.addresses, tuple())
1540        self.assertEqual(str(g), 'foo:;')
1541
1542    def test_null_group(self):
1543        g = Group()
1544        self.assertIsNone(g.display_name)
1545        self.assertEqual(g.addresses, tuple())
1546        self.assertEqual(str(g), 'None:;')
1547
1548    def test_group_with_addresses(self):
1549        addrs = [Address('b', 'b', 'c'), Address('a', 'b','c')]
1550        g = Group('foo', addrs)
1551        self.assertEqual(g.display_name, 'foo')
1552        self.assertEqual(g.addresses, tuple(addrs))
1553        self.assertEqual(str(g), 'foo: b <b@c>, a <b@c>;')
1554
1555    def test_group_with_addresses_no_display_name(self):
1556        addrs = [Address('b', 'b', 'c'), Address('a', 'b','c')]
1557        g = Group(addresses=addrs)
1558        self.assertIsNone(g.display_name)
1559        self.assertEqual(g.addresses, tuple(addrs))
1560        self.assertEqual(str(g), 'None: b <b@c>, a <b@c>;')
1561
1562    def test_group_with_one_address_no_display_name(self):
1563        addrs = [Address('b', 'b', 'c')]
1564        g = Group(addresses=addrs)
1565        self.assertIsNone(g.display_name)
1566        self.assertEqual(g.addresses, tuple(addrs))
1567        self.assertEqual(str(g), 'b <b@c>')
1568
1569    def test_display_name_quoting(self):
1570        g = Group('foo.bar')
1571        self.assertEqual(g.display_name, 'foo.bar')
1572        self.assertEqual(g.addresses, tuple())
1573        self.assertEqual(str(g), '"foo.bar":;')
1574
1575    def test_display_name_blanks_not_quoted(self):
1576        g = Group('foo bar')
1577        self.assertEqual(g.display_name, 'foo bar')
1578        self.assertEqual(g.addresses, tuple())
1579        self.assertEqual(str(g), 'foo bar:;')
1580
1581    def test_set_message_header_from_address(self):
1582        a = Address('foo', 'bar', 'example.com')
1583        m = Message(policy=policy.default)
1584        m['To'] = a
1585        self.assertEqual(m['to'], 'foo <[email protected]>')
1586        self.assertEqual(m['to'].addresses, (a,))
1587
1588    def test_set_message_header_from_group(self):
1589        g = Group('foo bar')
1590        m = Message(policy=policy.default)
1591        m['To'] = g
1592        self.assertEqual(m['to'], 'foo bar:;')
1593        self.assertEqual(m['to'].addresses, g.addresses)
1594
1595    def test_address_comparison(self):
1596        a = Address('foo', 'bar', 'example.com')
1597        self.assertEqual(Address('foo', 'bar', 'example.com'), a)
1598        self.assertNotEqual(Address('baz', 'bar', 'example.com'), a)
1599        self.assertNotEqual(Address('foo', 'baz', 'example.com'), a)
1600        self.assertNotEqual(Address('foo', 'bar', 'baz'), a)
1601        self.assertFalse(a == object())
1602        self.assertTrue(a == ALWAYS_EQ)
1603
1604    def test_group_comparison(self):
1605        a = Address('foo', 'bar', 'example.com')
1606        g = Group('foo bar', [a])
1607        self.assertEqual(Group('foo bar', (a,)), g)
1608        self.assertNotEqual(Group('baz', [a]), g)
1609        self.assertNotEqual(Group('foo bar', []), g)
1610        self.assertFalse(g == object())
1611        self.assertTrue(g == ALWAYS_EQ)
1612
1613
1614class TestFolding(TestHeaderBase):
1615
1616    def test_address_display_names(self):
1617        """Test the folding and encoding of address headers."""
1618        for name, result in (
1619                ('Foo Bar, France', '"Foo Bar, France"'),
1620                ('Foo Bar (France)', '"Foo Bar (France)"'),
1621                ('Foo Bar, España', 'Foo =?utf-8?q?Bar=2C_Espa=C3=B1a?='),
1622                ('Foo Bar (España)', 'Foo Bar =?utf-8?b?KEVzcGHDsWEp?='),
1623                ('Foo, Bar España', '=?utf-8?q?Foo=2C_Bar_Espa=C3=B1a?='),
1624                ('Foo, Bar [España]', '=?utf-8?q?Foo=2C_Bar_=5BEspa=C3=B1a=5D?='),
1625                ('Foo Bär, France', 'Foo =?utf-8?q?B=C3=A4r=2C?= France'),
1626                ('Foo Bär <France>', 'Foo =?utf-8?q?B=C3=A4r_=3CFrance=3E?='),
1627                (
1628                    'Lôrem ipsum dôlôr sit amet, cônsectetuer adipiscing. '
1629                    'Suspendisse pôtenti. Aliquam nibh. Suspendisse pôtenti.',
1630                    '=?utf-8?q?L=C3=B4rem_ipsum_d=C3=B4l=C3=B4r_sit_amet=2C_c'
1631                    '=C3=B4nsectetuer?=\n =?utf-8?q?adipiscing=2E_Suspendisse'
1632                    '_p=C3=B4tenti=2E_Aliquam_nibh=2E?=\n Suspendisse =?utf-8'
1633                    '?q?p=C3=B4tenti=2E?=',
1634                    ),
1635                ):
1636            h = self.make_header('To', Address(name, addr_spec='[email protected]'))
1637            self.assertEqual(h.fold(policy=policy.default),
1638                                    'To: %s <[email protected]>\n' % result)
1639
1640    def test_short_unstructured(self):
1641        h = self.make_header('subject', 'this is a test')
1642        self.assertEqual(h.fold(policy=policy.default),
1643                         'subject: this is a test\n')
1644
1645    def test_long_unstructured(self):
1646        h = self.make_header('Subject', 'This is a long header '
1647            'line that will need to be folded into two lines '
1648            'and will demonstrate basic folding')
1649        self.assertEqual(h.fold(policy=policy.default),
1650                        'Subject: This is a long header line that will '
1651                            'need to be folded into two lines\n'
1652                        ' and will demonstrate basic folding\n')
1653
1654    def test_unstructured_short_max_line_length(self):
1655        h = self.make_header('Subject', 'this is a short header '
1656            'that will be folded anyway')
1657        self.assertEqual(
1658            h.fold(policy=policy.default.clone(max_line_length=20)),
1659            textwrap.dedent("""\
1660                Subject: this is a
1661                 short header that
1662                 will be folded
1663                 anyway
1664                """))
1665
1666    def test_fold_unstructured_single_word(self):
1667        h = self.make_header('Subject', 'test')
1668        self.assertEqual(h.fold(policy=policy.default), 'Subject: test\n')
1669
1670    def test_fold_unstructured_short(self):
1671        h = self.make_header('Subject', 'test test test')
1672        self.assertEqual(h.fold(policy=policy.default),
1673                        'Subject: test test test\n')
1674
1675    def test_fold_unstructured_with_overlong_word(self):
1676        h = self.make_header('Subject', 'thisisaverylonglineconsistingofa'
1677            'singlewordthatwontfit')
1678        self.assertEqual(
1679            h.fold(policy=policy.default.clone(max_line_length=20)),
1680            'Subject: \n'
1681            ' =?utf-8?q?thisisa?=\n'
1682            ' =?utf-8?q?verylon?=\n'
1683            ' =?utf-8?q?glineco?=\n'
1684            ' =?utf-8?q?nsistin?=\n'
1685            ' =?utf-8?q?gofasin?=\n'
1686            ' =?utf-8?q?gleword?=\n'
1687            ' =?utf-8?q?thatwon?=\n'
1688            ' =?utf-8?q?tfit?=\n'
1689            )
1690
1691    def test_fold_unstructured_with_two_overlong_words(self):
1692        h = self.make_header('Subject', 'thisisaverylonglineconsistingofa'
1693            'singlewordthatwontfit plusanotherverylongwordthatwontfit')
1694        self.assertEqual(
1695            h.fold(policy=policy.default.clone(max_line_length=20)),
1696            'Subject: \n'
1697            ' =?utf-8?q?thisisa?=\n'
1698            ' =?utf-8?q?verylon?=\n'
1699            ' =?utf-8?q?glineco?=\n'
1700            ' =?utf-8?q?nsistin?=\n'
1701            ' =?utf-8?q?gofasin?=\n'
1702            ' =?utf-8?q?gleword?=\n'
1703            ' =?utf-8?q?thatwon?=\n'
1704            ' =?utf-8?q?tfit_pl?=\n'
1705            ' =?utf-8?q?usanoth?=\n'
1706            ' =?utf-8?q?erveryl?=\n'
1707            ' =?utf-8?q?ongword?=\n'
1708            ' =?utf-8?q?thatwon?=\n'
1709            ' =?utf-8?q?tfit?=\n'
1710            )
1711
1712    # XXX Need test for when max_line_length is less than the chrome size.
1713
1714    def test_fold_unstructured_with_slightly_long_word(self):
1715        h = self.make_header('Subject', 'thislongwordislessthanmaxlinelen')
1716        self.assertEqual(
1717            h.fold(policy=policy.default.clone(max_line_length=35)),
1718            'Subject:\n thislongwordislessthanmaxlinelen\n')
1719
1720    def test_fold_unstructured_with_commas(self):
1721        # The old wrapper would fold this at the commas.
1722        h = self.make_header('Subject', "This header is intended to "
1723            "demonstrate, in a fairly succinct way, that we now do "
1724            "not give a , special treatment in unstructured headers.")
1725        self.assertEqual(
1726            h.fold(policy=policy.default.clone(max_line_length=60)),
1727            textwrap.dedent("""\
1728                Subject: This header is intended to demonstrate, in a fairly
1729                 succinct way, that we now do not give a , special treatment
1730                 in unstructured headers.
1731                 """))
1732
1733    def test_fold_address_list(self):
1734        h = self.make_header('To', '"Theodore H. Perfect" <[email protected]>, '
1735            '"My address is very long because my name is long" <[email protected]>, '
1736            '"Only A. Friend" <[email protected]>')
1737        self.assertEqual(h.fold(policy=policy.default), textwrap.dedent("""\
1738            To: "Theodore H. Perfect" <[email protected]>,
1739             "My address is very long because my name is long" <[email protected]>,
1740             "Only A. Friend" <[email protected]>
1741             """))
1742
1743    def test_fold_date_header(self):
1744        h = self.make_header('Date', 'Sat, 2 Feb 2002 17:00:06 -0800')
1745        self.assertEqual(h.fold(policy=policy.default),
1746                        'Date: Sat, 02 Feb 2002 17:00:06 -0800\n')
1747
1748    def test_fold_overlong_words_using_RFC2047(self):
1749        h = self.make_header(
1750            'X-Report-Abuse',
1751            '<https://www.mailitapp.com/report_abuse.php?'
1752              'mid=xxx-xxx-xxxxxxxxxxxxxxxxxxxxxxxx==-xxx-xx-xx>')
1753        self.assertEqual(
1754            h.fold(policy=policy.default),
1755            'X-Report-Abuse: =?utf-8?q?=3Chttps=3A//www=2Emailitapp=2E'
1756                'com/report=5Fabuse?=\n'
1757            ' =?utf-8?q?=2Ephp=3Fmid=3Dxxx-xxx-xxxx'
1758                'xxxxxxxxxxxxxxxxxxxx=3D=3D-xxx-xx-xx?=\n'
1759            ' =?utf-8?q?=3E?=\n')
1760
1761    def test_message_id_header_is_not_folded(self):
1762        h = self.make_header(
1763            'Message-ID',
1764            '<[email protected]>')
1765        self.assertEqual(
1766            h.fold(policy=policy.default.clone(max_line_length=20)),
1767            'Message-ID: <[email protected]>\n')
1768
1769        # Test message-id isn't folded when id-right is no-fold-literal.
1770        h = self.make_header(
1771            'Message-ID',
1772            '<somemessageidlongerthan@[127.0.0.0.0.0.0.0.0.1]>')
1773        self.assertEqual(
1774            h.fold(policy=policy.default.clone(max_line_length=20)),
1775            'Message-ID: <somemessageidlongerthan@[127.0.0.0.0.0.0.0.0.1]>\n')
1776
1777        # Test message-id isn't folded when id-right is non-ascii characters.
1778        h = self.make_header('Message-ID', '<ईमेल@wők.com>')
1779        self.assertEqual(
1780            h.fold(policy=policy.default.clone(max_line_length=30)),
1781            'Message-ID: <ईमेल@wők.com>\n')
1782
1783        # Test message-id is folded without breaking the msg-id token into
1784        # encoded words, *even* if they don't fit into max_line_length.
1785        h = self.make_header('Message-ID', '<ईमेलfromMessage@wők.com>')
1786        self.assertEqual(
1787            h.fold(policy=policy.default.clone(max_line_length=20)),
1788            'Message-ID:\n <ईमेलfromMessage@wők.com>\n')
1789
1790if __name__ == '__main__':
1791    unittest.main()
1792