1# Copyright (C) 2001-2010 Python Software Foundation 2# Contact: [email protected] 3# email package unit tests 4 5import re 6import time 7import base64 8import unittest 9import textwrap 10import warnings 11 12from io import StringIO, BytesIO 13from itertools import chain 14from random import choice 15from threading import Thread 16from unittest.mock import patch 17 18import email 19import email.policy 20 21from email.charset import Charset 22from email.generator import Generator, DecodedGenerator, BytesGenerator 23from email.header import Header, decode_header, make_header 24from email.headerregistry import HeaderRegistry 25from email.message import Message 26from email.mime.application import MIMEApplication 27from email.mime.audio import MIMEAudio 28from email.mime.base import MIMEBase 29from email.mime.image import MIMEImage 30from email.mime.message import MIMEMessage 31from email.mime.multipart import MIMEMultipart 32from email.mime.nonmultipart import MIMENonMultipart 33from email.mime.text import MIMEText 34from email.parser import Parser, HeaderParser 35from email import base64mime 36from email import encoders 37from email import errors 38from email import iterators 39from email import quoprimime 40from email import utils 41 42from test.support import threading_helper 43from test.support.os_helper import unlink 44from test.test_email import openfile, TestEmailBase 45 46# These imports are documented to work, but we are testing them using a 47# different path, so we import them here just to make sure they are importable. 48from email.parser import FeedParser, BytesFeedParser 49 50NL = '\n' 51EMPTYSTRING = '' 52SPACE = ' ' 53 54 55# Test various aspects of the Message class's API 56class TestMessageAPI(TestEmailBase): 57 def test_get_all(self): 58 eq = self.assertEqual 59 msg = self._msgobj('msg_20.txt') 60 eq(msg.get_all('cc'), ['[email protected]', '[email protected]', '[email protected]']) 61 eq(msg.get_all('xx', 'n/a'), 'n/a') 62 63 def test_getset_charset(self): 64 eq = self.assertEqual 65 msg = Message() 66 eq(msg.get_charset(), None) 67 charset = Charset('iso-8859-1') 68 msg.set_charset(charset) 69 eq(msg['mime-version'], '1.0') 70 eq(msg.get_content_type(), 'text/plain') 71 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"') 72 eq(msg.get_param('charset'), 'iso-8859-1') 73 eq(msg['content-transfer-encoding'], 'quoted-printable') 74 eq(msg.get_charset().input_charset, 'iso-8859-1') 75 # Remove the charset 76 msg.set_charset(None) 77 eq(msg.get_charset(), None) 78 eq(msg['content-type'], 'text/plain') 79 # Try adding a charset when there's already MIME headers present 80 msg = Message() 81 msg['MIME-Version'] = '2.0' 82 msg['Content-Type'] = 'text/x-weird' 83 msg['Content-Transfer-Encoding'] = 'quinted-puntable' 84 msg.set_charset(charset) 85 eq(msg['mime-version'], '2.0') 86 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"') 87 eq(msg['content-transfer-encoding'], 'quinted-puntable') 88 89 def test_set_charset_from_string(self): 90 eq = self.assertEqual 91 msg = Message() 92 msg.set_charset('us-ascii') 93 eq(msg.get_charset().input_charset, 'us-ascii') 94 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 95 96 def test_set_payload_with_charset(self): 97 msg = Message() 98 charset = Charset('iso-8859-1') 99 msg.set_payload('This is a string payload', charset) 100 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1') 101 102 def test_set_payload_with_8bit_data_and_charset(self): 103 data = b'\xd0\x90\xd0\x91\xd0\x92' 104 charset = Charset('utf-8') 105 msg = Message() 106 msg.set_payload(data, charset) 107 self.assertEqual(msg['content-transfer-encoding'], 'base64') 108 self.assertEqual(msg.get_payload(decode=True), data) 109 self.assertEqual(msg.get_payload(), '0JDQkdCS\n') 110 111 def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self): 112 data = b'\xd0\x90\xd0\x91\xd0\x92' 113 charset = Charset('utf-8') 114 charset.body_encoding = None # Disable base64 encoding 115 msg = Message() 116 msg.set_payload(data.decode('utf-8'), charset) 117 self.assertEqual(msg['content-transfer-encoding'], '8bit') 118 self.assertEqual(msg.get_payload(decode=True), data) 119 120 def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self): 121 data = b'\xd0\x90\xd0\x91\xd0\x92' 122 charset = Charset('utf-8') 123 charset.body_encoding = None # Disable base64 encoding 124 msg = Message() 125 msg.set_payload(data, charset) 126 self.assertEqual(msg['content-transfer-encoding'], '8bit') 127 self.assertEqual(msg.get_payload(decode=True), data) 128 129 def test_set_payload_to_list(self): 130 msg = Message() 131 msg.set_payload([]) 132 self.assertEqual(msg.get_payload(), []) 133 134 def test_attach_when_payload_is_string(self): 135 msg = Message() 136 msg['Content-Type'] = 'multipart/mixed' 137 msg.set_payload('string payload') 138 sub_msg = MIMEMessage(Message()) 139 self.assertRaisesRegex(TypeError, "[Aa]ttach.*non-multipart", 140 msg.attach, sub_msg) 141 142 def test_get_charsets(self): 143 eq = self.assertEqual 144 145 msg = self._msgobj('msg_08.txt') 146 charsets = msg.get_charsets() 147 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r']) 148 149 msg = self._msgobj('msg_09.txt') 150 charsets = msg.get_charsets('dingbat') 151 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat', 152 'koi8-r']) 153 154 msg = self._msgobj('msg_12.txt') 155 charsets = msg.get_charsets() 156 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2', 157 'iso-8859-3', 'us-ascii', 'koi8-r']) 158 159 def test_get_filename(self): 160 eq = self.assertEqual 161 162 msg = self._msgobj('msg_04.txt') 163 filenames = [p.get_filename() for p in msg.get_payload()] 164 eq(filenames, ['msg.txt', 'msg.txt']) 165 166 msg = self._msgobj('msg_07.txt') 167 subpart = msg.get_payload(1) 168 eq(subpart.get_filename(), 'dingusfish.gif') 169 170 def test_get_filename_with_name_parameter(self): 171 eq = self.assertEqual 172 173 msg = self._msgobj('msg_44.txt') 174 filenames = [p.get_filename() for p in msg.get_payload()] 175 eq(filenames, ['msg.txt', 'msg.txt']) 176 177 def test_get_boundary(self): 178 eq = self.assertEqual 179 msg = self._msgobj('msg_07.txt') 180 # No quotes! 181 eq(msg.get_boundary(), 'BOUNDARY') 182 183 def test_set_boundary(self): 184 eq = self.assertEqual 185 # This one has no existing boundary parameter, but the Content-Type: 186 # header appears fifth. 187 msg = self._msgobj('msg_01.txt') 188 msg.set_boundary('BOUNDARY') 189 header, value = msg.items()[4] 190 eq(header.lower(), 'content-type') 191 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"') 192 # This one has a Content-Type: header, with a boundary, stuck in the 193 # middle of its headers. Make sure the order is preserved; it should 194 # be fifth. 195 msg = self._msgobj('msg_04.txt') 196 msg.set_boundary('BOUNDARY') 197 header, value = msg.items()[4] 198 eq(header.lower(), 'content-type') 199 eq(value, 'multipart/mixed; boundary="BOUNDARY"') 200 # And this one has no Content-Type: header at all. 201 msg = self._msgobj('msg_03.txt') 202 self.assertRaises(errors.HeaderParseError, 203 msg.set_boundary, 'BOUNDARY') 204 205 def test_make_boundary(self): 206 msg = MIMEMultipart('form-data') 207 # Note that when the boundary gets created is an implementation 208 # detail and might change. 209 self.assertEqual(msg.items()[0][1], 'multipart/form-data') 210 # Trigger creation of boundary 211 msg.as_string() 212 self.assertEqual(msg.items()[0][1][:33], 213 'multipart/form-data; boundary="==') 214 # XXX: there ought to be tests of the uniqueness of the boundary, too. 215 216 def test_message_rfc822_only(self): 217 # Issue 7970: message/rfc822 not in multipart parsed by 218 # HeaderParser caused an exception when flattened. 219 with openfile('msg_46.txt', encoding="utf-8") as fp: 220 msgdata = fp.read() 221 parser = HeaderParser() 222 msg = parser.parsestr(msgdata) 223 out = StringIO() 224 gen = Generator(out, True, 0) 225 gen.flatten(msg, False) 226 self.assertEqual(out.getvalue(), msgdata) 227 228 def test_byte_message_rfc822_only(self): 229 # Make sure new bytes header parser also passes this. 230 with openfile('msg_46.txt', encoding="utf-8") as fp: 231 msgdata = fp.read().encode('ascii') 232 parser = email.parser.BytesHeaderParser() 233 msg = parser.parsebytes(msgdata) 234 out = BytesIO() 235 gen = email.generator.BytesGenerator(out) 236 gen.flatten(msg) 237 self.assertEqual(out.getvalue(), msgdata) 238 239 def test_get_decoded_payload(self): 240 eq = self.assertEqual 241 msg = self._msgobj('msg_10.txt') 242 # The outer message is a multipart 243 eq(msg.get_payload(decode=True), None) 244 # Subpart 1 is 7bit encoded 245 eq(msg.get_payload(0).get_payload(decode=True), 246 b'This is a 7bit encoded message.\n') 247 # Subpart 2 is quopri 248 eq(msg.get_payload(1).get_payload(decode=True), 249 b'\xa1This is a Quoted Printable encoded message!\n') 250 # Subpart 3 is base64 251 eq(msg.get_payload(2).get_payload(decode=True), 252 b'This is a Base64 encoded message.') 253 # Subpart 4 is base64 with a trailing newline, which 254 # used to be stripped (issue 7143). 255 eq(msg.get_payload(3).get_payload(decode=True), 256 b'This is a Base64 encoded message.\n') 257 # Subpart 5 has no Content-Transfer-Encoding: header. 258 eq(msg.get_payload(4).get_payload(decode=True), 259 b'This has no Content-Transfer-Encoding: header.\n') 260 261 def test_get_decoded_uu_payload(self): 262 eq = self.assertEqual 263 msg = Message() 264 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n') 265 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 266 msg['content-transfer-encoding'] = cte 267 eq(msg.get_payload(decode=True), b'hello world') 268 # Now try some bogus data 269 msg.set_payload('foo') 270 eq(msg.get_payload(decode=True), b'foo') 271 272 def test_get_payload_n_raises_on_non_multipart(self): 273 msg = Message() 274 self.assertRaises(TypeError, msg.get_payload, 1) 275 276 def test_decoded_generator(self): 277 eq = self.assertEqual 278 msg = self._msgobj('msg_07.txt') 279 with openfile('msg_17.txt', encoding="utf-8") as fp: 280 text = fp.read() 281 s = StringIO() 282 g = DecodedGenerator(s) 283 g.flatten(msg) 284 eq(s.getvalue(), text) 285 286 def test__contains__(self): 287 msg = Message() 288 msg['From'] = 'Me' 289 msg['to'] = 'You' 290 # Check for case insensitivity 291 self.assertIn('from', msg) 292 self.assertIn('From', msg) 293 self.assertIn('FROM', msg) 294 self.assertIn('to', msg) 295 self.assertIn('To', msg) 296 self.assertIn('TO', msg) 297 298 def test_as_string(self): 299 msg = self._msgobj('msg_01.txt') 300 with openfile('msg_01.txt', encoding="utf-8") as fp: 301 text = fp.read() 302 self.assertEqual(text, str(msg)) 303 fullrepr = msg.as_string(unixfrom=True) 304 lines = fullrepr.split('\n') 305 self.assertTrue(lines[0].startswith('From ')) 306 self.assertEqual(text, NL.join(lines[1:])) 307 308 def test_as_string_policy(self): 309 msg = self._msgobj('msg_01.txt') 310 newpolicy = msg.policy.clone(linesep='\r\n') 311 fullrepr = msg.as_string(policy=newpolicy) 312 s = StringIO() 313 g = Generator(s, policy=newpolicy) 314 g.flatten(msg) 315 self.assertEqual(fullrepr, s.getvalue()) 316 317 def test_nonascii_as_string_without_cte(self): 318 m = textwrap.dedent("""\ 319 MIME-Version: 1.0 320 Content-type: text/plain; charset="iso-8859-1" 321 322 Test if non-ascii messages with no Content-Transfer-Encoding set 323 can be as_string'd: 324 Föö bär 325 """) 326 source = m.encode('iso-8859-1') 327 expected = textwrap.dedent("""\ 328 MIME-Version: 1.0 329 Content-type: text/plain; charset="iso-8859-1" 330 Content-Transfer-Encoding: quoted-printable 331 332 Test if non-ascii messages with no Content-Transfer-Encoding set 333 can be as_string'd: 334 F=F6=F6 b=E4r 335 """) 336 msg = email.message_from_bytes(source) 337 self.assertEqual(msg.as_string(), expected) 338 339 def test_nonascii_as_string_without_content_type_and_cte(self): 340 m = textwrap.dedent("""\ 341 MIME-Version: 1.0 342 343 Test if non-ascii messages with no Content-Type nor 344 Content-Transfer-Encoding set can be as_string'd: 345 Föö bär 346 """) 347 source = m.encode('iso-8859-1') 348 expected = source.decode('ascii', 'replace') 349 msg = email.message_from_bytes(source) 350 self.assertEqual(msg.as_string(), expected) 351 352 def test_as_bytes(self): 353 msg = self._msgobj('msg_01.txt') 354 with openfile('msg_01.txt', encoding="utf-8") as fp: 355 data = fp.read().encode('ascii') 356 self.assertEqual(data, bytes(msg)) 357 fullrepr = msg.as_bytes(unixfrom=True) 358 lines = fullrepr.split(b'\n') 359 self.assertTrue(lines[0].startswith(b'From ')) 360 self.assertEqual(data, b'\n'.join(lines[1:])) 361 362 def test_as_bytes_policy(self): 363 msg = self._msgobj('msg_01.txt') 364 newpolicy = msg.policy.clone(linesep='\r\n') 365 fullrepr = msg.as_bytes(policy=newpolicy) 366 s = BytesIO() 367 g = BytesGenerator(s,policy=newpolicy) 368 g.flatten(msg) 369 self.assertEqual(fullrepr, s.getvalue()) 370 371 # test_headerregistry.TestContentTypeHeader.bad_params 372 def test_bad_param(self): 373 msg = email.message_from_string("Content-Type: blarg; baz; boo\n") 374 self.assertEqual(msg.get_param('baz'), '') 375 376 def test_missing_filename(self): 377 msg = email.message_from_string("From: foo\n") 378 self.assertEqual(msg.get_filename(), None) 379 380 def test_bogus_filename(self): 381 msg = email.message_from_string( 382 "Content-Disposition: blarg; filename\n") 383 self.assertEqual(msg.get_filename(), '') 384 385 def test_missing_boundary(self): 386 msg = email.message_from_string("From: foo\n") 387 self.assertEqual(msg.get_boundary(), None) 388 389 def test_get_params(self): 390 eq = self.assertEqual 391 msg = email.message_from_string( 392 'X-Header: foo=one; bar=two; baz=three\n') 393 eq(msg.get_params(header='x-header'), 394 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) 395 msg = email.message_from_string( 396 'X-Header: foo; bar=one; baz=two\n') 397 eq(msg.get_params(header='x-header'), 398 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 399 eq(msg.get_params(), None) 400 msg = email.message_from_string( 401 'X-Header: foo; bar="one"; baz=two\n') 402 eq(msg.get_params(header='x-header'), 403 [('foo', ''), ('bar', 'one'), ('baz', 'two')]) 404 405 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals 406 def test_get_param_liberal(self): 407 msg = Message() 408 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"' 409 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG') 410 411 def test_get_param(self): 412 eq = self.assertEqual 413 msg = email.message_from_string( 414 "X-Header: foo=one; bar=two; baz=three\n") 415 eq(msg.get_param('bar', header='x-header'), 'two') 416 eq(msg.get_param('quuz', header='x-header'), None) 417 eq(msg.get_param('quuz'), None) 418 msg = email.message_from_string( 419 'X-Header: foo; bar="one"; baz=two\n') 420 eq(msg.get_param('foo', header='x-header'), '') 421 eq(msg.get_param('bar', header='x-header'), 'one') 422 eq(msg.get_param('baz', header='x-header'), 'two') 423 # XXX: We are not RFC-2045 compliant! We cannot parse: 424 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"' 425 # msg.get_param("weird") 426 # yet. 427 428 # test_headerregistry.TestContentTypeHeader.spaces_around_semis 429 def test_get_param_funky_continuation_lines(self): 430 msg = self._msgobj('msg_22.txt') 431 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') 432 433 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes 434 def test_get_param_with_semis_in_quotes(self): 435 msg = email.message_from_string( 436 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') 437 self.assertEqual(msg.get_param('name'), 'Jim&&Jill') 438 self.assertEqual(msg.get_param('name', unquote=False), 439 '"Jim&&Jill"') 440 441 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value 442 def test_get_param_with_quotes(self): 443 msg = email.message_from_string( 444 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"') 445 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 446 msg = email.message_from_string( 447 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") 448 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') 449 450 def test_field_containment(self): 451 msg = email.message_from_string('Header: exists') 452 self.assertIn('header', msg) 453 self.assertIn('Header', msg) 454 self.assertIn('HEADER', msg) 455 self.assertNotIn('headerx', msg) 456 457 def test_set_param(self): 458 eq = self.assertEqual 459 msg = Message() 460 msg.set_param('charset', 'iso-2022-jp') 461 eq(msg.get_param('charset'), 'iso-2022-jp') 462 msg.set_param('importance', 'high value') 463 eq(msg.get_param('importance'), 'high value') 464 eq(msg.get_param('importance', unquote=False), '"high value"') 465 eq(msg.get_params(), [('text/plain', ''), 466 ('charset', 'iso-2022-jp'), 467 ('importance', 'high value')]) 468 eq(msg.get_params(unquote=False), [('text/plain', ''), 469 ('charset', '"iso-2022-jp"'), 470 ('importance', '"high value"')]) 471 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy') 472 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx') 473 474 def test_del_param(self): 475 eq = self.assertEqual 476 msg = self._msgobj('msg_05.txt') 477 eq(msg.get_params(), 478 [('multipart/report', ''), ('report-type', 'delivery-status'), 479 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 480 old_val = msg.get_param("report-type") 481 msg.del_param("report-type") 482 eq(msg.get_params(), 483 [('multipart/report', ''), 484 ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 485 msg.set_param("report-type", old_val) 486 eq(msg.get_params(), 487 [('multipart/report', ''), 488 ('boundary', 'D1690A7AC1.996856090/mail.example.com'), 489 ('report-type', old_val)]) 490 491 def test_del_param_on_other_header(self): 492 msg = Message() 493 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') 494 msg.del_param('filename', 'content-disposition') 495 self.assertEqual(msg['content-disposition'], 'attachment') 496 497 def test_del_param_on_nonexistent_header(self): 498 msg = Message() 499 # Deleting param on empty msg should not raise exception. 500 msg.del_param('filename', 'content-disposition') 501 502 def test_del_nonexistent_param(self): 503 msg = Message() 504 msg.add_header('Content-Type', 'text/plain', charset='utf-8') 505 existing_header = msg['Content-Type'] 506 msg.del_param('foobar', header='Content-Type') 507 self.assertEqual(msg['Content-Type'], existing_header) 508 509 def test_set_type(self): 510 eq = self.assertEqual 511 msg = Message() 512 self.assertRaises(ValueError, msg.set_type, 'text') 513 msg.set_type('text/plain') 514 eq(msg['content-type'], 'text/plain') 515 msg.set_param('charset', 'us-ascii') 516 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 517 msg.set_type('text/html') 518 eq(msg['content-type'], 'text/html; charset="us-ascii"') 519 520 def test_set_type_on_other_header(self): 521 msg = Message() 522 msg['X-Content-Type'] = 'text/plain' 523 msg.set_type('application/octet-stream', 'X-Content-Type') 524 self.assertEqual(msg['x-content-type'], 'application/octet-stream') 525 526 def test_get_content_type_missing(self): 527 msg = Message() 528 self.assertEqual(msg.get_content_type(), 'text/plain') 529 530 def test_get_content_type_missing_with_default_type(self): 531 msg = Message() 532 msg.set_default_type('message/rfc822') 533 self.assertEqual(msg.get_content_type(), 'message/rfc822') 534 535 def test_get_content_type_from_message_implicit(self): 536 msg = self._msgobj('msg_30.txt') 537 self.assertEqual(msg.get_payload(0).get_content_type(), 538 'message/rfc822') 539 540 def test_get_content_type_from_message_explicit(self): 541 msg = self._msgobj('msg_28.txt') 542 self.assertEqual(msg.get_payload(0).get_content_type(), 543 'message/rfc822') 544 545 def test_get_content_type_from_message_text_plain_implicit(self): 546 msg = self._msgobj('msg_03.txt') 547 self.assertEqual(msg.get_content_type(), 'text/plain') 548 549 def test_get_content_type_from_message_text_plain_explicit(self): 550 msg = self._msgobj('msg_01.txt') 551 self.assertEqual(msg.get_content_type(), 'text/plain') 552 553 def test_get_content_maintype_missing(self): 554 msg = Message() 555 self.assertEqual(msg.get_content_maintype(), 'text') 556 557 def test_get_content_maintype_missing_with_default_type(self): 558 msg = Message() 559 msg.set_default_type('message/rfc822') 560 self.assertEqual(msg.get_content_maintype(), 'message') 561 562 def test_get_content_maintype_from_message_implicit(self): 563 msg = self._msgobj('msg_30.txt') 564 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 565 566 def test_get_content_maintype_from_message_explicit(self): 567 msg = self._msgobj('msg_28.txt') 568 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message') 569 570 def test_get_content_maintype_from_message_text_plain_implicit(self): 571 msg = self._msgobj('msg_03.txt') 572 self.assertEqual(msg.get_content_maintype(), 'text') 573 574 def test_get_content_maintype_from_message_text_plain_explicit(self): 575 msg = self._msgobj('msg_01.txt') 576 self.assertEqual(msg.get_content_maintype(), 'text') 577 578 def test_get_content_subtype_missing(self): 579 msg = Message() 580 self.assertEqual(msg.get_content_subtype(), 'plain') 581 582 def test_get_content_subtype_missing_with_default_type(self): 583 msg = Message() 584 msg.set_default_type('message/rfc822') 585 self.assertEqual(msg.get_content_subtype(), 'rfc822') 586 587 def test_get_content_subtype_from_message_implicit(self): 588 msg = self._msgobj('msg_30.txt') 589 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 590 591 def test_get_content_subtype_from_message_explicit(self): 592 msg = self._msgobj('msg_28.txt') 593 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822') 594 595 def test_get_content_subtype_from_message_text_plain_implicit(self): 596 msg = self._msgobj('msg_03.txt') 597 self.assertEqual(msg.get_content_subtype(), 'plain') 598 599 def test_get_content_subtype_from_message_text_plain_explicit(self): 600 msg = self._msgobj('msg_01.txt') 601 self.assertEqual(msg.get_content_subtype(), 'plain') 602 603 def test_get_content_maintype_error(self): 604 msg = Message() 605 msg['Content-Type'] = 'no-slash-in-this-string' 606 self.assertEqual(msg.get_content_maintype(), 'text') 607 608 def test_get_content_subtype_error(self): 609 msg = Message() 610 msg['Content-Type'] = 'no-slash-in-this-string' 611 self.assertEqual(msg.get_content_subtype(), 'plain') 612 613 def test_replace_header(self): 614 eq = self.assertEqual 615 msg = Message() 616 msg.add_header('First', 'One') 617 msg.add_header('Second', 'Two') 618 msg.add_header('Third', 'Three') 619 eq(msg.keys(), ['First', 'Second', 'Third']) 620 eq(msg.values(), ['One', 'Two', 'Three']) 621 msg.replace_header('Second', 'Twenty') 622 eq(msg.keys(), ['First', 'Second', 'Third']) 623 eq(msg.values(), ['One', 'Twenty', 'Three']) 624 msg.add_header('First', 'Eleven') 625 msg.replace_header('First', 'One Hundred') 626 eq(msg.keys(), ['First', 'Second', 'Third', 'First']) 627 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven']) 628 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing') 629 630 def test_get_content_disposition(self): 631 msg = Message() 632 self.assertIsNone(msg.get_content_disposition()) 633 msg.add_header('Content-Disposition', 'attachment', 634 filename='random.avi') 635 self.assertEqual(msg.get_content_disposition(), 'attachment') 636 msg.replace_header('Content-Disposition', 'inline') 637 self.assertEqual(msg.get_content_disposition(), 'inline') 638 msg.replace_header('Content-Disposition', 'InlinE') 639 self.assertEqual(msg.get_content_disposition(), 'inline') 640 641 # test_defect_handling:test_invalid_chars_in_base64_payload 642 def test_broken_base64_payload(self): 643 x = 'AwDp0P7//y6LwKEAcPa/6Q=9' 644 msg = Message() 645 msg['content-type'] = 'audio/x-midi' 646 msg['content-transfer-encoding'] = 'base64' 647 msg.set_payload(x) 648 self.assertEqual(msg.get_payload(decode=True), 649 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0' 650 b'\xa1\x00p\xf6\xbf\xe9\x0f')) 651 self.assertIsInstance(msg.defects[0], 652 errors.InvalidBase64CharactersDefect) 653 654 def test_broken_unicode_payload(self): 655 # This test improves coverage but is not a compliance test. 656 # The behavior in this situation is currently undefined by the API. 657 x = 'this is a br\xf6ken thing to do' 658 msg = Message() 659 msg['content-type'] = 'text/plain' 660 msg['content-transfer-encoding'] = '8bit' 661 msg.set_payload(x) 662 self.assertEqual(msg.get_payload(decode=True), 663 bytes(x, 'raw-unicode-escape')) 664 665 def test_questionable_bytes_payload(self): 666 # This test improves coverage but is not a compliance test, 667 # since it involves poking inside the black box. 668 x = 'this is a quéstionable thing to do'.encode('utf-8') 669 msg = Message() 670 msg['content-type'] = 'text/plain; charset="utf-8"' 671 msg['content-transfer-encoding'] = '8bit' 672 msg._payload = x 673 self.assertEqual(msg.get_payload(decode=True), x) 674 675 # Issue 1078919 676 def test_ascii_add_header(self): 677 msg = Message() 678 msg.add_header('Content-Disposition', 'attachment', 679 filename='bud.gif') 680 self.assertEqual('attachment; filename="bud.gif"', 681 msg['Content-Disposition']) 682 683 def test_noascii_add_header(self): 684 msg = Message() 685 msg.add_header('Content-Disposition', 'attachment', 686 filename="Fußballer.ppt") 687 self.assertEqual( 688 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt', 689 msg['Content-Disposition']) 690 691 def test_nonascii_add_header_via_triple(self): 692 msg = Message() 693 msg.add_header('Content-Disposition', 'attachment', 694 filename=('iso-8859-1', '', 'Fußballer.ppt')) 695 self.assertEqual( 696 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt', 697 msg['Content-Disposition']) 698 699 def test_ascii_add_header_with_tspecial(self): 700 msg = Message() 701 msg.add_header('Content-Disposition', 'attachment', 702 filename="windows [filename].ppt") 703 self.assertEqual( 704 'attachment; filename="windows [filename].ppt"', 705 msg['Content-Disposition']) 706 707 def test_nonascii_add_header_with_tspecial(self): 708 msg = Message() 709 msg.add_header('Content-Disposition', 'attachment', 710 filename="Fußballer [filename].ppt") 711 self.assertEqual( 712 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt", 713 msg['Content-Disposition']) 714 715 def test_binary_quopri_payload(self): 716 for charset in ('latin-1', 'ascii'): 717 msg = Message() 718 msg['content-type'] = 'text/plain; charset=%s' % charset 719 msg['content-transfer-encoding'] = 'quoted-printable' 720 msg.set_payload(b'foo=e6=96=87bar') 721 self.assertEqual( 722 msg.get_payload(decode=True), 723 b'foo\xe6\x96\x87bar', 724 'get_payload returns wrong result with charset %s.' % charset) 725 726 def test_binary_base64_payload(self): 727 for charset in ('latin-1', 'ascii'): 728 msg = Message() 729 msg['content-type'] = 'text/plain; charset=%s' % charset 730 msg['content-transfer-encoding'] = 'base64' 731 msg.set_payload(b'Zm9v5paHYmFy') 732 self.assertEqual( 733 msg.get_payload(decode=True), 734 b'foo\xe6\x96\x87bar', 735 'get_payload returns wrong result with charset %s.' % charset) 736 737 def test_binary_uuencode_payload(self): 738 for charset in ('latin-1', 'ascii'): 739 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 740 msg = Message() 741 msg['content-type'] = 'text/plain; charset=%s' % charset 742 msg['content-transfer-encoding'] = encoding 743 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n") 744 self.assertEqual( 745 msg.get_payload(decode=True), 746 b'foo\xe6\x96\x87bar', 747 str(('get_payload returns wrong result ', 748 'with charset {0} and encoding {1}.')).\ 749 format(charset, encoding)) 750 751 def test_add_header_with_name_only_param(self): 752 msg = Message() 753 msg.add_header('Content-Disposition', 'inline', foo_bar=None) 754 self.assertEqual("inline; foo-bar", msg['Content-Disposition']) 755 756 def test_add_header_with_no_value(self): 757 msg = Message() 758 msg.add_header('X-Status', None) 759 self.assertEqual('', msg['X-Status']) 760 761 # Issue 5871: reject an attempt to embed a header inside a header value 762 # (header injection attack). 763 def test_embedded_header_via_Header_rejected(self): 764 msg = Message() 765 msg['Dummy'] = Header('dummy\nX-Injected-Header: test') 766 self.assertRaises(errors.HeaderParseError, msg.as_string) 767 768 def test_embedded_header_via_string_rejected(self): 769 msg = Message() 770 msg['Dummy'] = 'dummy\nX-Injected-Header: test' 771 self.assertRaises(errors.HeaderParseError, msg.as_string) 772 773 def test_unicode_header_defaults_to_utf8_encoding(self): 774 # Issue 14291 775 m = MIMEText('abc\n') 776 m['Subject'] = 'É test' 777 self.assertEqual(str(m),textwrap.dedent("""\ 778 Content-Type: text/plain; charset="us-ascii" 779 MIME-Version: 1.0 780 Content-Transfer-Encoding: 7bit 781 Subject: =?utf-8?q?=C3=89_test?= 782 783 abc 784 """)) 785 786 def test_unicode_body_defaults_to_utf8_encoding(self): 787 # Issue 14291 788 m = MIMEText('É testabc\n') 789 self.assertEqual(str(m),textwrap.dedent("""\ 790 Content-Type: text/plain; charset="utf-8" 791 MIME-Version: 1.0 792 Content-Transfer-Encoding: base64 793 794 w4kgdGVzdGFiYwo= 795 """)) 796 797 798# Test the email.encoders module 799class TestEncoders(unittest.TestCase): 800 801 def test_EncodersEncode_base64(self): 802 with openfile('python.gif', 'rb') as fp: 803 bindata = fp.read() 804 mimed = email.mime.image.MIMEImage(bindata) 805 base64ed = mimed.get_payload() 806 # the transfer-encoded body lines should all be <=76 characters 807 lines = base64ed.split('\n') 808 self.assertLessEqual(max([ len(x) for x in lines ]), 76) 809 810 def test_encode_empty_payload(self): 811 eq = self.assertEqual 812 msg = Message() 813 msg.set_charset('us-ascii') 814 eq(msg['content-transfer-encoding'], '7bit') 815 816 def test_default_cte(self): 817 eq = self.assertEqual 818 # 7bit data and the default us-ascii _charset 819 msg = MIMEText('hello world') 820 eq(msg['content-transfer-encoding'], '7bit') 821 # Similar, but with 8bit data 822 msg = MIMEText('hello \xf8 world') 823 eq(msg['content-transfer-encoding'], 'base64') 824 # And now with a different charset 825 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1') 826 eq(msg['content-transfer-encoding'], 'quoted-printable') 827 828 def test_encode7or8bit(self): 829 # Make sure a charset whose input character set is 8bit but 830 # whose output character set is 7bit gets a transfer-encoding 831 # of 7bit. 832 eq = self.assertEqual 833 msg = MIMEText('文\n', _charset='euc-jp') 834 eq(msg['content-transfer-encoding'], '7bit') 835 eq(msg.as_string(), textwrap.dedent("""\ 836 MIME-Version: 1.0 837 Content-Type: text/plain; charset="iso-2022-jp" 838 Content-Transfer-Encoding: 7bit 839 840 \x1b$BJ8\x1b(B 841 """)) 842 843 def test_qp_encode_latin1(self): 844 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1') 845 self.assertEqual(str(msg), textwrap.dedent("""\ 846 MIME-Version: 1.0 847 Content-Type: text/text; charset="iso-8859-1" 848 Content-Transfer-Encoding: quoted-printable 849 850 =E1=F6 851 """)) 852 853 def test_qp_encode_non_latin1(self): 854 # Issue 16948 855 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2') 856 self.assertEqual(str(msg), textwrap.dedent("""\ 857 MIME-Version: 1.0 858 Content-Type: text/text; charset="iso-8859-2" 859 Content-Transfer-Encoding: quoted-printable 860 861 =BF 862 """)) 863 864 865# Test long header wrapping 866class TestLongHeaders(TestEmailBase): 867 868 maxDiff = None 869 870 def test_split_long_continuation(self): 871 eq = self.ndiffAssertEqual 872 msg = email.message_from_string("""\ 873Subject: bug demonstration 874\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 875\tmore text 876 877test 878""") 879 sfp = StringIO() 880 g = Generator(sfp) 881 g.flatten(msg) 882 eq(sfp.getvalue(), """\ 883Subject: bug demonstration 884\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 885\tmore text 886 887test 888""") 889 890 def test_another_long_almost_unsplittable_header(self): 891 eq = self.ndiffAssertEqual 892 hstr = """\ 893bug demonstration 894\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 895\tmore text""" 896 h = Header(hstr, continuation_ws='\t') 897 eq(h.encode(), """\ 898bug demonstration 899\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 900\tmore text""") 901 h = Header(hstr.replace('\t', ' ')) 902 eq(h.encode(), """\ 903bug demonstration 904 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 905 more text""") 906 907 def test_long_nonstring(self): 908 eq = self.ndiffAssertEqual 909 g = Charset("iso-8859-1") 910 cz = Charset("iso-8859-2") 911 utf8 = Charset("utf-8") 912 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband ' 913 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 914 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 915 b'bef\xf6rdert. ') 916 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 917 b'd\xf9vtipu.. ') 918 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 919 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 920 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 921 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 922 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 923 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 924 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 925 '\u3044\u307e\u3059\u3002') 926 h = Header(g_head, g, header_name='Subject') 927 h.append(cz_head, cz) 928 h.append(utf8_head, utf8) 929 msg = Message() 930 msg['Subject'] = h 931 sfp = StringIO() 932 g = Generator(sfp) 933 g.flatten(msg) 934 eq(sfp.getvalue(), """\ 935Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?= 936 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?= 937 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?= 938 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?= 939 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 940 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?= 941 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?= 942 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?= 943 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?= 944 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?= 945 =?utf-8?b?44CC?= 946 947""") 948 eq(h.encode(maxlinelen=76), """\ 949=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?= 950 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?= 951 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?= 952 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?= 953 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= 954 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?= 955 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?= 956 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?= 957 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?= 958 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?= 959 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""") 960 961 def test_long_header_encode(self): 962 eq = self.ndiffAssertEqual 963 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 964 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 965 header_name='X-Foobar-Spoink-Defrobnit') 966 eq(h.encode(), '''\ 967wasnipoop; giraffes="very-long-necked-animals"; 968 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 969 970 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self): 971 eq = self.ndiffAssertEqual 972 h = Header('wasnipoop; giraffes="very-long-necked-animals"; ' 973 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 974 header_name='X-Foobar-Spoink-Defrobnit', 975 continuation_ws='\t') 976 eq(h.encode(), '''\ 977wasnipoop; giraffes="very-long-necked-animals"; 978 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 979 980 def test_long_header_encode_with_tab_continuation(self): 981 eq = self.ndiffAssertEqual 982 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t' 983 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"', 984 header_name='X-Foobar-Spoink-Defrobnit', 985 continuation_ws='\t') 986 eq(h.encode(), '''\ 987wasnipoop; giraffes="very-long-necked-animals"; 988\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''') 989 990 def test_header_encode_with_different_output_charset(self): 991 h = Header('文', 'euc-jp') 992 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=") 993 994 def test_long_header_encode_with_different_output_charset(self): 995 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4' 996 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4' 997 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4' 998 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp') 999 res = """\ 1000=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?= 1001 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=""" 1002 self.assertEqual(h.encode(), res) 1003 1004 def test_header_splitter(self): 1005 eq = self.ndiffAssertEqual 1006 msg = MIMEText('') 1007 # It'd be great if we could use add_header() here, but that doesn't 1008 # guarantee an order of the parameters. 1009 msg['X-Foobar-Spoink-Defrobnit'] = ( 1010 'wasnipoop; giraffes="very-long-necked-animals"; ' 1011 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"') 1012 sfp = StringIO() 1013 g = Generator(sfp) 1014 g.flatten(msg) 1015 eq(sfp.getvalue(), '''\ 1016Content-Type: text/plain; charset="us-ascii" 1017MIME-Version: 1.0 1018Content-Transfer-Encoding: 7bit 1019X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals"; 1020 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey" 1021 1022''') 1023 1024 def test_no_semis_header_splitter(self): 1025 eq = self.ndiffAssertEqual 1026 msg = Message() 1027 msg['From'] = '[email protected]' 1028 msg['References'] = SPACE.join('<%[email protected]>' % i for i in range(10)) 1029 msg.set_payload('Test') 1030 sfp = StringIO() 1031 g = Generator(sfp) 1032 g.flatten(msg) 1033 eq(sfp.getvalue(), """\ 1034From: [email protected] 1035References: <[email protected]> <[email protected]> <[email protected]> <[email protected]> <[email protected]> 1036 <[email protected]> <[email protected]> <[email protected]> <[email protected]> <[email protected]> 1037 1038Test""") 1039 1040 def test_last_split_chunk_does_not_fit(self): 1041 eq = self.ndiffAssertEqual 1042 h = Header('Subject: the first part of this is short, but_the_second' 1043 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1044 '_all_by_itself') 1045 eq(h.encode(), """\ 1046Subject: the first part of this is short, 1047 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1048 1049 def test_splittable_leading_char_followed_by_overlong_unsplittable(self): 1050 eq = self.ndiffAssertEqual 1051 h = Header(', but_the_second' 1052 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1053 '_all_by_itself') 1054 eq(h.encode(), """\ 1055, 1056 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1057 1058 def test_multiple_splittable_leading_char_followed_by_overlong_unsplittable(self): 1059 eq = self.ndiffAssertEqual 1060 h = Header(', , but_the_second' 1061 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' 1062 '_all_by_itself') 1063 eq(h.encode(), """\ 1064, , 1065 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") 1066 1067 def test_trailing_splittable_on_overlong_unsplittable(self): 1068 eq = self.ndiffAssertEqual 1069 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1070 'be_on_a_line_all_by_itself;') 1071 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_" 1072 "be_on_a_line_all_by_itself;") 1073 1074 def test_trailing_splittable_on_overlong_unsplittable_with_leading_splittable(self): 1075 eq = self.ndiffAssertEqual 1076 h = Header('; ' 1077 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1078 'be_on_a_line_all_by_itself; ') 1079 eq(h.encode(), """\ 1080; 1081 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1082 1083 def test_long_header_with_multiple_sequential_split_chars(self): 1084 eq = self.ndiffAssertEqual 1085 h = Header('This is a long line that has two whitespaces in a row. ' 1086 'This used to cause truncation of the header when folded') 1087 eq(h.encode(), """\ 1088This is a long line that has two whitespaces in a row. This used to cause 1089 truncation of the header when folded""") 1090 1091 def test_splitter_split_on_punctuation_only_if_fws_with_header(self): 1092 eq = self.ndiffAssertEqual 1093 h = Header('thisverylongheaderhas;semicolons;and,commas,but' 1094 'they;arenotlegal;fold,points') 1095 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;" 1096 "arenotlegal;fold,points") 1097 1098 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self): 1099 eq = self.ndiffAssertEqual 1100 h = Header('this is a test where we need to have more than one line ' 1101 'before; our final line that is just too big to fit;; ' 1102 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1103 'be_on_a_line_all_by_itself;') 1104 eq(h.encode(), """\ 1105this is a test where we need to have more than one line before; 1106 our final line that is just too big to fit;; 1107 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") 1108 1109 def test_overlong_last_part_followed_by_split_point(self): 1110 eq = self.ndiffAssertEqual 1111 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1112 'be_on_a_line_all_by_itself ') 1113 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_" 1114 "should_be_on_a_line_all_by_itself ") 1115 1116 def test_multiline_with_overlong_parts_separated_by_two_split_points(self): 1117 eq = self.ndiffAssertEqual 1118 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_' 1119 'before_our_final_line_; ; ' 1120 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1121 'be_on_a_line_all_by_itself; ') 1122 eq(h.encode(), """\ 1123this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_; 1124 ; 1125 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1126 1127 def test_multiline_with_overlong_last_part_followed_by_split_point(self): 1128 eq = self.ndiffAssertEqual 1129 h = Header('this is a test where we need to have more than one line ' 1130 'before our final line; ; ' 1131 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' 1132 'be_on_a_line_all_by_itself; ') 1133 eq(h.encode(), """\ 1134this is a test where we need to have more than one line before our final line; 1135 ; 1136 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """) 1137 1138 def test_long_header_with_whitespace_runs(self): 1139 eq = self.ndiffAssertEqual 1140 msg = Message() 1141 msg['From'] = '[email protected]' 1142 msg['References'] = SPACE.join(['<[email protected]> '] * 10) 1143 msg.set_payload('Test') 1144 sfp = StringIO() 1145 g = Generator(sfp) 1146 g.flatten(msg) 1147 eq(sfp.getvalue(), """\ 1148From: [email protected] 1149References: <[email protected]> <[email protected]> <[email protected]> <[email protected]> 1150 <[email protected]> <[email protected]> <[email protected]> <[email protected]> 1151 <[email protected]> <[email protected]>\x20\x20 1152 1153Test""") 1154 1155 def test_long_run_with_semi_header_splitter(self): 1156 eq = self.ndiffAssertEqual 1157 msg = Message() 1158 msg['From'] = '[email protected]' 1159 msg['References'] = SPACE.join(['<[email protected]>'] * 10) + '; abc' 1160 msg.set_payload('Test') 1161 sfp = StringIO() 1162 g = Generator(sfp) 1163 g.flatten(msg) 1164 eq(sfp.getvalue(), """\ 1165From: [email protected] 1166References: <[email protected]> <[email protected]> <[email protected]> <[email protected]> 1167 <[email protected]> <[email protected]> <[email protected]> <[email protected]> <[email protected]> 1168 <[email protected]>; abc 1169 1170Test""") 1171 1172 def test_splitter_split_on_punctuation_only_if_fws(self): 1173 eq = self.ndiffAssertEqual 1174 msg = Message() 1175 msg['From'] = '[email protected]' 1176 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but' 1177 'they;arenotlegal;fold,points') 1178 msg.set_payload('Test') 1179 sfp = StringIO() 1180 g = Generator(sfp) 1181 g.flatten(msg) 1182 # XXX the space after the header should not be there. 1183 eq(sfp.getvalue(), """\ 1184From: [email protected] 1185References:\x20 1186 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points 1187 1188Test""") 1189 1190 def test_no_split_long_header(self): 1191 eq = self.ndiffAssertEqual 1192 hstr = 'References: ' + 'x' * 80 1193 h = Header(hstr) 1194 # These come on two lines because Headers are really field value 1195 # classes and don't really know about their field names. 1196 eq(h.encode(), """\ 1197References: 1198 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""") 1199 h = Header('x' * 80) 1200 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') 1201 1202 def test_splitting_multiple_long_lines(self): 1203 eq = self.ndiffAssertEqual 1204 hstr = """\ 1205from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <[email protected]>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1206\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <[email protected]>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1207\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <[email protected]>; Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1208""" 1209 h = Header(hstr, continuation_ws='\t') 1210 eq(h.encode(), """\ 1211from babylon.socal-raves.org (localhost [127.0.0.1]); 1212 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1213 for <[email protected]>; 1214 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1215\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1216 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1217 for <[email protected]>; 1218 Sat, 2 Feb 2002 17:00:06 -0800 (PST) 1219\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); 1220 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; 1221 for <[email protected]>; 1222 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""") 1223 1224 def test_splitting_first_line_only_is_long(self): 1225 eq = self.ndiffAssertEqual 1226 hstr = """\ 1227from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca) 1228\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1229\tid 17k4h5-00034i-00 1230\tfor [email protected]; Wed, 28 Aug 2002 11:25:20 -0400""" 1231 h = Header(hstr, maxlinelen=78, header_name='Received', 1232 continuation_ws='\t') 1233 eq(h.encode(), """\ 1234from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] 1235 helo=cthulhu.gerg.ca) 1236\tby kronos.mems-exchange.org with esmtp (Exim 4.05) 1237\tid 17k4h5-00034i-00 1238\tfor [email protected]; Wed, 28 Aug 2002 11:25:20 -0400""") 1239 1240 def test_long_8bit_header(self): 1241 eq = self.ndiffAssertEqual 1242 msg = Message() 1243 h = Header('Britische Regierung gibt', 'iso-8859-1', 1244 header_name='Subject') 1245 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte') 1246 eq(h.encode(maxlinelen=76), """\ 1247=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1248 =?iso-8859-1?q?hore-Windkraftprojekte?=""") 1249 msg['Subject'] = h 1250 eq(msg.as_string(maxheaderlen=76), """\ 1251Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?= 1252 =?iso-8859-1?q?hore-Windkraftprojekte?= 1253 1254""") 1255 eq(msg.as_string(maxheaderlen=0), """\ 1256Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?= 1257 1258""") 1259 1260 def test_long_8bit_header_no_charset(self): 1261 eq = self.ndiffAssertEqual 1262 msg = Message() 1263 header_string = ('Britische Regierung gibt gr\xfcnes Licht ' 1264 'f\xfcr Offshore-Windkraftprojekte ' 1265 '<[email protected]>') 1266 msg['Reply-To'] = header_string 1267 eq(msg.as_string(maxheaderlen=78), """\ 1268Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1269 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1270 1271""") 1272 msg = Message() 1273 msg['Reply-To'] = Header(header_string, 1274 header_name='Reply-To') 1275 eq(msg.as_string(maxheaderlen=78), """\ 1276Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= 1277 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= 1278 1279""") 1280 1281 def test_long_to_header(self): 1282 eq = self.ndiffAssertEqual 1283 to = ('"Someone Test #A" <[email protected]>,' 1284 '<[email protected]>, ' 1285 '"Someone Test #B" <[email protected]>, ' 1286 '"Someone Test #C" <[email protected]>, ' 1287 '"Someone Test #D" <[email protected]>') 1288 msg = Message() 1289 msg['To'] = to 1290 eq(msg.as_string(maxheaderlen=78), '''\ 1291To: "Someone Test #A" <[email protected]>,<[email protected]>, 1292 "Someone Test #B" <[email protected]>, 1293 "Someone Test #C" <[email protected]>, 1294 "Someone Test #D" <[email protected]> 1295 1296''') 1297 1298 def test_long_line_after_append(self): 1299 eq = self.ndiffAssertEqual 1300 s = 'This is an example of string which has almost the limit of header length.' 1301 h = Header(s) 1302 h.append('Add another line.') 1303 eq(h.encode(maxlinelen=76), """\ 1304This is an example of string which has almost the limit of header length. 1305 Add another line.""") 1306 1307 def test_shorter_line_with_append(self): 1308 eq = self.ndiffAssertEqual 1309 s = 'This is a shorter line.' 1310 h = Header(s) 1311 h.append('Add another sentence. (Surprise?)') 1312 eq(h.encode(), 1313 'This is a shorter line. Add another sentence. (Surprise?)') 1314 1315 def test_long_field_name(self): 1316 eq = self.ndiffAssertEqual 1317 fn = 'X-Very-Very-Very-Long-Header-Name' 1318 gs = ('Die Mieter treten hier ein werden mit einem Foerderband ' 1319 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen ' 1320 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen ' 1321 'bef\xf6rdert. ') 1322 h = Header(gs, 'iso-8859-1', header_name=fn) 1323 # BAW: this seems broken because the first line is too long 1324 eq(h.encode(maxlinelen=76), """\ 1325=?iso-8859-1?q?Die_Mieter_treten_hier_e?= 1326 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?= 1327 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?= 1328 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""") 1329 1330 def test_long_received_header(self): 1331 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) ' 1332 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; ' 1333 'Wed, 05 Mar 2003 18:10:18 -0700') 1334 msg = Message() 1335 msg['Received-1'] = Header(h, continuation_ws='\t') 1336 msg['Received-2'] = h 1337 # This should be splitting on spaces not semicolons. 1338 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1339Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1340 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1341 Wed, 05 Mar 2003 18:10:18 -0700 1342Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by 1343 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; 1344 Wed, 05 Mar 2003 18:10:18 -0700 1345 1346""") 1347 1348 def test_string_headerinst_eq(self): 1349 h = ('<[email protected].' 1350 'tu-muenchen.de> (David Bremner\'s message of ' 1351 '"Thu, 6 Mar 2003 13:58:21 +0100")') 1352 msg = Message() 1353 msg['Received-1'] = Header(h, header_name='Received-1', 1354 continuation_ws='\t') 1355 msg['Received-2'] = h 1356 # XXX The space after the ':' should not be there. 1357 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\ 1358Received-1:\x20 1359 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1360 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1361Received-2:\x20 1362 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David 1363 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\") 1364 1365""") 1366 1367 def test_long_unbreakable_lines_with_continuation(self): 1368 eq = self.ndiffAssertEqual 1369 msg = Message() 1370 t = """\ 1371iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1372 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" 1373 msg['Face-1'] = t 1374 msg['Face-2'] = Header(t, header_name='Face-2') 1375 msg['Face-3'] = ' ' + t 1376 # XXX This splitting is all wrong. It the first value line should be 1377 # snug against the field name or the space after the header not there. 1378 eq(msg.as_string(maxheaderlen=78), """\ 1379Face-1:\x20 1380 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1381 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1382Face-2:\x20 1383 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1384 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1385Face-3:\x20 1386 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 1387 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp 1388 1389""") 1390 1391 def test_another_long_multiline_header(self): 1392 eq = self.ndiffAssertEqual 1393 m = ('Received: from siimage.com ' 1394 '([172.25.1.3]) by zima.siliconimage.com with ' 1395 'Microsoft SMTPSVC(5.0.2195.4905); ' 1396 'Wed, 16 Oct 2002 07:41:11 -0700') 1397 msg = email.message_from_string(m) 1398 eq(msg.as_string(maxheaderlen=78), '''\ 1399Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with 1400 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 1401 1402''') 1403 1404 def test_long_lines_with_different_header(self): 1405 eq = self.ndiffAssertEqual 1406 h = ('List-Unsubscribe: ' 1407 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,' 1408 ' <mailto:[email protected]' 1409 '?subject=unsubscribe>') 1410 msg = Message() 1411 msg['List'] = h 1412 msg['List'] = Header(h, header_name='List') 1413 eq(msg.as_string(maxheaderlen=78), """\ 1414List: List-Unsubscribe: 1415 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1416 <mailto:[email protected]?subject=unsubscribe> 1417List: List-Unsubscribe: 1418 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>, 1419 <mailto:[email protected]?subject=unsubscribe> 1420 1421""") 1422 1423 def test_long_rfc2047_header_with_embedded_fws(self): 1424 h = Header(textwrap.dedent("""\ 1425 We're going to pretend this header is in a non-ascii character set 1426 \tto see if line wrapping with encoded words and embedded 1427 folding white space works"""), 1428 charset='utf-8', 1429 header_name='Test') 1430 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\ 1431 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?= 1432 =?utf-8?q?cter_set?= 1433 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= 1434 =?utf-8?q?_folding_white_space_works?=""")+'\n') 1435 1436 1437 1438# Test mangling of "From " lines in the body of a message 1439class TestFromMangling(unittest.TestCase): 1440 def setUp(self): 1441 self.msg = Message() 1442 self.msg['From'] = '[email protected]' 1443 self.msg.set_payload("""\ 1444From the desk of A.A.A.: 1445Blah blah blah 1446""") 1447 1448 def test_mangled_from(self): 1449 s = StringIO() 1450 g = Generator(s, mangle_from_=True) 1451 g.flatten(self.msg) 1452 self.assertEqual(s.getvalue(), """\ 1453From: [email protected] 1454 1455>From the desk of A.A.A.: 1456Blah blah blah 1457""") 1458 1459 def test_dont_mangle_from(self): 1460 s = StringIO() 1461 g = Generator(s, mangle_from_=False) 1462 g.flatten(self.msg) 1463 self.assertEqual(s.getvalue(), """\ 1464From: [email protected] 1465 1466From the desk of A.A.A.: 1467Blah blah blah 1468""") 1469 1470 def test_mangle_from_in_preamble_and_epilog(self): 1471 s = StringIO() 1472 g = Generator(s, mangle_from_=True) 1473 msg = email.message_from_string(textwrap.dedent("""\ 1474 From: [email protected] 1475 Mime-Version: 1.0 1476 Content-Type: multipart/mixed; boundary=XXX 1477 1478 From somewhere unknown 1479 1480 --XXX 1481 Content-Type: text/plain 1482 1483 foo 1484 1485 --XXX-- 1486 1487 From somewhere unknowable 1488 """)) 1489 g.flatten(msg) 1490 self.assertEqual(len([1 for x in s.getvalue().split('\n') 1491 if x.startswith('>From ')]), 2) 1492 1493 def test_mangled_from_with_bad_bytes(self): 1494 source = textwrap.dedent("""\ 1495 Content-Type: text/plain; charset="utf-8" 1496 MIME-Version: 1.0 1497 Content-Transfer-Encoding: 8bit 1498 From: [email protected] 1499 1500 """).encode('utf-8') 1501 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n') 1502 b = BytesIO() 1503 g = BytesGenerator(b, mangle_from_=True) 1504 g.flatten(msg) 1505 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n') 1506 1507 def test_multipart_with_bad_bytes_in_cte(self): 1508 # bpo30835 1509 source = textwrap.dedent("""\ 1510 From: [email protected] 1511 Content-Type: multipart/mixed; boundary="1" 1512 Content-Transfer-Encoding: \xc8 1513 """).encode('utf-8') 1514 msg = email.message_from_bytes(source) 1515 1516 1517# Test the basic MIMEAudio class 1518class TestMIMEAudio(unittest.TestCase): 1519 def _make_audio(self, ext): 1520 with openfile(f'sndhdr.{ext}', 'rb') as fp: 1521 self._audiodata = fp.read() 1522 self._au = MIMEAudio(self._audiodata) 1523 1524 def test_guess_minor_type(self): 1525 for ext, subtype in { 1526 'aifc': 'x-aiff', 1527 'aiff': 'x-aiff', 1528 'wav': 'x-wav', 1529 'au': 'basic', 1530 }.items(): 1531 self._make_audio(ext) 1532 subtype = ext if subtype is None else subtype 1533 self.assertEqual(self._au.get_content_type(), f'audio/{subtype}') 1534 1535 def test_encoding(self): 1536 self._make_audio('au') 1537 payload = self._au.get_payload() 1538 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1539 self._audiodata) 1540 1541 def test_checkSetMinor(self): 1542 self._make_audio('au') 1543 au = MIMEAudio(self._audiodata, 'fish') 1544 self.assertEqual(au.get_content_type(), 'audio/fish') 1545 1546 def test_add_header(self): 1547 self._make_audio('au') 1548 eq = self.assertEqual 1549 self._au.add_header('Content-Disposition', 'attachment', 1550 filename='sndhdr.au') 1551 eq(self._au['content-disposition'], 1552 'attachment; filename="sndhdr.au"') 1553 eq(self._au.get_params(header='content-disposition'), 1554 [('attachment', ''), ('filename', 'sndhdr.au')]) 1555 eq(self._au.get_param('filename', header='content-disposition'), 1556 'sndhdr.au') 1557 missing = [] 1558 eq(self._au.get_param('attachment', header='content-disposition'), '') 1559 self.assertIs(self._au.get_param( 1560 'foo', failobj=missing, 1561 header='content-disposition'), missing) 1562 # Try some missing stuff 1563 self.assertIs(self._au.get_param('foobar', missing), missing) 1564 self.assertIs(self._au.get_param('attachment', missing, 1565 header='foobar'), missing) 1566 1567 1568 1569# Test the basic MIMEImage class 1570class TestMIMEImage(unittest.TestCase): 1571 def _make_image(self, ext): 1572 with openfile(f'python.{ext}', 'rb') as fp: 1573 self._imgdata = fp.read() 1574 self._im = MIMEImage(self._imgdata) 1575 1576 def test_guess_minor_type(self): 1577 for ext, subtype in { 1578 'bmp': None, 1579 'exr': None, 1580 'gif': None, 1581 'jpg': 'jpeg', 1582 'pbm': None, 1583 'pgm': None, 1584 'png': None, 1585 'ppm': None, 1586 'ras': 'rast', 1587 'sgi': 'rgb', 1588 'tiff': None, 1589 'webp': None, 1590 'xbm': None, 1591 }.items(): 1592 self._make_image(ext) 1593 subtype = ext if subtype is None else subtype 1594 self.assertEqual(self._im.get_content_type(), f'image/{subtype}') 1595 1596 def test_encoding(self): 1597 self._make_image('gif') 1598 payload = self._im.get_payload() 1599 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')), 1600 self._imgdata) 1601 1602 def test_checkSetMinor(self): 1603 self._make_image('gif') 1604 im = MIMEImage(self._imgdata, 'fish') 1605 self.assertEqual(im.get_content_type(), 'image/fish') 1606 1607 def test_add_header(self): 1608 self._make_image('gif') 1609 eq = self.assertEqual 1610 self._im.add_header('Content-Disposition', 'attachment', 1611 filename='dingusfish.gif') 1612 eq(self._im['content-disposition'], 1613 'attachment; filename="dingusfish.gif"') 1614 eq(self._im.get_params(header='content-disposition'), 1615 [('attachment', ''), ('filename', 'dingusfish.gif')]) 1616 eq(self._im.get_param('filename', header='content-disposition'), 1617 'dingusfish.gif') 1618 missing = [] 1619 eq(self._im.get_param('attachment', header='content-disposition'), '') 1620 self.assertIs(self._im.get_param('foo', failobj=missing, 1621 header='content-disposition'), missing) 1622 # Try some missing stuff 1623 self.assertIs(self._im.get_param('foobar', missing), missing) 1624 self.assertIs(self._im.get_param('attachment', missing, 1625 header='foobar'), missing) 1626 1627 1628# Test the basic MIMEApplication class 1629class TestMIMEApplication(unittest.TestCase): 1630 def test_headers(self): 1631 eq = self.assertEqual 1632 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff') 1633 eq(msg.get_content_type(), 'application/octet-stream') 1634 eq(msg['content-transfer-encoding'], 'base64') 1635 1636 def test_body(self): 1637 eq = self.assertEqual 1638 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1639 msg = MIMEApplication(bytesdata) 1640 # whitespace in the cte encoded block is RFC-irrelevant. 1641 eq(msg.get_payload().strip(), '+vv8/f7/') 1642 eq(msg.get_payload(decode=True), bytesdata) 1643 1644 def test_binary_body_with_encode_7or8bit(self): 1645 # Issue 17171. 1646 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1647 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit) 1648 # Treated as a string, this will be invalid code points. 1649 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1650 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1651 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit') 1652 s = BytesIO() 1653 g = BytesGenerator(s) 1654 g.flatten(msg) 1655 wireform = s.getvalue() 1656 msg2 = email.message_from_bytes(wireform) 1657 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1658 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1659 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit') 1660 1661 def test_binary_body_with_encode_noop(self): 1662 # Issue 16564: This does not produce an RFC valid message, since to be 1663 # valid it should have a CTE of binary. But the below works in 1664 # Python2, and is documented as working this way. 1665 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1666 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1667 # Treated as a string, this will be invalid code points. 1668 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1669 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1670 s = BytesIO() 1671 g = BytesGenerator(s) 1672 g.flatten(msg) 1673 wireform = s.getvalue() 1674 msg2 = email.message_from_bytes(wireform) 1675 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata)) 1676 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1677 1678 def test_binary_body_with_unicode_linend_encode_noop(self): 1679 # Issue 19003: This is a variation on #16564. 1680 bytesdata = b'\x0b\xfa\xfb\xfc\xfd\xfe\xff' 1681 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop) 1682 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1683 s = BytesIO() 1684 g = BytesGenerator(s) 1685 g.flatten(msg) 1686 wireform = s.getvalue() 1687 msg2 = email.message_from_bytes(wireform) 1688 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1689 1690 def test_binary_body_with_encode_quopri(self): 1691 # Issue 14360. 1692 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff ' 1693 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri) 1694 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1695 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1696 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable') 1697 s = BytesIO() 1698 g = BytesGenerator(s) 1699 g.flatten(msg) 1700 wireform = s.getvalue() 1701 msg2 = email.message_from_bytes(wireform) 1702 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20') 1703 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1704 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable') 1705 1706 def test_binary_body_with_encode_base64(self): 1707 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff' 1708 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64) 1709 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1710 self.assertEqual(msg.get_payload(decode=True), bytesdata) 1711 s = BytesIO() 1712 g = BytesGenerator(s) 1713 g.flatten(msg) 1714 wireform = s.getvalue() 1715 msg2 = email.message_from_bytes(wireform) 1716 self.assertEqual(msg.get_payload(), '+vv8/f7/\n') 1717 self.assertEqual(msg2.get_payload(decode=True), bytesdata) 1718 1719 1720# Test the basic MIMEText class 1721class TestMIMEText(unittest.TestCase): 1722 def setUp(self): 1723 self._msg = MIMEText('hello there') 1724 1725 def test_types(self): 1726 eq = self.assertEqual 1727 eq(self._msg.get_content_type(), 'text/plain') 1728 eq(self._msg.get_param('charset'), 'us-ascii') 1729 missing = [] 1730 self.assertIs(self._msg.get_param('foobar', missing), missing) 1731 self.assertIs(self._msg.get_param('charset', missing, header='foobar'), 1732 missing) 1733 1734 def test_payload(self): 1735 self.assertEqual(self._msg.get_payload(), 'hello there') 1736 self.assertFalse(self._msg.is_multipart()) 1737 1738 def test_charset(self): 1739 eq = self.assertEqual 1740 msg = MIMEText('hello there', _charset='us-ascii') 1741 eq(msg.get_charset().input_charset, 'us-ascii') 1742 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1743 # Also accept a Charset instance 1744 charset = Charset('utf-8') 1745 charset.body_encoding = None 1746 msg = MIMEText('hello there', _charset=charset) 1747 eq(msg.get_charset().input_charset, 'utf-8') 1748 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1749 eq(msg.get_payload(), 'hello there') 1750 1751 def test_7bit_input(self): 1752 eq = self.assertEqual 1753 msg = MIMEText('hello there', _charset='us-ascii') 1754 eq(msg.get_charset().input_charset, 'us-ascii') 1755 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1756 1757 def test_7bit_input_no_charset(self): 1758 eq = self.assertEqual 1759 msg = MIMEText('hello there') 1760 eq(msg.get_charset(), 'us-ascii') 1761 eq(msg['content-type'], 'text/plain; charset="us-ascii"') 1762 self.assertIn('hello there', msg.as_string()) 1763 1764 def test_utf8_input(self): 1765 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1766 eq = self.assertEqual 1767 msg = MIMEText(teststr, _charset='utf-8') 1768 eq(msg.get_charset().output_charset, 'utf-8') 1769 eq(msg['content-type'], 'text/plain; charset="utf-8"') 1770 eq(msg.get_payload(decode=True), teststr.encode('utf-8')) 1771 1772 @unittest.skip("can't fix because of backward compat in email5, " 1773 "will fix in email6") 1774 def test_utf8_input_no_charset(self): 1775 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' 1776 self.assertRaises(UnicodeEncodeError, MIMEText, teststr) 1777 1778 1779 1780# Test complicated multipart/* messages 1781class TestMultipart(TestEmailBase): 1782 def setUp(self): 1783 with openfile('python.gif', 'rb') as fp: 1784 data = fp.read() 1785 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY') 1786 image = MIMEImage(data, name='dingusfish.gif') 1787 image.add_header('content-disposition', 'attachment', 1788 filename='dingusfish.gif') 1789 intro = MIMEText('''\ 1790Hi there, 1791 1792This is the dingus fish. 1793''') 1794 container.attach(intro) 1795 container.attach(image) 1796 container['From'] = 'Barry <[email protected]>' 1797 container['To'] = 'Dingus Lovers <[email protected]>' 1798 container['Subject'] = 'Here is your dingus fish' 1799 1800 now = 987809702.54848599 1801 timetuple = time.localtime(now) 1802 if timetuple[-1] == 0: 1803 tzsecs = time.timezone 1804 else: 1805 tzsecs = time.altzone 1806 if tzsecs > 0: 1807 sign = '-' 1808 else: 1809 sign = '+' 1810 tzoffset = ' %s%04d' % (sign, tzsecs / 36) 1811 container['Date'] = time.strftime( 1812 '%a, %d %b %Y %H:%M:%S', 1813 time.localtime(now)) + tzoffset 1814 self._msg = container 1815 self._im = image 1816 self._txt = intro 1817 1818 def test_hierarchy(self): 1819 # convenience 1820 eq = self.assertEqual 1821 raises = self.assertRaises 1822 # tests 1823 m = self._msg 1824 self.assertTrue(m.is_multipart()) 1825 eq(m.get_content_type(), 'multipart/mixed') 1826 eq(len(m.get_payload()), 2) 1827 raises(IndexError, m.get_payload, 2) 1828 m0 = m.get_payload(0) 1829 m1 = m.get_payload(1) 1830 self.assertIs(m0, self._txt) 1831 self.assertIs(m1, self._im) 1832 eq(m.get_payload(), [m0, m1]) 1833 self.assertFalse(m0.is_multipart()) 1834 self.assertFalse(m1.is_multipart()) 1835 1836 def test_empty_multipart_idempotent(self): 1837 text = """\ 1838Content-Type: multipart/mixed; boundary="BOUNDARY" 1839MIME-Version: 1.0 1840Subject: A subject 1841To: [email protected] 1842From: [email protected] 1843 1844 1845--BOUNDARY 1846 1847 1848--BOUNDARY-- 1849""" 1850 msg = Parser().parsestr(text) 1851 self.ndiffAssertEqual(text, msg.as_string()) 1852 1853 def test_no_parts_in_a_multipart_with_none_epilogue(self): 1854 outer = MIMEBase('multipart', 'mixed') 1855 outer['Subject'] = 'A subject' 1856 outer['To'] = '[email protected]' 1857 outer['From'] = '[email protected]' 1858 outer.set_boundary('BOUNDARY') 1859 self.ndiffAssertEqual(outer.as_string(), '''\ 1860Content-Type: multipart/mixed; boundary="BOUNDARY" 1861MIME-Version: 1.0 1862Subject: A subject 1863To: [email protected] 1864From: [email protected] 1865 1866--BOUNDARY 1867 1868--BOUNDARY-- 1869''') 1870 1871 def test_no_parts_in_a_multipart_with_empty_epilogue(self): 1872 outer = MIMEBase('multipart', 'mixed') 1873 outer['Subject'] = 'A subject' 1874 outer['To'] = '[email protected]' 1875 outer['From'] = '[email protected]' 1876 outer.preamble = '' 1877 outer.epilogue = '' 1878 outer.set_boundary('BOUNDARY') 1879 self.ndiffAssertEqual(outer.as_string(), '''\ 1880Content-Type: multipart/mixed; boundary="BOUNDARY" 1881MIME-Version: 1.0 1882Subject: A subject 1883To: [email protected] 1884From: [email protected] 1885 1886 1887--BOUNDARY 1888 1889--BOUNDARY-- 1890''') 1891 1892 def test_one_part_in_a_multipart(self): 1893 eq = self.ndiffAssertEqual 1894 outer = MIMEBase('multipart', 'mixed') 1895 outer['Subject'] = 'A subject' 1896 outer['To'] = '[email protected]' 1897 outer['From'] = '[email protected]' 1898 outer.set_boundary('BOUNDARY') 1899 msg = MIMEText('hello world') 1900 outer.attach(msg) 1901 eq(outer.as_string(), '''\ 1902Content-Type: multipart/mixed; boundary="BOUNDARY" 1903MIME-Version: 1.0 1904Subject: A subject 1905To: [email protected] 1906From: [email protected] 1907 1908--BOUNDARY 1909Content-Type: text/plain; charset="us-ascii" 1910MIME-Version: 1.0 1911Content-Transfer-Encoding: 7bit 1912 1913hello world 1914--BOUNDARY-- 1915''') 1916 1917 def test_seq_parts_in_a_multipart_with_empty_preamble(self): 1918 eq = self.ndiffAssertEqual 1919 outer = MIMEBase('multipart', 'mixed') 1920 outer['Subject'] = 'A subject' 1921 outer['To'] = '[email protected]' 1922 outer['From'] = '[email protected]' 1923 outer.preamble = '' 1924 msg = MIMEText('hello world') 1925 outer.attach(msg) 1926 outer.set_boundary('BOUNDARY') 1927 eq(outer.as_string(), '''\ 1928Content-Type: multipart/mixed; boundary="BOUNDARY" 1929MIME-Version: 1.0 1930Subject: A subject 1931To: [email protected] 1932From: [email protected] 1933 1934 1935--BOUNDARY 1936Content-Type: text/plain; charset="us-ascii" 1937MIME-Version: 1.0 1938Content-Transfer-Encoding: 7bit 1939 1940hello world 1941--BOUNDARY-- 1942''') 1943 1944 1945 def test_seq_parts_in_a_multipart_with_none_preamble(self): 1946 eq = self.ndiffAssertEqual 1947 outer = MIMEBase('multipart', 'mixed') 1948 outer['Subject'] = 'A subject' 1949 outer['To'] = '[email protected]' 1950 outer['From'] = '[email protected]' 1951 outer.preamble = None 1952 msg = MIMEText('hello world') 1953 outer.attach(msg) 1954 outer.set_boundary('BOUNDARY') 1955 eq(outer.as_string(), '''\ 1956Content-Type: multipart/mixed; boundary="BOUNDARY" 1957MIME-Version: 1.0 1958Subject: A subject 1959To: [email protected] 1960From: [email protected] 1961 1962--BOUNDARY 1963Content-Type: text/plain; charset="us-ascii" 1964MIME-Version: 1.0 1965Content-Transfer-Encoding: 7bit 1966 1967hello world 1968--BOUNDARY-- 1969''') 1970 1971 1972 def test_seq_parts_in_a_multipart_with_none_epilogue(self): 1973 eq = self.ndiffAssertEqual 1974 outer = MIMEBase('multipart', 'mixed') 1975 outer['Subject'] = 'A subject' 1976 outer['To'] = '[email protected]' 1977 outer['From'] = '[email protected]' 1978 outer.epilogue = None 1979 msg = MIMEText('hello world') 1980 outer.attach(msg) 1981 outer.set_boundary('BOUNDARY') 1982 eq(outer.as_string(), '''\ 1983Content-Type: multipart/mixed; boundary="BOUNDARY" 1984MIME-Version: 1.0 1985Subject: A subject 1986To: [email protected] 1987From: [email protected] 1988 1989--BOUNDARY 1990Content-Type: text/plain; charset="us-ascii" 1991MIME-Version: 1.0 1992Content-Transfer-Encoding: 7bit 1993 1994hello world 1995--BOUNDARY-- 1996''') 1997 1998 1999 def test_seq_parts_in_a_multipart_with_empty_epilogue(self): 2000 eq = self.ndiffAssertEqual 2001 outer = MIMEBase('multipart', 'mixed') 2002 outer['Subject'] = 'A subject' 2003 outer['To'] = '[email protected]' 2004 outer['From'] = '[email protected]' 2005 outer.epilogue = '' 2006 msg = MIMEText('hello world') 2007 outer.attach(msg) 2008 outer.set_boundary('BOUNDARY') 2009 eq(outer.as_string(), '''\ 2010Content-Type: multipart/mixed; boundary="BOUNDARY" 2011MIME-Version: 1.0 2012Subject: A subject 2013To: [email protected] 2014From: [email protected] 2015 2016--BOUNDARY 2017Content-Type: text/plain; charset="us-ascii" 2018MIME-Version: 1.0 2019Content-Transfer-Encoding: 7bit 2020 2021hello world 2022--BOUNDARY-- 2023''') 2024 2025 2026 def test_seq_parts_in_a_multipart_with_nl_epilogue(self): 2027 eq = self.ndiffAssertEqual 2028 outer = MIMEBase('multipart', 'mixed') 2029 outer['Subject'] = 'A subject' 2030 outer['To'] = '[email protected]' 2031 outer['From'] = '[email protected]' 2032 outer.epilogue = '\n' 2033 msg = MIMEText('hello world') 2034 outer.attach(msg) 2035 outer.set_boundary('BOUNDARY') 2036 eq(outer.as_string(), '''\ 2037Content-Type: multipart/mixed; boundary="BOUNDARY" 2038MIME-Version: 1.0 2039Subject: A subject 2040To: [email protected] 2041From: [email protected] 2042 2043--BOUNDARY 2044Content-Type: text/plain; charset="us-ascii" 2045MIME-Version: 1.0 2046Content-Transfer-Encoding: 7bit 2047 2048hello world 2049--BOUNDARY-- 2050 2051''') 2052 2053 def test_message_external_body(self): 2054 eq = self.assertEqual 2055 msg = self._msgobj('msg_36.txt') 2056 eq(len(msg.get_payload()), 2) 2057 msg1 = msg.get_payload(1) 2058 eq(msg1.get_content_type(), 'multipart/alternative') 2059 eq(len(msg1.get_payload()), 2) 2060 for subpart in msg1.get_payload(): 2061 eq(subpart.get_content_type(), 'message/external-body') 2062 eq(len(subpart.get_payload()), 1) 2063 subsubpart = subpart.get_payload(0) 2064 eq(subsubpart.get_content_type(), 'text/plain') 2065 2066 def test_double_boundary(self): 2067 # msg_37.txt is a multipart that contains two dash-boundary's in a 2068 # row. Our interpretation of RFC 2046 calls for ignoring the second 2069 # and subsequent boundaries. 2070 msg = self._msgobj('msg_37.txt') 2071 self.assertEqual(len(msg.get_payload()), 3) 2072 2073 def test_nested_inner_contains_outer_boundary(self): 2074 eq = self.ndiffAssertEqual 2075 # msg_38.txt has an inner part that contains outer boundaries. My 2076 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say 2077 # these are illegal and should be interpreted as unterminated inner 2078 # parts. 2079 msg = self._msgobj('msg_38.txt') 2080 sfp = StringIO() 2081 iterators._structure(msg, sfp) 2082 eq(sfp.getvalue(), """\ 2083multipart/mixed 2084 multipart/mixed 2085 multipart/alternative 2086 text/plain 2087 text/plain 2088 text/plain 2089 text/plain 2090""") 2091 2092 def test_nested_with_same_boundary(self): 2093 eq = self.ndiffAssertEqual 2094 # msg 39.txt is similarly evil in that it's got inner parts that use 2095 # the same boundary as outer parts. Again, I believe the way this is 2096 # parsed is closest to the spirit of RFC 2046 2097 msg = self._msgobj('msg_39.txt') 2098 sfp = StringIO() 2099 iterators._structure(msg, sfp) 2100 eq(sfp.getvalue(), """\ 2101multipart/mixed 2102 multipart/mixed 2103 multipart/alternative 2104 application/octet-stream 2105 application/octet-stream 2106 text/plain 2107""") 2108 2109 def test_boundary_in_non_multipart(self): 2110 msg = self._msgobj('msg_40.txt') 2111 self.assertEqual(msg.as_string(), '''\ 2112MIME-Version: 1.0 2113Content-Type: text/html; boundary="--961284236552522269" 2114 2115----961284236552522269 2116Content-Type: text/html; 2117Content-Transfer-Encoding: 7Bit 2118 2119<html></html> 2120 2121----961284236552522269-- 2122''') 2123 2124 def test_boundary_with_leading_space(self): 2125 eq = self.assertEqual 2126 msg = email.message_from_string('''\ 2127MIME-Version: 1.0 2128Content-Type: multipart/mixed; boundary=" XXXX" 2129 2130-- XXXX 2131Content-Type: text/plain 2132 2133 2134-- XXXX 2135Content-Type: text/plain 2136 2137-- XXXX-- 2138''') 2139 self.assertTrue(msg.is_multipart()) 2140 eq(msg.get_boundary(), ' XXXX') 2141 eq(len(msg.get_payload()), 2) 2142 2143 def test_boundary_without_trailing_newline(self): 2144 m = Parser().parsestr("""\ 2145Content-Type: multipart/mixed; boundary="===============0012394164==" 2146MIME-Version: 1.0 2147 2148--===============0012394164== 2149Content-Type: image/file1.jpg 2150MIME-Version: 1.0 2151Content-Transfer-Encoding: base64 2152 2153YXNkZg== 2154--===============0012394164==--""") 2155 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==') 2156 2157 def test_mimebase_default_policy(self): 2158 m = MIMEBase('multipart', 'mixed') 2159 self.assertIs(m.policy, email.policy.compat32) 2160 2161 def test_mimebase_custom_policy(self): 2162 m = MIMEBase('multipart', 'mixed', policy=email.policy.default) 2163 self.assertIs(m.policy, email.policy.default) 2164 2165# Test some badly formatted messages 2166class TestNonConformant(TestEmailBase): 2167 2168 def test_parse_missing_minor_type(self): 2169 eq = self.assertEqual 2170 msg = self._msgobj('msg_14.txt') 2171 eq(msg.get_content_type(), 'text/plain') 2172 eq(msg.get_content_maintype(), 'text') 2173 eq(msg.get_content_subtype(), 'plain') 2174 2175 # test_defect_handling 2176 def test_same_boundary_inner_outer(self): 2177 msg = self._msgobj('msg_15.txt') 2178 # XXX We can probably eventually do better 2179 inner = msg.get_payload(0) 2180 self.assertTrue(hasattr(inner, 'defects')) 2181 self.assertEqual(len(inner.defects), 1) 2182 self.assertIsInstance(inner.defects[0], 2183 errors.StartBoundaryNotFoundDefect) 2184 2185 # test_defect_handling 2186 def test_multipart_no_boundary(self): 2187 msg = self._msgobj('msg_25.txt') 2188 self.assertIsInstance(msg.get_payload(), str) 2189 self.assertEqual(len(msg.defects), 2) 2190 self.assertIsInstance(msg.defects[0], 2191 errors.NoBoundaryInMultipartDefect) 2192 self.assertIsInstance(msg.defects[1], 2193 errors.MultipartInvariantViolationDefect) 2194 2195 multipart_msg = textwrap.dedent("""\ 2196 Date: Wed, 14 Nov 2007 12:56:23 GMT 2197 From: [email protected] 2198 To: [email protected] 2199 Subject: Content-Transfer-Encoding: base64 and multipart 2200 MIME-Version: 1.0 2201 Content-Type: multipart/mixed; 2202 boundary="===============3344438784458119861=="{} 2203 2204 --===============3344438784458119861== 2205 Content-Type: text/plain 2206 2207 Test message 2208 2209 --===============3344438784458119861== 2210 Content-Type: application/octet-stream 2211 Content-Transfer-Encoding: base64 2212 2213 YWJj 2214 2215 --===============3344438784458119861==-- 2216 """) 2217 2218 # test_defect_handling 2219 def test_multipart_invalid_cte(self): 2220 msg = self._str_msg( 2221 self.multipart_msg.format("\nContent-Transfer-Encoding: base64")) 2222 self.assertEqual(len(msg.defects), 1) 2223 self.assertIsInstance(msg.defects[0], 2224 errors.InvalidMultipartContentTransferEncodingDefect) 2225 2226 # test_defect_handling 2227 def test_multipart_no_cte_no_defect(self): 2228 msg = self._str_msg(self.multipart_msg.format('')) 2229 self.assertEqual(len(msg.defects), 0) 2230 2231 # test_defect_handling 2232 def test_multipart_valid_cte_no_defect(self): 2233 for cte in ('7bit', '8bit', 'BINary'): 2234 msg = self._str_msg( 2235 self.multipart_msg.format( 2236 "\nContent-Transfer-Encoding: {}".format(cte))) 2237 self.assertEqual(len(msg.defects), 0) 2238 2239 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2. 2240 def test_invalid_content_type(self): 2241 eq = self.assertEqual 2242 neq = self.ndiffAssertEqual 2243 msg = Message() 2244 # RFC 2045, $5.2 says invalid yields text/plain 2245 msg['Content-Type'] = 'text' 2246 eq(msg.get_content_maintype(), 'text') 2247 eq(msg.get_content_subtype(), 'plain') 2248 eq(msg.get_content_type(), 'text/plain') 2249 # Clear the old value and try something /really/ invalid 2250 del msg['content-type'] 2251 msg['Content-Type'] = 'foo' 2252 eq(msg.get_content_maintype(), 'text') 2253 eq(msg.get_content_subtype(), 'plain') 2254 eq(msg.get_content_type(), 'text/plain') 2255 # Still, make sure that the message is idempotently generated 2256 s = StringIO() 2257 g = Generator(s) 2258 g.flatten(msg) 2259 neq(s.getvalue(), 'Content-Type: foo\n\n') 2260 2261 def test_no_start_boundary(self): 2262 eq = self.ndiffAssertEqual 2263 msg = self._msgobj('msg_31.txt') 2264 eq(msg.get_payload(), """\ 2265--BOUNDARY 2266Content-Type: text/plain 2267 2268message 1 2269 2270--BOUNDARY 2271Content-Type: text/plain 2272 2273message 2 2274 2275--BOUNDARY-- 2276""") 2277 2278 def test_no_separating_blank_line(self): 2279 eq = self.ndiffAssertEqual 2280 msg = self._msgobj('msg_35.txt') 2281 eq(msg.as_string(), """\ 2282From: [email protected] 2283To: [email protected] 2284Subject: here's something interesting 2285 2286counter to RFC 2822, there's no separating newline here 2287""") 2288 2289 # test_defect_handling 2290 def test_lying_multipart(self): 2291 msg = self._msgobj('msg_41.txt') 2292 self.assertTrue(hasattr(msg, 'defects')) 2293 self.assertEqual(len(msg.defects), 2) 2294 self.assertIsInstance(msg.defects[0], 2295 errors.NoBoundaryInMultipartDefect) 2296 self.assertIsInstance(msg.defects[1], 2297 errors.MultipartInvariantViolationDefect) 2298 2299 # test_defect_handling 2300 def test_missing_start_boundary(self): 2301 outer = self._msgobj('msg_42.txt') 2302 # The message structure is: 2303 # 2304 # multipart/mixed 2305 # text/plain 2306 # message/rfc822 2307 # multipart/mixed [*] 2308 # 2309 # [*] This message is missing its start boundary 2310 bad = outer.get_payload(1).get_payload(0) 2311 self.assertEqual(len(bad.defects), 1) 2312 self.assertIsInstance(bad.defects[0], 2313 errors.StartBoundaryNotFoundDefect) 2314 2315 # test_defect_handling 2316 def test_first_line_is_continuation_header(self): 2317 eq = self.assertEqual 2318 m = ' Line 1\nSubject: test\n\nbody' 2319 msg = email.message_from_string(m) 2320 eq(msg.keys(), ['Subject']) 2321 eq(msg.get_payload(), 'body') 2322 eq(len(msg.defects), 1) 2323 self.assertDefectsEqual(msg.defects, 2324 [errors.FirstHeaderLineIsContinuationDefect]) 2325 eq(msg.defects[0].line, ' Line 1\n') 2326 2327 # test_defect_handling 2328 def test_missing_header_body_separator(self): 2329 # Our heuristic if we see a line that doesn't look like a header (no 2330 # leading whitespace but no ':') is to assume that the blank line that 2331 # separates the header from the body is missing, and to stop parsing 2332 # headers and start parsing the body. 2333 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n') 2334 self.assertEqual(msg.keys(), ['Subject']) 2335 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n') 2336 self.assertDefectsEqual(msg.defects, 2337 [errors.MissingHeaderBodySeparatorDefect]) 2338 2339 2340# Test RFC 2047 header encoding and decoding 2341class TestRFC2047(TestEmailBase): 2342 def test_rfc2047_multiline(self): 2343 eq = self.assertEqual 2344 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz 2345 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?=""" 2346 dh = decode_header(s) 2347 eq(dh, [ 2348 (b'Re: ', None), 2349 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'), 2350 (b' baz foo bar ', None), 2351 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')]) 2352 header = make_header(dh) 2353 eq(str(header), 2354 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s') 2355 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\ 2356Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?= 2357 =?mac-iceland?q?=9Arg=8Cs?=""") 2358 2359 def test_whitespace_keeper_unicode(self): 2360 eq = self.assertEqual 2361 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <[email protected]>' 2362 dh = decode_header(s) 2363 eq(dh, [(b'Andr\xe9', 'iso-8859-1'), 2364 (b' Pirard <[email protected]>', None)]) 2365 header = str(make_header(dh)) 2366 eq(header, 'Andr\xe9 Pirard <[email protected]>') 2367 2368 def test_whitespace_keeper_unicode_2(self): 2369 eq = self.assertEqual 2370 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?=' 2371 dh = decode_header(s) 2372 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'), 2373 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')]) 2374 hu = str(make_header(dh)) 2375 eq(hu, 'The quick brown fox jumped over the lazy dog') 2376 2377 def test_rfc2047_missing_whitespace(self): 2378 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' 2379 dh = decode_header(s) 2380 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), 2381 (b'rg', None), (b'\xe5', 'iso-8859-1'), 2382 (b'sbord', None)]) 2383 2384 def test_rfc2047_with_whitespace(self): 2385 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' 2386 dh = decode_header(s) 2387 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'), 2388 (b' rg ', None), (b'\xe5', 'iso-8859-1'), 2389 (b' sbord', None)]) 2390 2391 def test_rfc2047_B_bad_padding(self): 2392 s = '=?iso-8859-1?B?%s?=' 2393 data = [ # only test complete bytes 2394 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'), 2395 ('dmk=', b'vi'), ('dmk', b'vi') 2396 ] 2397 for q, a in data: 2398 dh = decode_header(s % q) 2399 self.assertEqual(dh, [(a, 'iso-8859-1')]) 2400 2401 def test_rfc2047_Q_invalid_digits(self): 2402 # issue 10004. 2403 s = '=?iso-8859-1?Q?andr=e9=zz?=' 2404 self.assertEqual(decode_header(s), 2405 [(b'andr\xe9=zz', 'iso-8859-1')]) 2406 2407 def test_rfc2047_rfc2047_1(self): 2408 # 1st testcase at end of rfc2047 2409 s = '(=?ISO-8859-1?Q?a?=)' 2410 self.assertEqual(decode_header(s), 2411 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) 2412 2413 def test_rfc2047_rfc2047_2(self): 2414 # 2nd testcase at end of rfc2047 2415 s = '(=?ISO-8859-1?Q?a?= b)' 2416 self.assertEqual(decode_header(s), 2417 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) 2418 2419 def test_rfc2047_rfc2047_3(self): 2420 # 3rd testcase at end of rfc2047 2421 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2422 self.assertEqual(decode_header(s), 2423 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2424 2425 def test_rfc2047_rfc2047_4(self): 2426 # 4th testcase at end of rfc2047 2427 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' 2428 self.assertEqual(decode_header(s), 2429 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2430 2431 def test_rfc2047_rfc2047_5a(self): 2432 # 5th testcase at end of rfc2047 newline is \r\n 2433 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' 2434 self.assertEqual(decode_header(s), 2435 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2436 2437 def test_rfc2047_rfc2047_5b(self): 2438 # 5th testcase at end of rfc2047 newline is \n 2439 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' 2440 self.assertEqual(decode_header(s), 2441 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) 2442 2443 def test_rfc2047_rfc2047_6(self): 2444 # 6th testcase at end of rfc2047 2445 s = '(=?ISO-8859-1?Q?a_b?=)' 2446 self.assertEqual(decode_header(s), 2447 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) 2448 2449 def test_rfc2047_rfc2047_7(self): 2450 # 7th testcase at end of rfc2047 2451 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' 2452 self.assertEqual(decode_header(s), 2453 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), 2454 (b')', None)]) 2455 self.assertEqual(make_header(decode_header(s)).encode(), s.lower()) 2456 self.assertEqual(str(make_header(decode_header(s))), '(a b)') 2457 2458 def test_multiline_header(self): 2459 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <[email protected]>' 2460 self.assertEqual(decode_header(s), 2461 [(b'"M\xfcller T"', 'windows-1252'), 2462 (b'<[email protected]>', None)]) 2463 self.assertEqual(make_header(decode_header(s)).encode(), 2464 ''.join(s.splitlines())) 2465 self.assertEqual(str(make_header(decode_header(s))), 2466 '"Müller T" <[email protected]>') 2467 2468 2469# Test the MIMEMessage class 2470class TestMIMEMessage(TestEmailBase): 2471 def setUp(self): 2472 with openfile('msg_11.txt', encoding="utf-8") as fp: 2473 self._text = fp.read() 2474 2475 def test_type_error(self): 2476 self.assertRaises(TypeError, MIMEMessage, 'a plain string') 2477 2478 def test_valid_argument(self): 2479 eq = self.assertEqual 2480 subject = 'A sub-message' 2481 m = Message() 2482 m['Subject'] = subject 2483 r = MIMEMessage(m) 2484 eq(r.get_content_type(), 'message/rfc822') 2485 payload = r.get_payload() 2486 self.assertIsInstance(payload, list) 2487 eq(len(payload), 1) 2488 subpart = payload[0] 2489 self.assertIs(subpart, m) 2490 eq(subpart['subject'], subject) 2491 2492 def test_bad_multipart(self): 2493 msg1 = Message() 2494 msg1['Subject'] = 'subpart 1' 2495 msg2 = Message() 2496 msg2['Subject'] = 'subpart 2' 2497 r = MIMEMessage(msg1) 2498 self.assertRaises(errors.MultipartConversionError, r.attach, msg2) 2499 2500 def test_generate(self): 2501 # First craft the message to be encapsulated 2502 m = Message() 2503 m['Subject'] = 'An enclosed message' 2504 m.set_payload('Here is the body of the message.\n') 2505 r = MIMEMessage(m) 2506 r['Subject'] = 'The enclosing message' 2507 s = StringIO() 2508 g = Generator(s) 2509 g.flatten(r) 2510 self.assertEqual(s.getvalue(), """\ 2511Content-Type: message/rfc822 2512MIME-Version: 1.0 2513Subject: The enclosing message 2514 2515Subject: An enclosed message 2516 2517Here is the body of the message. 2518""") 2519 2520 def test_parse_message_rfc822(self): 2521 eq = self.assertEqual 2522 msg = self._msgobj('msg_11.txt') 2523 eq(msg.get_content_type(), 'message/rfc822') 2524 payload = msg.get_payload() 2525 self.assertIsInstance(payload, list) 2526 eq(len(payload), 1) 2527 submsg = payload[0] 2528 self.assertIsInstance(submsg, Message) 2529 eq(submsg['subject'], 'An enclosed message') 2530 eq(submsg.get_payload(), 'Here is the body of the message.\n') 2531 2532 def test_dsn(self): 2533 eq = self.assertEqual 2534 # msg 16 is a Delivery Status Notification, see RFC 1894 2535 msg = self._msgobj('msg_16.txt') 2536 eq(msg.get_content_type(), 'multipart/report') 2537 self.assertTrue(msg.is_multipart()) 2538 eq(len(msg.get_payload()), 3) 2539 # Subpart 1 is a text/plain, human readable section 2540 subpart = msg.get_payload(0) 2541 eq(subpart.get_content_type(), 'text/plain') 2542 eq(subpart.get_payload(), """\ 2543This report relates to a message you sent with the following header fields: 2544 2545 Message-id: <[email protected]> 2546 Date: Sun, 23 Sep 2001 20:10:55 -0700 2547 From: "Ian T. Henry" <[email protected]> 2548 To: SoCal Raves <[email protected]> 2549 Subject: [scr] yeah for Ians!! 2550 2551Your message cannot be delivered to the following recipients: 2552 2553 Recipient address: [email protected] 2554 Reason: recipient reached disk quota 2555 2556""") 2557 # Subpart 2 contains the machine parsable DSN information. It 2558 # consists of two blocks of headers, represented by two nested Message 2559 # objects. 2560 subpart = msg.get_payload(1) 2561 eq(subpart.get_content_type(), 'message/delivery-status') 2562 eq(len(subpart.get_payload()), 2) 2563 # message/delivery-status should treat each block as a bunch of 2564 # headers, i.e. a bunch of Message objects. 2565 dsn1 = subpart.get_payload(0) 2566 self.assertIsInstance(dsn1, Message) 2567 eq(dsn1['original-envelope-id'], '[email protected]') 2568 eq(dsn1.get_param('dns', header='reporting-mta'), '') 2569 # Try a missing one <wink> 2570 eq(dsn1.get_param('nsd', header='reporting-mta'), None) 2571 dsn2 = subpart.get_payload(1) 2572 self.assertIsInstance(dsn2, Message) 2573 eq(dsn2['action'], 'failed') 2574 eq(dsn2.get_params(header='original-recipient'), 2575 [('rfc822', ''), ('[email protected]', '')]) 2576 eq(dsn2.get_param('rfc822', header='final-recipient'), '') 2577 # Subpart 3 is the original message 2578 subpart = msg.get_payload(2) 2579 eq(subpart.get_content_type(), 'message/rfc822') 2580 payload = subpart.get_payload() 2581 self.assertIsInstance(payload, list) 2582 eq(len(payload), 1) 2583 subsubpart = payload[0] 2584 self.assertIsInstance(subsubpart, Message) 2585 eq(subsubpart.get_content_type(), 'text/plain') 2586 eq(subsubpart['message-id'], 2587 '<[email protected]>') 2588 2589 def test_epilogue(self): 2590 eq = self.ndiffAssertEqual 2591 with openfile('msg_21.txt', encoding="utf-8") as fp: 2592 text = fp.read() 2593 msg = Message() 2594 msg['From'] = '[email protected]' 2595 msg['To'] = '[email protected]' 2596 msg['Subject'] = 'Test' 2597 msg.preamble = 'MIME message' 2598 msg.epilogue = 'End of MIME message\n' 2599 msg1 = MIMEText('One') 2600 msg2 = MIMEText('Two') 2601 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2602 msg.attach(msg1) 2603 msg.attach(msg2) 2604 sfp = StringIO() 2605 g = Generator(sfp) 2606 g.flatten(msg) 2607 eq(sfp.getvalue(), text) 2608 2609 def test_no_nl_preamble(self): 2610 eq = self.ndiffAssertEqual 2611 msg = Message() 2612 msg['From'] = '[email protected]' 2613 msg['To'] = '[email protected]' 2614 msg['Subject'] = 'Test' 2615 msg.preamble = 'MIME message' 2616 msg.epilogue = '' 2617 msg1 = MIMEText('One') 2618 msg2 = MIMEText('Two') 2619 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY') 2620 msg.attach(msg1) 2621 msg.attach(msg2) 2622 eq(msg.as_string(), """\ 2623From: [email protected] 2624To: [email protected] 2625Subject: Test 2626Content-Type: multipart/mixed; boundary="BOUNDARY" 2627 2628MIME message 2629--BOUNDARY 2630Content-Type: text/plain; charset="us-ascii" 2631MIME-Version: 1.0 2632Content-Transfer-Encoding: 7bit 2633 2634One 2635--BOUNDARY 2636Content-Type: text/plain; charset="us-ascii" 2637MIME-Version: 1.0 2638Content-Transfer-Encoding: 7bit 2639 2640Two 2641--BOUNDARY-- 2642""") 2643 2644 def test_default_type(self): 2645 eq = self.assertEqual 2646 with openfile('msg_30.txt', encoding="utf-8") as fp: 2647 msg = email.message_from_file(fp) 2648 container1 = msg.get_payload(0) 2649 eq(container1.get_default_type(), 'message/rfc822') 2650 eq(container1.get_content_type(), 'message/rfc822') 2651 container2 = msg.get_payload(1) 2652 eq(container2.get_default_type(), 'message/rfc822') 2653 eq(container2.get_content_type(), 'message/rfc822') 2654 container1a = container1.get_payload(0) 2655 eq(container1a.get_default_type(), 'text/plain') 2656 eq(container1a.get_content_type(), 'text/plain') 2657 container2a = container2.get_payload(0) 2658 eq(container2a.get_default_type(), 'text/plain') 2659 eq(container2a.get_content_type(), 'text/plain') 2660 2661 def test_default_type_with_explicit_container_type(self): 2662 eq = self.assertEqual 2663 with openfile('msg_28.txt', encoding="utf-8") as fp: 2664 msg = email.message_from_file(fp) 2665 container1 = msg.get_payload(0) 2666 eq(container1.get_default_type(), 'message/rfc822') 2667 eq(container1.get_content_type(), 'message/rfc822') 2668 container2 = msg.get_payload(1) 2669 eq(container2.get_default_type(), 'message/rfc822') 2670 eq(container2.get_content_type(), 'message/rfc822') 2671 container1a = container1.get_payload(0) 2672 eq(container1a.get_default_type(), 'text/plain') 2673 eq(container1a.get_content_type(), 'text/plain') 2674 container2a = container2.get_payload(0) 2675 eq(container2a.get_default_type(), 'text/plain') 2676 eq(container2a.get_content_type(), 'text/plain') 2677 2678 def test_default_type_non_parsed(self): 2679 eq = self.assertEqual 2680 neq = self.ndiffAssertEqual 2681 # Set up container 2682 container = MIMEMultipart('digest', 'BOUNDARY') 2683 container.epilogue = '' 2684 # Set up subparts 2685 subpart1a = MIMEText('message 1\n') 2686 subpart2a = MIMEText('message 2\n') 2687 subpart1 = MIMEMessage(subpart1a) 2688 subpart2 = MIMEMessage(subpart2a) 2689 container.attach(subpart1) 2690 container.attach(subpart2) 2691 eq(subpart1.get_content_type(), 'message/rfc822') 2692 eq(subpart1.get_default_type(), 'message/rfc822') 2693 eq(subpart2.get_content_type(), 'message/rfc822') 2694 eq(subpart2.get_default_type(), 'message/rfc822') 2695 neq(container.as_string(0), '''\ 2696Content-Type: multipart/digest; boundary="BOUNDARY" 2697MIME-Version: 1.0 2698 2699--BOUNDARY 2700Content-Type: message/rfc822 2701MIME-Version: 1.0 2702 2703Content-Type: text/plain; charset="us-ascii" 2704MIME-Version: 1.0 2705Content-Transfer-Encoding: 7bit 2706 2707message 1 2708 2709--BOUNDARY 2710Content-Type: message/rfc822 2711MIME-Version: 1.0 2712 2713Content-Type: text/plain; charset="us-ascii" 2714MIME-Version: 1.0 2715Content-Transfer-Encoding: 7bit 2716 2717message 2 2718 2719--BOUNDARY-- 2720''') 2721 del subpart1['content-type'] 2722 del subpart1['mime-version'] 2723 del subpart2['content-type'] 2724 del subpart2['mime-version'] 2725 eq(subpart1.get_content_type(), 'message/rfc822') 2726 eq(subpart1.get_default_type(), 'message/rfc822') 2727 eq(subpart2.get_content_type(), 'message/rfc822') 2728 eq(subpart2.get_default_type(), 'message/rfc822') 2729 neq(container.as_string(0), '''\ 2730Content-Type: multipart/digest; boundary="BOUNDARY" 2731MIME-Version: 1.0 2732 2733--BOUNDARY 2734 2735Content-Type: text/plain; charset="us-ascii" 2736MIME-Version: 1.0 2737Content-Transfer-Encoding: 7bit 2738 2739message 1 2740 2741--BOUNDARY 2742 2743Content-Type: text/plain; charset="us-ascii" 2744MIME-Version: 1.0 2745Content-Transfer-Encoding: 7bit 2746 2747message 2 2748 2749--BOUNDARY-- 2750''') 2751 2752 def test_mime_attachments_in_constructor(self): 2753 eq = self.assertEqual 2754 text1 = MIMEText('') 2755 text2 = MIMEText('') 2756 msg = MIMEMultipart(_subparts=(text1, text2)) 2757 eq(len(msg.get_payload()), 2) 2758 eq(msg.get_payload(0), text1) 2759 eq(msg.get_payload(1), text2) 2760 2761 def test_default_multipart_constructor(self): 2762 msg = MIMEMultipart() 2763 self.assertTrue(msg.is_multipart()) 2764 2765 def test_multipart_default_policy(self): 2766 msg = MIMEMultipart() 2767 msg['To'] = '[email protected]' 2768 msg['To'] = '[email protected]' 2769 self.assertEqual(msg.get_all('to'), ['[email protected]', '[email protected]']) 2770 2771 def test_multipart_custom_policy(self): 2772 msg = MIMEMultipart(policy=email.policy.default) 2773 msg['To'] = '[email protected]' 2774 with self.assertRaises(ValueError) as cm: 2775 msg['To'] = '[email protected]' 2776 self.assertEqual(str(cm.exception), 2777 'There may be at most 1 To headers in a message') 2778 2779 2780# Test the NonMultipart class 2781class TestNonMultipart(TestEmailBase): 2782 def test_nonmultipart_is_not_multipart(self): 2783 msg = MIMENonMultipart('text', 'plain') 2784 self.assertFalse(msg.is_multipart()) 2785 2786 def test_attach_raises_exception(self): 2787 msg = Message() 2788 msg['Subject'] = 'subpart 1' 2789 r = MIMENonMultipart('text', 'plain') 2790 self.assertRaises(errors.MultipartConversionError, r.attach, msg) 2791 2792 2793# A general test of parser->model->generator idempotency. IOW, read a message 2794# in, parse it into a message object tree, then without touching the tree, 2795# regenerate the plain text. The original text and the transformed text 2796# should be identical. Note: that we ignore the Unix-From since that may 2797# contain a changed date. 2798class TestIdempotent(TestEmailBase): 2799 2800 linesep = '\n' 2801 2802 def _msgobj(self, filename): 2803 with openfile(filename, encoding="utf-8") as fp: 2804 data = fp.read() 2805 msg = email.message_from_string(data) 2806 return msg, data 2807 2808 def _idempotent(self, msg, text, unixfrom=False): 2809 eq = self.ndiffAssertEqual 2810 s = StringIO() 2811 g = Generator(s, maxheaderlen=0) 2812 g.flatten(msg, unixfrom=unixfrom) 2813 eq(text, s.getvalue()) 2814 2815 def test_parse_text_message(self): 2816 eq = self.assertEqual 2817 msg, text = self._msgobj('msg_01.txt') 2818 eq(msg.get_content_type(), 'text/plain') 2819 eq(msg.get_content_maintype(), 'text') 2820 eq(msg.get_content_subtype(), 'plain') 2821 eq(msg.get_params()[1], ('charset', 'us-ascii')) 2822 eq(msg.get_param('charset'), 'us-ascii') 2823 eq(msg.preamble, None) 2824 eq(msg.epilogue, None) 2825 self._idempotent(msg, text) 2826 2827 def test_parse_untyped_message(self): 2828 eq = self.assertEqual 2829 msg, text = self._msgobj('msg_03.txt') 2830 eq(msg.get_content_type(), 'text/plain') 2831 eq(msg.get_params(), None) 2832 eq(msg.get_param('charset'), None) 2833 self._idempotent(msg, text) 2834 2835 def test_simple_multipart(self): 2836 msg, text = self._msgobj('msg_04.txt') 2837 self._idempotent(msg, text) 2838 2839 def test_MIME_digest(self): 2840 msg, text = self._msgobj('msg_02.txt') 2841 self._idempotent(msg, text) 2842 2843 def test_long_header(self): 2844 msg, text = self._msgobj('msg_27.txt') 2845 self._idempotent(msg, text) 2846 2847 def test_MIME_digest_with_part_headers(self): 2848 msg, text = self._msgobj('msg_28.txt') 2849 self._idempotent(msg, text) 2850 2851 def test_mixed_with_image(self): 2852 msg, text = self._msgobj('msg_06.txt') 2853 self._idempotent(msg, text) 2854 2855 def test_multipart_report(self): 2856 msg, text = self._msgobj('msg_05.txt') 2857 self._idempotent(msg, text) 2858 2859 def test_dsn(self): 2860 msg, text = self._msgobj('msg_16.txt') 2861 self._idempotent(msg, text) 2862 2863 def test_preamble_epilogue(self): 2864 msg, text = self._msgobj('msg_21.txt') 2865 self._idempotent(msg, text) 2866 2867 def test_multipart_one_part(self): 2868 msg, text = self._msgobj('msg_23.txt') 2869 self._idempotent(msg, text) 2870 2871 def test_multipart_no_parts(self): 2872 msg, text = self._msgobj('msg_24.txt') 2873 self._idempotent(msg, text) 2874 2875 def test_no_start_boundary(self): 2876 msg, text = self._msgobj('msg_31.txt') 2877 self._idempotent(msg, text) 2878 2879 def test_rfc2231_charset(self): 2880 msg, text = self._msgobj('msg_32.txt') 2881 self._idempotent(msg, text) 2882 2883 def test_more_rfc2231_parameters(self): 2884 msg, text = self._msgobj('msg_33.txt') 2885 self._idempotent(msg, text) 2886 2887 def test_text_plain_in_a_multipart_digest(self): 2888 msg, text = self._msgobj('msg_34.txt') 2889 self._idempotent(msg, text) 2890 2891 def test_nested_multipart_mixeds(self): 2892 msg, text = self._msgobj('msg_12a.txt') 2893 self._idempotent(msg, text) 2894 2895 def test_message_external_body_idempotent(self): 2896 msg, text = self._msgobj('msg_36.txt') 2897 self._idempotent(msg, text) 2898 2899 def test_message_delivery_status(self): 2900 msg, text = self._msgobj('msg_43.txt') 2901 self._idempotent(msg, text, unixfrom=True) 2902 2903 def test_message_signed_idempotent(self): 2904 msg, text = self._msgobj('msg_45.txt') 2905 self._idempotent(msg, text) 2906 2907 def test_content_type(self): 2908 eq = self.assertEqual 2909 # Get a message object and reset the seek pointer for other tests 2910 msg, text = self._msgobj('msg_05.txt') 2911 eq(msg.get_content_type(), 'multipart/report') 2912 # Test the Content-Type: parameters 2913 params = {} 2914 for pk, pv in msg.get_params(): 2915 params[pk] = pv 2916 eq(params['report-type'], 'delivery-status') 2917 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com') 2918 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep) 2919 eq(msg.epilogue, self.linesep) 2920 eq(len(msg.get_payload()), 3) 2921 # Make sure the subparts are what we expect 2922 msg1 = msg.get_payload(0) 2923 eq(msg1.get_content_type(), 'text/plain') 2924 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep) 2925 msg2 = msg.get_payload(1) 2926 eq(msg2.get_content_type(), 'text/plain') 2927 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep) 2928 msg3 = msg.get_payload(2) 2929 eq(msg3.get_content_type(), 'message/rfc822') 2930 self.assertIsInstance(msg3, Message) 2931 payload = msg3.get_payload() 2932 self.assertIsInstance(payload, list) 2933 eq(len(payload), 1) 2934 msg4 = payload[0] 2935 self.assertIsInstance(msg4, Message) 2936 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep) 2937 2938 def test_parser(self): 2939 eq = self.assertEqual 2940 msg, text = self._msgobj('msg_06.txt') 2941 # Check some of the outer headers 2942 eq(msg.get_content_type(), 'message/rfc822') 2943 # Make sure the payload is a list of exactly one sub-Message, and that 2944 # that submessage has a type of text/plain 2945 payload = msg.get_payload() 2946 self.assertIsInstance(payload, list) 2947 eq(len(payload), 1) 2948 msg1 = payload[0] 2949 self.assertIsInstance(msg1, Message) 2950 eq(msg1.get_content_type(), 'text/plain') 2951 self.assertIsInstance(msg1.get_payload(), str) 2952 eq(msg1.get_payload(), self.linesep) 2953 2954 2955 2956# Test various other bits of the package's functionality 2957class TestMiscellaneous(TestEmailBase): 2958 def test_message_from_string(self): 2959 with openfile('msg_01.txt', encoding="utf-8") as fp: 2960 text = fp.read() 2961 msg = email.message_from_string(text) 2962 s = StringIO() 2963 # Don't wrap/continue long headers since we're trying to test 2964 # idempotency. 2965 g = Generator(s, maxheaderlen=0) 2966 g.flatten(msg) 2967 self.assertEqual(text, s.getvalue()) 2968 2969 def test_message_from_file(self): 2970 with openfile('msg_01.txt', encoding="utf-8") as fp: 2971 text = fp.read() 2972 fp.seek(0) 2973 msg = email.message_from_file(fp) 2974 s = StringIO() 2975 # Don't wrap/continue long headers since we're trying to test 2976 # idempotency. 2977 g = Generator(s, maxheaderlen=0) 2978 g.flatten(msg) 2979 self.assertEqual(text, s.getvalue()) 2980 2981 def test_message_from_string_with_class(self): 2982 with openfile('msg_01.txt', encoding="utf-8") as fp: 2983 text = fp.read() 2984 2985 # Create a subclass 2986 class MyMessage(Message): 2987 pass 2988 2989 msg = email.message_from_string(text, MyMessage) 2990 self.assertIsInstance(msg, MyMessage) 2991 # Try something more complicated 2992 with openfile('msg_02.txt', encoding="utf-8") as fp: 2993 text = fp.read() 2994 msg = email.message_from_string(text, MyMessage) 2995 for subpart in msg.walk(): 2996 self.assertIsInstance(subpart, MyMessage) 2997 2998 def test_message_from_file_with_class(self): 2999 # Create a subclass 3000 class MyMessage(Message): 3001 pass 3002 3003 with openfile('msg_01.txt', encoding="utf-8") as fp: 3004 msg = email.message_from_file(fp, MyMessage) 3005 self.assertIsInstance(msg, MyMessage) 3006 # Try something more complicated 3007 with openfile('msg_02.txt', encoding="utf-8") as fp: 3008 msg = email.message_from_file(fp, MyMessage) 3009 for subpart in msg.walk(): 3010 self.assertIsInstance(subpart, MyMessage) 3011 3012 def test_custom_message_does_not_require_arguments(self): 3013 class MyMessage(Message): 3014 def __init__(self): 3015 super().__init__() 3016 msg = self._str_msg("Subject: test\n\ntest", MyMessage) 3017 self.assertIsInstance(msg, MyMessage) 3018 3019 def test__all__(self): 3020 module = __import__('email') 3021 self.assertEqual(sorted(module.__all__), [ 3022 'base64mime', 'charset', 'encoders', 'errors', 'feedparser', 3023 'generator', 'header', 'iterators', 'message', 3024 'message_from_binary_file', 'message_from_bytes', 3025 'message_from_file', 'message_from_string', 'mime', 'parser', 3026 'quoprimime', 'utils', 3027 ]) 3028 3029 def test_formatdate(self): 3030 now = time.time() 3031 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6], 3032 time.gmtime(now)[:6]) 3033 3034 def test_formatdate_localtime(self): 3035 now = time.time() 3036 self.assertEqual( 3037 utils.parsedate(utils.formatdate(now, localtime=True))[:6], 3038 time.localtime(now)[:6]) 3039 3040 def test_formatdate_usegmt(self): 3041 now = time.time() 3042 self.assertEqual( 3043 utils.formatdate(now, localtime=False), 3044 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now))) 3045 self.assertEqual( 3046 utils.formatdate(now, localtime=False, usegmt=True), 3047 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now))) 3048 3049 # parsedate and parsedate_tz will become deprecated interfaces someday 3050 def test_parsedate_returns_None_for_invalid_strings(self): 3051 # See also test_parsedate_to_datetime_with_invalid_raises_valueerror 3052 # in test_utils. 3053 invalid_dates = [ 3054 '', 3055 ' ', 3056 '0', 3057 'A Complete Waste of Time', 3058 'Wed, 3 Apr 2002 12.34.56.78+0800', 3059 '17 June , 2022', 3060 'Friday, -Nov-82 16:14:55 EST', 3061 'Friday, Nov--82 16:14:55 EST', 3062 'Friday, 19-Nov- 16:14:55 EST', 3063 ] 3064 for dtstr in invalid_dates: 3065 with self.subTest(dtstr=dtstr): 3066 self.assertIsNone(utils.parsedate(dtstr)) 3067 self.assertIsNone(utils.parsedate_tz(dtstr)) 3068 # Not a part of the spec but, but this has historically worked: 3069 self.assertIsNone(utils.parsedate(None)) 3070 self.assertIsNone(utils.parsedate_tz(None)) 3071 3072 def test_parsedate_compact(self): 3073 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26 +0800'), 3074 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3075 # The FWS after the comma is optional 3076 self.assertEqual(utils.parsedate_tz('Wed,3 Apr 2002 14:58:26 +0800'), 3077 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3078 # The comma is optional 3079 self.assertEqual(utils.parsedate_tz('Wed 3 Apr 2002 14:58:26 +0800'), 3080 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3081 3082 def test_parsedate_no_dayofweek(self): 3083 eq = self.assertEqual 3084 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'), 3085 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3086 eq(utils.parsedate_tz('February 5, 2003 13:47:26 -0800'), 3087 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3088 3089 def test_parsedate_no_space_before_positive_offset(self): 3090 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'), 3091 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3092 3093 def test_parsedate_no_space_before_negative_offset(self): 3094 # Issue 1155362: we already handled '+' for this case. 3095 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'), 3096 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800)) 3097 3098 def test_parsedate_accepts_time_with_dots(self): 3099 eq = self.assertEqual 3100 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'), 3101 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800)) 3102 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'), 3103 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800)) 3104 3105 def test_parsedate_rfc_850(self): 3106 self.assertEqual(utils.parsedate_tz('Friday, 19-Nov-82 16:14:55 EST'), 3107 (1982, 11, 19, 16, 14, 55, 0, 1, -1, -18000)) 3108 3109 def test_parsedate_no_seconds(self): 3110 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58 +0800'), 3111 (2002, 4, 3, 14, 58, 0, 0, 1, -1, 28800)) 3112 3113 def test_parsedate_dot_time_delimiter(self): 3114 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14.58.26 +0800'), 3115 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800)) 3116 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14.58 +0800'), 3117 (2002, 4, 3, 14, 58, 0, 0, 1, -1, 28800)) 3118 3119 def test_parsedate_acceptable_to_time_functions(self): 3120 eq = self.assertEqual 3121 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800') 3122 t = int(time.mktime(timetup)) 3123 eq(time.localtime(t)[:6], timetup[:6]) 3124 eq(int(time.strftime('%Y', timetup)), 2003) 3125 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800') 3126 t = int(time.mktime(timetup[:9])) 3127 eq(time.localtime(t)[:6], timetup[:6]) 3128 eq(int(time.strftime('%Y', timetup[:9])), 2003) 3129 3130 def test_mktime_tz(self): 3131 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3132 -1, -1, -1, 0)), 0) 3133 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0, 3134 -1, -1, -1, 1234)), -1234) 3135 3136 def test_parsedate_y2k(self): 3137 """Test for parsing a date with a two-digit year. 3138 3139 Parsing a date with a two-digit year should return the correct 3140 four-digit year. RFC822 allows two-digit years, but RFC2822 (which 3141 obsoletes RFC822) requires four-digit years. 3142 3143 """ 3144 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), 3145 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800')) 3146 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'), 3147 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800')) 3148 3149 def test_parseaddr_empty(self): 3150 self.assertEqual(utils.parseaddr('<>'), ('', '')) 3151 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '') 3152 3153 def test_parseaddr_multiple_domains(self): 3154 self.assertEqual( 3155 utils.parseaddr('a@b@c'), 3156 ('', '') 3157 ) 3158 self.assertEqual( 3159 utils.parseaddr('[email protected]@c'), 3160 ('', '') 3161 ) 3162 self.assertEqual( 3163 utils.parseaddr('[email protected]@c'), 3164 ('', '') 3165 ) 3166 3167 def test_noquote_dump(self): 3168 self.assertEqual( 3169 utils.formataddr(('A Silly Person', '[email protected]')), 3170 'A Silly Person <[email protected]>') 3171 3172 def test_escape_dump(self): 3173 self.assertEqual( 3174 utils.formataddr(('A (Very) Silly Person', '[email protected]')), 3175 r'"A (Very) Silly Person" <[email protected]>') 3176 self.assertEqual( 3177 utils.parseaddr(r'"A \(Very\) Silly Person" <[email protected]>'), 3178 ('A (Very) Silly Person', '[email protected]')) 3179 a = r'A \(Special\) Person' 3180 b = '[email protected]' 3181 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3182 3183 def test_escape_backslashes(self): 3184 self.assertEqual( 3185 utils.formataddr((r'Arthur \Backslash\ Foobar', '[email protected]')), 3186 r'"Arthur \\Backslash\\ Foobar" <[email protected]>') 3187 a = r'Arthur \Backslash\ Foobar' 3188 b = '[email protected]' 3189 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) 3190 3191 def test_quotes_unicode_names(self): 3192 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3193 name = "H\u00e4ns W\u00fcrst" 3194 addr = '[email protected]' 3195 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <[email protected]>" 3196 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <[email protected]>" 3197 self.assertEqual(utils.formataddr((name, addr)), utf8_base64) 3198 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'), 3199 latin1_quopri) 3200 3201 def test_accepts_any_charset_like_object(self): 3202 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3203 name = "H\u00e4ns W\u00fcrst" 3204 addr = '[email protected]' 3205 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <[email protected]>" 3206 foobar = "FOOBAR" 3207 class CharsetMock: 3208 def header_encode(self, string): 3209 return foobar 3210 mock = CharsetMock() 3211 mock_expected = "%s <%s>" % (foobar, addr) 3212 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected) 3213 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')), 3214 utf8_base64) 3215 3216 def test_invalid_charset_like_object_raises_error(self): 3217 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3218 name = "H\u00e4ns W\u00fcrst" 3219 addr = '[email protected]' 3220 # An object without a header_encode method: 3221 bad_charset = object() 3222 self.assertRaises(AttributeError, utils.formataddr, (name, addr), 3223 bad_charset) 3224 3225 def test_unicode_address_raises_error(self): 3226 # issue 1690608. email.utils.formataddr() should be rfc2047 aware. 3227 addr = 'pers\[email protected]' 3228 self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) 3229 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) 3230 3231 def test_name_with_dot(self): 3232 x = 'John X. Doe <[email protected]>' 3233 y = '"John X. Doe" <[email protected]>' 3234 a, b = ('John X. Doe', '[email protected]') 3235 self.assertEqual(utils.parseaddr(x), (a, b)) 3236 self.assertEqual(utils.parseaddr(y), (a, b)) 3237 # formataddr() quotes the name if there's a dot in it 3238 self.assertEqual(utils.formataddr((a, b)), y) 3239 3240 def test_parseaddr_preserves_quoted_pairs_in_addresses(self): 3241 # issue 10005. Note that in the third test the second pair of 3242 # backslashes is not actually a quoted pair because it is not inside a 3243 # comment or quoted string: the address being parsed has a quoted 3244 # string containing a quoted backslash, followed by 'example' and two 3245 # backslashes, followed by another quoted string containing a space and 3246 # the word 'example'. parseaddr copies those two backslashes 3247 # literally. Per rfc5322 this is not technically correct since a \ may 3248 # not appear in an address outside of a quoted string. It is probably 3249 # a sensible Postel interpretation, though. 3250 eq = self.assertEqual 3251 eq(utils.parseaddr('""example" example"@example.com'), 3252 ('', '""example" example"@example.com')) 3253 eq(utils.parseaddr('"\\"example\\" example"@example.com'), 3254 ('', '"\\"example\\" example"@example.com')) 3255 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'), 3256 ('', '"\\\\"example\\\\" example"@example.com')) 3257 3258 def test_parseaddr_preserves_spaces_in_local_part(self): 3259 # issue 9286. A normal RFC5322 local part should not contain any 3260 # folding white space, but legacy local parts can (they are a sequence 3261 # of atoms, not dotatoms). On the other hand we strip whitespace from 3262 # before the @ and around dots, on the assumption that the whitespace 3263 # around the punctuation is a mistake in what would otherwise be 3264 # an RFC5322 local part. Leading whitespace is, usual, stripped as well. 3265 self.assertEqual(('', "merwok [email protected]"), 3266 utils.parseaddr("merwok [email protected]")) 3267 self.assertEqual(('', "merwok [email protected]"), 3268 utils.parseaddr("merwok [email protected]")) 3269 self.assertEqual(('', "merwok [email protected]"), 3270 utils.parseaddr(" merwok wok @xample.com")) 3271 self.assertEqual(('', 'merwok"wok" [email protected]'), 3272 utils.parseaddr('merwok"wok" [email protected]')) 3273 self.assertEqual(('', '[email protected]'), 3274 utils.parseaddr('merwok. wok . [email protected]')) 3275 3276 def test_formataddr_does_not_quote_parens_in_quoted_string(self): 3277 addr = ("'[email protected]' ([email protected])", 3278 '[email protected]') 3279 addrstr = ('"\'[email protected]\' ' 3280 '([email protected])" <[email protected]>') 3281 self.assertEqual(utils.parseaddr(addrstr), addr) 3282 self.assertEqual(utils.formataddr(addr), addrstr) 3283 3284 3285 def test_multiline_from_comment(self): 3286 x = """\ 3287Foo 3288\tBar <[email protected]>""" 3289 self.assertEqual(utils.parseaddr(x), ('Foo Bar', '[email protected]')) 3290 3291 def test_quote_dump(self): 3292 self.assertEqual( 3293 utils.formataddr(('A Silly; Person', '[email protected]')), 3294 r'"A Silly; Person" <[email protected]>') 3295 3296 def test_charset_richcomparisons(self): 3297 eq = self.assertEqual 3298 ne = self.assertNotEqual 3299 cset1 = Charset() 3300 cset2 = Charset() 3301 eq(cset1, 'us-ascii') 3302 eq(cset1, 'US-ASCII') 3303 eq(cset1, 'Us-AsCiI') 3304 eq('us-ascii', cset1) 3305 eq('US-ASCII', cset1) 3306 eq('Us-AsCiI', cset1) 3307 ne(cset1, 'usascii') 3308 ne(cset1, 'USASCII') 3309 ne(cset1, 'UsAsCiI') 3310 ne('usascii', cset1) 3311 ne('USASCII', cset1) 3312 ne('UsAsCiI', cset1) 3313 eq(cset1, cset2) 3314 eq(cset2, cset1) 3315 3316 def test_getaddresses(self): 3317 eq = self.assertEqual 3318 eq(utils.getaddresses(['[email protected] (Al Person)', 3319 'Bud Person <[email protected]>']), 3320 [('Al Person', '[email protected]'), 3321 ('Bud Person', '[email protected]')]) 3322 3323 def test_getaddresses_nasty(self): 3324 eq = self.assertEqual 3325 eq(utils.getaddresses(['foo: ;']), [('', '')]) 3326 eq(utils.getaddresses( 3327 ['[]*-- =~$']), 3328 [('', ''), ('', ''), ('', '*--')]) 3329 eq(utils.getaddresses( 3330 ['foo: ;', '"Jason R. Mastaler" <[email protected]>']), 3331 [('', ''), ('Jason R. Mastaler', '[email protected]')]) 3332 3333 def test_getaddresses_embedded_comment(self): 3334 """Test proper handling of a nested comment""" 3335 eq = self.assertEqual 3336 addrs = utils.getaddresses(['User ((nested comment)) <[email protected]>']) 3337 eq(addrs[0][1], '[email protected]') 3338 3339 def test_getaddresses_header_obj(self): 3340 """Test the handling of a Header object.""" 3341 addrs = utils.getaddresses([Header('Al Person <[email protected]>')]) 3342 self.assertEqual(addrs[0][1], '[email protected]') 3343 3344 @threading_helper.requires_working_threading() 3345 def test_make_msgid_collisions(self): 3346 # Test make_msgid uniqueness, even with multiple threads 3347 class MsgidsThread(Thread): 3348 def run(self): 3349 # generate msgids for 3 seconds 3350 self.msgids = [] 3351 append = self.msgids.append 3352 make_msgid = utils.make_msgid 3353 clock = time.monotonic 3354 tfin = clock() + 3.0 3355 while clock() < tfin: 3356 append(make_msgid(domain='testdomain-string')) 3357 3358 threads = [MsgidsThread() for i in range(5)] 3359 with threading_helper.start_threads(threads): 3360 pass 3361 all_ids = sum([t.msgids for t in threads], []) 3362 self.assertEqual(len(set(all_ids)), len(all_ids)) 3363 3364 def test_utils_quote_unquote(self): 3365 eq = self.assertEqual 3366 msg = Message() 3367 msg.add_header('content-disposition', 'attachment', 3368 filename='foo\\wacky"name') 3369 eq(msg.get_filename(), 'foo\\wacky"name') 3370 3371 def test_get_body_encoding_with_bogus_charset(self): 3372 charset = Charset('not a charset') 3373 self.assertEqual(charset.get_body_encoding(), 'base64') 3374 3375 def test_get_body_encoding_with_uppercase_charset(self): 3376 eq = self.assertEqual 3377 msg = Message() 3378 msg['Content-Type'] = 'text/plain; charset=UTF-8' 3379 eq(msg['content-type'], 'text/plain; charset=UTF-8') 3380 charsets = msg.get_charsets() 3381 eq(len(charsets), 1) 3382 eq(charsets[0], 'utf-8') 3383 charset = Charset(charsets[0]) 3384 eq(charset.get_body_encoding(), 'base64') 3385 msg.set_payload(b'hello world', charset=charset) 3386 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n') 3387 eq(msg.get_payload(decode=True), b'hello world') 3388 eq(msg['content-transfer-encoding'], 'base64') 3389 # Try another one 3390 msg = Message() 3391 msg['Content-Type'] = 'text/plain; charset="US-ASCII"' 3392 charsets = msg.get_charsets() 3393 eq(len(charsets), 1) 3394 eq(charsets[0], 'us-ascii') 3395 charset = Charset(charsets[0]) 3396 eq(charset.get_body_encoding(), encoders.encode_7or8bit) 3397 msg.set_payload('hello world', charset=charset) 3398 eq(msg.get_payload(), 'hello world') 3399 eq(msg['content-transfer-encoding'], '7bit') 3400 3401 def test_charsets_case_insensitive(self): 3402 lc = Charset('us-ascii') 3403 uc = Charset('US-ASCII') 3404 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding()) 3405 3406 def test_partial_falls_inside_message_delivery_status(self): 3407 eq = self.ndiffAssertEqual 3408 # The Parser interface provides chunks of data to FeedParser in 8192 3409 # byte gulps. SF bug #1076485 found one of those chunks inside 3410 # message/delivery-status header block, which triggered an 3411 # unreadline() of NeedMoreData. 3412 msg = self._msgobj('msg_43.txt') 3413 sfp = StringIO() 3414 iterators._structure(msg, sfp) 3415 eq(sfp.getvalue(), """\ 3416multipart/report 3417 text/plain 3418 message/delivery-status 3419 text/plain 3420 text/plain 3421 text/plain 3422 text/plain 3423 text/plain 3424 text/plain 3425 text/plain 3426 text/plain 3427 text/plain 3428 text/plain 3429 text/plain 3430 text/plain 3431 text/plain 3432 text/plain 3433 text/plain 3434 text/plain 3435 text/plain 3436 text/plain 3437 text/plain 3438 text/plain 3439 text/plain 3440 text/plain 3441 text/plain 3442 text/plain 3443 text/plain 3444 text/plain 3445 text/rfc822-headers 3446""") 3447 3448 def test_make_msgid_domain(self): 3449 self.assertEqual( 3450 email.utils.make_msgid(domain='testdomain-string')[-19:], 3451 '@testdomain-string>') 3452 3453 def test_make_msgid_idstring(self): 3454 self.assertEqual( 3455 email.utils.make_msgid(idstring='test-idstring', 3456 domain='testdomain-string')[-33:], 3457 '.test-idstring@testdomain-string>') 3458 3459 def test_make_msgid_default_domain(self): 3460 with patch('socket.getfqdn') as mock_getfqdn: 3461 mock_getfqdn.return_value = domain = 'pythontest.example.com' 3462 self.assertTrue( 3463 email.utils.make_msgid().endswith( 3464 '@' + domain + '>')) 3465 3466 def test_Generator_linend(self): 3467 # Issue 14645. 3468 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f: 3469 msgtxt = f.read() 3470 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3471 msg = email.message_from_string(msgtxt) 3472 s = StringIO() 3473 g = email.generator.Generator(s) 3474 g.flatten(msg) 3475 self.assertEqual(s.getvalue(), msgtxt_nl) 3476 3477 def test_BytesGenerator_linend(self): 3478 # Issue 14645. 3479 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as f: 3480 msgtxt = f.read() 3481 msgtxt_nl = msgtxt.replace('\r\n', '\n') 3482 msg = email.message_from_string(msgtxt_nl) 3483 s = BytesIO() 3484 g = email.generator.BytesGenerator(s) 3485 g.flatten(msg, linesep='\r\n') 3486 self.assertEqual(s.getvalue().decode('ascii'), msgtxt) 3487 3488 def test_BytesGenerator_linend_with_non_ascii(self): 3489 # Issue 14645. 3490 with openfile('msg_26.txt', 'rb') as f: 3491 msgtxt = f.read() 3492 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6') 3493 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n') 3494 msg = email.message_from_bytes(msgtxt_nl) 3495 s = BytesIO() 3496 g = email.generator.BytesGenerator(s) 3497 g.flatten(msg, linesep='\r\n') 3498 self.assertEqual(s.getvalue(), msgtxt) 3499 3500 def test_mime_classes_policy_argument(self): 3501 with openfile('sndhdr.au', 'rb') as fp: 3502 audiodata = fp.read() 3503 with openfile('python.gif', 'rb') as fp: 3504 bindata = fp.read() 3505 classes = [ 3506 (MIMEApplication, ('',)), 3507 (MIMEAudio, (audiodata,)), 3508 (MIMEImage, (bindata,)), 3509 (MIMEMessage, (Message(),)), 3510 (MIMENonMultipart, ('multipart', 'mixed')), 3511 (MIMEText, ('',)), 3512 ] 3513 for cls, constructor in classes: 3514 with self.subTest(cls=cls.__name__, policy='compat32'): 3515 m = cls(*constructor) 3516 self.assertIs(m.policy, email.policy.compat32) 3517 with self.subTest(cls=cls.__name__, policy='default'): 3518 m = cls(*constructor, policy=email.policy.default) 3519 self.assertIs(m.policy, email.policy.default) 3520 3521 3522# Test the iterator/generators 3523class TestIterators(TestEmailBase): 3524 def test_body_line_iterator(self): 3525 eq = self.assertEqual 3526 neq = self.ndiffAssertEqual 3527 # First a simple non-multipart message 3528 msg = self._msgobj('msg_01.txt') 3529 it = iterators.body_line_iterator(msg) 3530 lines = list(it) 3531 eq(len(lines), 6) 3532 neq(EMPTYSTRING.join(lines), msg.get_payload()) 3533 # Now a more complicated multipart 3534 msg = self._msgobj('msg_02.txt') 3535 it = iterators.body_line_iterator(msg) 3536 lines = list(it) 3537 eq(len(lines), 43) 3538 with openfile('msg_19.txt', encoding="utf-8") as fp: 3539 neq(EMPTYSTRING.join(lines), fp.read()) 3540 3541 def test_typed_subpart_iterator(self): 3542 eq = self.assertEqual 3543 msg = self._msgobj('msg_04.txt') 3544 it = iterators.typed_subpart_iterator(msg, 'text') 3545 lines = [] 3546 subparts = 0 3547 for subpart in it: 3548 subparts += 1 3549 lines.append(subpart.get_payload()) 3550 eq(subparts, 2) 3551 eq(EMPTYSTRING.join(lines), """\ 3552a simple kind of mirror 3553to reflect upon our own 3554a simple kind of mirror 3555to reflect upon our own 3556""") 3557 3558 def test_typed_subpart_iterator_default_type(self): 3559 eq = self.assertEqual 3560 msg = self._msgobj('msg_03.txt') 3561 it = iterators.typed_subpart_iterator(msg, 'text', 'plain') 3562 lines = [] 3563 subparts = 0 3564 for subpart in it: 3565 subparts += 1 3566 lines.append(subpart.get_payload()) 3567 eq(subparts, 1) 3568 eq(EMPTYSTRING.join(lines), """\ 3569 3570Hi, 3571 3572Do you like this message? 3573 3574-Me 3575""") 3576 3577 def test_pushCR_LF(self): 3578 '''FeedParser BufferedSubFile.push() assumed it received complete 3579 line endings. A CR ending one push() followed by a LF starting 3580 the next push() added an empty line. 3581 ''' 3582 imt = [ 3583 ("a\r \n", 2), 3584 ("b", 0), 3585 ("c\n", 1), 3586 ("", 0), 3587 ("d\r\n", 1), 3588 ("e\r", 0), 3589 ("\nf", 1), 3590 ("\r\n", 1), 3591 ] 3592 from email.feedparser import BufferedSubFile, NeedMoreData 3593 bsf = BufferedSubFile() 3594 om = [] 3595 nt = 0 3596 for il, n in imt: 3597 bsf.push(il) 3598 nt += n 3599 n1 = 0 3600 for ol in iter(bsf.readline, NeedMoreData): 3601 om.append(ol) 3602 n1 += 1 3603 self.assertEqual(n, n1) 3604 self.assertEqual(len(om), nt) 3605 self.assertEqual(''.join([il for il, n in imt]), ''.join(om)) 3606 3607 def test_push_random(self): 3608 from email.feedparser import BufferedSubFile, NeedMoreData 3609 3610 n = 10000 3611 chunksize = 5 3612 chars = 'abcd \t\r\n' 3613 3614 s = ''.join(choice(chars) for i in range(n)) + '\n' 3615 target = s.splitlines(True) 3616 3617 bsf = BufferedSubFile() 3618 lines = [] 3619 for i in range(0, len(s), chunksize): 3620 chunk = s[i:i+chunksize] 3621 bsf.push(chunk) 3622 lines.extend(iter(bsf.readline, NeedMoreData)) 3623 self.assertEqual(lines, target) 3624 3625 3626class TestFeedParsers(TestEmailBase): 3627 3628 def parse(self, chunks): 3629 feedparser = FeedParser() 3630 for chunk in chunks: 3631 feedparser.feed(chunk) 3632 return feedparser.close() 3633 3634 def test_empty_header_name_handled(self): 3635 # Issue 19996 3636 msg = self.parse("First: val\n: bad\nSecond: val") 3637 self.assertEqual(msg['First'], 'val') 3638 self.assertEqual(msg['Second'], 'val') 3639 3640 def test_newlines(self): 3641 m = self.parse(['a:\nb:\rc:\r\nd:\n']) 3642 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3643 m = self.parse(['a:\nb:\rc:\r\nd:']) 3644 self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) 3645 m = self.parse(['a:\rb', 'c:\n']) 3646 self.assertEqual(m.keys(), ['a', 'bc']) 3647 m = self.parse(['a:\r', 'b:\n']) 3648 self.assertEqual(m.keys(), ['a', 'b']) 3649 m = self.parse(['a:\r', '\nb:\n']) 3650 self.assertEqual(m.keys(), ['a', 'b']) 3651 3652 # Only CR and LF should break header fields 3653 m = self.parse(['a:\x85b:\u2028c:\n']) 3654 self.assertEqual(m.items(), [('a', '\x85b:\u2028c:')]) 3655 m = self.parse(['a:\r', 'b:\x85', 'c:\n']) 3656 self.assertEqual(m.items(), [('a', ''), ('b', '\x85c:')]) 3657 3658 def test_long_lines(self): 3659 # Expected peak memory use on 32-bit platform: 6*N*M bytes. 3660 M, N = 1000, 20000 3661 m = self.parse(['a:b\n\n'] + ['x'*M] * N) 3662 self.assertEqual(m.items(), [('a', 'b')]) 3663 self.assertEqual(m.get_payload(), 'x'*M*N) 3664 m = self.parse(['a:b\r\r'] + ['x'*M] * N) 3665 self.assertEqual(m.items(), [('a', 'b')]) 3666 self.assertEqual(m.get_payload(), 'x'*M*N) 3667 m = self.parse(['a:b\r\r'] + ['x'*M+'\x85'] * N) 3668 self.assertEqual(m.items(), [('a', 'b')]) 3669 self.assertEqual(m.get_payload(), ('x'*M+'\x85')*N) 3670 m = self.parse(['a:\r', 'b: '] + ['x'*M] * N) 3671 self.assertEqual(m.items(), [('a', ''), ('b', 'x'*M*N)]) 3672 3673 3674class TestParsers(TestEmailBase): 3675 3676 def test_header_parser(self): 3677 eq = self.assertEqual 3678 # Parse only the headers of a complex multipart MIME document 3679 with openfile('msg_02.txt', encoding="utf-8") as fp: 3680 msg = HeaderParser().parse(fp) 3681 eq(msg['from'], '[email protected]') 3682 eq(msg['to'], '[email protected]') 3683 eq(msg.get_content_type(), 'multipart/mixed') 3684 self.assertFalse(msg.is_multipart()) 3685 self.assertIsInstance(msg.get_payload(), str) 3686 3687 def test_bytes_header_parser(self): 3688 eq = self.assertEqual 3689 # Parse only the headers of a complex multipart MIME document 3690 with openfile('msg_02.txt', 'rb') as fp: 3691 msg = email.parser.BytesHeaderParser().parse(fp) 3692 eq(msg['from'], '[email protected]') 3693 eq(msg['to'], '[email protected]') 3694 eq(msg.get_content_type(), 'multipart/mixed') 3695 self.assertFalse(msg.is_multipart()) 3696 self.assertIsInstance(msg.get_payload(), str) 3697 self.assertIsInstance(msg.get_payload(decode=True), bytes) 3698 3699 def test_bytes_parser_does_not_close_file(self): 3700 with openfile('msg_02.txt', 'rb') as fp: 3701 email.parser.BytesParser().parse(fp) 3702 self.assertFalse(fp.closed) 3703 3704 def test_bytes_parser_on_exception_does_not_close_file(self): 3705 with openfile('msg_15.txt', 'rb') as fp: 3706 bytesParser = email.parser.BytesParser 3707 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3708 bytesParser(policy=email.policy.strict).parse, 3709 fp) 3710 self.assertFalse(fp.closed) 3711 3712 def test_parser_does_not_close_file(self): 3713 with openfile('msg_02.txt', encoding="utf-8") as fp: 3714 email.parser.Parser().parse(fp) 3715 self.assertFalse(fp.closed) 3716 3717 def test_parser_on_exception_does_not_close_file(self): 3718 with openfile('msg_15.txt', encoding="utf-8") as fp: 3719 parser = email.parser.Parser 3720 self.assertRaises(email.errors.StartBoundaryNotFoundDefect, 3721 parser(policy=email.policy.strict).parse, fp) 3722 self.assertFalse(fp.closed) 3723 3724 def test_whitespace_continuation(self): 3725 eq = self.assertEqual 3726 # This message contains a line after the Subject: header that has only 3727 # whitespace, but it is not empty! 3728 msg = email.message_from_string("""\ 3729From: [email protected] 3730To: [email protected] 3731Subject: the next line has a space on it 3732\x20 3733Date: Mon, 8 Apr 2002 15:09:19 -0400 3734Message-ID: spam 3735 3736Here's the message body 3737""") 3738 eq(msg['subject'], 'the next line has a space on it\n ') 3739 eq(msg['message-id'], 'spam') 3740 eq(msg.get_payload(), "Here's the message body\n") 3741 3742 def test_whitespace_continuation_last_header(self): 3743 eq = self.assertEqual 3744 # Like the previous test, but the subject line is the last 3745 # header. 3746 msg = email.message_from_string("""\ 3747From: [email protected] 3748To: [email protected] 3749Date: Mon, 8 Apr 2002 15:09:19 -0400 3750Message-ID: spam 3751Subject: the next line has a space on it 3752\x20 3753 3754Here's the message body 3755""") 3756 eq(msg['subject'], 'the next line has a space on it\n ') 3757 eq(msg['message-id'], 'spam') 3758 eq(msg.get_payload(), "Here's the message body\n") 3759 3760 def test_crlf_separation(self): 3761 eq = self.assertEqual 3762 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp: 3763 msg = Parser().parse(fp) 3764 eq(len(msg.get_payload()), 2) 3765 part1 = msg.get_payload(0) 3766 eq(part1.get_content_type(), 'text/plain') 3767 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n') 3768 part2 = msg.get_payload(1) 3769 eq(part2.get_content_type(), 'application/riscos') 3770 3771 def test_crlf_flatten(self): 3772 # Using newline='\n' preserves the crlfs in this input file. 3773 with openfile('msg_26.txt', encoding="utf-8", newline='\n') as fp: 3774 text = fp.read() 3775 msg = email.message_from_string(text) 3776 s = StringIO() 3777 g = Generator(s) 3778 g.flatten(msg, linesep='\r\n') 3779 self.assertEqual(s.getvalue(), text) 3780 3781 maxDiff = None 3782 3783 def test_multipart_digest_with_extra_mime_headers(self): 3784 eq = self.assertEqual 3785 neq = self.ndiffAssertEqual 3786 with openfile('msg_28.txt', encoding="utf-8") as fp: 3787 msg = email.message_from_file(fp) 3788 # Structure is: 3789 # multipart/digest 3790 # message/rfc822 3791 # text/plain 3792 # message/rfc822 3793 # text/plain 3794 eq(msg.is_multipart(), 1) 3795 eq(len(msg.get_payload()), 2) 3796 part1 = msg.get_payload(0) 3797 eq(part1.get_content_type(), 'message/rfc822') 3798 eq(part1.is_multipart(), 1) 3799 eq(len(part1.get_payload()), 1) 3800 part1a = part1.get_payload(0) 3801 eq(part1a.is_multipart(), 0) 3802 eq(part1a.get_content_type(), 'text/plain') 3803 neq(part1a.get_payload(), 'message 1\n') 3804 # next message/rfc822 3805 part2 = msg.get_payload(1) 3806 eq(part2.get_content_type(), 'message/rfc822') 3807 eq(part2.is_multipart(), 1) 3808 eq(len(part2.get_payload()), 1) 3809 part2a = part2.get_payload(0) 3810 eq(part2a.is_multipart(), 0) 3811 eq(part2a.get_content_type(), 'text/plain') 3812 neq(part2a.get_payload(), 'message 2\n') 3813 3814 def test_three_lines(self): 3815 # A bug report by Andrew McNamara 3816 lines = ['From: Andrew Person <[email protected]', 3817 'Subject: Test', 3818 'Date: Tue, 20 Aug 2002 16:43:45 +1000'] 3819 msg = email.message_from_string(NL.join(lines)) 3820 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000') 3821 3822 def test_strip_line_feed_and_carriage_return_in_headers(self): 3823 eq = self.assertEqual 3824 # For [ 1002475 ] email message parser doesn't handle \r\n correctly 3825 value1 = 'text' 3826 value2 = 'more text' 3827 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % ( 3828 value1, value2) 3829 msg = email.message_from_string(m) 3830 eq(msg.get('Header'), value1) 3831 eq(msg.get('Next-Header'), value2) 3832 3833 def test_rfc2822_header_syntax(self): 3834 eq = self.assertEqual 3835 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3836 msg = email.message_from_string(m) 3837 eq(len(msg), 3) 3838 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From']) 3839 eq(msg.get_payload(), 'body') 3840 3841 def test_rfc2822_space_not_allowed_in_header(self): 3842 eq = self.assertEqual 3843 m = '>From [email protected] 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' 3844 msg = email.message_from_string(m) 3845 eq(len(msg.keys()), 0) 3846 3847 def test_rfc2822_one_character_header(self): 3848 eq = self.assertEqual 3849 m = 'A: first header\nB: second header\nCC: third header\n\nbody' 3850 msg = email.message_from_string(m) 3851 headers = msg.keys() 3852 headers.sort() 3853 eq(headers, ['A', 'B', 'CC']) 3854 eq(msg.get_payload(), 'body') 3855 3856 def test_CRLFLF_at_end_of_part(self): 3857 # issue 5610: feedparser should not eat two chars from body part ending 3858 # with "\r\n\n". 3859 m = ( 3860 "From: [email protected]\n" 3861 "To: baz\n" 3862 "Mime-Version: 1.0\n" 3863 "Content-Type: multipart/mixed; boundary=BOUNDARY\n" 3864 "\n" 3865 "--BOUNDARY\n" 3866 "Content-Type: text/plain\n" 3867 "\n" 3868 "body ending with CRLF newline\r\n" 3869 "\n" 3870 "--BOUNDARY--\n" 3871 ) 3872 msg = email.message_from_string(m) 3873 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) 3874 3875 3876class Test8BitBytesHandling(TestEmailBase): 3877 # In Python3 all input is string, but that doesn't work if the actual input 3878 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we 3879 # decode byte streams using the surrogateescape error handler, and 3880 # reconvert to binary at appropriate places if we detect surrogates. This 3881 # doesn't allow us to transform headers with 8bit bytes (they get munged), 3882 # but it does allow us to parse and preserve them, and to decode body 3883 # parts that use an 8bit CTE. 3884 3885 bodytest_msg = textwrap.dedent("""\ 3886 From: [email protected] 3887 To: baz 3888 Mime-Version: 1.0 3889 Content-Type: text/plain; charset={charset} 3890 Content-Transfer-Encoding: {cte} 3891 3892 {bodyline} 3893 """) 3894 3895 def test_known_8bit_CTE(self): 3896 m = self.bodytest_msg.format(charset='utf-8', 3897 cte='8bit', 3898 bodyline='pöstal').encode('utf-8') 3899 msg = email.message_from_bytes(m) 3900 self.assertEqual(msg.get_payload(), "pöstal\n") 3901 self.assertEqual(msg.get_payload(decode=True), 3902 "pöstal\n".encode('utf-8')) 3903 3904 def test_unknown_8bit_CTE(self): 3905 m = self.bodytest_msg.format(charset='notavalidcharset', 3906 cte='8bit', 3907 bodyline='pöstal').encode('utf-8') 3908 msg = email.message_from_bytes(m) 3909 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n") 3910 self.assertEqual(msg.get_payload(decode=True), 3911 "pöstal\n".encode('utf-8')) 3912 3913 def test_8bit_in_quopri_body(self): 3914 # This is non-RFC compliant data...without 'decode' the library code 3915 # decodes the body using the charset from the headers, and because the 3916 # source byte really is utf-8 this works. This is likely to fail 3917 # against real dirty data (ie: produce mojibake), but the data is 3918 # invalid anyway so it is as good a guess as any. But this means that 3919 # this test just confirms the current behavior; that behavior is not 3920 # necessarily the best possible behavior. With 'decode' it is 3921 # returning the raw bytes, so that test should be of correct behavior, 3922 # or at least produce the same result that email4 did. 3923 m = self.bodytest_msg.format(charset='utf-8', 3924 cte='quoted-printable', 3925 bodyline='p=C3=B6stál').encode('utf-8') 3926 msg = email.message_from_bytes(m) 3927 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n') 3928 self.assertEqual(msg.get_payload(decode=True), 3929 'pöstál\n'.encode('utf-8')) 3930 3931 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self): 3932 # This is similar to the previous test, but proves that if the 8bit 3933 # byte is undecodeable in the specified charset, it gets replaced 3934 # by the unicode 'unknown' character. Again, this may or may not 3935 # be the ideal behavior. Note that if decode=False none of the 3936 # decoders will get involved, so this is the only test we need 3937 # for this behavior. 3938 m = self.bodytest_msg.format(charset='ascii', 3939 cte='quoted-printable', 3940 bodyline='p=C3=B6stál').encode('utf-8') 3941 msg = email.message_from_bytes(m) 3942 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n') 3943 self.assertEqual(msg.get_payload(decode=True), 3944 'pöstál\n'.encode('utf-8')) 3945 3946 # test_defect_handling:test_invalid_chars_in_base64_payload 3947 def test_8bit_in_base64_body(self): 3948 # If we get 8bit bytes in a base64 body, we can just ignore them 3949 # as being outside the base64 alphabet and decode anyway. But 3950 # we register a defect. 3951 m = self.bodytest_msg.format(charset='utf-8', 3952 cte='base64', 3953 bodyline='cMO2c3RhbAá=').encode('utf-8') 3954 msg = email.message_from_bytes(m) 3955 self.assertEqual(msg.get_payload(decode=True), 3956 'pöstal'.encode('utf-8')) 3957 self.assertIsInstance(msg.defects[0], 3958 errors.InvalidBase64CharactersDefect) 3959 3960 def test_8bit_in_uuencode_body(self): 3961 # Sticking an 8bit byte in a uuencode block makes it undecodable by 3962 # normal means, so the block is returned undecoded, but as bytes. 3963 m = self.bodytest_msg.format(charset='utf-8', 3964 cte='uuencode', 3965 bodyline='<,.V<W1A; á ').encode('utf-8') 3966 msg = email.message_from_bytes(m) 3967 self.assertEqual(msg.get_payload(decode=True), 3968 '<,.V<W1A; á \n'.encode('utf-8')) 3969 3970 3971 headertest_headers = ( 3972 ('From: [email protected]', ('From', '[email protected]')), 3973 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')), 3974 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n' 3975 '\tJean de Baddie', 3976 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 3977 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n' 3978 ' =?unknown-8bit?q?_Jean_de_Baddie?=')), 3979 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')), 3980 ) 3981 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) + 3982 '\nYes, they are flying.\n').encode('utf-8') 3983 3984 def test_get_8bit_header(self): 3985 msg = email.message_from_bytes(self.headertest_msg) 3986 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz') 3987 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz') 3988 3989 def test_print_8bit_headers(self): 3990 msg = email.message_from_bytes(self.headertest_msg) 3991 self.assertEqual(str(msg), 3992 textwrap.dedent("""\ 3993 From: {} 3994 To: {} 3995 Subject: {} 3996 From: {} 3997 3998 Yes, they are flying. 3999 """).format(*[expected[1] for (_, expected) in 4000 self.headertest_headers])) 4001 4002 def test_values_with_8bit_headers(self): 4003 msg = email.message_from_bytes(self.headertest_msg) 4004 self.assertListEqual([str(x) for x in msg.values()], 4005 ['[email protected]', 4006 'b\uFFFD\uFFFDz', 4007 'Maintenant je vous pr\uFFFD\uFFFDsente mon ' 4008 'coll\uFFFD\uFFFDgue, le pouf ' 4009 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 4010 '\tJean de Baddie', 4011 "g\uFFFD\uFFFDst"]) 4012 4013 def test_items_with_8bit_headers(self): 4014 msg = email.message_from_bytes(self.headertest_msg) 4015 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()], 4016 [('From', '[email protected]'), 4017 ('To', 'b\uFFFD\uFFFDz'), 4018 ('Subject', 'Maintenant je vous ' 4019 'pr\uFFFD\uFFFDsente ' 4020 'mon coll\uFFFD\uFFFDgue, le pouf ' 4021 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n' 4022 '\tJean de Baddie'), 4023 ('From', 'g\uFFFD\uFFFDst')]) 4024 4025 def test_get_all_with_8bit_headers(self): 4026 msg = email.message_from_bytes(self.headertest_msg) 4027 self.assertListEqual([str(x) for x in msg.get_all('from')], 4028 ['[email protected]', 4029 'g\uFFFD\uFFFDst']) 4030 4031 def test_get_content_type_with_8bit(self): 4032 msg = email.message_from_bytes(textwrap.dedent("""\ 4033 Content-Type: text/pl\xA7in; charset=utf-8 4034 """).encode('latin-1')) 4035 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin") 4036 self.assertEqual(msg.get_content_maintype(), "text") 4037 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin") 4038 4039 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params 4040 def test_get_params_with_8bit(self): 4041 msg = email.message_from_bytes( 4042 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1')) 4043 self.assertEqual(msg.get_params(header='x-header'), 4044 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')]) 4045 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne') 4046 # XXX: someday you might be able to get 'b\xa7r', for now you can't. 4047 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None) 4048 4049 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value 4050 def test_get_rfc2231_params_with_8bit(self): 4051 msg = email.message_from_bytes(textwrap.dedent("""\ 4052 Content-Type: text/plain; charset=us-ascii; 4053 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 4054 ).encode('latin-1')) 4055 self.assertEqual(msg.get_param('title'), 4056 ('us-ascii', 'en', 'This is not f\uFFFDn')) 4057 4058 def test_set_rfc2231_params_with_8bit(self): 4059 msg = email.message_from_bytes(textwrap.dedent("""\ 4060 Content-Type: text/plain; charset=us-ascii; 4061 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 4062 ).encode('latin-1')) 4063 msg.set_param('title', 'test') 4064 self.assertEqual(msg.get_param('title'), 'test') 4065 4066 def test_del_rfc2231_params_with_8bit(self): 4067 msg = email.message_from_bytes(textwrap.dedent("""\ 4068 Content-Type: text/plain; charset=us-ascii; 4069 title*=us-ascii'en'This%20is%20not%20f\xa7n""" 4070 ).encode('latin-1')) 4071 msg.del_param('title') 4072 self.assertEqual(msg.get_param('title'), None) 4073 self.assertEqual(msg.get_content_maintype(), 'text') 4074 4075 def test_get_payload_with_8bit_cte_header(self): 4076 msg = email.message_from_bytes(textwrap.dedent("""\ 4077 Content-Transfer-Encoding: b\xa7se64 4078 Content-Type: text/plain; charset=latin-1 4079 4080 payload 4081 """).encode('latin-1')) 4082 self.assertEqual(msg.get_payload(), 'payload\n') 4083 self.assertEqual(msg.get_payload(decode=True), b'payload\n') 4084 4085 non_latin_bin_msg = textwrap.dedent("""\ 4086 From: [email protected] 4087 To: báz 4088 Subject: Maintenant je vous présente mon collègue, le pouf célèbre 4089 \tJean de Baddie 4090 Mime-Version: 1.0 4091 Content-Type: text/plain; charset="utf-8" 4092 Content-Transfer-Encoding: 8bit 4093 4094 Да, они летят. 4095 """).encode('utf-8') 4096 4097 def test_bytes_generator(self): 4098 msg = email.message_from_bytes(self.non_latin_bin_msg) 4099 out = BytesIO() 4100 email.generator.BytesGenerator(out).flatten(msg) 4101 self.assertEqual(out.getvalue(), self.non_latin_bin_msg) 4102 4103 def test_bytes_generator_handles_None_body(self): 4104 #Issue 11019 4105 msg = email.message.Message() 4106 out = BytesIO() 4107 email.generator.BytesGenerator(out).flatten(msg) 4108 self.assertEqual(out.getvalue(), b"\n") 4109 4110 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\ 4111 From: [email protected] 4112 To: =?unknown-8bit?q?b=C3=A1z?= 4113 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?= 4114 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?= 4115 =?unknown-8bit?q?_Jean_de_Baddie?= 4116 Mime-Version: 1.0 4117 Content-Type: text/plain; charset="utf-8" 4118 Content-Transfer-Encoding: base64 4119 4120 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg== 4121 """) 4122 4123 def test_generator_handles_8bit(self): 4124 msg = email.message_from_bytes(self.non_latin_bin_msg) 4125 out = StringIO() 4126 email.generator.Generator(out).flatten(msg) 4127 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped) 4128 4129 def test_str_generator_should_not_mutate_msg_when_handling_8bit(self): 4130 msg = email.message_from_bytes(self.non_latin_bin_msg) 4131 out = BytesIO() 4132 BytesGenerator(out).flatten(msg) 4133 orig_value = out.getvalue() 4134 Generator(StringIO()).flatten(msg) # Should not mutate msg! 4135 out = BytesIO() 4136 BytesGenerator(out).flatten(msg) 4137 self.assertEqual(out.getvalue(), orig_value) 4138 4139 def test_bytes_generator_with_unix_from(self): 4140 # The unixfrom contains a current date, so we can't check it 4141 # literally. Just make sure the first word is 'From' and the 4142 # rest of the message matches the input. 4143 msg = email.message_from_bytes(self.non_latin_bin_msg) 4144 out = BytesIO() 4145 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True) 4146 lines = out.getvalue().split(b'\n') 4147 self.assertEqual(lines[0].split()[0], b'From') 4148 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg) 4149 4150 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n') 4151 non_latin_bin_msg_as7bit[2:4] = [ 4152 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_' 4153 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?='] 4154 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit) 4155 4156 def test_message_from_binary_file(self): 4157 fn = 'test.msg' 4158 self.addCleanup(unlink, fn) 4159 with open(fn, 'wb') as testfile: 4160 testfile.write(self.non_latin_bin_msg) 4161 with open(fn, 'rb') as testfile: 4162 m = email.parser.BytesParser().parse(testfile) 4163 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit) 4164 4165 latin_bin_msg = textwrap.dedent("""\ 4166 From: [email protected] 4167 To: Dinsdale 4168 Subject: Nudge nudge, wink, wink 4169 Mime-Version: 1.0 4170 Content-Type: text/plain; charset="latin-1" 4171 Content-Transfer-Encoding: 8bit 4172 4173 oh là là, know what I mean, know what I mean? 4174 """).encode('latin-1') 4175 4176 latin_bin_msg_as7bit = textwrap.dedent("""\ 4177 From: [email protected] 4178 To: Dinsdale 4179 Subject: Nudge nudge, wink, wink 4180 Mime-Version: 1.0 4181 Content-Type: text/plain; charset="iso-8859-1" 4182 Content-Transfer-Encoding: quoted-printable 4183 4184 oh l=E0 l=E0, know what I mean, know what I mean? 4185 """) 4186 4187 def test_string_generator_reencodes_to_quopri_when_appropriate(self): 4188 m = email.message_from_bytes(self.latin_bin_msg) 4189 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4190 4191 def test_decoded_generator_emits_unicode_body(self): 4192 m = email.message_from_bytes(self.latin_bin_msg) 4193 out = StringIO() 4194 email.generator.DecodedGenerator(out).flatten(m) 4195 #DecodedHeader output contains an extra blank line compared 4196 #to the input message. RDM: not sure if this is a bug or not, 4197 #but it is not specific to the 8bit->7bit conversion. 4198 self.assertEqual(out.getvalue(), 4199 self.latin_bin_msg.decode('latin-1')+'\n') 4200 4201 def test_bytes_feedparser(self): 4202 bfp = email.feedparser.BytesFeedParser() 4203 for i in range(0, len(self.latin_bin_msg), 10): 4204 bfp.feed(self.latin_bin_msg[i:i+10]) 4205 m = bfp.close() 4206 self.assertEqual(str(m), self.latin_bin_msg_as7bit) 4207 4208 def test_crlf_flatten(self): 4209 with openfile('msg_26.txt', 'rb') as fp: 4210 text = fp.read() 4211 msg = email.message_from_bytes(text) 4212 s = BytesIO() 4213 g = email.generator.BytesGenerator(s) 4214 g.flatten(msg, linesep='\r\n') 4215 self.assertEqual(s.getvalue(), text) 4216 4217 def test_8bit_multipart(self): 4218 # Issue 11605 4219 source = textwrap.dedent("""\ 4220 Date: Fri, 18 Mar 2011 17:15:43 +0100 4221 To: [email protected] 4222 From: foodwatch-Newsletter <[email protected]> 4223 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System 4224 Message-ID: <[email protected]> 4225 MIME-Version: 1.0 4226 Content-Type: multipart/alternative; 4227 boundary="b1_76a486bee62b0d200f33dc2ca08220ad" 4228 4229 --b1_76a486bee62b0d200f33dc2ca08220ad 4230 Content-Type: text/plain; charset="utf-8" 4231 Content-Transfer-Encoding: 8bit 4232 4233 Guten Tag, , 4234 4235 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die 4236 Nachrichten aus Japan. 4237 4238 4239 --b1_76a486bee62b0d200f33dc2ca08220ad 4240 Content-Type: text/html; charset="utf-8" 4241 Content-Transfer-Encoding: 8bit 4242 4243 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 4244 "http://www.w3.org/TR/html4/loose.dtd"> 4245 <html lang="de"> 4246 <head> 4247 <title>foodwatch - Newsletter</title> 4248 </head> 4249 <body> 4250 <p>mit großer Betroffenheit verfolgen auch wir im foodwatch-Team 4251 die Nachrichten aus Japan.</p> 4252 </body> 4253 </html> 4254 --b1_76a486bee62b0d200f33dc2ca08220ad-- 4255 4256 """).encode('utf-8') 4257 msg = email.message_from_bytes(source) 4258 s = BytesIO() 4259 g = email.generator.BytesGenerator(s) 4260 g.flatten(msg) 4261 self.assertEqual(s.getvalue(), source) 4262 4263 def test_bytes_generator_b_encoding_linesep(self): 4264 # Issue 14062: b encoding was tacking on an extra \n. 4265 m = Message() 4266 # This has enough non-ascii that it should always end up b encoded. 4267 m['Subject'] = Header('žluťoučký kůň') 4268 s = BytesIO() 4269 g = email.generator.BytesGenerator(s) 4270 g.flatten(m, linesep='\r\n') 4271 self.assertEqual( 4272 s.getvalue(), 4273 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4274 4275 def test_generator_b_encoding_linesep(self): 4276 # Since this broke in ByteGenerator, test Generator for completeness. 4277 m = Message() 4278 # This has enough non-ascii that it should always end up b encoded. 4279 m['Subject'] = Header('žluťoučký kůň') 4280 s = StringIO() 4281 g = email.generator.Generator(s) 4282 g.flatten(m, linesep='\r\n') 4283 self.assertEqual( 4284 s.getvalue(), 4285 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n') 4286 4287 maxDiff = None 4288 4289 4290class BaseTestBytesGeneratorIdempotent: 4291 4292 maxDiff = None 4293 4294 def _msgobj(self, filename): 4295 with openfile(filename, 'rb') as fp: 4296 data = fp.read() 4297 data = self.normalize_linesep_regex.sub(self.blinesep, data) 4298 msg = email.message_from_bytes(data) 4299 return msg, data 4300 4301 def _idempotent(self, msg, data, unixfrom=False): 4302 b = BytesIO() 4303 g = email.generator.BytesGenerator(b, maxheaderlen=0) 4304 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep) 4305 self.assertEqual(data, b.getvalue()) 4306 4307 4308class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent, 4309 TestIdempotent): 4310 linesep = '\n' 4311 blinesep = b'\n' 4312 normalize_linesep_regex = re.compile(br'\r\n') 4313 4314 4315class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent, 4316 TestIdempotent): 4317 linesep = '\r\n' 4318 blinesep = b'\r\n' 4319 normalize_linesep_regex = re.compile(br'(?<!\r)\n') 4320 4321 4322class TestBase64(unittest.TestCase): 4323 def test_len(self): 4324 eq = self.assertEqual 4325 eq(base64mime.header_length('hello'), 4326 len(base64mime.body_encode(b'hello', eol=''))) 4327 for size in range(15): 4328 if size == 0 : bsize = 0 4329 elif size <= 3 : bsize = 4 4330 elif size <= 6 : bsize = 8 4331 elif size <= 9 : bsize = 12 4332 elif size <= 12: bsize = 16 4333 else : bsize = 20 4334 eq(base64mime.header_length('x' * size), bsize) 4335 4336 def test_decode(self): 4337 eq = self.assertEqual 4338 eq(base64mime.decode(''), b'') 4339 eq(base64mime.decode('aGVsbG8='), b'hello') 4340 4341 def test_encode(self): 4342 eq = self.assertEqual 4343 eq(base64mime.body_encode(b''), '') 4344 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n') 4345 # Test the binary flag 4346 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n') 4347 # Test the maxlinelen arg 4348 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\ 4349eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4350eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4351eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg 4352eHh4eCB4eHh4IA== 4353""") 4354 # Test the eol argument 4355 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4356 """\ 4357eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4358eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4359eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r 4360eHh4eCB4eHh4IA==\r 4361""") 4362 4363 def test_header_encode(self): 4364 eq = self.assertEqual 4365 he = base64mime.header_encode 4366 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=') 4367 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=') 4368 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4369 # Test the charset option 4370 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=') 4371 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=') 4372 4373 4374class TestQuopri(unittest.TestCase): 4375 def setUp(self): 4376 # Set of characters (as byte integers) that don't need to be encoded 4377 # in headers. 4378 self.hlit = list(chain( 4379 range(ord('a'), ord('z') + 1), 4380 range(ord('A'), ord('Z') + 1), 4381 range(ord('0'), ord('9') + 1), 4382 (c for c in b'!*+-/'))) 4383 # Set of characters (as byte integers) that do need to be encoded in 4384 # headers. 4385 self.hnon = [c for c in range(256) if c not in self.hlit] 4386 assert len(self.hlit) + len(self.hnon) == 256 4387 # Set of characters (as byte integers) that don't need to be encoded 4388 # in bodies. 4389 self.blit = list(range(ord(' '), ord('~') + 1)) 4390 self.blit.append(ord('\t')) 4391 self.blit.remove(ord('=')) 4392 # Set of characters (as byte integers) that do need to be encoded in 4393 # bodies. 4394 self.bnon = [c for c in range(256) if c not in self.blit] 4395 assert len(self.blit) + len(self.bnon) == 256 4396 4397 def test_quopri_header_check(self): 4398 for c in self.hlit: 4399 self.assertFalse(quoprimime.header_check(c), 4400 'Should not be header quopri encoded: %s' % chr(c)) 4401 for c in self.hnon: 4402 self.assertTrue(quoprimime.header_check(c), 4403 'Should be header quopri encoded: %s' % chr(c)) 4404 4405 def test_quopri_body_check(self): 4406 for c in self.blit: 4407 self.assertFalse(quoprimime.body_check(c), 4408 'Should not be body quopri encoded: %s' % chr(c)) 4409 for c in self.bnon: 4410 self.assertTrue(quoprimime.body_check(c), 4411 'Should be body quopri encoded: %s' % chr(c)) 4412 4413 def test_header_quopri_len(self): 4414 eq = self.assertEqual 4415 eq(quoprimime.header_length(b'hello'), 5) 4416 # RFC 2047 chrome is not included in header_length(). 4417 eq(len(quoprimime.header_encode(b'hello', charset='xxx')), 4418 quoprimime.header_length(b'hello') + 4419 # =?xxx?q?...?= means 10 extra characters 4420 10) 4421 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20) 4422 # RFC 2047 chrome is not included in header_length(). 4423 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')), 4424 quoprimime.header_length(b'h@e@l@l@o@') + 4425 # =?xxx?q?...?= means 10 extra characters 4426 10) 4427 for c in self.hlit: 4428 eq(quoprimime.header_length(bytes([c])), 1, 4429 'expected length 1 for %r' % chr(c)) 4430 for c in self.hnon: 4431 # Space is special; it's encoded to _ 4432 if c == ord(' '): 4433 continue 4434 eq(quoprimime.header_length(bytes([c])), 3, 4435 'expected length 3 for %r' % chr(c)) 4436 eq(quoprimime.header_length(b' '), 1) 4437 4438 def test_body_quopri_len(self): 4439 eq = self.assertEqual 4440 for c in self.blit: 4441 eq(quoprimime.body_length(bytes([c])), 1) 4442 for c in self.bnon: 4443 eq(quoprimime.body_length(bytes([c])), 3) 4444 4445 def test_quote_unquote_idempotent(self): 4446 for x in range(256): 4447 c = chr(x) 4448 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c) 4449 4450 def _test_header_encode(self, header, expected_encoded_header, charset=None): 4451 if charset is None: 4452 encoded_header = quoprimime.header_encode(header) 4453 else: 4454 encoded_header = quoprimime.header_encode(header, charset) 4455 self.assertEqual(encoded_header, expected_encoded_header) 4456 4457 def test_header_encode_null(self): 4458 self._test_header_encode(b'', '') 4459 4460 def test_header_encode_one_word(self): 4461 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=') 4462 4463 def test_header_encode_two_lines(self): 4464 self._test_header_encode(b'hello\nworld', 4465 '=?iso-8859-1?q?hello=0Aworld?=') 4466 4467 def test_header_encode_non_ascii(self): 4468 self._test_header_encode(b'hello\xc7there', 4469 '=?iso-8859-1?q?hello=C7there?=') 4470 4471 def test_header_encode_alt_charset(self): 4472 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=', 4473 charset='iso-8859-2') 4474 4475 def _test_header_decode(self, encoded_header, expected_decoded_header): 4476 decoded_header = quoprimime.header_decode(encoded_header) 4477 self.assertEqual(decoded_header, expected_decoded_header) 4478 4479 def test_header_decode_null(self): 4480 self._test_header_decode('', '') 4481 4482 def test_header_decode_one_word(self): 4483 self._test_header_decode('hello', 'hello') 4484 4485 def test_header_decode_two_lines(self): 4486 self._test_header_decode('hello=0Aworld', 'hello\nworld') 4487 4488 def test_header_decode_non_ascii(self): 4489 self._test_header_decode('hello=C7there', 'hello\xc7there') 4490 4491 def test_header_decode_re_bug_18380(self): 4492 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position 4493 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257) 4494 4495 def _test_decode(self, encoded, expected_decoded, eol=None): 4496 if eol is None: 4497 decoded = quoprimime.decode(encoded) 4498 else: 4499 decoded = quoprimime.decode(encoded, eol=eol) 4500 self.assertEqual(decoded, expected_decoded) 4501 4502 def test_decode_null_word(self): 4503 self._test_decode('', '') 4504 4505 def test_decode_null_line_null_word(self): 4506 self._test_decode('\r\n', '\n') 4507 4508 def test_decode_one_word(self): 4509 self._test_decode('hello', 'hello') 4510 4511 def test_decode_one_word_eol(self): 4512 self._test_decode('hello', 'hello', eol='X') 4513 4514 def test_decode_one_line(self): 4515 self._test_decode('hello\r\n', 'hello\n') 4516 4517 def test_decode_one_line_lf(self): 4518 self._test_decode('hello\n', 'hello\n') 4519 4520 def test_decode_one_line_cr(self): 4521 self._test_decode('hello\r', 'hello\n') 4522 4523 def test_decode_one_line_nl(self): 4524 self._test_decode('hello\n', 'helloX', eol='X') 4525 4526 def test_decode_one_line_crnl(self): 4527 self._test_decode('hello\r\n', 'helloX', eol='X') 4528 4529 def test_decode_one_line_one_word(self): 4530 self._test_decode('hello\r\nworld', 'hello\nworld') 4531 4532 def test_decode_one_line_one_word_eol(self): 4533 self._test_decode('hello\r\nworld', 'helloXworld', eol='X') 4534 4535 def test_decode_two_lines(self): 4536 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n') 4537 4538 def test_decode_two_lines_eol(self): 4539 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X') 4540 4541 def test_decode_one_long_line(self): 4542 self._test_decode('Spam' * 250, 'Spam' * 250) 4543 4544 def test_decode_one_space(self): 4545 self._test_decode(' ', '') 4546 4547 def test_decode_multiple_spaces(self): 4548 self._test_decode(' ' * 5, '') 4549 4550 def test_decode_one_line_trailing_spaces(self): 4551 self._test_decode('hello \r\n', 'hello\n') 4552 4553 def test_decode_two_lines_trailing_spaces(self): 4554 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n') 4555 4556 def test_decode_quoted_word(self): 4557 self._test_decode('=22quoted=20words=22', '"quoted words"') 4558 4559 def test_decode_uppercase_quoting(self): 4560 self._test_decode('ab=CD=EF', 'ab\xcd\xef') 4561 4562 def test_decode_lowercase_quoting(self): 4563 self._test_decode('ab=cd=ef', 'ab\xcd\xef') 4564 4565 def test_decode_soft_line_break(self): 4566 self._test_decode('soft line=\r\nbreak', 'soft linebreak') 4567 4568 def test_decode_false_quoting(self): 4569 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2') 4570 4571 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None): 4572 kwargs = {} 4573 if maxlinelen is None: 4574 # Use body_encode's default. 4575 maxlinelen = 76 4576 else: 4577 kwargs['maxlinelen'] = maxlinelen 4578 if eol is None: 4579 # Use body_encode's default. 4580 eol = '\n' 4581 else: 4582 kwargs['eol'] = eol 4583 encoded_body = quoprimime.body_encode(body, **kwargs) 4584 self.assertEqual(encoded_body, expected_encoded_body) 4585 if eol == '\n' or eol == '\r\n': 4586 # We know how to split the result back into lines, so maxlinelen 4587 # can be checked. 4588 for line in encoded_body.splitlines(): 4589 self.assertLessEqual(len(line), maxlinelen) 4590 4591 def test_encode_null(self): 4592 self._test_encode('', '') 4593 4594 def test_encode_null_lines(self): 4595 self._test_encode('\n\n', '\n\n') 4596 4597 def test_encode_one_line(self): 4598 self._test_encode('hello\n', 'hello\n') 4599 4600 def test_encode_one_line_crlf(self): 4601 self._test_encode('hello\r\n', 'hello\n') 4602 4603 def test_encode_one_line_eol(self): 4604 self._test_encode('hello\n', 'hello\r\n', eol='\r\n') 4605 4606 def test_encode_one_line_eol_after_non_ascii(self): 4607 # issue 20206; see changeset 0cf700464177 for why the encode/decode. 4608 self._test_encode('hello\u03c5\n'.encode('utf-8').decode('latin1'), 4609 'hello=CF=85\r\n', eol='\r\n') 4610 4611 def test_encode_one_space(self): 4612 self._test_encode(' ', '=20') 4613 4614 def test_encode_one_line_one_space(self): 4615 self._test_encode(' \n', '=20\n') 4616 4617# XXX: body_encode() expect strings, but uses ord(char) from these strings 4618# to index into a 256-entry list. For code points above 255, this will fail. 4619# Should there be a check for 8-bit only ord() values in body, or at least 4620# a comment about the expected input? 4621 4622 def test_encode_two_lines_one_space(self): 4623 self._test_encode(' \n \n', '=20\n=20\n') 4624 4625 def test_encode_one_word_trailing_spaces(self): 4626 self._test_encode('hello ', 'hello =20') 4627 4628 def test_encode_one_line_trailing_spaces(self): 4629 self._test_encode('hello \n', 'hello =20\n') 4630 4631 def test_encode_one_word_trailing_tab(self): 4632 self._test_encode('hello \t', 'hello =09') 4633 4634 def test_encode_one_line_trailing_tab(self): 4635 self._test_encode('hello \t\n', 'hello =09\n') 4636 4637 def test_encode_trailing_space_before_maxlinelen(self): 4638 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6) 4639 4640 def test_encode_trailing_space_at_maxlinelen(self): 4641 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5) 4642 4643 def test_encode_trailing_space_beyond_maxlinelen(self): 4644 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4) 4645 4646 def test_encode_whitespace_lines(self): 4647 self._test_encode(' \n' * 5, '=20\n' * 5) 4648 4649 def test_encode_quoted_equals(self): 4650 self._test_encode('a = b', 'a =3D b') 4651 4652 def test_encode_one_long_string(self): 4653 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25) 4654 4655 def test_encode_one_long_line(self): 4656 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n') 4657 4658 def test_encode_one_very_long_line(self): 4659 self._test_encode('x' * 200 + '\n', 4660 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n') 4661 4662 def test_encode_shortest_maxlinelen(self): 4663 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4) 4664 4665 def test_encode_maxlinelen_too_small(self): 4666 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3) 4667 4668 def test_encode(self): 4669 eq = self.assertEqual 4670 eq(quoprimime.body_encode(''), '') 4671 eq(quoprimime.body_encode('hello'), 'hello') 4672 # Test the binary flag 4673 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld') 4674 # Test the maxlinelen arg 4675 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\ 4676xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx= 4677 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx= 4678x xxxx xxxx xxxx xxxx=20""") 4679 # Test the eol argument 4680 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), 4681 """\ 4682xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r 4683 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r 4684x xxxx xxxx xxxx xxxx=20""") 4685 eq(quoprimime.body_encode("""\ 4686one line 4687 4688two line"""), """\ 4689one line 4690 4691two line""") 4692 4693 4694 4695# Test the Charset class 4696class TestCharset(unittest.TestCase): 4697 def tearDown(self): 4698 from email import charset as CharsetModule 4699 try: 4700 del CharsetModule.CHARSETS['fake'] 4701 except KeyError: 4702 pass 4703 4704 def test_codec_encodeable(self): 4705 eq = self.assertEqual 4706 # Make sure us-ascii = no Unicode conversion 4707 c = Charset('us-ascii') 4708 eq(c.header_encode('Hello World!'), 'Hello World!') 4709 # Test 8-bit idempotency with us-ascii 4710 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa' 4711 self.assertRaises(UnicodeError, c.header_encode, s) 4712 c = Charset('utf-8') 4713 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=') 4714 4715 def test_body_encode(self): 4716 eq = self.assertEqual 4717 # Try a charset with QP body encoding 4718 c = Charset('iso-8859-1') 4719 eq('hello w=F6rld', c.body_encode('hello w\xf6rld')) 4720 # Try a charset with Base64 body encoding 4721 c = Charset('utf-8') 4722 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world')) 4723 # Try a charset with None body encoding 4724 c = Charset('us-ascii') 4725 eq('hello world', c.body_encode('hello world')) 4726 # Try the convert argument, where input codec != output codec 4727 c = Charset('euc-jp') 4728 # With apologies to Tokio Kikuchi ;) 4729 # XXX FIXME 4730## try: 4731## eq('\x1b$B5FCO;~IW\x1b(B', 4732## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7')) 4733## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', 4734## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False)) 4735## except LookupError: 4736## # We probably don't have the Japanese codecs installed 4737## pass 4738 # Testing SF bug #625509, which we have to fake, since there are no 4739 # built-in encodings where the header encoding is QP but the body 4740 # encoding is not. 4741 from email import charset as CharsetModule 4742 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8') 4743 c = Charset('fake') 4744 eq('hello world', c.body_encode('hello world')) 4745 4746 def test_unicode_charset_name(self): 4747 charset = Charset('us-ascii') 4748 self.assertEqual(str(charset), 'us-ascii') 4749 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii') 4750 4751 4752 4753# Test multilingual MIME headers. 4754class TestHeader(TestEmailBase): 4755 def test_simple(self): 4756 eq = self.ndiffAssertEqual 4757 h = Header('Hello World!') 4758 eq(h.encode(), 'Hello World!') 4759 h.append(' Goodbye World!') 4760 eq(h.encode(), 'Hello World! Goodbye World!') 4761 4762 def test_simple_surprise(self): 4763 eq = self.ndiffAssertEqual 4764 h = Header('Hello World!') 4765 eq(h.encode(), 'Hello World!') 4766 h.append('Goodbye World!') 4767 eq(h.encode(), 'Hello World! Goodbye World!') 4768 4769 def test_header_needs_no_decoding(self): 4770 h = 'no decoding needed' 4771 self.assertEqual(decode_header(h), [(h, None)]) 4772 4773 def test_long(self): 4774 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.", 4775 maxlinelen=76) 4776 for l in h.encode(splitchars=' ').split('\n '): 4777 self.assertLessEqual(len(l), 76) 4778 4779 def test_multilingual(self): 4780 eq = self.ndiffAssertEqual 4781 g = Charset("iso-8859-1") 4782 cz = Charset("iso-8859-2") 4783 utf8 = Charset("utf-8") 4784 g_head = (b'Die Mieter treten hier ein werden mit einem ' 4785 b'Foerderband komfortabel den Korridor entlang, ' 4786 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, ' 4787 b'gegen die rotierenden Klingen bef\xf6rdert. ') 4788 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich ' 4789 b'd\xf9vtipu.. ') 4790 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f' 4791 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00' 4792 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c' 4793 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067' 4794 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das ' 4795 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder ' 4796 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066' 4797 '\u3044\u307e\u3059\u3002') 4798 h = Header(g_head, g) 4799 h.append(cz_head, cz) 4800 h.append(utf8_head, utf8) 4801 enc = h.encode(maxlinelen=76) 4802 eq(enc, """\ 4803=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?= 4804 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?= 4805 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?= 4806 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?= 4807 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?= 4808 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?= 4809 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?= 4810 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?= 4811 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?= 4812 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?= 4813 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""") 4814 decoded = decode_header(enc) 4815 eq(len(decoded), 3) 4816 eq(decoded[0], (g_head, 'iso-8859-1')) 4817 eq(decoded[1], (cz_head, 'iso-8859-2')) 4818 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8')) 4819 ustr = str(h) 4820 eq(ustr, 4821 (b'Die Mieter treten hier ein werden mit einem Foerderband ' 4822 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' 4823 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen ' 4824 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod ' 4825 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81' 4826 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3' 4827 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3' 4828 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83' 4829 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e' 4830 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3' 4831 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82' 4832 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b' 4833 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git ' 4834 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt ' 4835 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81' 4836 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82' 4837 ).decode('utf-8')) 4838 # Test make_header() 4839 newh = make_header(decode_header(enc)) 4840 eq(newh, h) 4841 4842 def test_empty_header_encode(self): 4843 h = Header() 4844 self.assertEqual(h.encode(), '') 4845 4846 def test_header_ctor_default_args(self): 4847 eq = self.ndiffAssertEqual 4848 h = Header() 4849 eq(h, '') 4850 h.append('foo', Charset('iso-8859-1')) 4851 eq(h, 'foo') 4852 4853 def test_explicit_maxlinelen(self): 4854 eq = self.ndiffAssertEqual 4855 hstr = ('A very long line that must get split to something other ' 4856 'than at the 76th character boundary to test the non-default ' 4857 'behavior') 4858 h = Header(hstr) 4859 eq(h.encode(), '''\ 4860A very long line that must get split to something other than at the 76th 4861 character boundary to test the non-default behavior''') 4862 eq(str(h), hstr) 4863 h = Header(hstr, header_name='Subject') 4864 eq(h.encode(), '''\ 4865A very long line that must get split to something other than at the 4866 76th character boundary to test the non-default behavior''') 4867 eq(str(h), hstr) 4868 h = Header(hstr, maxlinelen=1024, header_name='Subject') 4869 eq(h.encode(), hstr) 4870 eq(str(h), hstr) 4871 4872 def test_quopri_splittable(self): 4873 eq = self.ndiffAssertEqual 4874 h = Header(charset='iso-8859-1', maxlinelen=20) 4875 x = 'xxxx ' * 20 4876 h.append(x) 4877 s = h.encode() 4878 eq(s, """\ 4879=?iso-8859-1?q?xxx?= 4880 =?iso-8859-1?q?x_?= 4881 =?iso-8859-1?q?xx?= 4882 =?iso-8859-1?q?xx?= 4883 =?iso-8859-1?q?_x?= 4884 =?iso-8859-1?q?xx?= 4885 =?iso-8859-1?q?x_?= 4886 =?iso-8859-1?q?xx?= 4887 =?iso-8859-1?q?xx?= 4888 =?iso-8859-1?q?_x?= 4889 =?iso-8859-1?q?xx?= 4890 =?iso-8859-1?q?x_?= 4891 =?iso-8859-1?q?xx?= 4892 =?iso-8859-1?q?xx?= 4893 =?iso-8859-1?q?_x?= 4894 =?iso-8859-1?q?xx?= 4895 =?iso-8859-1?q?x_?= 4896 =?iso-8859-1?q?xx?= 4897 =?iso-8859-1?q?xx?= 4898 =?iso-8859-1?q?_x?= 4899 =?iso-8859-1?q?xx?= 4900 =?iso-8859-1?q?x_?= 4901 =?iso-8859-1?q?xx?= 4902 =?iso-8859-1?q?xx?= 4903 =?iso-8859-1?q?_x?= 4904 =?iso-8859-1?q?xx?= 4905 =?iso-8859-1?q?x_?= 4906 =?iso-8859-1?q?xx?= 4907 =?iso-8859-1?q?xx?= 4908 =?iso-8859-1?q?_x?= 4909 =?iso-8859-1?q?xx?= 4910 =?iso-8859-1?q?x_?= 4911 =?iso-8859-1?q?xx?= 4912 =?iso-8859-1?q?xx?= 4913 =?iso-8859-1?q?_x?= 4914 =?iso-8859-1?q?xx?= 4915 =?iso-8859-1?q?x_?= 4916 =?iso-8859-1?q?xx?= 4917 =?iso-8859-1?q?xx?= 4918 =?iso-8859-1?q?_x?= 4919 =?iso-8859-1?q?xx?= 4920 =?iso-8859-1?q?x_?= 4921 =?iso-8859-1?q?xx?= 4922 =?iso-8859-1?q?xx?= 4923 =?iso-8859-1?q?_x?= 4924 =?iso-8859-1?q?xx?= 4925 =?iso-8859-1?q?x_?= 4926 =?iso-8859-1?q?xx?= 4927 =?iso-8859-1?q?xx?= 4928 =?iso-8859-1?q?_?=""") 4929 eq(x, str(make_header(decode_header(s)))) 4930 h = Header(charset='iso-8859-1', maxlinelen=40) 4931 h.append('xxxx ' * 20) 4932 s = h.encode() 4933 eq(s, """\ 4934=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?= 4935 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?= 4936 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?= 4937 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?= 4938 =?iso-8859-1?q?_xxxx_xxxx_?=""") 4939 eq(x, str(make_header(decode_header(s)))) 4940 4941 def test_base64_splittable(self): 4942 eq = self.ndiffAssertEqual 4943 h = Header(charset='koi8-r', maxlinelen=20) 4944 x = 'xxxx ' * 20 4945 h.append(x) 4946 s = h.encode() 4947 eq(s, """\ 4948=?koi8-r?b?eHh4?= 4949 =?koi8-r?b?eCB4?= 4950 =?koi8-r?b?eHh4?= 4951 =?koi8-r?b?IHh4?= 4952 =?koi8-r?b?eHgg?= 4953 =?koi8-r?b?eHh4?= 4954 =?koi8-r?b?eCB4?= 4955 =?koi8-r?b?eHh4?= 4956 =?koi8-r?b?IHh4?= 4957 =?koi8-r?b?eHgg?= 4958 =?koi8-r?b?eHh4?= 4959 =?koi8-r?b?eCB4?= 4960 =?koi8-r?b?eHh4?= 4961 =?koi8-r?b?IHh4?= 4962 =?koi8-r?b?eHgg?= 4963 =?koi8-r?b?eHh4?= 4964 =?koi8-r?b?eCB4?= 4965 =?koi8-r?b?eHh4?= 4966 =?koi8-r?b?IHh4?= 4967 =?koi8-r?b?eHgg?= 4968 =?koi8-r?b?eHh4?= 4969 =?koi8-r?b?eCB4?= 4970 =?koi8-r?b?eHh4?= 4971 =?koi8-r?b?IHh4?= 4972 =?koi8-r?b?eHgg?= 4973 =?koi8-r?b?eHh4?= 4974 =?koi8-r?b?eCB4?= 4975 =?koi8-r?b?eHh4?= 4976 =?koi8-r?b?IHh4?= 4977 =?koi8-r?b?eHgg?= 4978 =?koi8-r?b?eHh4?= 4979 =?koi8-r?b?eCB4?= 4980 =?koi8-r?b?eHh4?= 4981 =?koi8-r?b?IA==?=""") 4982 eq(x, str(make_header(decode_header(s)))) 4983 h = Header(charset='koi8-r', maxlinelen=40) 4984 h.append(x) 4985 s = h.encode() 4986 eq(s, """\ 4987=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?= 4988 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?= 4989 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?= 4990 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?= 4991 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?= 4992 =?koi8-r?b?eHh4eCB4eHh4IA==?=""") 4993 eq(x, str(make_header(decode_header(s)))) 4994 4995 def test_us_ascii_header(self): 4996 eq = self.assertEqual 4997 s = 'hello' 4998 x = decode_header(s) 4999 eq(x, [('hello', None)]) 5000 h = make_header(x) 5001 eq(s, h.encode()) 5002 5003 def test_string_charset(self): 5004 eq = self.assertEqual 5005 h = Header() 5006 h.append('hello', 'iso-8859-1') 5007 eq(h, 'hello') 5008 5009## def test_unicode_error(self): 5010## raises = self.assertRaises 5011## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii') 5012## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii') 5013## h = Header() 5014## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii') 5015## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii') 5016## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1') 5017 5018 def test_utf8_shortest(self): 5019 eq = self.assertEqual 5020 h = Header('p\xf6stal', 'utf-8') 5021 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=') 5022 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8') 5023 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=') 5024 5025 def test_bad_8bit_header(self): 5026 raises = self.assertRaises 5027 eq = self.assertEqual 5028 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 5029 raises(UnicodeError, Header, x) 5030 h = Header() 5031 raises(UnicodeError, h.append, x) 5032 e = x.decode('utf-8', 'replace') 5033 eq(str(Header(x, errors='replace')), e) 5034 h.append(x, errors='replace') 5035 eq(str(h), e) 5036 5037 def test_escaped_8bit_header(self): 5038 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 5039 e = x.decode('ascii', 'surrogateescape') 5040 h = Header(e, charset=email.charset.UNKNOWN8BIT) 5041 self.assertEqual(str(h), 5042 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 5043 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 5044 5045 def test_header_handles_binary_unknown8bit(self): 5046 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 5047 h = Header(x, charset=email.charset.UNKNOWN8BIT) 5048 self.assertEqual(str(h), 5049 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 5050 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) 5051 5052 def test_make_header_handles_binary_unknown8bit(self): 5053 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' 5054 h = Header(x, charset=email.charset.UNKNOWN8BIT) 5055 h2 = email.header.make_header(email.header.decode_header(h)) 5056 self.assertEqual(str(h2), 5057 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') 5058 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) 5059 5060 def test_modify_returned_list_does_not_change_header(self): 5061 h = Header('test') 5062 chunks = email.header.decode_header(h) 5063 chunks.append(('ascii', 'test2')) 5064 self.assertEqual(str(h), 'test') 5065 5066 def test_encoded_adjacent_nonencoded(self): 5067 eq = self.assertEqual 5068 h = Header() 5069 h.append('hello', 'iso-8859-1') 5070 h.append('world') 5071 s = h.encode() 5072 eq(s, '=?iso-8859-1?q?hello?= world') 5073 h = make_header(decode_header(s)) 5074 eq(h.encode(), s) 5075 5076 def test_whitespace_keeper(self): 5077 eq = self.assertEqual 5078 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.' 5079 parts = decode_header(s) 5080 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)]) 5081 hdr = make_header(parts) 5082 eq(hdr.encode(), 5083 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.') 5084 5085 def test_broken_base64_header(self): 5086 raises = self.assertRaises 5087 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?=' 5088 raises(errors.HeaderParseError, decode_header, s) 5089 5090 def test_shift_jis_charset(self): 5091 h = Header('文', charset='shift_jis') 5092 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=') 5093 5094 def test_flatten_header_with_no_value(self): 5095 # Issue 11401 (regression from email 4.x) Note that the space after 5096 # the header doesn't reflect the input, but this is also the way 5097 # email 4.x behaved. At some point it would be nice to fix that. 5098 msg = email.message_from_string("EmptyHeader:") 5099 self.assertEqual(str(msg), "EmptyHeader: \n\n") 5100 5101 def test_encode_preserves_leading_ws_on_value(self): 5102 msg = Message() 5103 msg['SomeHeader'] = ' value with leading ws' 5104 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n") 5105 5106 def test_whitespace_header(self): 5107 self.assertEqual(Header(' ').encode(), ' ') 5108 5109 5110 5111# Test RFC 2231 header parameters (en/de)coding 5112class TestRFC2231(TestEmailBase): 5113 5114 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5115 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5116 def test_get_param(self): 5117 eq = self.assertEqual 5118 msg = self._msgobj('msg_29.txt') 5119 eq(msg.get_param('title'), 5120 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5121 eq(msg.get_param('title', unquote=False), 5122 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"')) 5123 5124 def test_set_param(self): 5125 eq = self.ndiffAssertEqual 5126 msg = Message() 5127 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5128 charset='us-ascii') 5129 eq(msg.get_param('title'), 5130 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!')) 5131 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5132 charset='us-ascii', language='en') 5133 eq(msg.get_param('title'), 5134 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!')) 5135 msg = self._msgobj('msg_01.txt') 5136 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5137 charset='us-ascii', language='en') 5138 eq(msg.as_string(maxheaderlen=78), """\ 5139Return-Path: <[email protected]> 5140Delivered-To: [email protected] 5141Received: by mail.zzz.org (Postfix, from userid 889) 5142\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5143MIME-Version: 1.0 5144Content-Transfer-Encoding: 7bit 5145Message-ID: <[email protected]> 5146From: [email protected] (John X. Doe) 5147To: [email protected] 5148Subject: This is a test message 5149Date: Fri, 4 May 2001 14:05:44 -0400 5150Content-Type: text/plain; charset=us-ascii; 5151 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5152 5153 5154Hi, 5155 5156Do you like this message? 5157 5158-Me 5159""") 5160 5161 def test_set_param_requote(self): 5162 msg = Message() 5163 msg.set_param('title', 'foo') 5164 self.assertEqual(msg['content-type'], 'text/plain; title="foo"') 5165 msg.set_param('title', 'bar', requote=False) 5166 self.assertEqual(msg['content-type'], 'text/plain; title=bar') 5167 # tspecial is still quoted. 5168 msg.set_param('title', "(bar)bell", requote=False) 5169 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"') 5170 5171 def test_del_param(self): 5172 eq = self.ndiffAssertEqual 5173 msg = self._msgobj('msg_01.txt') 5174 msg.set_param('foo', 'bar', charset='us-ascii', language='en') 5175 msg.set_param('title', 'This is even more ***fun*** isn\'t it!', 5176 charset='us-ascii', language='en') 5177 msg.del_param('foo', header='Content-Type') 5178 eq(msg.as_string(maxheaderlen=78), """\ 5179Return-Path: <[email protected]> 5180Delivered-To: [email protected] 5181Received: by mail.zzz.org (Postfix, from userid 889) 5182\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT) 5183MIME-Version: 1.0 5184Content-Transfer-Encoding: 7bit 5185Message-ID: <[email protected]> 5186From: [email protected] (John X. Doe) 5187To: [email protected] 5188Subject: This is a test message 5189Date: Fri, 4 May 2001 14:05:44 -0400 5190Content-Type: text/plain; charset="us-ascii"; 5191 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21 5192 5193 5194Hi, 5195 5196Do you like this message? 5197 5198-Me 5199""") 5200 5201 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset 5202 # I changed the charset name, though, because the one in the file isn't 5203 # a legal charset name. Should add a test for an illegal charset. 5204 def test_rfc2231_get_content_charset(self): 5205 eq = self.assertEqual 5206 msg = self._msgobj('msg_32.txt') 5207 eq(msg.get_content_charset(), 'us-ascii') 5208 5209 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes 5210 def test_rfc2231_parse_rfc_quoting(self): 5211 m = textwrap.dedent('''\ 5212 Content-Disposition: inline; 5213 \tfilename*0*=''This%20is%20even%20more%20; 5214 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20; 5215 \tfilename*2="is it not.pdf" 5216 5217 ''') 5218 msg = email.message_from_string(m) 5219 self.assertEqual(msg.get_filename(), 5220 'This is even more ***fun*** is it not.pdf') 5221 self.assertEqual(m, msg.as_string()) 5222 5223 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes 5224 def test_rfc2231_parse_extra_quoting(self): 5225 m = textwrap.dedent('''\ 5226 Content-Disposition: inline; 5227 \tfilename*0*="''This%20is%20even%20more%20"; 5228 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5229 \tfilename*2="is it not.pdf" 5230 5231 ''') 5232 msg = email.message_from_string(m) 5233 self.assertEqual(msg.get_filename(), 5234 'This is even more ***fun*** is it not.pdf') 5235 self.assertEqual(m, msg.as_string()) 5236 5237 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset 5238 # but new test uses *0* because otherwise lang/charset is not valid. 5239 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values 5240 def test_rfc2231_no_language_or_charset(self): 5241 m = '''\ 5242Content-Transfer-Encoding: 8bit 5243Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm" 5244Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm 5245 5246''' 5247 msg = email.message_from_string(m) 5248 param = msg.get_param('NAME') 5249 self.assertNotIsInstance(param, tuple) 5250 self.assertEqual( 5251 param, 5252 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm') 5253 5254 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset 5255 def test_rfc2231_no_language_or_charset_in_filename(self): 5256 m = '''\ 5257Content-Disposition: inline; 5258\tfilename*0*="''This%20is%20even%20more%20"; 5259\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5260\tfilename*2="is it not.pdf" 5261 5262''' 5263 msg = email.message_from_string(m) 5264 self.assertEqual(msg.get_filename(), 5265 'This is even more ***fun*** is it not.pdf') 5266 5267 # Duplicate of previous test? 5268 def test_rfc2231_no_language_or_charset_in_filename_encoded(self): 5269 m = '''\ 5270Content-Disposition: inline; 5271\tfilename*0*="''This%20is%20even%20more%20"; 5272\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5273\tfilename*2="is it not.pdf" 5274 5275''' 5276 msg = email.message_from_string(m) 5277 self.assertEqual(msg.get_filename(), 5278 'This is even more ***fun*** is it not.pdf') 5279 5280 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded, 5281 # but the test below is wrong (the first part should be decoded). 5282 def test_rfc2231_partly_encoded(self): 5283 m = '''\ 5284Content-Disposition: inline; 5285\tfilename*0="''This%20is%20even%20more%20"; 5286\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5287\tfilename*2="is it not.pdf" 5288 5289''' 5290 msg = email.message_from_string(m) 5291 self.assertEqual( 5292 msg.get_filename(), 5293 'This%20is%20even%20more%20***fun*** is it not.pdf') 5294 5295 def test_rfc2231_partly_nonencoded(self): 5296 m = '''\ 5297Content-Disposition: inline; 5298\tfilename*0="This%20is%20even%20more%20"; 5299\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20"; 5300\tfilename*2="is it not.pdf" 5301 5302''' 5303 msg = email.message_from_string(m) 5304 self.assertEqual( 5305 msg.get_filename(), 5306 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') 5307 5308 def test_rfc2231_no_language_or_charset_in_boundary(self): 5309 m = '''\ 5310Content-Type: multipart/alternative; 5311\tboundary*0*="''This%20is%20even%20more%20"; 5312\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5313\tboundary*2="is it not.pdf" 5314 5315''' 5316 msg = email.message_from_string(m) 5317 self.assertEqual(msg.get_boundary(), 5318 'This is even more ***fun*** is it not.pdf') 5319 5320 def test_rfc2231_no_language_or_charset_in_charset(self): 5321 # This is a nonsensical charset value, but tests the code anyway 5322 m = '''\ 5323Content-Type: text/plain; 5324\tcharset*0*="This%20is%20even%20more%20"; 5325\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5326\tcharset*2="is it not.pdf" 5327 5328''' 5329 msg = email.message_from_string(m) 5330 self.assertEqual(msg.get_content_charset(), 5331 'this is even more ***fun*** is it not.pdf') 5332 5333 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii 5334 def test_rfc2231_bad_encoding_in_filename(self): 5335 m = '''\ 5336Content-Disposition: inline; 5337\tfilename*0*="bogus'xx'This%20is%20even%20more%20"; 5338\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5339\tfilename*2="is it not.pdf" 5340 5341''' 5342 msg = email.message_from_string(m) 5343 self.assertEqual(msg.get_filename(), 5344 'This is even more ***fun*** is it not.pdf') 5345 5346 def test_rfc2231_bad_encoding_in_charset(self): 5347 m = """\ 5348Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D 5349 5350""" 5351 msg = email.message_from_string(m) 5352 # This should return None because non-ascii characters in the charset 5353 # are not allowed. 5354 self.assertEqual(msg.get_content_charset(), None) 5355 5356 def test_rfc2231_bad_character_in_charset(self): 5357 m = """\ 5358Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D 5359 5360""" 5361 msg = email.message_from_string(m) 5362 # This should return None because non-ascii characters in the charset 5363 # are not allowed. 5364 self.assertEqual(msg.get_content_charset(), None) 5365 5366 def test_rfc2231_bad_character_in_filename(self): 5367 m = '''\ 5368Content-Disposition: inline; 5369\tfilename*0*="ascii'xx'This%20is%20even%20more%20"; 5370\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20"; 5371\tfilename*2*="is it not.pdf%E2" 5372 5373''' 5374 msg = email.message_from_string(m) 5375 self.assertEqual(msg.get_filename(), 5376 'This is even more ***fun*** is it not.pdf\ufffd') 5377 5378 def test_rfc2231_unknown_encoding(self): 5379 m = """\ 5380Content-Transfer-Encoding: 8bit 5381Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt 5382 5383""" 5384 msg = email.message_from_string(m) 5385 self.assertEqual(msg.get_filename(), 'myfile.txt') 5386 5387 def test_rfc2231_bad_character_in_encoding(self): 5388 m = """\ 5389Content-Transfer-Encoding: 8bit 5390Content-Disposition: inline; filename*=utf-8\udce2\udc80\udc9d''myfile.txt 5391 5392""" 5393 msg = email.message_from_string(m) 5394 self.assertEqual(msg.get_filename(), 'myfile.txt') 5395 5396 def test_rfc2231_single_tick_in_filename_extended(self): 5397 eq = self.assertEqual 5398 m = """\ 5399Content-Type: application/x-foo; 5400\tname*0*=\"Frank's\"; name*1*=\" Document\" 5401 5402""" 5403 msg = email.message_from_string(m) 5404 charset, language, s = msg.get_param('name') 5405 eq(charset, None) 5406 eq(language, None) 5407 eq(s, "Frank's Document") 5408 5409 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes 5410 def test_rfc2231_single_tick_in_filename(self): 5411 m = """\ 5412Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" 5413 5414""" 5415 msg = email.message_from_string(m) 5416 param = msg.get_param('name') 5417 self.assertNotIsInstance(param, tuple) 5418 self.assertEqual(param, "Frank's Document") 5419 5420 def test_rfc2231_missing_tick(self): 5421 m = '''\ 5422Content-Disposition: inline; 5423\tfilename*0*="'This%20is%20broken"; 5424''' 5425 msg = email.message_from_string(m) 5426 self.assertEqual( 5427 msg.get_filename(), 5428 "'This is broken") 5429 5430 def test_rfc2231_missing_tick_with_encoded_non_ascii(self): 5431 m = '''\ 5432Content-Disposition: inline; 5433\tfilename*0*="'This%20is%E2broken"; 5434''' 5435 msg = email.message_from_string(m) 5436 self.assertEqual( 5437 msg.get_filename(), 5438 "'This is\ufffdbroken") 5439 5440 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang 5441 def test_rfc2231_tick_attack_extended(self): 5442 eq = self.assertEqual 5443 m = """\ 5444Content-Type: application/x-foo; 5445\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" 5446 5447""" 5448 msg = email.message_from_string(m) 5449 charset, language, s = msg.get_param('name') 5450 eq(charset, 'us-ascii') 5451 eq(language, 'en-us') 5452 eq(s, "Frank's Document") 5453 5454 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value 5455 def test_rfc2231_tick_attack(self): 5456 m = """\ 5457Content-Type: application/x-foo; 5458\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" 5459 5460""" 5461 msg = email.message_from_string(m) 5462 param = msg.get_param('name') 5463 self.assertNotIsInstance(param, tuple) 5464 self.assertEqual(param, "us-ascii'en-us'Frank's Document") 5465 5466 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes 5467 def test_rfc2231_no_extended_values(self): 5468 eq = self.assertEqual 5469 m = """\ 5470Content-Type: application/x-foo; name=\"Frank's Document\" 5471 5472""" 5473 msg = email.message_from_string(m) 5474 eq(msg.get_param('name'), "Frank's Document") 5475 5476 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments 5477 def test_rfc2231_encoded_then_unencoded_segments(self): 5478 eq = self.assertEqual 5479 m = """\ 5480Content-Type: application/x-foo; 5481\tname*0*=\"us-ascii'en-us'My\"; 5482\tname*1=\" Document\"; 5483\tname*2*=\" For You\" 5484 5485""" 5486 msg = email.message_from_string(m) 5487 charset, language, s = msg.get_param('name') 5488 eq(charset, 'us-ascii') 5489 eq(language, 'en-us') 5490 eq(s, 'My Document For You') 5491 5492 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments 5493 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments 5494 def test_rfc2231_unencoded_then_encoded_segments(self): 5495 eq = self.assertEqual 5496 m = """\ 5497Content-Type: application/x-foo; 5498\tname*0=\"us-ascii'en-us'My\"; 5499\tname*1*=\" Document\"; 5500\tname*2*=\" For You\" 5501 5502""" 5503 msg = email.message_from_string(m) 5504 charset, language, s = msg.get_param('name') 5505 eq(charset, 'us-ascii') 5506 eq(language, 'en-us') 5507 eq(s, 'My Document For You') 5508 5509 def test_should_not_hang_on_invalid_ew_messages(self): 5510 messages = ["""From: [email protected] 5511To: [email protected] 5512Bad-Header: 5513 =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?= 5514 =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?= 5515 =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?= 5516 5517Hello! 5518""", """From: ����� �������� <xxx@xxx> 5519To: "xxx" <xxx@xxx> 5520Subject: ��� ���������� ����� ����� � ��������� �� ���� 5521MIME-Version: 1.0 5522Content-Type: text/plain; charset="windows-1251"; 5523Content-Transfer-Encoding: 8bit 5524 5525�� ����� � ���� ������ ��� �������� 5526"""] 5527 for m in messages: 5528 with self.subTest(m=m): 5529 msg = email.message_from_string(m) 5530 5531 5532# Tests to ensure that signed parts of an email are completely preserved, as 5533# required by RFC1847 section 2.1. Note that these are incomplete, because the 5534# email package does not currently always preserve the body. See issue 1670765. 5535class TestSigned(TestEmailBase): 5536 5537 def _msg_and_obj(self, filename): 5538 with openfile(filename, encoding="utf-8") as fp: 5539 original = fp.read() 5540 msg = email.message_from_string(original) 5541 return original, msg 5542 5543 def _signed_parts_eq(self, original, result): 5544 # Extract the first mime part of each message 5545 import re 5546 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M) 5547 inpart = repart.search(original).group(2) 5548 outpart = repart.search(result).group(2) 5549 self.assertEqual(outpart, inpart) 5550 5551 def test_long_headers_as_string(self): 5552 original, msg = self._msg_and_obj('msg_45.txt') 5553 result = msg.as_string() 5554 self._signed_parts_eq(original, result) 5555 5556 def test_long_headers_as_string_maxheaderlen(self): 5557 original, msg = self._msg_and_obj('msg_45.txt') 5558 result = msg.as_string(maxheaderlen=60) 5559 self._signed_parts_eq(original, result) 5560 5561 def test_long_headers_flatten(self): 5562 original, msg = self._msg_and_obj('msg_45.txt') 5563 fp = StringIO() 5564 Generator(fp).flatten(msg) 5565 result = fp.getvalue() 5566 self._signed_parts_eq(original, result) 5567 5568class TestHeaderRegistry(TestEmailBase): 5569 # See issue gh-93010. 5570 def test_HeaderRegistry(self): 5571 reg = HeaderRegistry() 5572 a = reg('Content-Disposition', 'attachment; 0*00="foo"') 5573 self.assertIsInstance(a.defects[0], errors.InvalidHeaderDefect) 5574 5575if __name__ == '__main__': 5576 unittest.main() 5577