@@ -4606,7 +4606,8 @@ def _msgobj_trickle(filename, trickle_size=2, force_linetype="\r\n"):
46064606 # Trickle data into the feed parser, one character at a time
46074607 with openfile (filename , encoding = "utf-8" ) as fp :
46084608 file_str = fp .read ()
4609- file_str = file_str .replace ("\r \n " , "\n " ).replace ("\r " , "\n " ).replace ("\n " , force_linetype )
4609+ file_str = file_str .replace ("\r \n " , "\n " ).replace ("\r " , "\n " ) \
4610+ .replace ("\n " , force_linetype )
46104611
46114612 feedparser = FeedParser ()
46124613 for index in range (0 , len (file_str ), trickle_size ):
@@ -4617,22 +4618,15 @@ def _validate_msg10_msgobj(self, msg, line_end):
46174618 if isinstance (line_end , str ):
46184619 line_end = line_end .encode ()
46194620 eq = self .assertEqual
4620- # The outer message is a multipart
46214621 eq (msg .get_payload (decode = True ), None )
4622- # Subpart 1 is 7bit encoded
46234622 eq (msg .get_payload (0 ).get_payload (decode = True ),
46244623 b'This is a 7bit encoded message.' + line_end )
4625- # Subpart 2 is quopri
46264624 eq (msg .get_payload (1 ).get_payload (decode = True ),
46274625 b'\xa1 This is a Quoted Printable encoded message!' + line_end )
4628- # Subpart 3 is base64
46294626 eq (msg .get_payload (2 ).get_payload (decode = True ),
46304627 b'This is a Base64 encoded message.' )
4631- # Subpart 4 is base64 with a trailing newline, which
4632- # used to be stripped (issue 7143).
46334628 eq (msg .get_payload (3 ).get_payload (decode = True ),
46344629 b'This is a Base64 encoded message.\n ' )
4635- # Subpart 5 has no Content-Transfer-Encoding: header.
46364630 eq (msg .get_payload (4 ).get_payload (decode = True ),
46374631 b'This has no Content-Transfer-Encoding: header.' + line_end )
46384632
@@ -4649,8 +4643,6 @@ def test_trickle_1chr_lf(self):
46494643 self ._validate_msg10_msgobj (msg , '\n ' )
46504644
46514645 def test_trickle_2chr_crlf (self ):
4652- # During initial testing, it was realized that an edge case was missed around dangling newlines.
4653- # This helps test that behavior, as it is not otherwise covered by tests.
46544646 msg = self ._msgobj_trickle ('msg_10.txt' , 2 , '\r \n ' )
46554647 self ._validate_msg10_msgobj (msg , '\r \n ' )
46564648
@@ -4675,136 +4667,6 @@ def test_trickle_3chr_lf(self):
46754667 self ._validate_msg10_msgobj (msg , '\n ' )
46764668
46774669
4678- class TestPeakMemoryUsage (unittest .TestCase ):
4679-
4680- maxDiff = None
4681- SMALLER_CHUNK_SIZE = 1024
4682-
4683- def _msg_bytes (self , filename ):
4684- with openfile (filename , 'rb' ) as fp :
4685- data = fp .read ()
4686- return data
4687-
4688- def _make_plaintext_msg_bytes (self , min_size ):
4689- # Get msg_01 as our baseline
4690- msg_bytes = self ._msg_bytes ('msg_01.txt' )
4691- if len (msg_bytes ) < min_size :
4692- # Make it bigger
4693- msg_bytes = msg_bytes * ((min_size // len (msg_bytes )) + 1 )
4694- msg_bytes = msg_bytes [:min_size ] # Truncate it to min_size
4695- assert len (msg_bytes ) >= min_size
4696-
4697- match = re .search (rb'(\r|\n|\r\n){2}' , msg_bytes )
4698- self .assertIsNotNone (match )
4699- expected_payload = msg_bytes [match .end ():]
4700-
4701- return msg_bytes , expected_payload
4702-
4703- def _measure_message_from_bytes (self , msg_bytes ):
4704- import tracemalloc
4705-
4706- # Call email.message_from_bytes, gathering some memory usage stats in the process
4707- tracemalloc .start ()
4708- start_time = time .perf_counter ()
4709- msgobj = email .message_from_bytes (msg_bytes , policy = email .policy .default )
4710- end_time = time .perf_counter ()
4711- after_bytes , after_peak_bytes = tracemalloc .get_traced_memory ()
4712- tracemalloc .stop ()
4713-
4714- # "How many bytes did we allocate, that were ultimately discarded?"
4715- peak_overhead = after_peak_bytes - after_bytes
4716-
4717- # "How large was that overhead, relative to the size of the message?"
4718- overhead_ratio = peak_overhead / len (msg_bytes ) if len (msg_bytes ) > 0 else None
4719-
4720- return msgobj , peak_overhead , overhead_ratio , end_time - start_time
4721-
4722- def _base64_encode (self , bytes_to_encode , one_line = True ):
4723- base64_str = base64mime .body_encode (bytes_to_encode )
4724- if one_line :
4725- base64_str = "" .join (base64_str .splitlines ())
4726- return base64_str
4727-
4728- _multipart_msg_base = textwrap .dedent ("""\
4729- Date: Wed, 14 Nov 2007 12:56:23 GMT
4730- From: foo@bar.invalid
4731- To: foo@bar.invalid
4732- Subject: Content-Transfer-Encoding: base64 and multipart
4733- MIME-Version: 1.0
4734- Content-Type: multipart/mixed; boundary="BOUNDARY"
4735-
4736- --BOUNDARY
4737- Content-Type: text/plain
4738-
4739- Test message
4740-
4741- --BOUNDARY
4742- Content-Type: application/octet-stream
4743- Content-Transfer-Encoding: base64
4744-
4745- {}
4746- --BOUNDARY--
4747- """ )
4748-
4749- def _make_junk_bytes (self , bytes_length ):
4750- junk_data = bytearray (bytes_length )
4751- for i in range (len (junk_data )):
4752- junk_data [i ] = i % 256
4753- return bytes (junk_data )
4754-
4755- def _make_junk_base64 (self , bytes_length , one_line = True ):
4756- junk_bytes = self ._make_junk_bytes (bytes_length )
4757- return self ._base64_encode (junk_bytes , one_line ), junk_bytes
4758-
4759- _LARGE_EMAIL_BYTE_SIZE = 1024 * 1024 * 10 # 10 MiB
4760-
4761- def test_message_from_bytes_plaintext (self ):
4762- # Generate a 10MiB plaintext email
4763- msg_bytes , expected_payload = self ._make_plaintext_msg_bytes (self ._LARGE_EMAIL_BYTE_SIZE )
4764-
4765- # Parse it, collecting stats
4766- msgobj , peak_overhead , overhead_ratio , time_taken = self ._measure_message_from_bytes (msg_bytes )
4767-
4768- # Verify the message payload/content is correct.
4769- self .assertEqual (msgobj .get_payload (decode = True ), expected_payload )
4770- self .assertEqual (msgobj .get_content (), expected_payload .decode ())
4771-
4772- # overhead_ratio at time of writing: 1.0102445602416992
4773- self .assertLess (overhead_ratio , 1.05 )
4774-
4775- def test_message_from_bytes_large_attachment_body_encoded (self ):
4776- # Generate a 10 MiB attachment
4777- attachment_base64 , attachment_bytes = self ._make_junk_base64 (self ._LARGE_EMAIL_BYTE_SIZE , False )
4778- multipart_msg_bytes = self ._multipart_msg_base .format (attachment_base64 ).encode ()
4779-
4780- # Parse it, collecting stats
4781- msgobj , peak_overhead , overhead_ratio , time_taken = self ._measure_message_from_bytes (multipart_msg_bytes )
4782-
4783- # Verify the message payload/content is correct.
4784- attachment_msg = msgobj .get_payload (1 )
4785- self .assertEqual (attachment_msg .get_content (), attachment_bytes )
4786- self .assertEqual (attachment_msg .get_payload (decode = False ), attachment_base64 )
4787-
4788- # overhead_ratio at time of writing: 1.0088957315722829 - 85.0565% decrease
4789- self .assertLess (overhead_ratio , 1.05 )
4790-
4791- def test_message_from_bytes_large_attachment_one_line (self ):
4792- # Generate a 10 MiB attachment
4793- attachment_base64 , attachment_bytes = self ._make_junk_base64 (self ._LARGE_EMAIL_BYTE_SIZE , True )
4794- multipart_msg_bytes = self ._multipart_msg_base .format (attachment_base64 ).encode ()
4795-
4796- # Parse it, collecting stats
4797- msgobj , peak_overhead , overhead_ratio , time_taken = self ._measure_message_from_bytes (multipart_msg_bytes )
4798-
4799- # Verify the message payload/content is correct.
4800- attachment_msg = msgobj .get_payload (1 )
4801- self .assertEqual (attachment_msg .get_content (), attachment_bytes )
4802- self .assertEqual (attachment_msg .get_payload (decode = False ), attachment_base64 )
4803-
4804- # overhead_ratio at time of writing: 1.0077472351610626 - 89.2775% decrease
4805- self .assertLess (overhead_ratio , 1.05 )
4806-
4807-
48084670class TestBytesGeneratorIdempotentNL (BaseTestBytesGeneratorIdempotent ,
48094671 TestIdempotent ):
48104672 linesep = '\n '
0 commit comments