@@ -112,10 +112,10 @@ def _flush_partial(self):
112112 if not line :
113113 pass
114114 elif self ._dump_destination is None :
115- # We're not dumping data. Just flush the partial to lines, as normal
115+ # We're not dumping data. Just flush the partial to lines
116116 self ._lines .append (line )
117117 elif self ._check_eofstack (line ):
118- # We were dumping, but we've now reached the end of the dump. Push our line and stop dumping.
118+ # We were dumping, but we've now reached the end of the dump.
119119 self ._dump_destination = None
120120 self ._lines .append (line )
121121 else :
@@ -130,7 +130,6 @@ def push(self, data):
130130 if not data :
131131 return
132132
133- # If we're dumping, and we don't have anything that will ever tell us to terminate, simply dump everything
134133 if self ._can_dump_data (data ):
135134 self ._dump_destination .append (data )
136135 return
@@ -139,12 +138,11 @@ def push(self, data):
139138
140139 def _can_dump_data (self , data ):
141140 if self ._dump_destination is None :
142- # We're not dumping data
143141 return False
144142
145143 # We're dumping; check for easy optimizations
146144 if not self ._eofstack :
147- # There's nothing that will ever tell us to stop dumping. Go ahead and dump the entire `data` object.
145+ # There's nothing that will ever tell us to stop dumping.
148146 # This does absolute wonders for large non-multipart emails.
149147 assert not self ._lines
150148 assert not self ._dangling_partial
@@ -155,56 +153,44 @@ def _can_dump_data(self, data):
155153 if self ._partial :
156154 return False
157155
158- all_boundary_matches = True
159156 for pred in self ._eofstack :
160157 if not hasattr (pred , 'is_boundary_match' ):
161- all_boundary_matches = False
162- break
163-
164- if all_boundary_matches and '-' not in data :
165- # We eventually need to stop, but we only care about boundary matches, and there's no boundaries
166- # here. Dump the entire `data` object. This does wonders for multipart emails with large parts.
167- assert not self ._lines
168- return True
169-
170- # We're still dumping, but there's a potential boundary marker or EOF or similar issue. Force a proper parse.
171- return False
158+ # We can't blindly dump entire chunks, if we're interested in
159+ # more than just boundaries
160+ return False
161+
162+ # We only care about boundaries; we can dump as long as there's no
163+ # potential boundaries.
164+ return '-' not in data
172165
173166 def _can_dump_partial (self , line , start = 0 , end = sys .maxsize ):
174- # Very similar to _can_dump_data above, except we can make some additional assumptions for partials/lines.
175- # This should only ever be checked when we have a new partial line, in which case we have no partial,
176- # or when checking the partial itself, in which case it'll always be the first part
167+ # Very similar to _can_dump_data above, except we can make some
168+ # additional assumptions for partials/lines.
177169 assert not self ._partial or line is self ._partial [0 ]
178170
179171 if self ._dump_destination is None :
180- # We're not dumping data
181172 return False
182173
183- # We're dumping. There should be absolutely no other pending lines, because those should've been dumped.
174+ # We're dumping. There should be absolutely no other pending lines,
175+ # because those should've been dumped.
184176 assert not self ._lines
185177 if not self ._eofstack :
186- # There's nothing that will ever tell us to stop dumping. Dump away.
178+ # There's nothing that will ever tell us to stop dumping. Dump away
187179 return True
188180
189181 all_boundary_matches = True
190182 for pred in self ._eofstack :
191183 if not hasattr (pred , 'is_boundary_match' ):
192- all_boundary_matches = False
193- break
194-
195- if all_boundary_matches and not line .startswith ("-" , start , end ):
196- # We eventually need to stop, but we only care about boundary matches, and there's no boundaries
197- # here. Dump the entire `data` object. This does wonders for multipart emails with large parts.
198- return True
184+ return False
199185
200- # We're still dumping, but there's a potential boundary marker or EOF or similar issue. Force a proper parse.
201- return False
186+ # We only care about boundaries; we can dump as long as there's no
187+ # potential boundaries.
188+ return not line .startswith ("-" , start , end )
202189
203190 def _is_dump_midline (self ):
204191 if not self ._dump_destination :
205192 return False
206193
207- assert self ._dump_destination [- 1 ] # Never push empty strings to _dump_destination
208194 return self ._dump_destination [- 1 ][- 1 ] not in ('\n ' , '\r ' )
209195
210196 def _push_data (self , data ):
@@ -214,8 +200,9 @@ def _push_data(self, data):
214200 # No new complete lines, wait for more.
215201 # Check to see if we had a previous dangling partial newline
216202 if self ._dangling_partial :
217- # We previously pushed a dangling line expecting a \n to follow, however we received other data instead.
218- # Therefore, that \r does actually terminate a line. Go ahead and push it.
203+ # We previously pushed a dangling line expecting \n to follow,
204+ # however we received other data instead. Therefore, that \r
205+ # does actually terminate a line. Go ahead and push it.
219206 self ._flush_partial ()
220207
221208 # No lines in data to push; wait for more data
@@ -230,24 +217,23 @@ def _push_data(self, data):
230217
231218 # Complete our previous/partial line
232219 if self ._partial :
233- # Check to see if we had any dangling newlines in our partial, and handle if appropriate
234220 if self ._dangling_partial :
235- # We had a previously dangling line; this is either a \n (completion), or some other char (termination)
236221 if data [0 ] != NL :
237- # "\r<whatever>" -- push what we had, as it has been terminated; data_start_index = 0
222+ # "\r<whatever>" -- push what we had, it's been terminated
238223 self ._flush_partial ()
239224 else :
240- # "\r\n" -- append \n and push it; data_start_index = 1
225+ # "\r\n" -- append \n to complete it and push
241226 self ._partial .append (NL )
242227 self ._flush_partial ()
243228 data_start_index = 1
244229
245230 # Find the next newline
246- unl_start_index = BufferedSubFile ._find_unl (data , data_start_index )
247- # Fall through
231+ unl_start_index = BufferedSubFile ._find_unl (
232+ data , data_start_index )
248233 else :
249- # Our partial has no dangling newline; complete our partial with the new line and push it
250- unl_end_index = BufferedSubFile ._find_unl_end (data , unl_start_index )
234+ # Complete our partial with the new line and push it
235+ unl_end_index = BufferedSubFile ._find_unl_end (
236+ data , unl_start_index )
251237 if unl_end_index < 0 :
252238 # The newline is incomplete; append data and return
253239 self ._partial .append (data )
@@ -260,36 +246,37 @@ def _push_data(self, data):
260246 data_start_index = unl_end_index
261247
262248 # Find the next newline
263- unl_start_index = BufferedSubFile ._find_unl (data , data_start_index )
264- # Fall through
249+ unl_start_index = BufferedSubFile ._find_unl (
250+ data , data_start_index )
265251
266252 # _partial is now guaranteed to point to be empty
267- # data_start_index is an index which points to the start of the next line
268- # unl_start_index is an index which points to the start of the next newline character, if there is one
253+ # data_start_index is an index which points to the start of next line
254+ # unl_start_index is the start of the next newline character, or -1
269255 self ._push_data_no_partial (data , data_start_index , unl_start_index )
270256
271257 def _push_data_no_partial (self , data , data_start_index , unl_start_index ):
272- # _partial is now guaranteed to point to be empty
273- # data_start_index is an index which points to the start of the next line
274- # unl_start_index is an index which points to the start of the next newline character, if there is one
275-
276258 # Process any remaining whole lines in data
277259 if unl_start_index < 0 :
278260 # Push right to the partial if there's no lines
279261 if data_start_index < len (data ):
280262 assert data_start_index >= 0
281263 partial_line = data [data_start_index :]
282- if self ._is_dump_midline () or self ._can_dump_partial (partial_line ):
264+ if self ._is_dump_midline () \
265+ or self ._can_dump_partial (partial_line ):
283266 self ._dump_destination .append (partial_line )
284267 else :
285268 self ._partial = [partial_line ]
286269 if data [- 1 ] == '\r ' :
287270 self ._dangling_partial = True
288- elif self ._dump_destination is None and unl_start_index < len (data ) // 2 :
289- # If it looks like we're going to be doing a lot of splits/joins, just go ahead and use StringIO, for speed
290- # If we had some sort of "StringViewIO" to avoid the copy, this would be significantly more efficient
291- # This code block, and the "else" code block below, functionally do the exact same thing, except this path
292- # makes no attempt to handle dumping data
271+ elif self ._dump_destination is None \
272+ and unl_start_index < len (data ) // 2 :
273+ # If it looks like we're going to be doing a lot of splits/joins,
274+ # just go ahead and use StringIO, for speed
275+ # If we had some sort of "StringViewIO" to avoid the copy, this
276+ # would be significantly more efficient
277+ # This code block, and the "else" code block below, functionally do
278+ # the exact same thing, except this path makes no attempt to handle
279+ # dumping data
293280 sio = StringIO (data , '' )
294281 sio .seek (data_start_index )
295282 lines = sio .readlines ()
@@ -301,26 +288,28 @@ def _push_data_no_partial(self, data, data_start_index, unl_start_index):
301288
302289 self .pushlines (lines )
303290 else :
304- # If we're not, let's keep it in Python
305- dump_data_start = None if self . _dump_destination is None else data_start_index
291+ dump_data_start = None if self . _dump_destination is None \
292+ else data_start_index
306293 while unl_start_index >= 0 :
307- unl_end_index = BufferedSubFile ._find_unl_end (data , unl_start_index )
294+ unl_end_index = BufferedSubFile ._find_unl_end (
295+ data , unl_start_index )
308296 if unl_end_index < 0 :
309- # Incomplete line ending; break to update our partial and return
297+ # Incomplete line ending; break to just update our partial
310298 self ._dangling_partial = True
311299 break
312300
313301 # We have an easy line; push it
314302 if self ._dump_destination is not None :
315- # We have a window into a line. Make sure it's not EOF, and continue as long as it's not
316- if self ._check_eofstack (data , data_start_index , unl_end_index ):
317- # This line is "EOF". This is the end of our dump data! Push the dump data.
318- self ._dump_destination .append (data [dump_data_start :data_start_index ])
303+ # We have a window into a line. Make sure it's not EOF
304+ if self ._check_eofstack (
305+ data , data_start_index , unl_end_index ):
306+ # This line is "EOF". This is the end of our dump data
307+ self ._dump_destination .append (
308+ data [dump_data_start :data_start_index ])
319309
320310 # Also push our line, since we already have it
321- self ._lines .append (data [data_start_index :unl_end_index ])
322-
323- # Mark dump complete
311+ self ._lines .append (
312+ data [data_start_index :unl_end_index ])
324313 self ._dump_destination = None
325314 #else: # This line didn't mark the end. Keep going.
326315 else :
@@ -329,21 +318,23 @@ def _push_data_no_partial(self, data, data_start_index, unl_start_index):
329318
330319 # Update our iterators
331320 data_start_index = unl_end_index
332- unl_start_index = BufferedSubFile ._find_unl (data , data_start_index )
321+ unl_start_index = BufferedSubFile ._find_unl (
322+ data , data_start_index )
333323
334- # If we're still dumping, push everything that isn't going into the partial to the dump
335324 if self ._dump_destination is not None :
336- # If we're able to safely flush the partial, go ahead and do that too
337- # We don't care about self._is_dump_midline() here, because data_start_index always represents the
338- # start of a new line, always
325+ # Push everything that isn't going into the partial to the dump
326+ # If we're able to safely flush the partial, do that too
327+ # We don't care about self._is_dump_midline() here, because
328+ # data_start_index always represents the start of a new line
339329 if self ._can_dump_partial (data , data_start_index ):
340330 self ._dump_destination .append (data [dump_data_start :])
341331
342- # We've consumed the partial; flush any partial-related state we may have set
332+ # Flush any partial-related state we may have set
343333 self ._dangling_partial = False
344- return # skip the _partial.append below, because it's already been consumed
334+ return # skip the _partial.append below
345335 else :
346- self ._dump_destination .append (data [dump_data_start :data_start_index ])
336+ self ._dump_destination .append (
337+ data [dump_data_start :data_start_index ])
347338
348339 # If we have any partial data leftover, go ahead and set it
349340 if data_start_index < len (data ):
@@ -381,7 +372,6 @@ def _get_dump(self, start_value:str|None = None):
381372 if needs_more_data :
382373 # Flush our partial, if we can
383374 if self ._partial and self ._can_dump_partial (self ._partial [0 ]):
384- assert self ._partial [0 ] # We shouldn't ever push empty strings to _partial
385375 _dump_destination .extend (self ._partial )
386376 self ._partial .clear ()
387377 self ._dangling_partial = False
@@ -402,8 +392,9 @@ def _pop_dump(self):
402392 @staticmethod
403393 def _find_unl (data , start = 0 ):
404394 # Like str.find(), but for universal newlines
405- # Originally, this iterated over the string, however just calling find() twice is drastically faster
406- # This could be sped up by replacing with a similar function in C, so we don't pass over the string twice.
395+ # Originally, this iterated over the string, however this is faster
396+ # This could be sped up by replacing with a similar function in C,
397+ # so we don't pass over the string twice.
407398 cr_index = data .find ('\r ' , start )
408399 if cr_index < 0 :
409400 return data .find (NL , start )
@@ -413,9 +404,8 @@ def _find_unl(data, start=0):
413404
414405 @staticmethod
415406 def _find_unl_end (data , start ):
416- # A helper function which returns the 1-past-the-end index of a universal newline
407+ # Returns the 1-past-the-end index of a universal newline
417408 # This could be sped up by replacing with a similar function in C.
418- #assert data[start] in '\r\n'
419409
420410 # \n is always end of line
421411 if data .startswith (NL , start ):
@@ -425,7 +415,7 @@ def _find_unl_end(data, start):
425415 if data .startswith (NL , start + 1 ):
426416 return start + 2
427417
428- # End of string ; we can't know if a \n follows, so no universal line end
418+ # End of data ; we can't know if a \n follows, so no universal line end
429419 if start + 1 >= len (data ):
430420 return - 1
431421
@@ -461,7 +451,7 @@ def __init__(self, _factory=None, *, policy=compat32):
461451 self ._old_style_factory = True
462452 self ._input = BufferedSubFile ()
463453 self ._msgstack = []
464- self ._parse = self ._parsegen ().__next__ # Interesting trick which replaces yield values with return values
454+ self ._parse = self ._parsegen ().__next__
465455 self ._cur = None
466456 self ._last = None
467457 self ._headersonly = False
@@ -477,7 +467,7 @@ def feed(self, data):
477467
478468 def _call_parse (self ):
479469 try :
480- self ._parse () # Return value is always NeedMoreData or None, but discarded here in either case
470+ self ._parse ()
481471 except StopIteration :
482472 pass
483473
0 commit comments