Skip to content

Commit 13ebb39

Browse files
committed
Comment: 80-rule, remove some comments
1 parent 530f6d4 commit 13ebb39

2 files changed

Lines changed: 77 additions & 86 deletions

File tree

Lib/email/feedparser.py

Lines changed: 74 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,10 @@ def _flush_partial(self):
112112
if not line:
113113
pass
114114
elif self._dump_destination is None:
115-
# We're not dumping data. Just flush the partial to lines, as normal
115+
# We're not dumping data. Just flush the partial to lines
116116
self._lines.append(line)
117117
elif self._check_eofstack(line):
118-
# We were dumping, but we've now reached the end of the dump. Push our line and stop dumping.
118+
# We were dumping, but we've now reached the end of the dump.
119119
self._dump_destination = None
120120
self._lines.append(line)
121121
else:
@@ -130,7 +130,6 @@ def push(self, data):
130130
if not data:
131131
return
132132

133-
# If we're dumping, and we don't have anything that will ever tell us to terminate, simply dump everything
134133
if self._can_dump_data(data):
135134
self._dump_destination.append(data)
136135
return
@@ -139,12 +138,11 @@ def push(self, data):
139138

140139
def _can_dump_data(self, data):
141140
if self._dump_destination is None:
142-
# We're not dumping data
143141
return False
144142

145143
# We're dumping; check for easy optimizations
146144
if not self._eofstack:
147-
# There's nothing that will ever tell us to stop dumping. Go ahead and dump the entire `data` object.
145+
# There's nothing that will ever tell us to stop dumping.
148146
# This does absolute wonders for large non-multipart emails.
149147
assert not self._lines
150148
assert not self._dangling_partial
@@ -155,56 +153,44 @@ def _can_dump_data(self, data):
155153
if self._partial:
156154
return False
157155

158-
all_boundary_matches = True
159156
for pred in self._eofstack:
160157
if not hasattr(pred, 'is_boundary_match'):
161-
all_boundary_matches = False
162-
break
163-
164-
if all_boundary_matches and '-' not in data:
165-
# We eventually need to stop, but we only care about boundary matches, and there's no boundaries
166-
# here. Dump the entire `data` object. This does wonders for multipart emails with large parts.
167-
assert not self._lines
168-
return True
169-
170-
# We're still dumping, but there's a potential boundary marker or EOF or similar issue. Force a proper parse.
171-
return False
158+
# We can't blindly dump entire chunks, if we're interested in
159+
# more than just boundaries
160+
return False
161+
162+
# We only care about boundaries; we can dump as long as there's no
163+
# potential boundaries.
164+
return '-' not in data
172165

173166
def _can_dump_partial(self, line, start=0, end=sys.maxsize):
174-
# Very similar to _can_dump_data above, except we can make some additional assumptions for partials/lines.
175-
# This should only ever be checked when we have a new partial line, in which case we have no partial,
176-
# or when checking the partial itself, in which case it'll always be the first part
167+
# Very similar to _can_dump_data above, except we can make some
168+
# additional assumptions for partials/lines.
177169
assert not self._partial or line is self._partial[0]
178170

179171
if self._dump_destination is None:
180-
# We're not dumping data
181172
return False
182173

183-
# We're dumping. There should be absolutely no other pending lines, because those should've been dumped.
174+
# We're dumping. There should be absolutely no other pending lines,
175+
# because those should've been dumped.
184176
assert not self._lines
185177
if not self._eofstack:
186-
# There's nothing that will ever tell us to stop dumping. Dump away.
178+
# There's nothing that will ever tell us to stop dumping. Dump away
187179
return True
188180

189181
all_boundary_matches = True
190182
for pred in self._eofstack:
191183
if not hasattr(pred, 'is_boundary_match'):
192-
all_boundary_matches = False
193-
break
194-
195-
if all_boundary_matches and not line.startswith("-", start, end):
196-
# We eventually need to stop, but we only care about boundary matches, and there's no boundaries
197-
# here. Dump the entire `data` object. This does wonders for multipart emails with large parts.
198-
return True
184+
return False
199185

200-
# We're still dumping, but there's a potential boundary marker or EOF or similar issue. Force a proper parse.
201-
return False
186+
# We only care about boundaries; we can dump as long as there's no
187+
# potential boundaries.
188+
return not line.startswith("-", start, end)
202189

203190
def _is_dump_midline(self):
204191
if not self._dump_destination:
205192
return False
206193

207-
assert self._dump_destination[-1] # Never push empty strings to _dump_destination
208194
return self._dump_destination[-1][-1] not in ('\n', '\r')
209195

210196
def _push_data(self, data):
@@ -214,8 +200,9 @@ def _push_data(self, data):
214200
# No new complete lines, wait for more.
215201
# Check to see if we had a previous dangling partial newline
216202
if self._dangling_partial:
217-
# We previously pushed a dangling line expecting a \n to follow, however we received other data instead.
218-
# Therefore, that \r does actually terminate a line. Go ahead and push it.
203+
# We previously pushed a dangling line expecting \n to follow,
204+
# however we received other data instead. Therefore, that \r
205+
# does actually terminate a line. Go ahead and push it.
219206
self._flush_partial()
220207

221208
# No lines in data to push; wait for more data
@@ -230,24 +217,23 @@ def _push_data(self, data):
230217

231218
# Complete our previous/partial line
232219
if self._partial:
233-
# Check to see if we had any dangling newlines in our partial, and handle if appropriate
234220
if self._dangling_partial:
235-
# We had a previously dangling line; this is either a \n (completion), or some other char (termination)
236221
if data[0] != NL:
237-
# "\r<whatever>" -- push what we had, as it has been terminated; data_start_index = 0
222+
# "\r<whatever>" -- push what we had, it's been terminated
238223
self._flush_partial()
239224
else:
240-
# "\r\n" -- append \n and push it; data_start_index = 1
225+
# "\r\n" -- append \n to complete it and push
241226
self._partial.append(NL)
242227
self._flush_partial()
243228
data_start_index = 1
244229

245230
# Find the next newline
246-
unl_start_index = BufferedSubFile._find_unl(data, data_start_index)
247-
# Fall through
231+
unl_start_index = BufferedSubFile._find_unl(
232+
data, data_start_index)
248233
else:
249-
# Our partial has no dangling newline; complete our partial with the new line and push it
250-
unl_end_index = BufferedSubFile._find_unl_end(data, unl_start_index)
234+
# Complete our partial with the new line and push it
235+
unl_end_index = BufferedSubFile._find_unl_end(
236+
data, unl_start_index)
251237
if unl_end_index < 0:
252238
# The newline is incomplete; append data and return
253239
self._partial.append(data)
@@ -260,36 +246,37 @@ def _push_data(self, data):
260246
data_start_index = unl_end_index
261247

262248
# Find the next newline
263-
unl_start_index = BufferedSubFile._find_unl(data, data_start_index)
264-
# Fall through
249+
unl_start_index = BufferedSubFile._find_unl(
250+
data, data_start_index)
265251

266252
# _partial is now guaranteed to point to be empty
267-
# data_start_index is an index which points to the start of the next line
268-
# unl_start_index is an index which points to the start of the next newline character, if there is one
253+
# data_start_index is an index which points to the start of next line
254+
# unl_start_index is the start of the next newline character, or -1
269255
self._push_data_no_partial(data, data_start_index, unl_start_index)
270256

271257
def _push_data_no_partial(self, data, data_start_index, unl_start_index):
272-
# _partial is now guaranteed to point to be empty
273-
# data_start_index is an index which points to the start of the next line
274-
# unl_start_index is an index which points to the start of the next newline character, if there is one
275-
276258
# Process any remaining whole lines in data
277259
if unl_start_index < 0:
278260
# Push right to the partial if there's no lines
279261
if data_start_index < len(data):
280262
assert data_start_index >= 0
281263
partial_line = data[data_start_index:]
282-
if self._is_dump_midline() or self._can_dump_partial(partial_line):
264+
if self._is_dump_midline() \
265+
or self._can_dump_partial(partial_line):
283266
self._dump_destination.append(partial_line)
284267
else:
285268
self._partial = [partial_line]
286269
if data[-1] == '\r':
287270
self._dangling_partial = True
288-
elif self._dump_destination is None and unl_start_index < len(data) // 2:
289-
# If it looks like we're going to be doing a lot of splits/joins, just go ahead and use StringIO, for speed
290-
# If we had some sort of "StringViewIO" to avoid the copy, this would be significantly more efficient
291-
# This code block, and the "else" code block below, functionally do the exact same thing, except this path
292-
# makes no attempt to handle dumping data
271+
elif self._dump_destination is None \
272+
and unl_start_index < len(data) // 2:
273+
# If it looks like we're going to be doing a lot of splits/joins,
274+
# just go ahead and use StringIO, for speed
275+
# If we had some sort of "StringViewIO" to avoid the copy, this
276+
# would be significantly more efficient
277+
# This code block, and the "else" code block below, functionally do
278+
# the exact same thing, except this path makes no attempt to handle
279+
# dumping data
293280
sio = StringIO(data, '')
294281
sio.seek(data_start_index)
295282
lines = sio.readlines()
@@ -301,26 +288,28 @@ def _push_data_no_partial(self, data, data_start_index, unl_start_index):
301288

302289
self.pushlines(lines)
303290
else:
304-
# If we're not, let's keep it in Python
305-
dump_data_start = None if self._dump_destination is None else data_start_index
291+
dump_data_start = None if self._dump_destination is None \
292+
else data_start_index
306293
while unl_start_index >= 0:
307-
unl_end_index = BufferedSubFile._find_unl_end(data, unl_start_index)
294+
unl_end_index = BufferedSubFile._find_unl_end(
295+
data, unl_start_index)
308296
if unl_end_index < 0:
309-
# Incomplete line ending; break to update our partial and return
297+
# Incomplete line ending; break to just update our partial
310298
self._dangling_partial = True
311299
break
312300

313301
# We have an easy line; push it
314302
if self._dump_destination is not None:
315-
# We have a window into a line. Make sure it's not EOF, and continue as long as it's not
316-
if self._check_eofstack(data, data_start_index, unl_end_index):
317-
# This line is "EOF". This is the end of our dump data! Push the dump data.
318-
self._dump_destination.append(data[dump_data_start:data_start_index])
303+
# We have a window into a line. Make sure it's not EOF
304+
if self._check_eofstack(
305+
data, data_start_index, unl_end_index):
306+
# This line is "EOF". This is the end of our dump data
307+
self._dump_destination.append(
308+
data[dump_data_start:data_start_index])
319309

320310
# Also push our line, since we already have it
321-
self._lines.append(data[data_start_index:unl_end_index])
322-
323-
# Mark dump complete
311+
self._lines.append(
312+
data[data_start_index:unl_end_index])
324313
self._dump_destination = None
325314
#else: # This line didn't mark the end. Keep going.
326315
else:
@@ -329,21 +318,23 @@ def _push_data_no_partial(self, data, data_start_index, unl_start_index):
329318

330319
# Update our iterators
331320
data_start_index = unl_end_index
332-
unl_start_index = BufferedSubFile._find_unl(data, data_start_index)
321+
unl_start_index = BufferedSubFile._find_unl(
322+
data, data_start_index)
333323

334-
# If we're still dumping, push everything that isn't going into the partial to the dump
335324
if self._dump_destination is not None:
336-
# If we're able to safely flush the partial, go ahead and do that too
337-
# We don't care about self._is_dump_midline() here, because data_start_index always represents the
338-
# start of a new line, always
325+
# Push everything that isn't going into the partial to the dump
326+
# If we're able to safely flush the partial, do that too
327+
# We don't care about self._is_dump_midline() here, because
328+
# data_start_index always represents the start of a new line
339329
if self._can_dump_partial(data, data_start_index):
340330
self._dump_destination.append(data[dump_data_start:])
341331

342-
# We've consumed the partial; flush any partial-related state we may have set
332+
# Flush any partial-related state we may have set
343333
self._dangling_partial = False
344-
return # skip the _partial.append below, because it's already been consumed
334+
return # skip the _partial.append below
345335
else:
346-
self._dump_destination.append(data[dump_data_start:data_start_index])
336+
self._dump_destination.append(
337+
data[dump_data_start:data_start_index])
347338

348339
# If we have any partial data leftover, go ahead and set it
349340
if data_start_index < len(data):
@@ -381,7 +372,6 @@ def _get_dump(self, start_value:str|None = None):
381372
if needs_more_data:
382373
# Flush our partial, if we can
383374
if self._partial and self._can_dump_partial(self._partial[0]):
384-
assert self._partial[0] # We shouldn't ever push empty strings to _partial
385375
_dump_destination.extend(self._partial)
386376
self._partial.clear()
387377
self._dangling_partial = False
@@ -402,8 +392,9 @@ def _pop_dump(self):
402392
@staticmethod
403393
def _find_unl(data, start=0):
404394
# Like str.find(), but for universal newlines
405-
# Originally, this iterated over the string, however just calling find() twice is drastically faster
406-
# This could be sped up by replacing with a similar function in C, so we don't pass over the string twice.
395+
# Originally, this iterated over the string, however this is faster
396+
# This could be sped up by replacing with a similar function in C,
397+
# so we don't pass over the string twice.
407398
cr_index = data.find('\r', start)
408399
if cr_index < 0:
409400
return data.find(NL, start)
@@ -413,9 +404,8 @@ def _find_unl(data, start=0):
413404

414405
@staticmethod
415406
def _find_unl_end(data, start):
416-
# A helper function which returns the 1-past-the-end index of a universal newline
407+
# Returns the 1-past-the-end index of a universal newline
417408
# This could be sped up by replacing with a similar function in C.
418-
#assert data[start] in '\r\n'
419409

420410
# \n is always end of line
421411
if data.startswith(NL, start):
@@ -425,7 +415,7 @@ def _find_unl_end(data, start):
425415
if data.startswith(NL, start + 1):
426416
return start + 2
427417

428-
# End of string; we can't know if a \n follows, so no universal line end
418+
# End of data; we can't know if a \n follows, so no universal line end
429419
if start + 1 >= len(data):
430420
return -1
431421

@@ -461,7 +451,7 @@ def __init__(self, _factory=None, *, policy=compat32):
461451
self._old_style_factory = True
462452
self._input = BufferedSubFile()
463453
self._msgstack = []
464-
self._parse = self._parsegen().__next__ # Interesting trick which replaces yield values with return values
454+
self._parse = self._parsegen().__next__
465455
self._cur = None
466456
self._last = None
467457
self._headersonly = False
@@ -477,7 +467,7 @@ def feed(self, data):
477467

478468
def _call_parse(self):
479469
try:
480-
self._parse() # Return value is always NeedMoreData or None, but discarded here in either case
470+
self._parse()
481471
except StopIteration:
482472
pass
483473

Lib/email/parser.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def __init__(self, _class=None, *, policy=compat32):
4343
def _parse_chunks(self, chunk_generator, headersonly=False):
4444
"""Internal method / implementation detail
4545
46-
Parses chunks from a chunk generator into a FeedParser, returning the result
46+
Parses chunks from a chunk generator into a FeedParser
4747
"""
4848
feedparser = FeedParser(self._class, policy=self.policy)
4949
if headersonly:
@@ -135,7 +135,8 @@ def parsebytes(self, text, headersonly=False):
135135
the file.
136136
"""
137137
_chunk_generator = (
138-
text[offset:offset + _FEED_CHUNK_SIZE].decode('ASCII', errors='surrogateescape')
138+
text[offset:offset + _FEED_CHUNK_SIZE].decode(
139+
'ASCII', errors='surrogateescape')
139140
for offset in range(0, len(text), _FEED_CHUNK_SIZE)
140141
)
141142

0 commit comments

Comments
 (0)