Skip to content

Commit 4238f7e

Browse files
committed
Remove EndlessZstdDecompressor remains
1 parent 1a548c0 commit 4238f7e

1 file changed

Lines changed: 23 additions & 131 deletions

File tree

Modules/_zstd/decompressor.c

Lines changed: 23 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -45,19 +45,10 @@ typedef struct {
4545
/* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
4646
char needs_input;
4747

48-
/* For decompress(), 0 or 1.
49-
1 when both input and output streams are at a frame edge, means a
50-
frame is completely decoded and fully flushed, or the decompressor
51-
just be initialized. */
52-
char at_frame_edge;
53-
5448
/* For ZstdDecompressor, 0 or 1.
5549
1 means the end of the first frame has been reached. */
5650
char eof;
5751

58-
/* Used for fast reset above three variables */
59-
char _unused_char_for_align;
60-
6152
/* __init__ has been called, 0 or 1. */
6253
bool initialized;
6354
} ZstdDecompressor;
@@ -258,19 +249,13 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
258249
return 0;
259250
}
260251

261-
typedef enum {
262-
TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
263-
TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
264-
} decompress_type;
265-
266252
/*
267-
Given the two types of decompressors (defined above),
268-
decompress implementation for <D>, <E>, pseudo code:
253+
Decompress implementation in pseudo code:
269254
270255
initialize_output_buffer
271256
while True:
272257
decompress_data
273-
set_object_flag # .eof for <D>, .at_frame_edge for <E>.
258+
set_object_flag # .eof
274259
275260
if output_buffer_exhausted:
276261
if output_buffer_reached_max_length:
@@ -287,53 +272,17 @@ typedef enum {
287272
flushing to do to complete current frame.
288273
289274
Note, decompressing "an empty input" in any case will make it > 0.
290-
291-
<E> supports multiple frames, has an .at_frame_edge flag, it means both the
292-
input and output streams are at a frame edge. The flag can be set by this
293-
statement:
294-
295-
.at_frame_edge = (zstd_ret == 0) ? 1 : 0
296-
297-
But if decompressing "an empty input" at "a frame edge", zstd_ret will be
298-
non-zero, then .at_frame_edge will be wrongly set to false. To solve this
299-
problem, two AFE checks are needed to ensure that: when at "a frame edge",
300-
empty input will not be decompressed.
301-
302-
// AFE check
303-
if (self->at_frame_edge && in->pos == in->size) {
304-
finish
305-
}
306-
307-
In <E>, if .at_frame_edge is eventually set to true, but input stream has
308-
unconsumed data (in->pos < in->size), then the outer function
309-
stream_decompress() will set .at_frame_edge to false. In this case,
310-
although the output stream is at a frame edge, for the caller, the input
311-
stream is not at a frame edge, see below diagram. This behavior does not
312-
affect the next AFE check, since (in->pos < in->size).
313-
314-
input stream: --------------|---
315-
^
316-
output stream: ====================|
317-
^
318275
*/
319276
static PyObject *
320277
decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
321278
Py_ssize_t max_length,
322-
Py_ssize_t initial_size,
323-
decompress_type type)
279+
Py_ssize_t initial_size)
324280
{
325281
size_t zstd_ret;
326282
ZSTD_outBuffer out;
327283
_BlocksOutputBuffer buffer = {.list = NULL};
328284
PyObject *ret;
329285

330-
/* The first AFE check for setting .at_frame_edge flag */
331-
if (type == TYPE_ENDLESS_DECOMPRESSOR) {
332-
if (self->at_frame_edge && in->pos == in->size) {
333-
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
334-
}
335-
}
336-
337286
/* Initialize the output buffer */
338287
if (initial_size >= 0) {
339288
if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) {
@@ -362,22 +311,11 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
362311
goto error;
363312
}
364313

365-
/* Set .eof/.af_frame_edge flag */
366-
if (type == TYPE_DECOMPRESSOR) {
367-
/* ZstdDecompressor class stops when a frame is decompressed */
368-
if (zstd_ret == 0) {
369-
self->eof = 1;
370-
break;
371-
}
372-
}
373-
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
374-
/* decompress() function supports multiple frames */
375-
self->at_frame_edge = (zstd_ret == 0) ? 1 : 0;
376-
377-
/* The second AFE check for setting .at_frame_edge flag */
378-
if (self->at_frame_edge && in->pos == in->size) {
379-
break;
380-
}
314+
/* Set .eof flag */
315+
if (zstd_ret == 0) {
316+
/* Stop when a frame is decompressed */
317+
self->eof = 1;
318+
break;
381319
}
382320

383321
/* Need to check out before in. Maybe zstd's internal buffer still has
@@ -415,8 +353,7 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
415353
}
416354

417355
static void
418-
decompressor_reset_session(ZstdDecompressor *self,
419-
decompress_type type)
356+
decompressor_reset_session(ZstdDecompressor *self)
420357
{
421358
// TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here
422359
// and ensure lock is always held
@@ -425,56 +362,29 @@ decompressor_reset_session(ZstdDecompressor *self,
425362
self->in_begin = 0;
426363
self->in_end = 0;
427364

428-
if (type == TYPE_DECOMPRESSOR) {
429-
Py_CLEAR(self->unused_data);
430-
}
365+
Py_CLEAR(self->unused_data);
431366

432367
/* Reset variables in one operation */
433368
self->needs_input = 1;
434-
self->at_frame_edge = 1;
435369
self->eof = 0;
436-
self->_unused_char_for_align = 0;
437370

438371
/* Resetting session never fail */
439372
ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only);
440373
}
441374

442375
static PyObject *
443-
stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length,
444-
decompress_type type)
376+
stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length)
445377
{
446378
Py_ssize_t initial_buffer_size = -1;
447379
ZSTD_inBuffer in;
448380
PyObject *ret = NULL;
449381
int use_input_buffer;
450382

451-
if (type == TYPE_DECOMPRESSOR) {
452-
/* Check .eof flag */
453-
if (self->eof) {
454-
PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
455-
assert(ret == NULL);
456-
goto success;
457-
}
458-
}
459-
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
460-
/* Fast path for the first frame */
461-
if (self->at_frame_edge && self->in_begin == self->in_end) {
462-
/* Read decompressed size */
463-
uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len);
464-
465-
/* These two zstd constants always > PY_SSIZE_T_MAX:
466-
ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1)
467-
ZSTD_CONTENTSIZE_ERROR is (0ULL - 2)
468-
469-
Use ZSTD_findFrameCompressedSize() to check complete frame,
470-
prevent allocating too much memory for small input chunk. */
471-
472-
if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX &&
473-
!ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) )
474-
{
475-
initial_buffer_size = (Py_ssize_t) decompressed_size;
476-
}
477-
}
383+
/* Check .eof flag */
384+
if (self->eof) {
385+
PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
386+
assert(ret == NULL);
387+
goto success;
478388
}
479389

480390
/* Prepare input buffer w/wo unconsumed data */
@@ -562,29 +472,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
562472

563473
/* Decompress */
564474
ret = decompress_impl(self, &in,
565-
max_length, initial_buffer_size,
566-
type);
475+
max_length, initial_buffer_size);
567476
if (ret == NULL) {
568477
goto error;
569478
}
570479

571480
/* Unconsumed input data */
572481
if (in.pos == in.size) {
573-
if (type == TYPE_DECOMPRESSOR) {
574-
if (Py_SIZE(ret) == max_length || self->eof) {
575-
self->needs_input = 0;
576-
}
577-
else {
578-
self->needs_input = 1;
579-
}
482+
if (Py_SIZE(ret) == max_length || self->eof) {
483+
self->needs_input = 0;
580484
}
581-
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
582-
if (Py_SIZE(ret) == max_length && !self->at_frame_edge) {
583-
self->needs_input = 0;
584-
}
585-
else {
586-
self->needs_input = 1;
587-
}
485+
else {
486+
self->needs_input = 1;
588487
}
589488

590489
if (use_input_buffer) {
@@ -598,10 +497,6 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
598497

599498
self->needs_input = 0;
600499

601-
if (type == TYPE_ENDLESS_DECOMPRESSOR) {
602-
self->at_frame_edge = 0;
603-
}
604-
605500
if (!use_input_buffer) {
606501
/* Discard buffer if it's too small
607502
(resizing it may needlessly copy the current contents) */
@@ -638,7 +533,7 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
638533

639534
error:
640535
/* Reset decompressor's states/session */
641-
decompressor_reset_session(self, type);
536+
decompressor_reset_session(self);
642537

643538
Py_CLEAR(ret);
644539
success:
@@ -668,9 +563,6 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
668563
/* needs_input flag */
669564
self->needs_input = 1;
670565

671-
/* at_frame_edge flag */
672-
self->at_frame_edge = 1;
673-
674566
/* Decompression context */
675567
self->dctx = ZSTD_createDCtx();
676568
if (self->dctx == NULL) {
@@ -837,7 +729,7 @@ _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
837729
/* Thread-safe code */
838730
Py_BEGIN_CRITICAL_SECTION(self);
839731

840-
ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR);
732+
ret = stream_decompress(self, data, max_length);
841733
Py_END_CRITICAL_SECTION();
842734
return ret;
843735
}

0 commit comments

Comments
 (0)