Skip to content

Commit f6e2dab

Browse files
gh-141713: Prevent possible memory denial of service when reading
Make read() and similar methods that read a given number of bytes use a progressively growing buffer instead of allocating the maximum size buffer at once. This helps prevent certain kind of memory denial of service issues when the number of bytes to read within a specific protocol or format is received from untrusted source.
1 parent bc9e63d commit f6e2dab

15 files changed

Lines changed: 291 additions & 102 deletions

File tree

Lib/_pyio.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
# when the device block size is available.
2828
DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes
2929

30+
# Data larger than this will be read in chunks, to prevent extreme
31+
# overallocation.
32+
_MIN_READ_BUF_SIZE = 1 << 20
33+
3034
# NOTE: Base classes defined here are registered with the "official" ABCs
3135
# defined in io.py. We don't use real inheritance though, because we don't want
3236
# to inherit the C implementations.
@@ -611,15 +615,29 @@ def read(self, size=-1):
611615
"""
612616
if size is None:
613617
size = -1
618+
size = size.__index__()
614619
if size < 0:
615620
return self.readall()
616-
b = bytearray(size.__index__())
621+
b = bytearray(min(size, _MIN_READ_BUF_SIZE))
617622
n = self.readinto(b)
618623
if n is None:
619624
return None
620-
if n < 0 or n > len(b):
621-
raise ValueError(f"readinto returned {n} outside buffer size {len(b)}")
622-
del b[n:]
625+
written = 0
626+
while True:
627+
if n != len(b) - written:
628+
if n < 0 or n > len(b) - written:
629+
raise ValueError(f"readinto returned {n} outside buffer size {len(b) - written}")
630+
written += n
631+
break
632+
written += n
633+
if written >= size:
634+
break
635+
b.resize(min(2*len(b), size))
636+
with memoryview(b) as m:
637+
n = self.readinto(m[written:])
638+
if n is None:
639+
break
640+
del b[written:]
623641
return b.take_bytes()
624642

625643
def readall(self):

Lib/test/support/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3184,3 +3184,13 @@ def linked_to_musl():
31843184
return _linked_to_musl
31853185
_linked_to_musl = tuple(map(int, version.split('.')))
31863186
return _linked_to_musl
3187+
3188+
3189+
def itersize(start, stop):
3190+
# Produce geometrical increasing sequence from start to stop
3191+
# (inclusively) for tests.
3192+
size = start
3193+
while size < stop:
3194+
yield size
3195+
size <<= 1
3196+
yield stop

Lib/test/test_io/test_bufferedio.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,18 @@ def test_read_all(self):
433433

434434
self.assertEqual(b"abcdefg", bufio.read())
435435

436+
def test_large_read_from_small_file(self):
437+
for size in support.itersize(1 << 20, sys.maxsize):
438+
rawio = self.MockRawIO((b'abc',))
439+
bufio = self.tp(rawio)
440+
self.assertEqual(bufio.read(size), b'abc')
441+
442+
def test_large_read1_from_small_file(self):
443+
for size in support.itersize(1 << 20, sys.maxsize):
444+
rawio = self.MockRawIO((b'abc',))
445+
bufio = self.tp(rawio)
446+
self.assertEqual(bufio.read1(size), b'abc')
447+
436448
@threading_helper.requires_working_threading()
437449
@support.requires_resource('cpu')
438450
def test_threads(self):

Lib/test/test_io/test_fileio.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from weakref import proxy
1010
from functools import wraps
1111

12+
from test import support
1213
from test.support import (
1314
cpython_only, swap_attr, gc_collect, is_wasi,
1415
infinite_recursion, strace_helper
@@ -730,6 +731,15 @@ def __setattr__(self, name, value):
730731
self.assertRaises(MyException, MyFileIO, fd)
731732
os.close(fd) # should not raise OSError(EBADF)
732733

734+
def test_large_read_from_small_file(self):
735+
self.addCleanup(os.remove, TESTFN)
736+
data = b'abc'
737+
with self.FileIO(TESTFN, 'wb') as f:
738+
f.write(data)
739+
for size in support.itersize(1 << 20, sys.maxsize):
740+
with self.FileIO(TESTFN, 'rb') as f:
741+
self.assertEqual(f.read(size), data)
742+
733743

734744
class COtherFileTests(OtherFileTests, unittest.TestCase):
735745
FileIO = _io.FileIO

Lib/test/test_io/test_general.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,11 @@ def readinto(self, buf):
628628
rawio = RawIOKeepsReference([b"1234"])
629629
rawio.read(4)
630630

631+
def test_RawIOBase_large_read_from_small_file(self):
632+
for size in support.itersize(1 << 20, sys.maxsize):
633+
rawio = self.MockRawIOWithoutRead((b"abc",))
634+
self.assertEqual(rawio.read(size), b'abc')
635+
631636
def test_types_have_dict(self):
632637
test = (
633638
self.IOBase(),

Lib/test/test_os/test_os.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -321,25 +321,15 @@ def test_readinto_badarg(self):
321321
self.assertEqual(s, 4)
322322
self.assertEqual(buffer, b"spam")
323323

324-
@support.cpython_only
325-
# Skip the test on 32-bit platforms: the number of bytes must fit in a
326-
# Py_ssize_t type
327-
@unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX,
328-
"needs INT_MAX < PY_SSIZE_T_MAX")
329-
@support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False)
330-
def test_large_read(self, size):
324+
def test_large_read_from_small_file(self):
331325
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
332326
create_file(os_helper.TESTFN, b'test')
333327

334328
# Issue #21932: Make sure that os.read() does not raise an
335329
# OverflowError for size larger than INT_MAX
336-
with open(os_helper.TESTFN, "rb") as fp:
337-
data = os.read(fp.fileno(), size)
338-
339-
# The test does not try to read more than 2 GiB at once because the
340-
# operating system is free to return less bytes than requested.
341-
self.assertEqual(data, b'test')
342-
330+
for size in support.itersize(1 << 20, sys.maxsize):
331+
with open(os_helper.TESTFN, "rb") as fp:
332+
self.assertEqual(os.read(fp.fileno(), size), b'test')
343333

344334
@support.cpython_only
345335
# Skip the test on 32-bit platforms: the number of bytes must fit in a

Lib/test/test_os/test_posix.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,20 @@ def test_pread(self):
304304
finally:
305305
os.close(fd)
306306

307+
@unittest.skipUnless(hasattr(posix, 'pread'), "test needs posix.pread()")
308+
def test_large_pread_from_small_file(self):
309+
fd = os.open(os_helper.TESTFN, os.O_WRONLY | os.O_CREAT)
310+
try:
311+
os.write(fd, b'test')
312+
finally:
313+
os.close(fd)
314+
fd = os.open(os_helper.TESTFN, os.O_RDONLY)
315+
try:
316+
for size in support.itersize(1 << 20, sys.maxsize):
317+
self.assertEqual(posix.pread(fd, size, 1), b'est')
318+
finally:
319+
os.close(fd)
320+
307321
@unittest.skipUnless(hasattr(posix, 'preadv'), "test needs posix.preadv()")
308322
def test_preadv(self):
309323
fd = os.open(os_helper.TESTFN, os.O_RDWR | os.O_CREAT)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Fix a potential memory denial of service when reading from a file,
2+
a file descriptor or a buffered stream the large specific number of bytes.
3+
This affects the :func:`os.read` and :func:`os.pread` functions and
4+
the :meth:`!read` and :meth:`!read1` methods of various :mod:`io` classes.
5+
When the number of bytes to read is received from untrusted source, it could
6+
cause an arbitrary amount of memory to be allocated.
7+
This could have led to symptoms including a :exc:`MemoryError`, swapping,
8+
out of memory (OOM) killed processes or containers, or even system crashes.

Modules/_io/_iomodule.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ extern int _PyIO_trap_eintr(void);
8080

8181
#define DEFAULT_BUFFER_SIZE (128 * 1024) /* bytes */
8282

83+
// Data larger than this will be read in chunks, to prevent extreme
84+
// overallocation.
85+
#define MIN_READ_BUF_SIZE (1 << 20)
86+
8387
/*
8488
* Offset type for positioning.
8589
*/

Modules/_io/bufferedio.c

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,8 @@ _enter_buffered_busy(buffered *self)
407407

408408
#define MINUS_LAST_BLOCK(self, size) \
409409
(self->buffer_mask ? \
410-
(size & ~self->buffer_mask) : \
411-
(self->buffer_size * (size / self->buffer_size)))
410+
((size) & ~self->buffer_mask) : \
411+
(self->buffer_size * ((size) / self->buffer_size)))
412412

413413

414414
static int
@@ -1071,6 +1071,7 @@ _io__Buffered_read1_impl(buffered *self, Py_ssize_t n)
10711071
}
10721072
_bufferedreader_reset_buf(self);
10731073

1074+
n = Py_MIN(n, self->buffer_size);
10741075
PyBytesWriter *writer = PyBytesWriter_Create(n);
10751076
if (writer == NULL) {
10761077
return NULL;
@@ -1795,25 +1796,32 @@ _bufferedreader_read_fast(buffered *self, Py_ssize_t n)
17951796
* or until an EOF occurs or until read() would block.
17961797
*/
17971798
static PyObject *
1798-
_bufferedreader_read_generic(buffered *self, Py_ssize_t n)
1799+
_bufferedreader_read_generic(buffered *self, Py_ssize_t size)
17991800
{
1800-
Py_ssize_t current_size, remaining, written;
1801+
Py_ssize_t current_size, written;
18011802

18021803
current_size = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t);
1803-
if (n <= current_size)
1804-
return _bufferedreader_read_fast(self, n);
1804+
if (size <= current_size)
1805+
return _bufferedreader_read_fast(self, size);
18051806

1806-
PyBytesWriter *writer = PyBytesWriter_Create(n);
1807+
Py_ssize_t chunksize = self->buffer_size;
1808+
if (chunksize < MIN_READ_BUF_SIZE) {
1809+
chunksize = MINUS_LAST_BLOCK(self, MIN_READ_BUF_SIZE);
1810+
}
1811+
Py_ssize_t allocated = size, resize_after = size;
1812+
if (size - current_size > chunksize) {
1813+
allocated = current_size + chunksize;
1814+
resize_after = allocated - Py_MAX(self->buffer_size, chunksize/4);
1815+
}
1816+
PyBytesWriter *writer = PyBytesWriter_Create(allocated);
18071817
if (writer == NULL) {
18081818
goto error;
18091819
}
18101820
char *out = PyBytesWriter_GetData(writer);
18111821

1812-
remaining = n;
18131822
written = 0;
18141823
if (current_size > 0) {
18151824
memcpy(out, self->buffer + self->pos, current_size);
1816-
remaining -= current_size;
18171825
written += current_size;
18181826
self->pos += current_size;
18191827
}
@@ -1825,12 +1833,27 @@ _bufferedreader_read_generic(buffered *self, Py_ssize_t n)
18251833
Py_DECREF(r);
18261834
}
18271835
_bufferedreader_reset_buf(self);
1828-
while (remaining > 0) {
1836+
while (written < size) {
18291837
/* We want to read a whole block at the end into buffer.
1830-
If we had readv() we could do this in one pass. */
1831-
Py_ssize_t r = MINUS_LAST_BLOCK(self, remaining);
1832-
if (r == 0)
1838+
If we had readv() we could do this in one pass for the last chunc. */
1839+
if (written > resize_after) {
1840+
if (size - allocated > chunksize) {
1841+
allocated += chunksize;
1842+
resize_after = allocated - Py_MAX(self->buffer_size, chunksize/4);
1843+
chunksize += Py_MIN(chunksize, size - allocated - chunksize);
1844+
}
1845+
else {
1846+
resize_after = allocated = size;
1847+
}
1848+
if (PyBytesWriter_Resize(writer, allocated) < 0) {
1849+
PyBytesWriter_Discard(writer);
1850+
goto error;
1851+
}
1852+
}
1853+
Py_ssize_t r = MINUS_LAST_BLOCK(self, allocated - written);
1854+
if (r == 0) {
18331855
break;
1856+
}
18341857
r = _bufferedreader_raw_read(self, out + written, r);
18351858
if (r == -1)
18361859
goto error;
@@ -1842,13 +1865,19 @@ _bufferedreader_read_generic(buffered *self, Py_ssize_t n)
18421865
PyBytesWriter_Discard(writer);
18431866
Py_RETURN_NONE;
18441867
}
1845-
remaining -= r;
18461868
written += r;
18471869
}
1870+
Py_ssize_t remaining = size - written;
18481871
assert(remaining <= self->buffer_size);
18491872
self->pos = 0;
18501873
self->raw_pos = 0;
18511874
self->read_end = 0;
1875+
if (allocated < size) {
1876+
if (PyBytesWriter_Resize(writer, size) < 0) {
1877+
PyBytesWriter_Discard(writer);
1878+
goto error;
1879+
}
1880+
}
18521881
/* NOTE: when the read is satisfied, we avoid issuing any additional
18531882
reads, which could block indefinitely (e.g. on a socket).
18541883
See issue #9550. */

0 commit comments

Comments
 (0)