https://github.com/python/cpython/commit/72e5b25efb580fb1f0fdfade516be90d90822164 commit: 72e5b25efb580fb1f0fdfade516be90d90822164 branch: main author: Chris Markiewicz <markiew...@stanford.edu> committer: zware <zachary.w...@gmail.com> date: 2025-03-07T21:04:45-06:00 summary:
gh-128646: Implement GzipFile.readinto[1]() methods (GH-128647) The new methods simply delegate to the underlying buffer, much like the existing GzipFile.read[1] methods. This avoids extra allocations caused by the BufferedIOBase.readinto implementation previously used. This commit also factors out a common readability check rather than copying it an additional two times. files: A Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst M Lib/gzip.py M Lib/test/test_gzip.py diff --git a/Lib/gzip.py b/Lib/gzip.py index 7e384f8a568c1c..d681ef6b488dad 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -325,11 +325,15 @@ def _write_raw(self, data): return length - def read(self, size=-1): - self._check_not_closed() + def _check_read(self, caller): if self.mode != READ: import errno - raise OSError(errno.EBADF, "read() on write-only GzipFile object") + msg = f"{caller}() on write-only GzipFile object" + raise OSError(errno.EBADF, msg) + + def read(self, size=-1): + self._check_not_closed() + self._check_read("read") return self._buffer.read(size) def read1(self, size=-1): @@ -337,19 +341,25 @@ def read1(self, size=-1): Reads up to a buffer's worth of data if size is negative.""" self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "read1() on write-only GzipFile object") + self._check_read("read1") if size < 0: size = io.DEFAULT_BUFFER_SIZE return self._buffer.read1(size) + def readinto(self, b): + self._check_not_closed() + self._check_read("readinto") + return self._buffer.readinto(b) + + def readinto1(self, b): + self._check_not_closed() + self._check_read("readinto1") + return self._buffer.readinto1(b) + def peek(self, n): self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "peek() on write-only GzipFile object") + self._check_read("peek") return self._buffer.peek(n) @property diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 0940bb114df625..260fae5ae1b368 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -143,6 +143,38 @@ def test_read1(self): self.assertEqual(f.tell(), nread) self.assertEqual(b''.join(blocks), data1 * 50) + def test_readinto(self): + # 10MB of uncompressible data to ensure multiple reads + large_data = os.urandom(10 * 2**20) + with gzip.GzipFile(self.filename, 'wb') as f: + f.write(large_data) + + buf = bytearray(len(large_data)) + with gzip.GzipFile(self.filename, 'r') as f: + nbytes = f.readinto(buf) + self.assertEqual(nbytes, len(large_data)) + self.assertEqual(buf, large_data) + + def test_readinto1(self): + # 10MB of uncompressible data to ensure multiple reads + large_data = os.urandom(10 * 2**20) + with gzip.GzipFile(self.filename, 'wb') as f: + f.write(large_data) + + nread = 0 + buf = bytearray(len(large_data)) + memview = memoryview(buf) # Simplifies slicing + with gzip.GzipFile(self.filename, 'r') as f: + for count in range(200): + nbytes = f.readinto1(memview[nread:]) + if not nbytes: + break + nread += nbytes + self.assertEqual(f.tell(), nread) + self.assertEqual(buf, large_data) + # readinto1() should require multiple loops + self.assertGreater(count, 1) + @bigmemtest(size=_4G, memuse=1) def test_read_large(self, size): # Read chunk size over UINT_MAX should be supported, despite zlib's diff --git a/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst new file mode 100644 index 00000000000000..034a66b704dea8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst @@ -0,0 +1,4 @@ +Eagerly write to buffers passed to :class:`gzip.GzipFile`'s +:meth:`~io.BufferedIOBase.readinto` and +:meth:`~io.BufferedIOBase.readinto1` implementations, +avoiding unnecessary allocations. Patch by Chris Markiewicz. _______________________________________________ Python-checkins mailing list -- python-checkins@python.org To unsubscribe send an email to python-checkins-le...@python.org https://mail.python.org/mailman3/lists/python-checkins.python.org/ Member address: arch...@mail-archive.com