Skip to content

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit aba3d5c

Browse filesBrowse files
CPython Developerscoolreader18
CPython Developers
authored andcommitted
Update gzip,test_gzip from CPython 3.13
1 parent 8c5602f commit aba3d5c
Copy full SHA for aba3d5c

File tree

2 files changed

+325
-75
lines changed
Filter options

2 files changed

+325
-75
lines changed

‎Lib/gzip.py

Copy file name to clipboardExpand all lines: Lib/gzip.py
+68-62Lines changed: 68 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@
1515

1616
FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
1717

18-
READ, WRITE = 1, 2
18+
READ = 'rb'
19+
WRITE = 'wb'
1920

2021
_COMPRESS_LEVEL_FAST = 1
2122
_COMPRESS_LEVEL_TRADEOFF = 6
2223
_COMPRESS_LEVEL_BEST = 9
2324

25+
READ_BUFFER_SIZE = 128 * 1024
26+
_WRITE_BUFFER_SIZE = 4 * io.DEFAULT_BUFFER_SIZE
27+
2428

2529
def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
2630
encoding=None, errors=None, newline=None):
@@ -118,6 +122,21 @@ class BadGzipFile(OSError):
118122
"""Exception raised in some cases for invalid gzip files."""
119123

120124

125+
class _WriteBufferStream(io.RawIOBase):
126+
"""Minimal object to pass WriteBuffer flushes into GzipFile"""
127+
def __init__(self, gzip_file):
128+
self.gzip_file = gzip_file
129+
130+
def write(self, data):
131+
return self.gzip_file._write_raw(data)
132+
133+
def seekable(self):
134+
return False
135+
136+
def writable(self):
137+
return True
138+
139+
121140
class GzipFile(_compression.BaseStream):
122141
"""The GzipFile class simulates most of the methods of a file object with
123142
the exception of the truncate() method.
@@ -160,9 +179,10 @@ def __init__(self, filename=None, mode=None,
160179
and 9 is slowest and produces the most compression. 0 is no compression
161180
at all. The default is 9.
162181
163-
The mtime argument is an optional numeric timestamp to be written
164-
to the last modification time field in the stream when compressing.
165-
If omitted or None, the current time is used.
182+
The optional mtime argument is the timestamp requested by gzip. The time
183+
is in Unix format, i.e., seconds since 00:00:00 UTC, January 1, 1970.
184+
If mtime is omitted or None, the current time is used. Use mtime = 0
185+
to generate a compressed stream that does not depend on creation time.
166186
167187
"""
168188

@@ -182,6 +202,7 @@ def __init__(self, filename=None, mode=None,
182202
if mode is None:
183203
mode = getattr(fileobj, 'mode', 'rb')
184204

205+
185206
if mode.startswith('r'):
186207
self.mode = READ
187208
raw = _GzipReader(fileobj)
@@ -204,6 +225,9 @@ def __init__(self, filename=None, mode=None,
204225
zlib.DEF_MEM_LEVEL,
205226
0)
206227
self._write_mtime = mtime
228+
self._buffer_size = _WRITE_BUFFER_SIZE
229+
self._buffer = io.BufferedWriter(_WriteBufferStream(self),
230+
buffer_size=self._buffer_size)
207231
else:
208232
raise ValueError("Invalid mode: {!r}".format(mode))
209233

@@ -212,14 +236,6 @@ def __init__(self, filename=None, mode=None,
212236
if self.mode == WRITE:
213237
self._write_gzip_header(compresslevel)
214238

215-
@property
216-
def filename(self):
217-
import warnings
218-
warnings.warn("use the name attribute", DeprecationWarning, 2)
219-
if self.mode == WRITE and self.name[-3:] != ".gz":
220-
return self.name + ".gz"
221-
return self.name
222-
223239
@property
224240
def mtime(self):
225241
"""Last modification time read from stream, or None"""
@@ -237,6 +253,11 @@ def _init_write(self, filename):
237253
self.bufsize = 0
238254
self.offset = 0 # Current file offset for seek(), tell(), etc
239255

256+
def tell(self):
257+
self._check_not_closed()
258+
self._buffer.flush()
259+
return super().tell()
260+
240261
def _write_gzip_header(self, compresslevel):
241262
self.fileobj.write(b'\037\213') # magic header
242263
self.fileobj.write(b'\010') # compression method
@@ -278,6 +299,10 @@ def write(self,data):
278299
if self.fileobj is None:
279300
raise ValueError("write() on closed GzipFile object")
280301

302+
return self._buffer.write(data)
303+
304+
def _write_raw(self, data):
305+
# Called by our self._buffer underlying WriteBufferStream.
281306
if isinstance(data, (bytes, bytearray)):
282307
length = len(data)
283308
else:
@@ -326,18 +351,19 @@ def closed(self):
326351

327352
def close(self):
328353
fileobj = self.fileobj
329-
if fileobj is None:
354+
if fileobj is None or self._buffer.closed:
330355
return
331-
self.fileobj = None
332356
try:
333357
if self.mode == WRITE:
358+
self._buffer.flush()
334359
fileobj.write(self.compress.flush())
335360
write32u(fileobj, self.crc)
336361
# self.size may exceed 2 GiB, or even 4 GiB
337362
write32u(fileobj, self.size & 0xffffffff)
338363
elif self.mode == READ:
339364
self._buffer.close()
340365
finally:
366+
self.fileobj = None
341367
myfileobj = self.myfileobj
342368
if myfileobj:
343369
self.myfileobj = None
@@ -346,6 +372,7 @@ def close(self):
346372
def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
347373
self._check_not_closed()
348374
if self.mode == WRITE:
375+
self._buffer.flush()
349376
# Ensure the compressor's buffer is flushed
350377
self.fileobj.write(self.compress.flush(zlib_mode))
351378
self.fileobj.flush()
@@ -376,6 +403,9 @@ def seekable(self):
376403

377404
def seek(self, offset, whence=io.SEEK_SET):
378405
if self.mode == WRITE:
406+
self._check_not_closed()
407+
# Flush buffer to ensure validity of self.offset
408+
self._buffer.flush()
379409
if whence != io.SEEK_SET:
380410
if whence == io.SEEK_CUR:
381411
offset = self.offset + offset
@@ -384,10 +414,10 @@ def seek(self, offset, whence=io.SEEK_SET):
384414
if offset < self.offset:
385415
raise OSError('Negative seek in write mode')
386416
count = offset - self.offset
387-
chunk = b'\0' * 1024
388-
for i in range(count // 1024):
417+
chunk = b'\0' * self._buffer_size
418+
for i in range(count // self._buffer_size):
389419
self.write(chunk)
390-
self.write(b'\0' * (count % 1024))
420+
self.write(b'\0' * (count % self._buffer_size))
391421
elif self.mode == READ:
392422
self._check_not_closed()
393423
return self._buffer.seek(offset, whence)
@@ -454,7 +484,7 @@ def _read_gzip_header(fp):
454484

455485
class _GzipReader(_compression.DecompressReader):
456486
def __init__(self, fp):
457-
super().__init__(_PaddedFile(fp), zlib.decompressobj,
487+
super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor,
458488
wbits=-zlib.MAX_WBITS)
459489
# Set flag indicating start of a new member
460490
self._new_member = True
@@ -502,12 +532,13 @@ def read(self, size=-1):
502532
self._new_member = False
503533

504534
# Read a chunk of data from the file
505-
buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)
535+
if self._decompressor.needs_input:
536+
buf = self._fp.read(READ_BUFFER_SIZE)
537+
uncompress = self._decompressor.decompress(buf, size)
538+
else:
539+
uncompress = self._decompressor.decompress(b"", size)
506540

507-
uncompress = self._decompressor.decompress(buf, size)
508-
if self._decompressor.unconsumed_tail != b"":
509-
self._fp.prepend(self._decompressor.unconsumed_tail)
510-
elif self._decompressor.unused_data != b"":
541+
if self._decompressor.unused_data != b"":
511542
# Prepend the already read bytes to the fileobj so they can
512543
# be seen by _read_eof() and _read_gzip_header()
513544
self._fp.prepend(self._decompressor.unused_data)
@@ -518,14 +549,11 @@ def read(self, size=-1):
518549
raise EOFError("Compressed file ended before the "
519550
"end-of-stream marker was reached")
520551

521-
self._add_read_data( uncompress )
552+
self._crc = zlib.crc32(uncompress, self._crc)
553+
self._stream_size += len(uncompress)
522554
self._pos += len(uncompress)
523555
return uncompress
524556

525-
def _add_read_data(self, data):
526-
self._crc = zlib.crc32(data, self._crc)
527-
self._stream_size = self._stream_size + len(data)
528-
529557
def _read_eof(self):
530558
# We've read to the end of the file
531559
# We check that the computed CRC and size of the
@@ -552,43 +580,21 @@ def _rewind(self):
552580
self._new_member = True
553581

554582

555-
def _create_simple_gzip_header(compresslevel: int,
556-
mtime = None) -> bytes:
557-
"""
558-
Write a simple gzip header with no extra fields.
559-
:param compresslevel: Compresslevel used to determine the xfl bytes.
560-
:param mtime: The mtime (must support conversion to a 32-bit integer).
561-
:return: A bytes object representing the gzip header.
562-
"""
563-
if mtime is None:
564-
mtime = time.time()
565-
if compresslevel == _COMPRESS_LEVEL_BEST:
566-
xfl = 2
567-
elif compresslevel == _COMPRESS_LEVEL_FAST:
568-
xfl = 4
569-
else:
570-
xfl = 0
571-
# Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
572-
# fields added to header), mtime, xfl and os (255 for unknown OS).
573-
return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)
574-
575-
576-
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
583+
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0):
577584
"""Compress data in one shot and return the compressed string.
578585
579586
compresslevel sets the compression level in range of 0-9.
580-
mtime can be used to set the modification time. The modification time is
581-
set to the current time by default.
587+
mtime can be used to set the modification time.
588+
The modification time is set to 0 by default, for reproducibility.
582589
"""
583-
if mtime == 0:
584-
# Use zlib as it creates the header with 0 mtime by default.
585-
# This is faster and with less overhead.
586-
return zlib.compress(data, level=compresslevel, wbits=31)
587-
header = _create_simple_gzip_header(compresslevel, mtime)
588-
trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff))
589-
# Wbits=-15 creates a raw deflate block.
590-
return (header + zlib.compress(data, level=compresslevel, wbits=-15) +
591-
trailer)
590+
# Wbits=31 automatically includes a gzip header and trailer.
591+
gzip_data = zlib.compress(data, level=compresslevel, wbits=31)
592+
if mtime is None:
593+
mtime = time.time()
594+
# Reuse gzip header created by zlib, replace mtime and OS byte for
595+
# consistency.
596+
header = struct.pack("<4sLBB", gzip_data, int(mtime), gzip_data[8], 255)
597+
return header + gzip_data[10:]
592598

593599

594600
def decompress(data):
@@ -655,7 +661,7 @@ def main():
655661
f = builtins.open(arg, "rb")
656662
g = open(arg + ".gz", "wb")
657663
while True:
658-
chunk = f.read(io.DEFAULT_BUFFER_SIZE)
664+
chunk = f.read(READ_BUFFER_SIZE)
659665
if not chunk:
660666
break
661667
g.write(chunk)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.