14
14
# You should have received a copy of the GNU General Public License
15
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
18
"""Bzrlib specific gzip tunings. We plan to feed these to the upstream gzip."""
20
from __future__ import absolute_import
20
22
from cStringIO import StringIO
22
24
# make GzipFile faster:
24
from gzip import U32, LOWU32, FEXTRA, FCOMMENT, FNAME, FHCRC
26
from gzip import FEXTRA, FCOMMENT, FNAME, FHCRC
29
31
# we want a \n preserved, break on \n only splitlines.
32
__all__ = ["GzipFile"]
32
from bzrlib import symbol_versioning
34
__all__ = ["GzipFile", "bytes_to_gzip"]
38
"""Return i as an unsigned integer, assuming it fits in 32 bits.
40
If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
48
"""Return the low-order 32 bits of an int, as a non-negative int."""
49
return i & 0xFFFFFFFFL
52
def bytes_to_gzip(bytes, factory=zlib.compressobj,
53
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,
54
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,
56
"""Create a gzip file containing bytes and return its content."""
57
return chunks_to_gzip([bytes])
60
def chunks_to_gzip(chunks, factory=zlib.compressobj,
61
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,
62
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,
64
"""Create a gzip file containing chunks and return its content.
66
:param chunks: An iterable of strings. Each string can have arbitrary
70
'\037\213' # self.fileobj.write('\037\213') # magic header
71
'\010' # self.fileobj.write('\010') # compression method
72
# fname = self.filename[:-3]
76
'\x00' # self.fileobj.write(chr(flags))
77
'\0\0\0\0' # write32u(self.fileobj, long(time.time()))
78
'\002' # self.fileobj.write('\002')
79
'\377' # self.fileobj.write('\377')
81
'' # self.fileobj.write(fname + '\000')
83
# using a compressobj avoids a small header and trailer that the compress()
84
# utility function adds.
85
compress = factory(level, method, width, mem, 0)
89
crc = crc32(chunk, crc)
90
total_len += len(chunk)
91
zbytes = compress.compress(chunk)
94
result.append(compress.flush())
95
# size may exceed 2GB, or even 4GB
96
result.append(struct.pack("<LL", LOWU32(crc), LOWU32(total_len)))
97
return ''.join(result)
35
100
class GzipFile(gzip.GzipFile):
55
120
Yes, its only 1.6 seconds, but they add up.
123
def __init__(self, *args, **kwargs):
124
symbol_versioning.warn(
125
symbol_versioning.deprecated_in((2, 3, 0))
126
% 'bzrlib.tuned_gzip.GzipFile',
127
DeprecationWarning, stacklevel=2)
128
gzip.GzipFile.__init__(self, *args, **kwargs)
58
130
def _add_read_data(self, data):
59
131
# 4169 calls in 183
60
132
# temp var for len(data) and switch to +='s.
160
234
"""tuned to reduce function calls and eliminate file seeking:
162
236
reduces lsprof count from 800 to 288
164
238
avoid U32 call by using struct format L
167
# We've read to the end of the file, so we should have 8 bytes of
241
# We've read to the end of the file, so we should have 8 bytes of
168
242
# unused data in the decompressor. If we don't, there is a corrupt file.
169
243
# We use these 8 bytes to calculate the CRC and the recorded file size.
170
244
# We then check the that the computed CRC and size of the
171
245
# uncompressed data matches the stored values. Note that the size
172
246
# stored is the true file size mod 2**32.
173
assert len(self._gzip_tail) == 8, "gzip trailer is incorrect length."
247
if not (len(self._gzip_tail) == 8):
248
raise AssertionError("gzip trailer is incorrect length.")
174
249
crc32, isize = struct.unpack("<LL", self._gzip_tail)
175
250
# note that isize is unsigned - it can exceed 2GB
176
251
if crc32 != U32(self.crc):
295
370
def _unread(self, buf, len_buf=None):
296
371
"""tuned to remove unneeded len calls.
298
373
because this is such an inner routine in readline, and readline is
299
374
in many inner loops, this has been inlined into readline().
301
376
The len_buf parameter combined with the reduction in len calls dropped
302
the lsprof ms count for this routine on my test data from 800 to 200 -
377
the lsprof ms count for this routine on my test data from 800 to 200 -
305
380
if len_buf is None:
323
398
self.offset += data_len
325
400
def writelines(self, lines):
326
# profiling indicated a significant overhead
401
# profiling indicated a significant overhead
327
402
# calling write for each line.
328
403
# this batch call is a lot faster :).
329
404
# (4 seconds to 1 seconds for the sample upgrades I was testing).
330
405
self.write(''.join(lines))
407
if sys.version_info > (2, 7):
408
# As of Python 2.7 the crc32 must be positive when close is called
410
if self.fileobj is None:
412
if self.mode == gzip.WRITE:
413
self.crc &= 0xFFFFFFFFL
414
gzip.GzipFile.close(self)