14
14
# You should have received a copy of the GNU General Public License
15
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
18
"""Bzrlib specific gzip tunings. We plan to feed these to the upstream gzip."""
20
from __future__ import absolute_import
22
20
from cStringIO import StringIO
24
22
# make GzipFile faster:
26
from gzip import FEXTRA, FCOMMENT, FNAME, FHCRC
24
from gzip import U32, LOWU32, FEXTRA, FCOMMENT, FNAME, FHCRC
31
29
# we want a \n preserved, break on \n only splitlines.
32
from bzrlib import symbol_versioning
34
__all__ = ["GzipFile", "bytes_to_gzip"]
38
"""Return i as an unsigned integer, assuming it fits in 32 bits.
40
If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
48
"""Return the low-order 32 bits of an int, as a non-negative int."""
49
return i & 0xFFFFFFFFL
52
def bytes_to_gzip(bytes, factory=zlib.compressobj,
53
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,
54
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,
56
"""Create a gzip file containing bytes and return its content."""
57
return chunks_to_gzip([bytes])
60
def chunks_to_gzip(chunks, factory=zlib.compressobj,
61
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,
62
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,
64
"""Create a gzip file containing chunks and return its content.
66
:param chunks: An iterable of strings. Each string can have arbitrary
70
'\037\213' # self.fileobj.write('\037\213') # magic header
71
'\010' # self.fileobj.write('\010') # compression method
72
# fname = self.filename[:-3]
76
'\x00' # self.fileobj.write(chr(flags))
77
'\0\0\0\0' # write32u(self.fileobj, long(time.time()))
78
'\002' # self.fileobj.write('\002')
79
'\377' # self.fileobj.write('\377')
81
'' # self.fileobj.write(fname + '\000')
83
# using a compressobj avoids a small header and trailer that the compress()
84
# utility function adds.
85
compress = factory(level, method, width, mem, 0)
89
crc = crc32(chunk, crc)
90
total_len += len(chunk)
91
zbytes = compress.compress(chunk)
94
result.append(compress.flush())
95
# size may exceed 2GB, or even 4GB
96
result.append(struct.pack("<LL", LOWU32(crc), LOWU32(total_len)))
97
return ''.join(result)
32
__all__ = ["GzipFile"]
100
35
class GzipFile(gzip.GzipFile):
120
55
Yes, its only 1.6 seconds, but they add up.
123
def __init__(self, *args, **kwargs):
124
symbol_versioning.warn(
125
symbol_versioning.deprecated_in((2, 3, 0))
126
% 'bzrlib.tuned_gzip.GzipFile',
127
DeprecationWarning, stacklevel=2)
128
gzip.GzipFile.__init__(self, *args, **kwargs)
130
58
def _add_read_data(self, data):
131
59
# 4169 calls in 183
132
60
# temp var for len(data) and switch to +='s.
234
160
"""tuned to reduce function calls and eliminate file seeking:
236
162
reduces lsprof count from 800 to 288
238
164
avoid U32 call by using struct format L
241
# We've read to the end of the file, so we should have 8 bytes of
167
# We've read to the end of the file, so we should have 8 bytes of
242
168
# unused data in the decompressor. If we don't, there is a corrupt file.
243
169
# We use these 8 bytes to calculate the CRC and the recorded file size.
244
170
# We then check the that the computed CRC and size of the
245
171
# uncompressed data matches the stored values. Note that the size
246
172
# stored is the true file size mod 2**32.
247
if not (len(self._gzip_tail) == 8):
248
raise AssertionError("gzip trailer is incorrect length.")
173
assert len(self._gzip_tail) == 8, "gzip trailer is incorrect length."
249
174
crc32, isize = struct.unpack("<LL", self._gzip_tail)
250
175
# note that isize is unsigned - it can exceed 2GB
251
176
if crc32 != U32(self.crc):
370
295
def _unread(self, buf, len_buf=None):
371
296
"""tuned to remove unneeded len calls.
373
298
because this is such an inner routine in readline, and readline is
374
299
in many inner loops, this has been inlined into readline().
376
301
The len_buf parameter combined with the reduction in len calls dropped
377
the lsprof ms count for this routine on my test data from 800 to 200 -
302
the lsprof ms count for this routine on my test data from 800 to 200 -
380
305
if len_buf is None:
398
323
self.offset += data_len
400
325
def writelines(self, lines):
401
# profiling indicated a significant overhead
326
# profiling indicated a significant overhead
402
327
# calling write for each line.
403
328
# this batch call is a lot faster :).
404
329
# (4 seconds to 1 seconds for the sample upgrades I was testing).
405
330
self.write(''.join(lines))
407
if sys.version_info > (2, 7):
408
# As of Python 2.7 the crc32 must be positive when close is called
410
if self.fileobj is None:
412
if self.mode == gzip.WRITE:
413
self.crc &= 0xFFFFFFFFL
414
gzip.GzipFile.close(self)