1
# Copyright (C) 2005, 2006 Canonical Ltd
1
# Copyright (C) 2005, 2006 by Canonical Ltd
2
2
# Written by Robert Collins <robert.collins@canonical.com>
4
4
# This program is free software; you can redistribute it and/or modify
18
18
"""Bzrlib specific gzip tunings. We plan to feed these to the upstream gzip."""
20
from cStringIO import StringIO
22
20
# make GzipFile faster:
24
22
from gzip import U32, LOWU32, FEXTRA, FCOMMENT, FNAME, FHCRC
29
27
# we want a \n preserved, break on \n only splitlines.
32
__all__ = ["GzipFile", "bytes_to_gzip"]
35
def bytes_to_gzip(bytes, factory=zlib.compressobj,
36
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED,
37
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL,
39
"""Create a gzip file containing bytes and return its content."""
41
'\037\213' # self.fileobj.write('\037\213') # magic header
42
'\010' # self.fileobj.write('\010') # compression method
43
# fname = self.filename[:-3]
47
'\x00' # self.fileobj.write(chr(flags))
48
'\0\0\0\0' # write32u(self.fileobj, long(time.time()))
49
'\002' # self.fileobj.write('\002')
50
'\377' # self.fileobj.write('\377')
52
'' # self.fileobj.write(fname + '\000')
54
# using a compressobj avoids a small header and trailer that the compress()
55
# utility function adds.
56
compress = factory(level, method, width, mem, 0)
57
result.append(compress.compress(bytes))
58
result.append(compress.flush())
59
result.append(struct.pack("<L", LOWU32(crc32(bytes))))
60
# size may exceed 2GB, or even 4GB
61
result.append(struct.pack("<L", LOWU32(len(bytes))))
62
return ''.join(result)
30
__all__ = ["GzipFile"]
65
33
class GzipFile(gzip.GzipFile):
95
63
self.extrasize += len_data
96
64
self.size += len_data
98
def _write_gzip_header(self):
99
"""A tuned version of gzip._write_gzip_header
101
We have some extra constrains that plain Gzip does not.
102
1) We want to write the whole blob at once. rather than multiple
103
calls to fileobj.write().
104
2) We never have a filename
105
3) We don't care about the time
108
'\037\213' # self.fileobj.write('\037\213') # magic header
109
'\010' # self.fileobj.write('\010') # compression method
110
# fname = self.filename[:-3]
114
'\x00' # self.fileobj.write(chr(flags))
115
'\0\0\0\0' # write32u(self.fileobj, long(time.time()))
116
'\002' # self.fileobj.write('\002')
117
'\377' # self.fileobj.write('\377')
119
'' # self.fileobj.write(fname + '\000')
122
66
def _read(self, size=1024):
123
67
# various optimisations:
124
68
# reduces lsprof count from 2500 to
312
256
# 4168 calls in 417.
313
257
# Negative numbers result in reading all the lines
315
# python's gzip routine uses sizehint. This is a more efficient way
316
# than python uses to honor it. But it is even more efficient to
317
# just read the entire thing and use cStringIO to split into lines.
320
# content = self.read(sizehint)
321
# return bzrlib.osutils.split_lines(content)
322
content = StringIO(self.read(-1))
323
return content.readlines()
260
content = self.read(sizehint)
261
return bzrlib.osutils.split_lines(content)
325
263
def _unread(self, buf, len_buf=None):
326
264
"""tuned to remove unneeded len calls.