5590.1.2
by John Arbash Meinel
Change tuned_gzip.GzipFile to be deprecated |
1 |
# Copyright (C) 2006-2011 Canonical Ltd
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
2 |
# Written by Robert Collins <robert.collins@canonical.com>
|
3 |
#
|
|
4 |
# This program is free software; you can redistribute it and/or modify
|
|
5 |
# it under the terms of the GNU General Public License as published by
|
|
6 |
# the Free Software Foundation; either version 2 of the License, or
|
|
7 |
# (at your option) any later version.
|
|
8 |
#
|
|
9 |
# This program is distributed in the hope that it will be useful,
|
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12 |
# GNU General Public License for more details.
|
|
13 |
#
|
|
14 |
# You should have received a copy of the GNU General Public License
|
|
15 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
16 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
17 |
|
6379.6.7
by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear. |
18 |
"""Bzrlib specific gzip tunings. We plan to feed these to the upstream gzip."""
|
19 |
||
6379.6.1
by Jelmer Vernooij
Import absolute_import in a few places. |
20 |
from __future__ import absolute_import |
21 |
||
1908.4.12
by John Arbash Meinel
Minor change to tuned_gzip. |
22 |
from cStringIO import StringIO |
1908.4.5
by John Arbash Meinel
Some small tweaks to knit and tuned_gzip to shave off another couple seconds |
23 |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
24 |
# make GzipFile faster:
|
25 |
import gzip |
|
3734.2.1
by Vincent Ladeuil
Fix U32, LOWU32 disapearance in python-2.6. |
26 |
from gzip import FEXTRA, FCOMMENT, FNAME, FHCRC |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
27 |
import sys |
28 |
import struct |
|
29 |
import zlib |
|
30 |
||
1666.1.6
by Robert Collins
Make knit the default format. |
31 |
# we want a \n preserved, break on \n only splitlines.
|
5590.1.2
by John Arbash Meinel
Change tuned_gzip.GzipFile to be deprecated |
32 |
from bzrlib import symbol_versioning |
1666.1.6
by Robert Collins
Make knit the default format. |
33 |
|
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
34 |
__all__ = ["GzipFile", "bytes_to_gzip"] |
35 |
||
36 |
||
3734.2.1
by Vincent Ladeuil
Fix U32, LOWU32 disapearance in python-2.6. |
37 |
def U32(i): |
38 |
"""Return i as an unsigned integer, assuming it fits in 32 bits.
|
|
39 |
||
40 |
If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
|
|
41 |
"""
|
|
42 |
if i < 0: |
|
43 |
i += 1L << 32 |
|
44 |
return i |
|
45 |
||
46 |
||
47 |
def LOWU32(i): |
|
48 |
"""Return the low-order 32 bits of an int, as a non-negative int."""
|
|
49 |
return i & 0xFFFFFFFFL |
|
50 |
||
51 |
||
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
52 |
def bytes_to_gzip(bytes, factory=zlib.compressobj, |
53 |
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED, |
|
54 |
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL, |
|
55 |
crc32=zlib.crc32): |
|
56 |
"""Create a gzip file containing bytes and return its content."""
|
|
4398.8.2
by John Arbash Meinel
Add a chunks_to_gzip function. |
57 |
return chunks_to_gzip([bytes]) |
58 |
||
59 |
||
60 |
def chunks_to_gzip(chunks, factory=zlib.compressobj, |
|
61 |
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED, |
|
62 |
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL, |
|
63 |
crc32=zlib.crc32): |
|
64 |
"""Create a gzip file containing chunks and return its content.
|
|
65 |
||
66 |
:param chunks: An iterable of strings. Each string can have arbitrary
|
|
67 |
layout.
|
|
68 |
"""
|
|
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
69 |
result = [ |
70 |
'\037\213' # self.fileobj.write('\037\213') # magic header |
|
71 |
'\010' # self.fileobj.write('\010') # compression method |
|
72 |
# fname = self.filename[:-3]
|
|
73 |
# flags = 0
|
|
74 |
# if fname:
|
|
75 |
# flags = FNAME
|
|
76 |
'\x00' # self.fileobj.write(chr(flags)) |
|
77 |
'\0\0\0\0' # write32u(self.fileobj, long(time.time())) |
|
78 |
'\002' # self.fileobj.write('\002') |
|
79 |
'\377' # self.fileobj.write('\377') |
|
80 |
# if fname:
|
|
81 |
'' # self.fileobj.write(fname + '\000') |
|
82 |
]
|
|
83 |
# using a compressobj avoids a small header and trailer that the compress()
|
|
84 |
# utility function adds.
|
|
85 |
compress = factory(level, method, width, mem, 0) |
|
4398.8.2
by John Arbash Meinel
Add a chunks_to_gzip function. |
86 |
crc = 0 |
87 |
total_len = 0 |
|
88 |
for chunk in chunks: |
|
89 |
crc = crc32(chunk, crc) |
|
90 |
total_len += len(chunk) |
|
91 |
zbytes = compress.compress(chunk) |
|
92 |
if zbytes: |
|
93 |
result.append(zbytes) |
|
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
94 |
result.append(compress.flush()) |
95 |
# size may exceed 2GB, or even 4GB
|
|
4398.8.2
by John Arbash Meinel
Add a chunks_to_gzip function. |
96 |
result.append(struct.pack("<LL", LOWU32(crc), LOWU32(total_len))) |
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
97 |
return ''.join(result) |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
98 |
|
99 |
||
100 |
class GzipFile(gzip.GzipFile): |
|
101 |
"""Knit tuned version of GzipFile.
|
|
102 |
||
103 |
This is based on the following lsprof stats:
|
|
104 |
python 2.4 stock GzipFile write:
|
|
105 |
58971 0 5644.3090 2721.4730 gzip:193(write)
|
|
106 |
+58971 0 1159.5530 1159.5530 +<built-in method compress>
|
|
107 |
+176913 0 987.0320 987.0320 +<len>
|
|
108 |
+58971 0 423.1450 423.1450 +<zlib.crc32>
|
|
109 |
+58971 0 353.1060 353.1060 +<method 'write' of 'cStringIO.
|
|
110 |
StringO' objects>
|
|
111 |
tuned GzipFile write:
|
|
112 |
58971 0 4477.2590 2103.1120 bzrlib.knit:1250(write)
|
|
113 |
+58971 0 1297.7620 1297.7620 +<built-in method compress>
|
|
114 |
+58971 0 406.2160 406.2160 +<zlib.crc32>
|
|
115 |
+58971 0 341.9020 341.9020 +<method 'write' of 'cStringIO.
|
|
116 |
StringO' objects>
|
|
117 |
+58971 0 328.2670 328.2670 +<len>
|
|
118 |
||
119 |
||
120 |
Yes, its only 1.6 seconds, but they add up.
|
|
121 |
"""
|
|
122 |
||
5590.1.2
by John Arbash Meinel
Change tuned_gzip.GzipFile to be deprecated |
123 |
def __init__(self, *args, **kwargs): |
124 |
symbol_versioning.warn( |
|
125 |
symbol_versioning.deprecated_in((2, 3, 0)) |
|
126 |
% 'bzrlib.tuned_gzip.GzipFile', |
|
127 |
DeprecationWarning, stacklevel=2) |
|
128 |
gzip.GzipFile.__init__(self, *args, **kwargs) |
|
129 |
||
6579.1.4
by Vincent Ladeuil
Urgh. pqm still runs python 2.6 so we have to maintain compatibility to land the fix 8-( |
130 |
if sys.version_info >= (2, 7, 4): |
131 |
def _add_read_data(self, data): |
|
132 |
# 4169 calls in 183
|
|
133 |
# temp var for len(data) and switch to +='s.
|
|
134 |
# 4169 in 139
|
|
135 |
len_data = len(data) |
|
136 |
self.crc = zlib.crc32(data, self.crc) & 0xffffffffL |
|
137 |
offset = self.offset - self.extrastart |
|
138 |
self.extrabuf = self.extrabuf[offset:] + data |
|
139 |
self.extrasize = self.extrasize + len_data |
|
140 |
self.extrastart = self.offset |
|
141 |
self.size = self.size + len_data |
|
142 |
else: |
|
143 |
def _add_read_data(self, data): |
|
144 |
# 4169 calls in 183
|
|
145 |
# temp var for len(data) and switch to +='s.
|
|
146 |
# 4169 in 139
|
|
147 |
len_data = len(data) |
|
148 |
self.crc = zlib.crc32(data, self.crc) |
|
149 |
self.extrabuf += data |
|
150 |
self.extrasize += len_data |
|
151 |
self.size += len_data |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
152 |
|
1908.4.3
by John Arbash Meinel
Shave another second off of _record_to_data time, by optimizing single write versus multiple writes |
153 |
def _write_gzip_header(self): |
154 |
"""A tuned version of gzip._write_gzip_header
|
|
155 |
||
156 |
We have some extra constrains that plain Gzip does not.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
157 |
1) We want to write the whole blob at once. rather than multiple
|
1908.4.10
by John Arbash Meinel
Small cleanups |
158 |
calls to fileobj.write().
|
1908.4.3
by John Arbash Meinel
Shave another second off of _record_to_data time, by optimizing single write versus multiple writes |
159 |
2) We never have a filename
|
160 |
3) We don't care about the time
|
|
161 |
"""
|
|
162 |
self.fileobj.write( |
|
163 |
'\037\213' # self.fileobj.write('\037\213') # magic header |
|
164 |
'\010' # self.fileobj.write('\010') # compression method |
|
165 |
# fname = self.filename[:-3]
|
|
166 |
# flags = 0
|
|
167 |
# if fname:
|
|
168 |
# flags = FNAME
|
|
169 |
'\x00' # self.fileobj.write(chr(flags)) |
|
170 |
'\0\0\0\0' # write32u(self.fileobj, long(time.time())) |
|
171 |
'\002' # self.fileobj.write('\002') |
|
172 |
'\377' # self.fileobj.write('\377') |
|
173 |
# if fname:
|
|
174 |
'' # self.fileobj.write(fname + '\000') |
|
175 |
)
|
|
176 |
||
6579.1.4
by Vincent Ladeuil
Urgh. pqm still runs python 2.6 so we have to maintain compatibility to land the fix 8-( |
177 |
if sys.version_info < (2, 7, 4): |
178 |
def _read(self, size=1024): |
|
179 |
# various optimisations:
|
|
180 |
# reduces lsprof count from 2500 to
|
|
181 |
# 8337 calls in 1272, 365 internal
|
|
182 |
if self.fileobj is None: |
|
183 |
raise EOFError, "Reached EOF" |
|
184 |
||
185 |
if self._new_member: |
|
186 |
# If the _new_member flag is set, we have to
|
|
187 |
# jump to the next member, if there is one.
|
|
188 |
#
|
|
189 |
# First, check if we're at the end of the file;
|
|
190 |
# if so, it's time to stop; no more members to read.
|
|
191 |
next_header_bytes = self.fileobj.read(10) |
|
192 |
if next_header_bytes == '': |
|
193 |
raise EOFError, "Reached EOF" |
|
194 |
||
195 |
self._init_read() |
|
196 |
self._read_gzip_header(next_header_bytes) |
|
197 |
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) |
|
198 |
self._new_member = False |
|
199 |
||
200 |
# Read a chunk of data from the file
|
|
201 |
buf = self.fileobj.read(size) |
|
202 |
||
203 |
# If the EOF has been reached, flush the decompression object
|
|
204 |
# and mark this object as finished.
|
|
205 |
||
206 |
if buf == "": |
|
207 |
self._add_read_data(self.decompress.flush()) |
|
208 |
if len(self.decompress.unused_data) < 8: |
|
209 |
raise AssertionError("what does flush do?") |
|
210 |
self._gzip_tail = self.decompress.unused_data[0:8] |
|
211 |
self._read_eof() |
|
212 |
# tell the driving read() call we have stuffed all the data
|
|
213 |
# in self.extrabuf
|
|
214 |
raise EOFError, 'Reached EOF' |
|
215 |
||
216 |
self._add_read_data(self.decompress.decompress(buf)) |
|
217 |
||
218 |
if self.decompress.unused_data != "": |
|
219 |
# Ending case: we've come to the end of a member in the file,
|
|
220 |
# so seek back to the start of the data for the next member
|
|
221 |
# which is the length of the decompress objects unused data -
|
|
222 |
# the first 8 bytes for the end crc and size records.
|
|
223 |
#
|
|
224 |
# so seek back to the start of the unused data, finish up
|
|
225 |
# this member, and read a new gzip header.
|
|
226 |
# (The number of bytes to seek back is the length of the unused
|
|
227 |
# data, minus 8 because those 8 bytes are part of this member.
|
|
228 |
seek_length = len (self.decompress.unused_data) - 8 |
|
229 |
if seek_length > 0: |
|
230 |
# we read too much data
|
|
231 |
self.fileobj.seek(-seek_length, 1) |
|
232 |
self._gzip_tail = self.decompress.unused_data[0:8] |
|
233 |
elif seek_length < 0: |
|
234 |
# we haven't read enough to check the checksum.
|
|
235 |
if not (-8 < seek_length): |
|
236 |
raise AssertionError("too great a seek") |
|
237 |
buf = self.fileobj.read(-seek_length) |
|
238 |
self._gzip_tail = self.decompress.unused_data + buf |
|
239 |
else: |
|
240 |
self._gzip_tail = self.decompress.unused_data |
|
241 |
||
242 |
# Check the CRC and file size, and set the flag so we read
|
|
243 |
# a new member on the next call
|
|
244 |
self._read_eof() |
|
245 |
self._new_member = True |
|
246 |
||
247 |
def _read_eof(self): |
|
248 |
"""tuned to reduce function calls and eliminate file seeking:
|
|
249 |
pass 1:
|
|
250 |
reduces lsprof count from 800 to 288
|
|
251 |
4168 in 296
|
|
252 |
avoid U32 call by using struct format L
|
|
253 |
4168 in 200
|
|
254 |
"""
|
|
255 |
# We've read to the end of the file, so we should have 8 bytes of
|
|
256 |
# unused data in the decompressor. If we don't, there is a corrupt
|
|
257 |
# file. We use these 8 bytes to calculate the CRC and the recorded
|
|
258 |
# file size. We then check the that the computed CRC and size of
|
|
259 |
# the uncompressed data matches the stored values. Note that the
|
|
260 |
# size stored is the true file size mod 2**32.
|
|
261 |
if not (len(self._gzip_tail) == 8): |
|
262 |
raise AssertionError("gzip trailer is incorrect length.") |
|
263 |
crc32, isize = struct.unpack("<LL", self._gzip_tail) |
|
264 |
# note that isize is unsigned - it can exceed 2GB
|
|
265 |
if crc32 != U32(self.crc): |
|
266 |
raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc)) |
|
267 |
elif isize != LOWU32(self.size): |
|
268 |
raise IOError, "Incorrect length of data produced" |
|
269 |
||
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
270 |
def _read_gzip_header(self, bytes=None): |
271 |
"""Supply bytes if the minimum header size is already read.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
272 |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
273 |
:param bytes: 10 bytes of header data.
|
274 |
"""
|
|
275 |
"""starting cost: 300 in 3998
|
|
276 |
15998 reads from 3998 calls
|
|
277 |
final cost 168
|
|
278 |
"""
|
|
279 |
if bytes is None: |
|
280 |
bytes = self.fileobj.read(10) |
|
281 |
magic = bytes[0:2] |
|
282 |
if magic != '\037\213': |
|
283 |
raise IOError, 'Not a gzipped file' |
|
284 |
method = ord(bytes[2:3]) |
|
285 |
if method != 8: |
|
286 |
raise IOError, 'Unknown compression method' |
|
287 |
flag = ord(bytes[3:4]) |
|
288 |
# modtime = self.fileobj.read(4) (bytes [4:8])
|
|
289 |
# extraflag = self.fileobj.read(1) (bytes[8:9])
|
|
290 |
# os = self.fileobj.read(1) (bytes[9:10])
|
|
291 |
# self.fileobj.read(6)
|
|
292 |
||
293 |
if flag & FEXTRA: |
|
294 |
# Read & discard the extra field, if present
|
|
295 |
xlen = ord(self.fileobj.read(1)) |
|
296 |
xlen = xlen + 256*ord(self.fileobj.read(1)) |
|
297 |
self.fileobj.read(xlen) |
|
298 |
if flag & FNAME: |
|
299 |
# Read and discard a null-terminated string containing the filename
|
|
300 |
while True: |
|
301 |
s = self.fileobj.read(1) |
|
302 |
if not s or s=='\000': |
|
303 |
break
|
|
304 |
if flag & FCOMMENT: |
|
305 |
# Read and discard a null-terminated string containing a comment
|
|
306 |
while True: |
|
307 |
s = self.fileobj.read(1) |
|
308 |
if not s or s=='\000': |
|
309 |
break
|
|
310 |
if flag & FHCRC: |
|
311 |
self.fileobj.read(2) # Read & discard the 16-bit header CRC |
|
312 |
||
313 |
def readline(self, size=-1): |
|
314 |
"""Tuned to remove buffer length calls in _unread and...
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
315 |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
316 |
also removes multiple len(c) calls, inlines _unread,
|
317 |
total savings - lsprof 5800 to 5300
|
|
318 |
phase 2:
|
|
319 |
4168 calls in 2233
|
|
320 |
8176 calls to read() in 1684
|
|
321 |
changing the min chunk size to 200 halved all the cache misses
|
|
322 |
leading to a drop to:
|
|
323 |
4168 calls in 1977
|
|
324 |
4168 call to read() in 1646
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
325 |
- i.e. just reduced the function call overhead. May be worth
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
326 |
keeping.
|
327 |
"""
|
|
328 |
if size < 0: size = sys.maxint |
|
329 |
bufs = [] |
|
330 |
readsize = min(200, size) # Read from the file in small chunks |
|
331 |
while True: |
|
332 |
if size == 0: |
|
333 |
return "".join(bufs) # Return resulting line |
|
334 |
||
335 |
# c is the chunk
|
|
336 |
c = self.read(readsize) |
|
337 |
# number of bytes read
|
|
338 |
len_c = len(c) |
|
339 |
i = c.find('\n') |
|
340 |
if size is not None: |
|
341 |
# We set i=size to break out of the loop under two
|
|
342 |
# conditions: 1) there's no newline, and the chunk is
|
|
343 |
# larger than size, or 2) there is a newline, but the
|
|
344 |
# resulting line would be longer than 'size'.
|
|
345 |
if i==-1 and len_c > size: i=size-1 |
|
346 |
elif size <= i: i = size -1 |
|
347 |
||
348 |
if i >= 0 or c == '': |
|
349 |
# if i>= 0 we have a newline or have triggered the above
|
|
350 |
# if size is not None condition.
|
|
351 |
# if c == '' its EOF.
|
|
352 |
bufs.append(c[:i+1]) # Add portion of last chunk |
|
353 |
# -- inlined self._unread --
|
|
354 |
## self._unread(c[i+1:], len_c - i) # Push back rest of chunk
|
|
355 |
self.extrabuf = c[i+1:] + self.extrabuf |
|
356 |
self.extrasize = len_c - i + self.extrasize |
|
357 |
self.offset -= len_c - i |
|
358 |
# -- end inlined self._unread --
|
|
359 |
return ''.join(bufs) # Return resulting line |
|
360 |
||
361 |
# Append chunk to list, decrease 'size',
|
|
362 |
bufs.append(c) |
|
363 |
size = size - len_c |
|
364 |
readsize = min(size, readsize * 2) |
|
365 |
||
366 |
def readlines(self, sizehint=0): |
|
367 |
# optimise to avoid all the buffer manipulation
|
|
368 |
# lsprof changed from:
|
|
369 |
# 4168 calls in 5472 with 32000 calls to readline()
|
|
370 |
# to :
|
|
371 |
# 4168 calls in 417.
|
|
372 |
# Negative numbers result in reading all the lines
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
373 |
|
1908.4.15
by John Arbash Meinel
comment on tuned_gzip.readlines() functionality. |
374 |
# python's gzip routine uses sizehint. This is a more efficient way
|
375 |
# than python uses to honor it. But it is even more efficient to
|
|
376 |
# just read the entire thing and use cStringIO to split into lines.
|
|
377 |
# if sizehint <= 0:
|
|
378 |
# sizehint = -1
|
|
379 |
# content = self.read(sizehint)
|
|
380 |
# return bzrlib.osutils.split_lines(content)
|
|
1908.4.12
by John Arbash Meinel
Minor change to tuned_gzip. |
381 |
content = StringIO(self.read(-1)) |
1908.4.5
by John Arbash Meinel
Some small tweaks to knit and tuned_gzip to shave off another couple seconds |
382 |
return content.readlines() |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
383 |
|
384 |
def _unread(self, buf, len_buf=None): |
|
385 |
"""tuned to remove unneeded len calls.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
386 |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
387 |
because this is such an inner routine in readline, and readline is
|
388 |
in many inner loops, this has been inlined into readline().
|
|
389 |
||
390 |
The len_buf parameter combined with the reduction in len calls dropped
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
391 |
the lsprof ms count for this routine on my test data from 800 to 200 -
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
392 |
a 75% saving.
|
393 |
"""
|
|
394 |
if len_buf is None: |
|
395 |
len_buf = len(buf) |
|
396 |
self.extrabuf = buf + self.extrabuf |
|
397 |
self.extrasize = len_buf + self.extrasize |
|
398 |
self.offset -= len_buf |
|
399 |
||
400 |
def write(self, data): |
|
401 |
if self.mode != gzip.WRITE: |
|
402 |
import errno |
|
403 |
raise IOError(errno.EBADF, "write() on read-only GzipFile object") |
|
404 |
||
405 |
if self.fileobj is None: |
|
406 |
raise ValueError, "write() on closed GzipFile object" |
|
407 |
data_len = len(data) |
|
408 |
if data_len > 0: |
|
409 |
self.size = self.size + data_len |
|
410 |
self.crc = zlib.crc32(data, self.crc) |
|
411 |
self.fileobj.write( self.compress.compress(data) ) |
|
412 |
self.offset += data_len |
|
413 |
||
414 |
def writelines(self, lines): |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
415 |
# profiling indicated a significant overhead
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
416 |
# calling write for each line.
|
417 |
# this batch call is a lot faster :).
|
|
418 |
# (4 seconds to 1 seconds for the sample upgrades I was testing).
|
|
419 |
self.write(''.join(lines)) |
|
420 |
||
5340.9.2
by Martin
Add close method for tuned_gzip on Python 2.7 to adapt to gzip positive crc requirement |
421 |
if sys.version_info > (2, 7): |
422 |
# As of Python 2.7 the crc32 must be positive when close is called
|
|
423 |
def close(self): |
|
424 |
if self.fileobj is None: |
|
425 |
return
|
|
426 |
if self.mode == gzip.WRITE: |
|
427 |
self.crc &= 0xFFFFFFFFL |
|
428 |
gzip.GzipFile.close(self) |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
429 |