5590.1.2
by John Arbash Meinel
Change tuned_gzip.GzipFile to be deprecated |
1 |
# Copyright (C) 2006-2011 Canonical Ltd
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
2 |
# Written by Robert Collins <robert.collins@canonical.com>
|
3 |
#
|
|
4 |
# This program is free software; you can redistribute it and/or modify
|
|
5 |
# it under the terms of the GNU General Public License as published by
|
|
6 |
# the Free Software Foundation; either version 2 of the License, or
|
|
7 |
# (at your option) any later version.
|
|
8 |
#
|
|
9 |
# This program is distributed in the hope that it will be useful,
|
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12 |
# GNU General Public License for more details.
|
|
13 |
#
|
|
14 |
# You should have received a copy of the GNU General Public License
|
|
15 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
16 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
17 |
|
6379.6.7
by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear. |
18 |
"""Bzrlib specific gzip tunings. We plan to feed these to the upstream gzip."""
|
19 |
||
6379.6.1
by Jelmer Vernooij
Import absolute_import in a few places. |
20 |
from __future__ import absolute_import |
21 |
||
1908.4.12
by John Arbash Meinel
Minor change to tuned_gzip. |
22 |
from cStringIO import StringIO |
1908.4.5
by John Arbash Meinel
Some small tweaks to knit and tuned_gzip to shave off another couple seconds |
23 |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
24 |
# make GzipFile faster:
|
25 |
import gzip |
|
3734.2.1
by Vincent Ladeuil
Fix U32, LOWU32 disapearance in python-2.6. |
26 |
from gzip import FEXTRA, FCOMMENT, FNAME, FHCRC |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
27 |
import sys |
28 |
import struct |
|
29 |
import zlib |
|
30 |
||
1666.1.6
by Robert Collins
Make knit the default format. |
31 |
# we want a \n preserved, break on \n only splitlines.
|
5590.1.2
by John Arbash Meinel
Change tuned_gzip.GzipFile to be deprecated |
32 |
from bzrlib import symbol_versioning |
1666.1.6
by Robert Collins
Make knit the default format. |
33 |
|
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
34 |
__all__ = ["GzipFile", "bytes_to_gzip"] |
35 |
||
36 |
||
3734.2.1
by Vincent Ladeuil
Fix U32, LOWU32 disapearance in python-2.6. |
37 |
def U32(i): |
38 |
"""Return i as an unsigned integer, assuming it fits in 32 bits.
|
|
39 |
||
40 |
If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
|
|
41 |
"""
|
|
42 |
if i < 0: |
|
43 |
i += 1L << 32 |
|
44 |
return i |
|
45 |
||
46 |
||
47 |
def LOWU32(i): |
|
48 |
"""Return the low-order 32 bits of an int, as a non-negative int."""
|
|
49 |
return i & 0xFFFFFFFFL |
|
50 |
||
51 |
||
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
52 |
def bytes_to_gzip(bytes, factory=zlib.compressobj, |
53 |
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED, |
|
54 |
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL, |
|
55 |
crc32=zlib.crc32): |
|
56 |
"""Create a gzip file containing bytes and return its content."""
|
|
4398.8.2
by John Arbash Meinel
Add a chunks_to_gzip function. |
57 |
return chunks_to_gzip([bytes]) |
58 |
||
59 |
||
60 |
def chunks_to_gzip(chunks, factory=zlib.compressobj, |
|
61 |
level=zlib.Z_DEFAULT_COMPRESSION, method=zlib.DEFLATED, |
|
62 |
width=-zlib.MAX_WBITS, mem=zlib.DEF_MEM_LEVEL, |
|
63 |
crc32=zlib.crc32): |
|
64 |
"""Create a gzip file containing chunks and return its content.
|
|
65 |
||
66 |
:param chunks: An iterable of strings. Each string can have arbitrary
|
|
67 |
layout.
|
|
68 |
"""
|
|
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
69 |
result = [ |
70 |
'\037\213' # self.fileobj.write('\037\213') # magic header |
|
71 |
'\010' # self.fileobj.write('\010') # compression method |
|
72 |
# fname = self.filename[:-3]
|
|
73 |
# flags = 0
|
|
74 |
# if fname:
|
|
75 |
# flags = FNAME
|
|
76 |
'\x00' # self.fileobj.write(chr(flags)) |
|
77 |
'\0\0\0\0' # write32u(self.fileobj, long(time.time())) |
|
78 |
'\002' # self.fileobj.write('\002') |
|
79 |
'\377' # self.fileobj.write('\377') |
|
80 |
# if fname:
|
|
81 |
'' # self.fileobj.write(fname + '\000') |
|
82 |
]
|
|
83 |
# using a compressobj avoids a small header and trailer that the compress()
|
|
84 |
# utility function adds.
|
|
85 |
compress = factory(level, method, width, mem, 0) |
|
4398.8.2
by John Arbash Meinel
Add a chunks_to_gzip function. |
86 |
crc = 0 |
87 |
total_len = 0 |
|
88 |
for chunk in chunks: |
|
89 |
crc = crc32(chunk, crc) |
|
90 |
total_len += len(chunk) |
|
91 |
zbytes = compress.compress(chunk) |
|
92 |
if zbytes: |
|
93 |
result.append(zbytes) |
|
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
94 |
result.append(compress.flush()) |
95 |
# size may exceed 2GB, or even 4GB
|
|
4398.8.2
by John Arbash Meinel
Add a chunks_to_gzip function. |
96 |
result.append(struct.pack("<LL", LOWU32(crc), LOWU32(total_len))) |
2817.3.1
by Robert Collins
* New helper ``bzrlib.tuned_gzip.bytes_to_gzip`` which takes a byte string |
97 |
return ''.join(result) |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
98 |
|
99 |
||
100 |
class GzipFile(gzip.GzipFile): |
|
101 |
"""Knit tuned version of GzipFile.
|
|
102 |
||
103 |
This is based on the following lsprof stats:
|
|
104 |
python 2.4 stock GzipFile write:
|
|
105 |
58971 0 5644.3090 2721.4730 gzip:193(write)
|
|
106 |
+58971 0 1159.5530 1159.5530 +<built-in method compress>
|
|
107 |
+176913 0 987.0320 987.0320 +<len>
|
|
108 |
+58971 0 423.1450 423.1450 +<zlib.crc32>
|
|
109 |
+58971 0 353.1060 353.1060 +<method 'write' of 'cStringIO.
|
|
110 |
StringO' objects>
|
|
111 |
tuned GzipFile write:
|
|
112 |
58971 0 4477.2590 2103.1120 bzrlib.knit:1250(write)
|
|
113 |
+58971 0 1297.7620 1297.7620 +<built-in method compress>
|
|
114 |
+58971 0 406.2160 406.2160 +<zlib.crc32>
|
|
115 |
+58971 0 341.9020 341.9020 +<method 'write' of 'cStringIO.
|
|
116 |
StringO' objects>
|
|
117 |
+58971 0 328.2670 328.2670 +<len>
|
|
118 |
||
119 |
||
120 |
Yes, its only 1.6 seconds, but they add up.
|
|
121 |
"""
|
|
122 |
||
5590.1.2
by John Arbash Meinel
Change tuned_gzip.GzipFile to be deprecated |
123 |
def __init__(self, *args, **kwargs): |
124 |
symbol_versioning.warn( |
|
125 |
symbol_versioning.deprecated_in((2, 3, 0)) |
|
126 |
% 'bzrlib.tuned_gzip.GzipFile', |
|
127 |
DeprecationWarning, stacklevel=2) |
|
128 |
gzip.GzipFile.__init__(self, *args, **kwargs) |
|
129 |
||
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
130 |
def _add_read_data(self, data): |
131 |
# 4169 calls in 183
|
|
132 |
# temp var for len(data) and switch to +='s.
|
|
133 |
# 4169 in 139
|
|
134 |
len_data = len(data) |
|
135 |
self.crc = zlib.crc32(data, self.crc) |
|
136 |
self.extrabuf += data |
|
137 |
self.extrasize += len_data |
|
138 |
self.size += len_data |
|
139 |
||
1908.4.3
by John Arbash Meinel
Shave another second off of _record_to_data time, by optimizing single write versus multiple writes |
140 |
def _write_gzip_header(self): |
141 |
"""A tuned version of gzip._write_gzip_header
|
|
142 |
||
143 |
We have some extra constrains that plain Gzip does not.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
144 |
1) We want to write the whole blob at once. rather than multiple
|
1908.4.10
by John Arbash Meinel
Small cleanups |
145 |
calls to fileobj.write().
|
1908.4.3
by John Arbash Meinel
Shave another second off of _record_to_data time, by optimizing single write versus multiple writes |
146 |
2) We never have a filename
|
147 |
3) We don't care about the time
|
|
148 |
"""
|
|
149 |
self.fileobj.write( |
|
150 |
'\037\213' # self.fileobj.write('\037\213') # magic header |
|
151 |
'\010' # self.fileobj.write('\010') # compression method |
|
152 |
# fname = self.filename[:-3]
|
|
153 |
# flags = 0
|
|
154 |
# if fname:
|
|
155 |
# flags = FNAME
|
|
156 |
'\x00' # self.fileobj.write(chr(flags)) |
|
157 |
'\0\0\0\0' # write32u(self.fileobj, long(time.time())) |
|
158 |
'\002' # self.fileobj.write('\002') |
|
159 |
'\377' # self.fileobj.write('\377') |
|
160 |
# if fname:
|
|
161 |
'' # self.fileobj.write(fname + '\000') |
|
162 |
)
|
|
163 |
||
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
164 |
def _read(self, size=1024): |
165 |
# various optimisations:
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
166 |
# reduces lsprof count from 2500 to
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
167 |
# 8337 calls in 1272, 365 internal
|
168 |
if self.fileobj is None: |
|
169 |
raise EOFError, "Reached EOF" |
|
170 |
||
171 |
if self._new_member: |
|
172 |
# If the _new_member flag is set, we have to
|
|
173 |
# jump to the next member, if there is one.
|
|
174 |
#
|
|
175 |
# First, check if we're at the end of the file;
|
|
176 |
# if so, it's time to stop; no more members to read.
|
|
177 |
next_header_bytes = self.fileobj.read(10) |
|
178 |
if next_header_bytes == '': |
|
179 |
raise EOFError, "Reached EOF" |
|
180 |
||
181 |
self._init_read() |
|
182 |
self._read_gzip_header(next_header_bytes) |
|
183 |
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) |
|
184 |
self._new_member = False |
|
185 |
||
186 |
# Read a chunk of data from the file
|
|
187 |
buf = self.fileobj.read(size) |
|
188 |
||
189 |
# If the EOF has been reached, flush the decompression object
|
|
190 |
# and mark this object as finished.
|
|
191 |
||
192 |
if buf == "": |
|
193 |
self._add_read_data(self.decompress.flush()) |
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
194 |
if len(self.decompress.unused_data) < 8: |
195 |
raise AssertionError("what does flush do?") |
|
1666.1.11
by Robert Collins
Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought. |
196 |
self._gzip_tail = self.decompress.unused_data[0:8] |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
197 |
self._read_eof() |
198 |
# tell the driving read() call we have stuffed all the data
|
|
199 |
# in self.extrabuf
|
|
200 |
raise EOFError, 'Reached EOF' |
|
201 |
||
202 |
self._add_read_data(self.decompress.decompress(buf)) |
|
203 |
||
204 |
if self.decompress.unused_data != "": |
|
205 |
# Ending case: we've come to the end of a member in the file,
|
|
206 |
# so seek back to the start of the data for the next member which
|
|
207 |
# is the length of the decompress objects unused data - the first
|
|
208 |
# 8 bytes for the end crc and size records.
|
|
209 |
#
|
|
210 |
# so seek back to the start of the unused data, finish up
|
|
211 |
# this member, and read a new gzip header.
|
|
212 |
# (The number of bytes to seek back is the length of the unused
|
|
213 |
# data, minus 8 because those 8 bytes are part of this member.
|
|
214 |
seek_length = len (self.decompress.unused_data) - 8 |
|
1666.1.2
by Robert Collins
Fix race condition between end of stream and end of file with tuned_gzip. |
215 |
if seek_length > 0: |
216 |
# we read too much data
|
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
217 |
self.fileobj.seek(-seek_length, 1) |
1666.1.11
by Robert Collins
Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought. |
218 |
self._gzip_tail = self.decompress.unused_data[0:8] |
1666.1.2
by Robert Collins
Fix race condition between end of stream and end of file with tuned_gzip. |
219 |
elif seek_length < 0: |
220 |
# we haven't read enough to check the checksum.
|
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
221 |
if not (-8 < seek_length): |
222 |
raise AssertionError("too great a seek") |
|
1666.1.2
by Robert Collins
Fix race condition between end of stream and end of file with tuned_gzip. |
223 |
buf = self.fileobj.read(-seek_length) |
1666.1.11
by Robert Collins
Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought. |
224 |
self._gzip_tail = self.decompress.unused_data + buf |
225 |
else: |
|
226 |
self._gzip_tail = self.decompress.unused_data |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
227 |
|
228 |
# Check the CRC and file size, and set the flag so we read
|
|
229 |
# a new member on the next call
|
|
230 |
self._read_eof() |
|
231 |
self._new_member = True |
|
232 |
||
233 |
def _read_eof(self): |
|
234 |
"""tuned to reduce function calls and eliminate file seeking:
|
|
235 |
pass 1:
|
|
236 |
reduces lsprof count from 800 to 288
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
237 |
4168 in 296
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
238 |
avoid U32 call by using struct format L
|
239 |
4168 in 200
|
|
240 |
"""
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
241 |
# We've read to the end of the file, so we should have 8 bytes of
|
1759.2.2
by Jelmer Vernooij
Revert some of my spelling fixes and fix some typos after review by Aaron. |
242 |
# unused data in the decompressor. If we don't, there is a corrupt file.
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
243 |
# We use these 8 bytes to calculate the CRC and the recorded file size.
|
244 |
# We then check the that the computed CRC and size of the
|
|
245 |
# uncompressed data matches the stored values. Note that the size
|
|
246 |
# stored is the true file size mod 2**32.
|
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
247 |
if not (len(self._gzip_tail) == 8): |
248 |
raise AssertionError("gzip trailer is incorrect length.") |
|
1666.1.11
by Robert Collins
Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought. |
249 |
crc32, isize = struct.unpack("<LL", self._gzip_tail) |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
250 |
# note that isize is unsigned - it can exceed 2GB
|
251 |
if crc32 != U32(self.crc): |
|
1666.1.2
by Robert Collins
Fix race condition between end of stream and end of file with tuned_gzip. |
252 |
raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc)) |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
253 |
elif isize != LOWU32(self.size): |
254 |
raise IOError, "Incorrect length of data produced" |
|
255 |
||
256 |
def _read_gzip_header(self, bytes=None): |
|
257 |
"""Supply bytes if the minimum header size is already read.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
258 |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
259 |
:param bytes: 10 bytes of header data.
|
260 |
"""
|
|
261 |
"""starting cost: 300 in 3998
|
|
262 |
15998 reads from 3998 calls
|
|
263 |
final cost 168
|
|
264 |
"""
|
|
265 |
if bytes is None: |
|
266 |
bytes = self.fileobj.read(10) |
|
267 |
magic = bytes[0:2] |
|
268 |
if magic != '\037\213': |
|
269 |
raise IOError, 'Not a gzipped file' |
|
270 |
method = ord(bytes[2:3]) |
|
271 |
if method != 8: |
|
272 |
raise IOError, 'Unknown compression method' |
|
273 |
flag = ord(bytes[3:4]) |
|
274 |
# modtime = self.fileobj.read(4) (bytes [4:8])
|
|
275 |
# extraflag = self.fileobj.read(1) (bytes[8:9])
|
|
276 |
# os = self.fileobj.read(1) (bytes[9:10])
|
|
277 |
# self.fileobj.read(6)
|
|
278 |
||
279 |
if flag & FEXTRA: |
|
280 |
# Read & discard the extra field, if present
|
|
281 |
xlen = ord(self.fileobj.read(1)) |
|
282 |
xlen = xlen + 256*ord(self.fileobj.read(1)) |
|
283 |
self.fileobj.read(xlen) |
|
284 |
if flag & FNAME: |
|
285 |
# Read and discard a null-terminated string containing the filename
|
|
286 |
while True: |
|
287 |
s = self.fileobj.read(1) |
|
288 |
if not s or s=='\000': |
|
289 |
break
|
|
290 |
if flag & FCOMMENT: |
|
291 |
# Read and discard a null-terminated string containing a comment
|
|
292 |
while True: |
|
293 |
s = self.fileobj.read(1) |
|
294 |
if not s or s=='\000': |
|
295 |
break
|
|
296 |
if flag & FHCRC: |
|
297 |
self.fileobj.read(2) # Read & discard the 16-bit header CRC |
|
298 |
||
299 |
def readline(self, size=-1): |
|
300 |
"""Tuned to remove buffer length calls in _unread and...
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
301 |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
302 |
also removes multiple len(c) calls, inlines _unread,
|
303 |
total savings - lsprof 5800 to 5300
|
|
304 |
phase 2:
|
|
305 |
4168 calls in 2233
|
|
306 |
8176 calls to read() in 1684
|
|
307 |
changing the min chunk size to 200 halved all the cache misses
|
|
308 |
leading to a drop to:
|
|
309 |
4168 calls in 1977
|
|
310 |
4168 call to read() in 1646
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
311 |
- i.e. just reduced the function call overhead. May be worth
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
312 |
keeping.
|
313 |
"""
|
|
314 |
if size < 0: size = sys.maxint |
|
315 |
bufs = [] |
|
316 |
readsize = min(200, size) # Read from the file in small chunks |
|
317 |
while True: |
|
318 |
if size == 0: |
|
319 |
return "".join(bufs) # Return resulting line |
|
320 |
||
321 |
# c is the chunk
|
|
322 |
c = self.read(readsize) |
|
323 |
# number of bytes read
|
|
324 |
len_c = len(c) |
|
325 |
i = c.find('\n') |
|
326 |
if size is not None: |
|
327 |
# We set i=size to break out of the loop under two
|
|
328 |
# conditions: 1) there's no newline, and the chunk is
|
|
329 |
# larger than size, or 2) there is a newline, but the
|
|
330 |
# resulting line would be longer than 'size'.
|
|
331 |
if i==-1 and len_c > size: i=size-1 |
|
332 |
elif size <= i: i = size -1 |
|
333 |
||
334 |
if i >= 0 or c == '': |
|
335 |
# if i>= 0 we have a newline or have triggered the above
|
|
336 |
# if size is not None condition.
|
|
337 |
# if c == '' its EOF.
|
|
338 |
bufs.append(c[:i+1]) # Add portion of last chunk |
|
339 |
# -- inlined self._unread --
|
|
340 |
## self._unread(c[i+1:], len_c - i) # Push back rest of chunk
|
|
341 |
self.extrabuf = c[i+1:] + self.extrabuf |
|
342 |
self.extrasize = len_c - i + self.extrasize |
|
343 |
self.offset -= len_c - i |
|
344 |
# -- end inlined self._unread --
|
|
345 |
return ''.join(bufs) # Return resulting line |
|
346 |
||
347 |
# Append chunk to list, decrease 'size',
|
|
348 |
bufs.append(c) |
|
349 |
size = size - len_c |
|
350 |
readsize = min(size, readsize * 2) |
|
351 |
||
352 |
def readlines(self, sizehint=0): |
|
353 |
# optimise to avoid all the buffer manipulation
|
|
354 |
# lsprof changed from:
|
|
355 |
# 4168 calls in 5472 with 32000 calls to readline()
|
|
356 |
# to :
|
|
357 |
# 4168 calls in 417.
|
|
358 |
# Negative numbers result in reading all the lines
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
359 |
|
1908.4.15
by John Arbash Meinel
comment on tuned_gzip.readlines() functionality. |
360 |
# python's gzip routine uses sizehint. This is a more efficient way
|
361 |
# than python uses to honor it. But it is even more efficient to
|
|
362 |
# just read the entire thing and use cStringIO to split into lines.
|
|
363 |
# if sizehint <= 0:
|
|
364 |
# sizehint = -1
|
|
365 |
# content = self.read(sizehint)
|
|
366 |
# return bzrlib.osutils.split_lines(content)
|
|
1908.4.12
by John Arbash Meinel
Minor change to tuned_gzip. |
367 |
content = StringIO(self.read(-1)) |
1908.4.5
by John Arbash Meinel
Some small tweaks to knit and tuned_gzip to shave off another couple seconds |
368 |
return content.readlines() |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
369 |
|
370 |
def _unread(self, buf, len_buf=None): |
|
371 |
"""tuned to remove unneeded len calls.
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
372 |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
373 |
because this is such an inner routine in readline, and readline is
|
374 |
in many inner loops, this has been inlined into readline().
|
|
375 |
||
376 |
The len_buf parameter combined with the reduction in len calls dropped
|
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
377 |
the lsprof ms count for this routine on my test data from 800 to 200 -
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
378 |
a 75% saving.
|
379 |
"""
|
|
380 |
if len_buf is None: |
|
381 |
len_buf = len(buf) |
|
382 |
self.extrabuf = buf + self.extrabuf |
|
383 |
self.extrasize = len_buf + self.extrasize |
|
384 |
self.offset -= len_buf |
|
385 |
||
386 |
def write(self, data): |
|
387 |
if self.mode != gzip.WRITE: |
|
388 |
import errno |
|
389 |
raise IOError(errno.EBADF, "write() on read-only GzipFile object") |
|
390 |
||
391 |
if self.fileobj is None: |
|
392 |
raise ValueError, "write() on closed GzipFile object" |
|
393 |
data_len = len(data) |
|
394 |
if data_len > 0: |
|
395 |
self.size = self.size + data_len |
|
396 |
self.crc = zlib.crc32(data, self.crc) |
|
397 |
self.fileobj.write( self.compress.compress(data) ) |
|
398 |
self.offset += data_len |
|
399 |
||
400 |
def writelines(self, lines): |
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
401 |
# profiling indicated a significant overhead
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
402 |
# calling write for each line.
|
403 |
# this batch call is a lot faster :).
|
|
404 |
# (4 seconds to 1 seconds for the sample upgrades I was testing).
|
|
405 |
self.write(''.join(lines)) |
|
406 |
||
5340.9.2
by Martin
Add close method for tuned_gzip on Python 2.7 to adapt to gzip positive crc requirement |
407 |
if sys.version_info > (2, 7): |
408 |
# As of Python 2.7 the crc32 must be positive when close is called
|
|
409 |
def close(self): |
|
410 |
if self.fileobj is None: |
|
411 |
return
|
|
412 |
if self.mode == gzip.WRITE: |
|
413 |
self.crc &= 0xFFFFFFFFL |
|
414 |
gzip.GzipFile.close(self) |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
415 |