2052.3.2
by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical |
1 |
# Copyright (C) 2005, 2006 Canonical Ltd
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
2 |
# Written by Robert Collins <robert.collins@canonical.com>
|
3 |
#
|
|
4 |
# This program is free software; you can redistribute it and/or modify
|
|
5 |
# it under the terms of the GNU General Public License as published by
|
|
6 |
# the Free Software Foundation; either version 2 of the License, or
|
|
7 |
# (at your option) any later version.
|
|
8 |
#
|
|
9 |
# This program is distributed in the hope that it will be useful,
|
|
10 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12 |
# GNU General Public License for more details.
|
|
13 |
#
|
|
14 |
# You should have received a copy of the GNU General Public License
|
|
15 |
# along with this program; if not, write to the Free Software
|
|
16 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
17 |
||
18 |
"""Bzrlib specific gzip tunings. We plan to feed these to the upstream gzip."""
|
|
19 |
||
1908.4.12
by John Arbash Meinel
Minor change to tuned_gzip. |
20 |
from cStringIO import StringIO |
1908.4.5
by John Arbash Meinel
Some small tweaks to knit and tuned_gzip to shave off another couple seconds |
21 |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
22 |
# make GzipFile faster:
|
23 |
import gzip |
|
24 |
from gzip import U32, LOWU32, FEXTRA, FCOMMENT, FNAME, FHCRC |
|
25 |
import sys |
|
26 |
import struct |
|
27 |
import zlib |
|
28 |
||
1666.1.6
by Robert Collins
Make knit the default format. |
29 |
# we want a \n preserved, break on \n only splitlines.
|
30 |
import bzrlib |
|
31 |
||
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
32 |
__all__ = ["GzipFile"] |
33 |
||
34 |
||
35 |
class GzipFile(gzip.GzipFile): |
|
36 |
"""Knit tuned version of GzipFile.
|
|
37 |
||
38 |
This is based on the following lsprof stats:
|
|
39 |
python 2.4 stock GzipFile write:
|
|
40 |
58971 0 5644.3090 2721.4730 gzip:193(write)
|
|
41 |
+58971 0 1159.5530 1159.5530 +<built-in method compress>
|
|
42 |
+176913 0 987.0320 987.0320 +<len>
|
|
43 |
+58971 0 423.1450 423.1450 +<zlib.crc32>
|
|
44 |
+58971 0 353.1060 353.1060 +<method 'write' of 'cStringIO.
|
|
45 |
StringO' objects>
|
|
46 |
tuned GzipFile write:
|
|
47 |
58971 0 4477.2590 2103.1120 bzrlib.knit:1250(write)
|
|
48 |
+58971 0 1297.7620 1297.7620 +<built-in method compress>
|
|
49 |
+58971 0 406.2160 406.2160 +<zlib.crc32>
|
|
50 |
+58971 0 341.9020 341.9020 +<method 'write' of 'cStringIO.
|
|
51 |
StringO' objects>
|
|
52 |
+58971 0 328.2670 328.2670 +<len>
|
|
53 |
||
54 |
||
55 |
Yes, its only 1.6 seconds, but they add up.
|
|
56 |
"""
|
|
57 |
||
58 |
def _add_read_data(self, data): |
|
59 |
# 4169 calls in 183
|
|
60 |
# temp var for len(data) and switch to +='s.
|
|
61 |
# 4169 in 139
|
|
62 |
len_data = len(data) |
|
63 |
self.crc = zlib.crc32(data, self.crc) |
|
64 |
self.extrabuf += data |
|
65 |
self.extrasize += len_data |
|
66 |
self.size += len_data |
|
67 |
||
1908.4.3
by John Arbash Meinel
Shave another second off of _record_to_data time, by optimizing single write versus multiple writes |
68 |
def _write_gzip_header(self): |
69 |
"""A tuned version of gzip._write_gzip_header
|
|
70 |
||
71 |
We have some extra constrains that plain Gzip does not.
|
|
1908.4.10
by John Arbash Meinel
Small cleanups |
72 |
1) We want to write the whole blob at once. rather than multiple
|
73 |
calls to fileobj.write().
|
|
1908.4.3
by John Arbash Meinel
Shave another second off of _record_to_data time, by optimizing single write versus multiple writes |
74 |
2) We never have a filename
|
75 |
3) We don't care about the time
|
|
76 |
"""
|
|
77 |
self.fileobj.write( |
|
78 |
'\037\213' # self.fileobj.write('\037\213') # magic header |
|
79 |
'\010' # self.fileobj.write('\010') # compression method |
|
80 |
# fname = self.filename[:-3]
|
|
81 |
# flags = 0
|
|
82 |
# if fname:
|
|
83 |
# flags = FNAME
|
|
84 |
'\x00' # self.fileobj.write(chr(flags)) |
|
85 |
'\0\0\0\0' # write32u(self.fileobj, long(time.time())) |
|
86 |
'\002' # self.fileobj.write('\002') |
|
87 |
'\377' # self.fileobj.write('\377') |
|
88 |
# if fname:
|
|
89 |
'' # self.fileobj.write(fname + '\000') |
|
90 |
)
|
|
91 |
||
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
92 |
def _read(self, size=1024): |
93 |
# various optimisations:
|
|
94 |
# reduces lsprof count from 2500 to
|
|
95 |
# 8337 calls in 1272, 365 internal
|
|
96 |
if self.fileobj is None: |
|
97 |
raise EOFError, "Reached EOF" |
|
98 |
||
99 |
if self._new_member: |
|
100 |
# If the _new_member flag is set, we have to
|
|
101 |
# jump to the next member, if there is one.
|
|
102 |
#
|
|
103 |
# First, check if we're at the end of the file;
|
|
104 |
# if so, it's time to stop; no more members to read.
|
|
105 |
next_header_bytes = self.fileobj.read(10) |
|
106 |
if next_header_bytes == '': |
|
107 |
raise EOFError, "Reached EOF" |
|
108 |
||
109 |
self._init_read() |
|
110 |
self._read_gzip_header(next_header_bytes) |
|
111 |
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) |
|
112 |
self._new_member = False |
|
113 |
||
114 |
# Read a chunk of data from the file
|
|
115 |
buf = self.fileobj.read(size) |
|
116 |
||
117 |
# If the EOF has been reached, flush the decompression object
|
|
118 |
# and mark this object as finished.
|
|
119 |
||
120 |
if buf == "": |
|
121 |
self._add_read_data(self.decompress.flush()) |
|
122 |
assert len(self.decompress.unused_data) >= 8, "what does flush do?" |
|
1666.1.11
by Robert Collins
Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought. |
123 |
self._gzip_tail = self.decompress.unused_data[0:8] |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
124 |
self._read_eof() |
125 |
# tell the driving read() call we have stuffed all the data
|
|
126 |
# in self.extrabuf
|
|
127 |
raise EOFError, 'Reached EOF' |
|
128 |
||
129 |
self._add_read_data(self.decompress.decompress(buf)) |
|
130 |
||
131 |
if self.decompress.unused_data != "": |
|
132 |
# Ending case: we've come to the end of a member in the file,
|
|
133 |
# so seek back to the start of the data for the next member which
|
|
134 |
# is the length of the decompress objects unused data - the first
|
|
135 |
# 8 bytes for the end crc and size records.
|
|
136 |
#
|
|
137 |
# so seek back to the start of the unused data, finish up
|
|
138 |
# this member, and read a new gzip header.
|
|
139 |
# (The number of bytes to seek back is the length of the unused
|
|
140 |
# data, minus 8 because those 8 bytes are part of this member.
|
|
141 |
seek_length = len (self.decompress.unused_data) - 8 |
|
1666.1.2
by Robert Collins
Fix race condition between end of stream and end of file with tuned_gzip. |
142 |
if seek_length > 0: |
143 |
# we read too much data
|
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
144 |
self.fileobj.seek(-seek_length, 1) |
1666.1.11
by Robert Collins
Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought. |
145 |
self._gzip_tail = self.decompress.unused_data[0:8] |
1666.1.2
by Robert Collins
Fix race condition between end of stream and end of file with tuned_gzip. |
146 |
elif seek_length < 0: |
147 |
# we haven't read enough to check the checksum.
|
|
148 |
assert -8 < seek_length, "too great a seek." |
|
149 |
buf = self.fileobj.read(-seek_length) |
|
1666.1.11
by Robert Collins
Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought. |
150 |
self._gzip_tail = self.decompress.unused_data + buf |
151 |
else: |
|
152 |
self._gzip_tail = self.decompress.unused_data |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
153 |
|
154 |
# Check the CRC and file size, and set the flag so we read
|
|
155 |
# a new member on the next call
|
|
156 |
self._read_eof() |
|
157 |
self._new_member = True |
|
158 |
||
159 |
def _read_eof(self): |
|
160 |
"""tuned to reduce function calls and eliminate file seeking:
|
|
161 |
pass 1:
|
|
162 |
reduces lsprof count from 800 to 288
|
|
163 |
4168 in 296
|
|
164 |
avoid U32 call by using struct format L
|
|
165 |
4168 in 200
|
|
166 |
"""
|
|
167 |
# We've read to the end of the file, so we should have 8 bytes of
|
|
1759.2.2
by Jelmer Vernooij
Revert some of my spelling fixes and fix some typos after review by Aaron. |
168 |
# unused data in the decompressor. If we don't, there is a corrupt file.
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
169 |
# We use these 8 bytes to calculate the CRC and the recorded file size.
|
170 |
# We then check the that the computed CRC and size of the
|
|
171 |
# uncompressed data matches the stored values. Note that the size
|
|
172 |
# stored is the true file size mod 2**32.
|
|
1666.1.11
by Robert Collins
Really fix short-read support in tuned_gzip. The python zlib module behaved differently than thought. |
173 |
assert len(self._gzip_tail) == 8, "gzip trailer is incorrect length." |
174 |
crc32, isize = struct.unpack("<LL", self._gzip_tail) |
|
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
175 |
# note that isize is unsigned - it can exceed 2GB
|
176 |
if crc32 != U32(self.crc): |
|
1666.1.2
by Robert Collins
Fix race condition between end of stream and end of file with tuned_gzip. |
177 |
raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc)) |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
178 |
elif isize != LOWU32(self.size): |
179 |
raise IOError, "Incorrect length of data produced" |
|
180 |
||
181 |
def _read_gzip_header(self, bytes=None): |
|
182 |
"""Supply bytes if the minimum header size is already read.
|
|
183 |
|
|
184 |
:param bytes: 10 bytes of header data.
|
|
185 |
"""
|
|
186 |
"""starting cost: 300 in 3998
|
|
187 |
15998 reads from 3998 calls
|
|
188 |
final cost 168
|
|
189 |
"""
|
|
190 |
if bytes is None: |
|
191 |
bytes = self.fileobj.read(10) |
|
192 |
magic = bytes[0:2] |
|
193 |
if magic != '\037\213': |
|
194 |
raise IOError, 'Not a gzipped file' |
|
195 |
method = ord(bytes[2:3]) |
|
196 |
if method != 8: |
|
197 |
raise IOError, 'Unknown compression method' |
|
198 |
flag = ord(bytes[3:4]) |
|
199 |
# modtime = self.fileobj.read(4) (bytes [4:8])
|
|
200 |
# extraflag = self.fileobj.read(1) (bytes[8:9])
|
|
201 |
# os = self.fileobj.read(1) (bytes[9:10])
|
|
202 |
# self.fileobj.read(6)
|
|
203 |
||
204 |
if flag & FEXTRA: |
|
205 |
# Read & discard the extra field, if present
|
|
206 |
xlen = ord(self.fileobj.read(1)) |
|
207 |
xlen = xlen + 256*ord(self.fileobj.read(1)) |
|
208 |
self.fileobj.read(xlen) |
|
209 |
if flag & FNAME: |
|
210 |
# Read and discard a null-terminated string containing the filename
|
|
211 |
while True: |
|
212 |
s = self.fileobj.read(1) |
|
213 |
if not s or s=='\000': |
|
214 |
break
|
|
215 |
if flag & FCOMMENT: |
|
216 |
# Read and discard a null-terminated string containing a comment
|
|
217 |
while True: |
|
218 |
s = self.fileobj.read(1) |
|
219 |
if not s or s=='\000': |
|
220 |
break
|
|
221 |
if flag & FHCRC: |
|
222 |
self.fileobj.read(2) # Read & discard the 16-bit header CRC |
|
223 |
||
224 |
def readline(self, size=-1): |
|
225 |
"""Tuned to remove buffer length calls in _unread and...
|
|
226 |
|
|
227 |
also removes multiple len(c) calls, inlines _unread,
|
|
228 |
total savings - lsprof 5800 to 5300
|
|
229 |
phase 2:
|
|
230 |
4168 calls in 2233
|
|
231 |
8176 calls to read() in 1684
|
|
232 |
changing the min chunk size to 200 halved all the cache misses
|
|
233 |
leading to a drop to:
|
|
234 |
4168 calls in 1977
|
|
235 |
4168 call to read() in 1646
|
|
236 |
- i.e. just reduced the function call overhead. May be worth
|
|
237 |
keeping.
|
|
238 |
"""
|
|
239 |
if size < 0: size = sys.maxint |
|
240 |
bufs = [] |
|
241 |
readsize = min(200, size) # Read from the file in small chunks |
|
242 |
while True: |
|
243 |
if size == 0: |
|
244 |
return "".join(bufs) # Return resulting line |
|
245 |
||
246 |
# c is the chunk
|
|
247 |
c = self.read(readsize) |
|
248 |
# number of bytes read
|
|
249 |
len_c = len(c) |
|
250 |
i = c.find('\n') |
|
251 |
if size is not None: |
|
252 |
# We set i=size to break out of the loop under two
|
|
253 |
# conditions: 1) there's no newline, and the chunk is
|
|
254 |
# larger than size, or 2) there is a newline, but the
|
|
255 |
# resulting line would be longer than 'size'.
|
|
256 |
if i==-1 and len_c > size: i=size-1 |
|
257 |
elif size <= i: i = size -1 |
|
258 |
||
259 |
if i >= 0 or c == '': |
|
260 |
# if i>= 0 we have a newline or have triggered the above
|
|
261 |
# if size is not None condition.
|
|
262 |
# if c == '' its EOF.
|
|
263 |
bufs.append(c[:i+1]) # Add portion of last chunk |
|
264 |
# -- inlined self._unread --
|
|
265 |
## self._unread(c[i+1:], len_c - i) # Push back rest of chunk
|
|
266 |
self.extrabuf = c[i+1:] + self.extrabuf |
|
267 |
self.extrasize = len_c - i + self.extrasize |
|
268 |
self.offset -= len_c - i |
|
269 |
# -- end inlined self._unread --
|
|
270 |
return ''.join(bufs) # Return resulting line |
|
271 |
||
272 |
# Append chunk to list, decrease 'size',
|
|
273 |
bufs.append(c) |
|
274 |
size = size - len_c |
|
275 |
readsize = min(size, readsize * 2) |
|
276 |
||
277 |
def readlines(self, sizehint=0): |
|
278 |
# optimise to avoid all the buffer manipulation
|
|
279 |
# lsprof changed from:
|
|
280 |
# 4168 calls in 5472 with 32000 calls to readline()
|
|
281 |
# to :
|
|
282 |
# 4168 calls in 417.
|
|
283 |
# Negative numbers result in reading all the lines
|
|
1908.4.15
by John Arbash Meinel
comment on tuned_gzip.readlines() functionality. |
284 |
|
285 |
# python's gzip routine uses sizehint. This is a more efficient way
|
|
286 |
# than python uses to honor it. But it is even more efficient to
|
|
287 |
# just read the entire thing and use cStringIO to split into lines.
|
|
288 |
# if sizehint <= 0:
|
|
289 |
# sizehint = -1
|
|
290 |
# content = self.read(sizehint)
|
|
291 |
# return bzrlib.osutils.split_lines(content)
|
|
1908.4.12
by John Arbash Meinel
Minor change to tuned_gzip. |
292 |
content = StringIO(self.read(-1)) |
1908.4.5
by John Arbash Meinel
Some small tweaks to knit and tuned_gzip to shave off another couple seconds |
293 |
return content.readlines() |
1641.1.1
by Robert Collins
* Various microoptimisations to knit and gzip - reducing function call |
294 |
|
295 |
def _unread(self, buf, len_buf=None): |
|
296 |
"""tuned to remove unneeded len calls.
|
|
297 |
|
|
298 |
because this is such an inner routine in readline, and readline is
|
|
299 |
in many inner loops, this has been inlined into readline().
|
|
300 |
||
301 |
The len_buf parameter combined with the reduction in len calls dropped
|
|
302 |
the lsprof ms count for this routine on my test data from 800 to 200 -
|
|
303 |
a 75% saving.
|
|
304 |
"""
|
|
305 |
if len_buf is None: |
|
306 |
len_buf = len(buf) |
|
307 |
self.extrabuf = buf + self.extrabuf |
|
308 |
self.extrasize = len_buf + self.extrasize |
|
309 |
self.offset -= len_buf |
|
310 |
||
311 |
def write(self, data): |
|
312 |
if self.mode != gzip.WRITE: |
|
313 |
import errno |
|
314 |
raise IOError(errno.EBADF, "write() on read-only GzipFile object") |
|
315 |
||
316 |
if self.fileobj is None: |
|
317 |
raise ValueError, "write() on closed GzipFile object" |
|
318 |
data_len = len(data) |
|
319 |
if data_len > 0: |
|
320 |
self.size = self.size + data_len |
|
321 |
self.crc = zlib.crc32(data, self.crc) |
|
322 |
self.fileobj.write( self.compress.compress(data) ) |
|
323 |
self.offset += data_len |
|
324 |
||
325 |
def writelines(self, lines): |
|
326 |
# profiling indicated a significant overhead
|
|
327 |
# calling write for each line.
|
|
328 |
# this batch call is a lot faster :).
|
|
329 |
# (4 seconds to 1 seconds for the sample upgrades I was testing).
|
|
330 |
self.write(''.join(lines)) |
|
331 |
||
332 |