118
120
Yes, its only 1.6 seconds, but they add up.
121
def _add_read_data(self, data):
123
# temp var for len(data) and switch to +='s.
126
self.crc = zlib.crc32(data, self.crc)
127
self.extrabuf += data
128
self.extrasize += len_data
129
self.size += len_data
123
def __init__(self, *args, **kwargs):
124
symbol_versioning.warn(
125
symbol_versioning.deprecated_in((2, 3, 0))
126
% 'bzrlib.tuned_gzip.GzipFile',
127
DeprecationWarning, stacklevel=2)
128
gzip.GzipFile.__init__(self, *args, **kwargs)
130
if sys.version_info >= (2, 7, 4):
131
def _add_read_data(self, data):
133
# temp var for len(data) and switch to +='s.
136
self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
137
offset = self.offset - self.extrastart
138
self.extrabuf = self.extrabuf[offset:] + data
139
self.extrasize = self.extrasize + len_data
140
self.extrastart = self.offset
141
self.size = self.size + len_data
143
def _add_read_data(self, data):
145
# temp var for len(data) and switch to +='s.
148
self.crc = zlib.crc32(data, self.crc)
149
self.extrabuf += data
150
self.extrasize += len_data
151
self.size += len_data
131
153
def _write_gzip_header(self):
132
154
"""A tuned version of gzip._write_gzip_header
152
174
'' # self.fileobj.write(fname + '\000')
155
def _read(self, size=1024):
156
# various optimisations:
157
# reduces lsprof count from 2500 to
158
# 8337 calls in 1272, 365 internal
159
if self.fileobj is None:
160
raise EOFError, "Reached EOF"
163
# If the _new_member flag is set, we have to
164
# jump to the next member, if there is one.
166
# First, check if we're at the end of the file;
167
# if so, it's time to stop; no more members to read.
168
next_header_bytes = self.fileobj.read(10)
169
if next_header_bytes == '':
177
if sys.version_info < (2, 7, 4):
178
def _read(self, size=1024):
179
# various optimisations:
180
# reduces lsprof count from 2500 to
181
# 8337 calls in 1272, 365 internal
182
if self.fileobj is None:
170
183
raise EOFError, "Reached EOF"
173
self._read_gzip_header(next_header_bytes)
174
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
175
self._new_member = False
177
# Read a chunk of data from the file
178
buf = self.fileobj.read(size)
180
# If the EOF has been reached, flush the decompression object
181
# and mark this object as finished.
184
self._add_read_data(self.decompress.flush())
185
if len(self.decompress.unused_data) < 8:
186
raise AssertionError("what does flush do?")
187
self._gzip_tail = self.decompress.unused_data[0:8]
189
# tell the driving read() call we have stuffed all the data
191
raise EOFError, 'Reached EOF'
193
self._add_read_data(self.decompress.decompress(buf))
195
if self.decompress.unused_data != "":
196
# Ending case: we've come to the end of a member in the file,
197
# so seek back to the start of the data for the next member which
198
# is the length of the decompress objects unused data - the first
199
# 8 bytes for the end crc and size records.
201
# so seek back to the start of the unused data, finish up
202
# this member, and read a new gzip header.
203
# (The number of bytes to seek back is the length of the unused
204
# data, minus 8 because those 8 bytes are part of this member.
205
seek_length = len (self.decompress.unused_data) - 8
207
# we read too much data
208
self.fileobj.seek(-seek_length, 1)
186
# If the _new_member flag is set, we have to
187
# jump to the next member, if there is one.
189
# First, check if we're at the end of the file;
190
# if so, it's time to stop; no more members to read.
191
next_header_bytes = self.fileobj.read(10)
192
if next_header_bytes == '':
193
raise EOFError, "Reached EOF"
196
self._read_gzip_header(next_header_bytes)
197
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
198
self._new_member = False
200
# Read a chunk of data from the file
201
buf = self.fileobj.read(size)
203
# If the EOF has been reached, flush the decompression object
204
# and mark this object as finished.
207
self._add_read_data(self.decompress.flush())
208
if len(self.decompress.unused_data) < 8:
209
raise AssertionError("what does flush do?")
209
210
self._gzip_tail = self.decompress.unused_data[0:8]
210
elif seek_length < 0:
211
# we haven't read enough to check the checksum.
212
if not (-8 < seek_length):
213
raise AssertionError("too great a seek")
214
buf = self.fileobj.read(-seek_length)
215
self._gzip_tail = self.decompress.unused_data + buf
217
self._gzip_tail = self.decompress.unused_data
219
# Check the CRC and file size, and set the flag so we read
220
# a new member on the next call
222
self._new_member = True
225
"""tuned to reduce function calls and eliminate file seeking:
227
reduces lsprof count from 800 to 288
229
avoid U32 call by using struct format L
232
# We've read to the end of the file, so we should have 8 bytes of
233
# unused data in the decompressor. If we don't, there is a corrupt file.
234
# We use these 8 bytes to calculate the CRC and the recorded file size.
235
# We then check the that the computed CRC and size of the
236
# uncompressed data matches the stored values. Note that the size
237
# stored is the true file size mod 2**32.
238
if not (len(self._gzip_tail) == 8):
239
raise AssertionError("gzip trailer is incorrect length.")
240
crc32, isize = struct.unpack("<LL", self._gzip_tail)
241
# note that isize is unsigned - it can exceed 2GB
242
if crc32 != U32(self.crc):
243
raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
244
elif isize != LOWU32(self.size):
245
raise IOError, "Incorrect length of data produced"
212
# tell the driving read() call we have stuffed all the data
214
raise EOFError, 'Reached EOF'
216
self._add_read_data(self.decompress.decompress(buf))
218
if self.decompress.unused_data != "":
219
# Ending case: we've come to the end of a member in the file,
220
# so seek back to the start of the data for the next member
221
# which is the length of the decompress objects unused data -
222
# the first 8 bytes for the end crc and size records.
224
# so seek back to the start of the unused data, finish up
225
# this member, and read a new gzip header.
226
# (The number of bytes to seek back is the length of the unused
227
# data, minus 8 because those 8 bytes are part of this member.
228
seek_length = len (self.decompress.unused_data) - 8
230
# we read too much data
231
self.fileobj.seek(-seek_length, 1)
232
self._gzip_tail = self.decompress.unused_data[0:8]
233
elif seek_length < 0:
234
# we haven't read enough to check the checksum.
235
if not (-8 < seek_length):
236
raise AssertionError("too great a seek")
237
buf = self.fileobj.read(-seek_length)
238
self._gzip_tail = self.decompress.unused_data + buf
240
self._gzip_tail = self.decompress.unused_data
242
# Check the CRC and file size, and set the flag so we read
243
# a new member on the next call
245
self._new_member = True
248
"""tuned to reduce function calls and eliminate file seeking:
250
reduces lsprof count from 800 to 288
252
avoid U32 call by using struct format L
255
# We've read to the end of the file, so we should have 8 bytes of
256
# unused data in the decompressor. If we don't, there is a corrupt
257
# file. We use these 8 bytes to calculate the CRC and the recorded
258
# file size. We then check the that the computed CRC and size of
259
# the uncompressed data matches the stored values. Note that the
260
# size stored is the true file size mod 2**32.
261
if not (len(self._gzip_tail) == 8):
262
raise AssertionError("gzip trailer is incorrect length.")
263
crc32, isize = struct.unpack("<LL", self._gzip_tail)
264
# note that isize is unsigned - it can exceed 2GB
265
if crc32 != U32(self.crc):
266
raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
267
elif isize != LOWU32(self.size):
268
raise IOError, "Incorrect length of data produced"
247
270
def _read_gzip_header(self, bytes=None):
248
271
"""Supply bytes if the minimum header size is already read.