159
174
'' # self.fileobj.write(fname + '\000')
162
def _read(self, size=1024):
163
# various optimisations:
164
# reduces lsprof count from 2500 to
165
# 8337 calls in 1272, 365 internal
166
if self.fileobj is None:
167
raise EOFError, "Reached EOF"
170
# If the _new_member flag is set, we have to
171
# jump to the next member, if there is one.
173
# First, check if we're at the end of the file;
174
# if so, it's time to stop; no more members to read.
175
next_header_bytes = self.fileobj.read(10)
176
if next_header_bytes == '':
177
if sys.version_info < (2, 7, 4):
178
def _read(self, size=1024):
179
# various optimisations:
180
# reduces lsprof count from 2500 to
181
# 8337 calls in 1272, 365 internal
182
if self.fileobj is None:
177
183
raise EOFError, "Reached EOF"
180
self._read_gzip_header(next_header_bytes)
181
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
182
self._new_member = False
184
# Read a chunk of data from the file
185
buf = self.fileobj.read(size)
187
# If the EOF has been reached, flush the decompression object
188
# and mark this object as finished.
191
self._add_read_data(self.decompress.flush())
192
if len(self.decompress.unused_data) < 8:
193
raise AssertionError("what does flush do?")
194
self._gzip_tail = self.decompress.unused_data[0:8]
196
# tell the driving read() call we have stuffed all the data
198
raise EOFError, 'Reached EOF'
200
self._add_read_data(self.decompress.decompress(buf))
202
if self.decompress.unused_data != "":
203
# Ending case: we've come to the end of a member in the file,
204
# so seek back to the start of the data for the next member which
205
# is the length of the decompress objects unused data - the first
206
# 8 bytes for the end crc and size records.
208
# so seek back to the start of the unused data, finish up
209
# this member, and read a new gzip header.
210
# (The number of bytes to seek back is the length of the unused
211
# data, minus 8 because those 8 bytes are part of this member.
212
seek_length = len (self.decompress.unused_data) - 8
214
# we read too much data
215
self.fileobj.seek(-seek_length, 1)
186
# If the _new_member flag is set, we have to
187
# jump to the next member, if there is one.
189
# First, check if we're at the end of the file;
190
# if so, it's time to stop; no more members to read.
191
next_header_bytes = self.fileobj.read(10)
192
if next_header_bytes == '':
193
raise EOFError, "Reached EOF"
196
self._read_gzip_header(next_header_bytes)
197
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
198
self._new_member = False
200
# Read a chunk of data from the file
201
buf = self.fileobj.read(size)
203
# If the EOF has been reached, flush the decompression object
204
# and mark this object as finished.
207
self._add_read_data(self.decompress.flush())
208
if len(self.decompress.unused_data) < 8:
209
raise AssertionError("what does flush do?")
216
210
self._gzip_tail = self.decompress.unused_data[0:8]
217
elif seek_length < 0:
218
# we haven't read enough to check the checksum.
219
if not (-8 < seek_length):
220
raise AssertionError("too great a seek")
221
buf = self.fileobj.read(-seek_length)
222
self._gzip_tail = self.decompress.unused_data + buf
224
self._gzip_tail = self.decompress.unused_data
226
# Check the CRC and file size, and set the flag so we read
227
# a new member on the next call
229
self._new_member = True
232
"""tuned to reduce function calls and eliminate file seeking:
234
reduces lsprof count from 800 to 288
236
avoid U32 call by using struct format L
239
# We've read to the end of the file, so we should have 8 bytes of
240
# unused data in the decompressor. If we don't, there is a corrupt file.
241
# We use these 8 bytes to calculate the CRC and the recorded file size.
242
# We then check the that the computed CRC and size of the
243
# uncompressed data matches the stored values. Note that the size
244
# stored is the true file size mod 2**32.
245
if not (len(self._gzip_tail) == 8):
246
raise AssertionError("gzip trailer is incorrect length.")
247
crc32, isize = struct.unpack("<LL", self._gzip_tail)
248
# note that isize is unsigned - it can exceed 2GB
249
if crc32 != U32(self.crc):
250
raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
251
elif isize != LOWU32(self.size):
252
raise IOError, "Incorrect length of data produced"
212
# tell the driving read() call we have stuffed all the data
214
raise EOFError, 'Reached EOF'
216
self._add_read_data(self.decompress.decompress(buf))
218
if self.decompress.unused_data != "":
219
# Ending case: we've come to the end of a member in the file,
220
# so seek back to the start of the data for the next member
221
# which is the length of the decompress objects unused data -
222
# the first 8 bytes for the end crc and size records.
224
# so seek back to the start of the unused data, finish up
225
# this member, and read a new gzip header.
226
# (The number of bytes to seek back is the length of the unused
227
# data, minus 8 because those 8 bytes are part of this member.
228
seek_length = len (self.decompress.unused_data) - 8
230
# we read too much data
231
self.fileobj.seek(-seek_length, 1)
232
self._gzip_tail = self.decompress.unused_data[0:8]
233
elif seek_length < 0:
234
# we haven't read enough to check the checksum.
235
if not (-8 < seek_length):
236
raise AssertionError("too great a seek")
237
buf = self.fileobj.read(-seek_length)
238
self._gzip_tail = self.decompress.unused_data + buf
240
self._gzip_tail = self.decompress.unused_data
242
# Check the CRC and file size, and set the flag so we read
243
# a new member on the next call
245
self._new_member = True
248
"""tuned to reduce function calls and eliminate file seeking:
250
reduces lsprof count from 800 to 288
252
avoid U32 call by using struct format L
255
# We've read to the end of the file, so we should have 8 bytes of
256
# unused data in the decompressor. If we don't, there is a corrupt
257
# file. We use these 8 bytes to calculate the CRC and the recorded
258
# file size. We then check the that the computed CRC and size of
259
# the uncompressed data matches the stored values. Note that the
260
# size stored is the true file size mod 2**32.
261
if not (len(self._gzip_tail) == 8):
262
raise AssertionError("gzip trailer is incorrect length.")
263
crc32, isize = struct.unpack("<LL", self._gzip_tail)
264
# note that isize is unsigned - it can exceed 2GB
265
if crc32 != U32(self.crc):
266
raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
267
elif isize != LOWU32(self.size):
268
raise IOError, "Incorrect length of data produced"
254
270
def _read_gzip_header(self, bytes=None):
255
271
"""Supply bytes if the minimum header size is already read.