174
161
'' # self.fileobj.write(fname + '\000')
177
if sys.version_info < (2, 7, 4):
178
def _read(self, size=1024):
179
# various optimisations:
180
# reduces lsprof count from 2500 to
181
# 8337 calls in 1272, 365 internal
182
if self.fileobj is None:
164
def _read(self, size=1024):
165
# various optimisations:
166
# reduces lsprof count from 2500 to
167
# 8337 calls in 1272, 365 internal
168
if self.fileobj is None:
169
raise EOFError, "Reached EOF"
172
# If the _new_member flag is set, we have to
173
# jump to the next member, if there is one.
175
# First, check if we're at the end of the file;
176
# if so, it's time to stop; no more members to read.
177
next_header_bytes = self.fileobj.read(10)
178
if next_header_bytes == '':
183
179
raise EOFError, "Reached EOF"
186
# If the _new_member flag is set, we have to
187
# jump to the next member, if there is one.
189
# First, check if we're at the end of the file;
190
# if so, it's time to stop; no more members to read.
191
next_header_bytes = self.fileobj.read(10)
192
if next_header_bytes == '':
193
raise EOFError, "Reached EOF"
196
self._read_gzip_header(next_header_bytes)
197
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
198
self._new_member = False
200
# Read a chunk of data from the file
201
buf = self.fileobj.read(size)
203
# If the EOF has been reached, flush the decompression object
204
# and mark this object as finished.
207
self._add_read_data(self.decompress.flush())
208
if len(self.decompress.unused_data) < 8:
209
raise AssertionError("what does flush do?")
182
self._read_gzip_header(next_header_bytes)
183
self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
184
self._new_member = False
186
# Read a chunk of data from the file
187
buf = self.fileobj.read(size)
189
# If the EOF has been reached, flush the decompression object
190
# and mark this object as finished.
193
self._add_read_data(self.decompress.flush())
194
if len(self.decompress.unused_data) < 8:
195
raise AssertionError("what does flush do?")
196
self._gzip_tail = self.decompress.unused_data[0:8]
198
# tell the driving read() call we have stuffed all the data
200
raise EOFError, 'Reached EOF'
202
self._add_read_data(self.decompress.decompress(buf))
204
if self.decompress.unused_data != "":
205
# Ending case: we've come to the end of a member in the file,
206
# so seek back to the start of the data for the next member which
207
# is the length of the decompress objects unused data - the first
208
# 8 bytes for the end crc and size records.
210
# so seek back to the start of the unused data, finish up
211
# this member, and read a new gzip header.
212
# (The number of bytes to seek back is the length of the unused
213
# data, minus 8 because those 8 bytes are part of this member.
214
seek_length = len (self.decompress.unused_data) - 8
216
# we read too much data
217
self.fileobj.seek(-seek_length, 1)
210
218
self._gzip_tail = self.decompress.unused_data[0:8]
212
# tell the driving read() call we have stuffed all the data
214
raise EOFError, 'Reached EOF'
216
self._add_read_data(self.decompress.decompress(buf))
218
if self.decompress.unused_data != "":
219
# Ending case: we've come to the end of a member in the file,
220
# so seek back to the start of the data for the next member
221
# which is the length of the decompress objects unused data -
222
# the first 8 bytes for the end crc and size records.
224
# so seek back to the start of the unused data, finish up
225
# this member, and read a new gzip header.
226
# (The number of bytes to seek back is the length of the unused
227
# data, minus 8 because those 8 bytes are part of this member.
228
seek_length = len (self.decompress.unused_data) - 8
230
# we read too much data
231
self.fileobj.seek(-seek_length, 1)
232
self._gzip_tail = self.decompress.unused_data[0:8]
233
elif seek_length < 0:
234
# we haven't read enough to check the checksum.
235
if not (-8 < seek_length):
236
raise AssertionError("too great a seek")
237
buf = self.fileobj.read(-seek_length)
238
self._gzip_tail = self.decompress.unused_data + buf
240
self._gzip_tail = self.decompress.unused_data
242
# Check the CRC and file size, and set the flag so we read
243
# a new member on the next call
245
self._new_member = True
248
"""tuned to reduce function calls and eliminate file seeking:
250
reduces lsprof count from 800 to 288
252
avoid U32 call by using struct format L
255
# We've read to the end of the file, so we should have 8 bytes of
256
# unused data in the decompressor. If we don't, there is a corrupt
257
# file. We use these 8 bytes to calculate the CRC and the recorded
258
# file size. We then check the that the computed CRC and size of
259
# the uncompressed data matches the stored values. Note that the
260
# size stored is the true file size mod 2**32.
261
if not (len(self._gzip_tail) == 8):
262
raise AssertionError("gzip trailer is incorrect length.")
263
crc32, isize = struct.unpack("<LL", self._gzip_tail)
264
# note that isize is unsigned - it can exceed 2GB
265
if crc32 != U32(self.crc):
266
raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
267
elif isize != LOWU32(self.size):
268
raise IOError, "Incorrect length of data produced"
219
elif seek_length < 0:
220
# we haven't read enough to check the checksum.
221
if not (-8 < seek_length):
222
raise AssertionError("too great a seek")
223
buf = self.fileobj.read(-seek_length)
224
self._gzip_tail = self.decompress.unused_data + buf
226
self._gzip_tail = self.decompress.unused_data
228
# Check the CRC and file size, and set the flag so we read
229
# a new member on the next call
231
self._new_member = True
234
"""tuned to reduce function calls and eliminate file seeking:
236
reduces lsprof count from 800 to 288
238
avoid U32 call by using struct format L
241
# We've read to the end of the file, so we should have 8 bytes of
242
# unused data in the decompressor. If we don't, there is a corrupt file.
243
# We use these 8 bytes to calculate the CRC and the recorded file size.
244
# We then check the that the computed CRC and size of the
245
# uncompressed data matches the stored values. Note that the size
246
# stored is the true file size mod 2**32.
247
if not (len(self._gzip_tail) == 8):
248
raise AssertionError("gzip trailer is incorrect length.")
249
crc32, isize = struct.unpack("<LL", self._gzip_tail)
250
# note that isize is unsigned - it can exceed 2GB
251
if crc32 != U32(self.crc):
252
raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))
253
elif isize != LOWU32(self.size):
254
raise IOError, "Incorrect length of data produced"
270
256
def _read_gzip_header(self, bytes=None):
271
257
"""Supply bytes if the minimum header size is already read.