~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tuned_gzip.py

Committer: Patch Queue Manager
Date: 2013-07-13 19:33:29 UTC
mfrom: (6579.1.4 1116079-gzip-compat)
Revision ID: pqm@pqm.ubuntu.com-20130713193329-toxd5u7s4tk19fb0

(vila) Fix test failure for tuned_gzip. (Vincent Ladeuil)

files modified:
bzrlib/tests/test_tuned_gzip.py

bzrlib/tuned_gzip.py

doc/en/release-notes/bzr-2.6.txt

Show diffs side-by-side

added added

removed removed

bzrlib/tuned_gzip.py

127

DeprecationWarning, stacklevel=2)

128

gzip.GzipFile.__init__(self, *args, **kwargs)

129

130

def _add_read_data(self, data):

131

# 4169 calls in 183

132

# temp var for len(data) and switch to +='s.

133

# 4169 in 139

134

len_data = len(data)

135

self.crc = zlib.crc32(data, self.crc)

136

self.extrabuf += data

137

self.extrasize += len_data

138

self.size += len_data

130

if sys.version_info >= (2, 7, 4):

131

def _add_read_data(self, data):

132

# 4169 calls in 183

133

# temp var for len(data) and switch to +='s.

134

# 4169 in 139

135

len_data = len(data)

136

self.crc = zlib.crc32(data, self.crc) & 0xffffffffL

137

offset = self.offset - self.extrastart

138

self.extrabuf = self.extrabuf[offset:] + data

139

self.extrasize = self.extrasize + len_data

140

self.extrastart = self.offset

141

self.size = self.size + len_data

142

else:

143

def _add_read_data(self, data):

144

# 4169 calls in 183

145

# temp var for len(data) and switch to +='s.

146

# 4169 in 139

147

len_data = len(data)

148

self.crc = zlib.crc32(data, self.crc)

149

self.extrabuf += data

150

self.extrasize += len_data

151

self.size += len_data

139

152

140

153

def _write_gzip_header(self):

141

154

"""A tuned version of gzip._write_gzip_header

161

174

'' # self.fileobj.write(fname + '\000')

162

175

)

163

176

164

def _read(self, size=1024):

165

# various optimisations:

166

# reduces lsprof count from 2500 to

167

# 8337 calls in 1272, 365 internal

168

if self.fileobj is None:

169

raise EOFError, "Reached EOF"

170

171

if self._new_member:

172

# If the _new_member flag is set, we have to

173

# jump to the next member, if there is one.

174

175

# First, check if we're at the end of the file;

176

# if so, it's time to stop; no more members to read.

177

next_header_bytes = self.fileobj.read(10)

178

if next_header_bytes == '':

177

if sys.version_info < (2, 7, 4):

178

def _read(self, size=1024):

179

# various optimisations:

180

# reduces lsprof count from 2500 to

181

# 8337 calls in 1272, 365 internal

182

if self.fileobj is None:

179

183

raise EOFError, "Reached EOF"

180

184

181

self._init_read()

182

self._read_gzip_header(next_header_bytes)

183

self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)

184

self._new_member = False

185

186

# Read a chunk of data from the file

187

buf = self.fileobj.read(size)

188

189

# If the EOF has been reached, flush the decompression object

190

# and mark this object as finished.

191

192

if buf == "":

193

self._add_read_data(self.decompress.flush())

194

if len(self.decompress.unused_data) < 8:

195

raise AssertionError("what does flush do?")

196

self._gzip_tail = self.decompress.unused_data[0:8]

197

self._read_eof()

198

# tell the driving read() call we have stuffed all the data

199

# in self.extrabuf

200

raise EOFError, 'Reached EOF'

201

202

self._add_read_data(self.decompress.decompress(buf))

203

204

if self.decompress.unused_data != "":

205

# Ending case: we've come to the end of a member in the file,

206

# so seek back to the start of the data for the next member which

207

# is the length of the decompress objects unused data - the first

208

# 8 bytes for the end crc and size records.

209

210

# so seek back to the start of the unused data, finish up

211

# this member, and read a new gzip header.

212

# (The number of bytes to seek back is the length of the unused

213

# data, minus 8 because those 8 bytes are part of this member.

214

seek_length = len (self.decompress.unused_data) - 8

215

if seek_length > 0:

216

# we read too much data

217

self.fileobj.seek(-seek_length, 1)

185

if self._new_member:

186

# If the _new_member flag is set, we have to

187

# jump to the next member, if there is one.

188

189

# First, check if we're at the end of the file;

190

# if so, it's time to stop; no more members to read.

191

next_header_bytes = self.fileobj.read(10)

192

if next_header_bytes == '':

193

raise EOFError, "Reached EOF"

194

195

self._init_read()

196

self._read_gzip_header(next_header_bytes)

197

self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)

198

self._new_member = False

199

200

# Read a chunk of data from the file

201

buf = self.fileobj.read(size)

202

203

# If the EOF has been reached, flush the decompression object

204

# and mark this object as finished.

205

206

if buf == "":

207

self._add_read_data(self.decompress.flush())

208

if len(self.decompress.unused_data) < 8:

209

raise AssertionError("what does flush do?")

218

210

self._gzip_tail = self.decompress.unused_data[0:8]

219

elif seek_length < 0:

220

# we haven't read enough to check the checksum.

221

if not (-8 < seek_length):

222

raise AssertionError("too great a seek")

223

buf = self.fileobj.read(-seek_length)

224

self._gzip_tail = self.decompress.unused_data + buf

225

else:

226

self._gzip_tail = self.decompress.unused_data

227

228

# Check the CRC and file size, and set the flag so we read

229

# a new member on the next call

230

self._read_eof()

231

self._new_member = True

232

233

def _read_eof(self):

234

"""tuned to reduce function calls and eliminate file seeking:

235

pass 1:

236

reduces lsprof count from 800 to 288

237

4168 in 296

238

avoid U32 call by using struct format L

239

4168 in 200

240

"""

241

# We've read to the end of the file, so we should have 8 bytes of

242

# unused data in the decompressor. If we don't, there is a corrupt file.

243

# We use these 8 bytes to calculate the CRC and the recorded file size.

244

# We then check the that the computed CRC and size of the

245

# uncompressed data matches the stored values. Note that the size

246

# stored is the true file size mod 2**32.

247

if not (len(self._gzip_tail) == 8):

248

raise AssertionError("gzip trailer is incorrect length.")

249

crc32, isize = struct.unpack("<LL", self._gzip_tail)

250

# note that isize is unsigned - it can exceed 2GB

251

if crc32 != U32(self.crc):

252

raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))

253

elif isize != LOWU32(self.size):

254

raise IOError, "Incorrect length of data produced"

211

self._read_eof()

212

# tell the driving read() call we have stuffed all the data

213

# in self.extrabuf

214

raise EOFError, 'Reached EOF'

215

216

self._add_read_data(self.decompress.decompress(buf))

217

218

if self.decompress.unused_data != "":

219

# Ending case: we've come to the end of a member in the file,

220

# so seek back to the start of the data for the next member

221

# which is the length of the decompress objects unused data -

222

# the first 8 bytes for the end crc and size records.

223

224

# so seek back to the start of the unused data, finish up

225

# this member, and read a new gzip header.

226

# (The number of bytes to seek back is the length of the unused

227

# data, minus 8 because those 8 bytes are part of this member.

228

seek_length = len (self.decompress.unused_data) - 8

229

if seek_length > 0:

230

# we read too much data

231

self.fileobj.seek(-seek_length, 1)

232

self._gzip_tail = self.decompress.unused_data[0:8]

233

elif seek_length < 0:

234

# we haven't read enough to check the checksum.

235

if not (-8 < seek_length):

236

raise AssertionError("too great a seek")

237

buf = self.fileobj.read(-seek_length)

238

self._gzip_tail = self.decompress.unused_data + buf

239

else:

240

self._gzip_tail = self.decompress.unused_data

241

242

# Check the CRC and file size, and set the flag so we read

243

# a new member on the next call

244

self._read_eof()

245

self._new_member = True

246

247

def _read_eof(self):

248

"""tuned to reduce function calls and eliminate file seeking:

249

pass 1:

250

reduces lsprof count from 800 to 288

251

4168 in 296

252

avoid U32 call by using struct format L

253

4168 in 200

254

"""

255

# We've read to the end of the file, so we should have 8 bytes of

256

# unused data in the decompressor. If we don't, there is a corrupt

257

# file. We use these 8 bytes to calculate the CRC and the recorded

258

# file size. We then check the that the computed CRC and size of

259

# the uncompressed data matches the stored values. Note that the

260

# size stored is the true file size mod 2**32.

261

if not (len(self._gzip_tail) == 8):

262

raise AssertionError("gzip trailer is incorrect length.")

263

crc32, isize = struct.unpack("<LL", self._gzip_tail)

264

# note that isize is unsigned - it can exceed 2GB

265

if crc32 != U32(self.crc):

266

raise IOError, "CRC check failed %d %d" % (crc32, U32(self.crc))

267

elif isize != LOWU32(self.size):

268

raise IOError, "Incorrect length of data produced"

255

269

256

270

def _read_gzip_header(self, bytes=None):

257

271

"""Supply bytes if the minimum header size is already read.

Older »