~bzr-pqm/bzr/bzr.dev

Committer: John Arbash Meinel
Date: 2009-03-04 18:31:31 UTC
mto: (0.17.34 trunk)
mto: This revision was merged to the branch mainline in revision 4280.
Revision ID: john@arbash-meinel.com-20090304183131-p433dz5coqrmv8pw

Now using a zlib compressed format.
We encode the length of the compressed and uncompressed content,
and then compress the actual content.
Need to do some testing with real data to see if this is efficient
or if another structure would be better.

files modified:
groupcompress.py

tests/test_groupcompress.py

Show diffs side-by-side

added added

removed removed

groupcompress.py

152

This tracks the meta info (start of text, length, type, etc.)

153

"""

154

155

# Group Compress Block v1 Plain

156

GCB_HEADER = 'gcb1p\n'

155

# Group Compress Block v1 Zlib

156

GCB_HEADER = 'gcb1z\n'

157

158

def __init__(self):

159

# map by key? or just order in file?

163

"""Parse the meta-info from the stream."""

164

165

@classmethod

166

def from_zlib_bytes(cls, bytes):

167

"""Get the info about this block from the compressed bytes.

168

169

:return: A new GroupCompressBlock

170

"""

171

return cls()

172

173

@classmethod

174

166

def from_bytes(cls, bytes):

175

167

out = cls()

176

168

if bytes[:6] != cls.GCB_HEADER:

177

169

raise gc_errors.InvalidGroupCompressBlock(

178

170

'bytes did not start with %r' % (cls.GCB_HEADER,))

179

171

pos = bytes.index('\n', 6)

180

total_header_length = int(bytes[6:pos])

181

if total_header_length == 0:

172

z_header_length = int(bytes[6:pos])

173

pos += 1

174

pos2 = bytes.index('\n', pos)

175

header_length = int(bytes[pos:pos2])

176

if z_header_length == 0:

177

assert header_length == 0

182

178

return out

183

pos += 1

184

header_bytes = bytes[pos:total_header_length+pos]

179

pos = pos2 + 1

180

pos2 = pos + z_header_length

181

z_header_bytes = bytes[pos:pos2]

182

assert len(z_header_bytes) == z_header_length

183

header_bytes = zlib.decompress(z_header_bytes)

184

assert len(header_bytes) == header_length

185

del z_header_bytes

185

186

lines = header_bytes.split('\n')

187

del header_bytes

186

188

info_dict = {}

187

189

for line in lines:

188

190

if not line: #End of record

240

242

entry.length,

241

243

)

242

244

chunks.append(chunk)

243

info_len = sum(map(len, chunks))

244

chunks = [self.GCB_HEADER, '%d\n' % (info_len,)] + chunks

245

bytes = ''.join(chunks)

246

info_len = len(bytes)

247

z_bytes = zlib.compress(bytes)

248

del bytes

249

z_len = len(z_bytes)

250

chunks = [self.GCB_HEADER, '%d\n' % (z_len,), '%d\n' % (info_len,),

251

z_bytes]

245

252

return ''.join(chunks)

246

253

247

254

Older »