~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to groupcompress.py

  • Committer: John Arbash Meinel
  • Date: 2009-03-04 18:31:31 UTC
  • mto: (0.17.34 trunk)
  • mto: This revision was merged to the branch mainline in revision 4280.
  • Revision ID: john@arbash-meinel.com-20090304183131-p433dz5coqrmv8pw
Now using a zlib compressed format.
We encode the length of the compressed and uncompressed content,
and then compress the actual content.
Need to do some testing with real data to see if this is efficient
or if another structure would be better.

Show diffs side-by-side

added added

removed removed

Lines of Context:
152
152
    This tracks the meta info (start of text, length, type, etc.)
153
153
    """
154
154
 
155
 
    # Group Compress Block v1 Plain
156
 
    GCB_HEADER = 'gcb1p\n'
 
155
    # Group Compress Block v1 Zlib
 
156
    GCB_HEADER = 'gcb1z\n'
157
157
 
158
158
    def __init__(self):
159
159
        # map by key? or just order in file?
163
163
        """Parse the meta-info from the stream."""
164
164
 
165
165
    @classmethod
166
 
    def from_zlib_bytes(cls, bytes):
167
 
        """Get the info about this block from the compressed bytes.
168
 
 
169
 
        :return: A new GroupCompressBlock
170
 
        """
171
 
        return cls()
172
 
 
173
 
    @classmethod
174
166
    def from_bytes(cls, bytes):
175
167
        out = cls()
176
168
        if bytes[:6] != cls.GCB_HEADER:
177
169
            raise gc_errors.InvalidGroupCompressBlock(
178
170
                'bytes did not start with %r' % (cls.GCB_HEADER,))
179
171
        pos = bytes.index('\n', 6)
180
 
        total_header_length = int(bytes[6:pos])
181
 
        if total_header_length == 0:
 
172
        z_header_length = int(bytes[6:pos])
 
173
        pos += 1
 
174
        pos2 = bytes.index('\n', pos)
 
175
        header_length = int(bytes[pos:pos2])
 
176
        if z_header_length == 0:
 
177
            assert header_length == 0
182
178
            return out
183
 
        pos += 1
184
 
        header_bytes = bytes[pos:total_header_length+pos]
 
179
        pos = pos2 + 1
 
180
        pos2 = pos + z_header_length
 
181
        z_header_bytes = bytes[pos:pos2]
 
182
        assert len(z_header_bytes) == z_header_length
 
183
        header_bytes = zlib.decompress(z_header_bytes)
 
184
        assert len(header_bytes) == header_length
 
185
        del z_header_bytes
185
186
        lines = header_bytes.split('\n')
 
187
        del header_bytes
186
188
        info_dict = {}
187
189
        for line in lines:
188
190
            if not line: #End of record
240
242
                          entry.length,
241
243
                          )
242
244
            chunks.append(chunk)
243
 
        info_len = sum(map(len, chunks))
244
 
        chunks = [self.GCB_HEADER, '%d\n' % (info_len,)] + chunks
 
245
        bytes = ''.join(chunks)
 
246
        info_len = len(bytes)
 
247
        z_bytes = zlib.compress(bytes)
 
248
        del bytes
 
249
        z_len = len(z_bytes)
 
250
        chunks = [self.GCB_HEADER, '%d\n' % (z_len,), '%d\n' % (info_len,),
 
251
                  z_bytes]
245
252
        return ''.join(chunks)
246
253
 
247
254