1
# index2, a bzr plugin providing experimental index types.
2
# Copyright (C) 2008 Canonical Limited.
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License version 2 as published
6
# by the Free Software Foundation.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
"""ChunkWriter: write compressed data out with a fixed upper bound."""
21
from zlib import Z_FINISH, Z_SYNC_FLUSH
24
class ChunkWriter(object):
25
"""ChunkWriter allows writing of compressed data with a fixed size.
27
If less data is supplied than fills a chunk, the chunk is padded with
28
NULL bytes. If more data is supplied, then the writer packs as much
29
in as it can, but never splits any item it was given.
31
The algorithm for packing is open to improvement! Current it is:
32
- write the bytes given
33
- if the total seen bytes so far exceeds the chunk size, flush.
38
def __init__(self, chunk_size, reserved=0):
39
"""Create a ChunkWriter to write chunk_size chunks.
41
:param chunk_size: The total byte count to emit at the end of the
43
:param reserved: How many bytes to allow for reserved data. reserved
44
data space can only be written to via the write_reserved method.
46
self.chunk_size = chunk_size
47
self.compressor = zlib.compressobj()
50
self.compressed = None
53
self.unused_bytes = None
54
self.reserved_size = reserved
59
This returns the final compressed chunk, and either None, or the
60
bytes that did not fit in the chunk.
62
self.bytes_in = None # Free the data cached so far, we don't need it
63
self.bytes_list.append(self.compressor.flush(Z_FINISH))
64
total_len = sum(len(b) for b in self.bytes_list)
65
if total_len > self.chunk_size:
66
raise AssertionError('Somehow we ended up with too much'
67
' compressed data, %d > %d'
68
% (total_len, self.chunk_size))
69
nulls_needed = self.chunk_size - total_len % self.chunk_size
71
self.bytes_list.append("\x00" * nulls_needed)
72
return self.bytes_list, self.unused_bytes, nulls_needed
74
def _recompress_all_bytes_in(self, extra_bytes=None):
75
compressor = zlib.compressobj()
77
for accepted_bytes in self.bytes_in:
78
out = compressor.compress(accepted_bytes)
82
out = compressor.compress(extra_bytes)
85
out = compressor.flush(Z_SYNC_FLUSH)
88
return bytes_out, compressor
90
def write(self, bytes):
91
"""Write some bytes to the chunk.
93
If the bytes fit, False is returned. Otherwise True is returned
94
and the bytes have not been added to the chunk.
96
return self._write(bytes, False)
98
def write_reserved(self, bytes):
99
"""Write some bytes to the chunk bypassing the reserved check.
101
If the bytes fit, False is returned. Otherwise True is returned
102
and the bytes have not been added to the chunk.
104
return self._write(bytes, True)
106
def _write(self, bytes, reserved):
108
capacity = self.chunk_size
110
capacity = self.chunk_size - self.reserved_size
111
# Check quickly to see if this is likely to put us outside of our
113
next_seen_size = self.seen_bytes + len(bytes)
114
if (next_seen_size < 1.8 * capacity):
115
# No need, we assume this will "just fit"
116
out = self.compressor.compress(bytes)
117
self.bytes_in.append(bytes)
118
self.seen_bytes = next_seen_size
120
self.bytes_list.append(out)
122
if not reserved and self.num_repack >= self._max_repack:
123
# We have packed too many times already.
125
# This may or may not fit, try to add it with Z_SYNC_FLUSH
126
out = self.compressor.compress(bytes)
128
self.bytes_list.append(out)
129
out = self.compressor.flush(Z_SYNC_FLUSH)
131
self.bytes_list.append(out)
132
total_len = sum(len(b) for b in self.bytes_list)
133
# Give us some extra room for a final Z_FINISH call.
134
if total_len + 10 > capacity:
135
# We are over budget, try to squeeze this in without any
138
bytes_out, compressor = self._recompress_all_bytes_in(bytes)
139
this_len = sum(len(b) for b in bytes_out)
140
if this_len + 10 > capacity:
141
# No way we can add anymore, we need to re-pack because our
142
# compressor is now out of sync
143
bytes_out, compressor = self._recompress_all_bytes_in()
144
self.compressor = compressor
145
self.bytes_list = bytes_out
146
self.unused_bytes = bytes
149
# This fits when we pack it tighter, so use the new packing
150
self.compressor = compressor
151
self.bytes_in.append(bytes)
152
self.bytes_list = bytes_out
154
# It fit, so mark it added
155
self.bytes_in.append(bytes)
156
self.seen_bytes = next_seen_size