~bzr-pqm/bzr/bzr.dev : contents of bzrlib/chunk

~bzr-pqm/bzr/bzr.dev : (revision 3778.3.3)

# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

"""ChunkWriter: write compressed data out with a fixed upper bound."""

import zlib
from zlib import Z_FINISH, Z_SYNC_FLUSH


class ChunkWriter(object):
    """ChunkWriter allows writing of compressed data with a fixed size.

    If less data is supplied than fills a chunk, the chunk is padded with
    NULL bytes. If more data is supplied, then the writer packs as much
    in as it can, but never splits any item it was given.

    The algorithm for packing is open to improvement! Current it is:
     - write the bytes given
     - if the total seen bytes so far exceeds the chunk size, flush.

    :cvar _max_repack: To fit the maximum number of entries into a node, we
        will sometimes start over and compress the whole list to get tighter
        packing. We get diminishing returns after a while, so this limits the
        number of times we will try.
        The default is to try to avoid recompressing entirely, but setting this
        to something like 20 will give maximum compression.

    :cvar _max_zsync: Another tunable nob. If _max_repack is set to 0, then you
        can limit the number of times we will try to pack more data into a
        node. This allows us to do a single compression pass, rather than
        trying until we overflow, and then recompressing again.
    """
    #    In testing, some values for bzr.dev::
    #        repack  time  MB   max   full
    #         1       7.5  4.6  1140  0
    #         2       8.4  4.2  1036  1          6.8
    #         3       9.8  4.1  1012  278
    #         4      10.8  4.1  728   945
    #        20      11.1  4.1  0     1012
    #        repack = 0
    #        zsync   time  MB    repack  max_z   time w/ add_node
    #         0       6.7  24.7  0       6270    5.0
    #         1       6.5  13.2  0       3342    4.3
    #         2       6.6   9.6  0       2414    4.9
    #         5       6.5   6.2  0       1549    4.8
    #         6       6.5   5.8  1       1435    4.8
    #         7       6.6   5.5  19      1337    4.8
    #         8       6.7   5.3  81      1220    4.4
    #        10       6.8   5.0  260     967     5.3
    #        11       6.8   4.9  366     839     5.3
    #        12       6.9   4.8  454     731     5.1
    #        15       7.2   4.7  704     450     5.8
    #        20       7.7   4.6  1133    7       5.8

    #    In testing, some values for mysql-unpacked::
    #                next_bytes estim
    #        repack  time  MB    hit_max full
    #         1      51.7  15.4  3913  0
    #         2      54.4  13.7  3467  0         35.4
    #        20      67.0  13.4  0     3380      46.7
    #        repack=0
    #        zsync                               time w/ add_node
    #         0      47.7 116.5  0       29782   29.5
    #         1      48.5  60.2  0       15356   27.8
    #         2      48.1  42.4  0       10822   27.8
    #         5      48.3  25.5  0       6491    26.8
    #         6      48.0  23.2  13      5896    27.3
    #         7      48.1  21.6  29      5451    27.5
    #         8      48.1  20.3  52      5108    27.1
    #        10      46.9  18.6  195     4526    29.4
    #        11      48.8  18.0  421     4143    29.2
    #        12      47.4  17.5  702     3738    28.0
    #        15      49.6  16.5  1223    2969    28.9
    #        20      48.9  15.7  2182    1810    29.6
    #        30            15.4  3891    23      31.4

    _max_repack = 0
    _max_zsync = 8

    def __init__(self, chunk_size, reserved=0):
        """Create a ChunkWriter to write chunk_size chunks.

        :param chunk_size: The total byte count to emit at the end of the
            chunk.
        :param reserved: How many bytes to allow for reserved data. reserved
            data space can only be written to via the write(..., reserved=True).
        """
        self.chunk_size = chunk_size
        self.compressor = zlib.compressobj()
        self.bytes_in = []
        self.bytes_list = []
        self.bytes_out_len = 0
        # bytes that have been seen, but not included in a flush to out yet
        self.unflushed_in_bytes = 0
        self.num_repack = 0
        self.num_zsync = 0
        self.unused_bytes = None
        self.reserved_size = reserved

    def finish(self):
        """Finish the chunk.

        This returns the final compressed chunk, and either None, or the
        bytes that did not fit in the chunk.

        :return: (compressed_bytes, unused_bytes, num_nulls_needed)
            compressed_bytes    a list of bytes that were output from the
                                compressor. If the compressed length was not
                                exactly chunk_size, the final string will be a
                                string of all null bytes to pad this to
                                chunk_size
            unused_bytes        None, or the last bytes that were added, which
                                we could not fit.
            num_nulls_needed    How many nulls are padded at the end
        """
        self.bytes_in = None # Free the data cached so far, we don't need it
        out = self.compressor.flush(Z_FINISH)
        self.bytes_list.append(out)
        self.bytes_out_len += len(out)

        if self.bytes_out_len > self.chunk_size:
            raise AssertionError('Somehow we ended up with too much'
                                 ' compressed data, %d > %d'
                                 % (self.bytes_out_len, self.chunk_size))
        nulls_needed = self.chunk_size - self.bytes_out_len
        if nulls_needed:
            self.bytes_list.append("\x00" * nulls_needed)
        return self.bytes_list, self.unused_bytes, nulls_needed

    def _recompress_all_bytes_in(self, extra_bytes=None):
        """Recompress the current bytes_in, and optionally more.

        :param extra_bytes: Optional, if supplied we will add it with
            Z_SYNC_FLUSH
        :return: (bytes_out, bytes_out_len, alt_compressed)
            bytes_out   is the compressed bytes returned from the compressor
            bytes_out_len the length of the compressed output
            compressor  An object with everything packed in so far, and
                        Z_SYNC_FLUSH called.
        """
        compressor = zlib.compressobj()
        bytes_out = []
        append = bytes_out.append
        compress = compressor.compress
        for accepted_bytes in self.bytes_in:
            out = compress(accepted_bytes)
            if out:
                append(out)
        if extra_bytes:
            out = compress(extra_bytes)
            out += compressor.flush(Z_SYNC_FLUSH)
            append(out)
        bytes_out_len = sum(map(len, bytes_out))
        return bytes_out, bytes_out_len, compressor

    def write(self, bytes, reserved=False):
        """Write some bytes to the chunk.

        If the bytes fit, False is returned. Otherwise True is returned
        and the bytes have not been added to the chunk.

        :param bytes: The bytes to include
        :param reserved: If True, we can use the space reserved in the
            constructor.
        """
        if self.num_repack > self._max_repack and not reserved:
            self.unused_bytes = bytes
            return True
        if reserved:
            capacity = self.chunk_size
        else:
            capacity = self.chunk_size - self.reserved_size
        comp = self.compressor

        # Check to see if the currently unflushed bytes would fit with a bit of
        # room to spare, assuming no compression.
        next_unflushed = self.unflushed_in_bytes + len(bytes)
        remaining_capacity = capacity - self.bytes_out_len - 10
        if (next_unflushed < remaining_capacity):
            # looks like it will fit
            out = comp.compress(bytes)
            if out:
                self.bytes_list.append(out)
                self.bytes_out_len += len(out)
            self.bytes_in.append(bytes)
            self.unflushed_in_bytes += len(bytes)
        else:
            # This may or may not fit, try to add it with Z_SYNC_FLUSH
            # Note: It is tempting to do this as a look-ahead pass, and to
            #       'copy()' the compressor before flushing. However, it seems
            #       that Which means that it is the same thing as increasing
            #       repack, similar cost, same benefit. And this way we still
            #       have the 'repack' knob that can be adjusted, and not depend
            #       on a platform-specific 'copy()' function.
            self.num_zsync += 1
            if self._max_repack == 0 and self.num_zsync > self._max_zsync:
                self.num_repack += 1
                self.unused_bytes = bytes
                return True
            out = comp.compress(bytes)
            out += comp.flush(Z_SYNC_FLUSH)
            self.unflushed_in_bytes = 0
            if out:
                self.bytes_list.append(out)
                self.bytes_out_len += len(out)

            # We are a bit extra conservative, because it seems that you *can*
            # get better compression with Z_SYNC_FLUSH than a full compress. It
            # is probably very rare, but we were able to trigger it.
            if self.num_repack == 0:
                safety_margin = 100
            else:
                safety_margin = 10
            if self.bytes_out_len + safety_margin <= capacity:
                # It fit, so mark it added
                self.bytes_in.append(bytes)
            else:
                # We are over budget, try to squeeze this in without any
                # Z_SYNC_FLUSH calls
                self.num_repack += 1
                (bytes_out, this_len,
                 compressor) = self._recompress_all_bytes_in(bytes)
                if self.num_repack >= self._max_repack:
                    # When we get *to* _max_repack, bump over so that the
                    # earlier > _max_repack will be triggered.
                    self.num_repack += 1
                if this_len + 10 > capacity:
                    (bytes_out, this_len,
                     compressor) = self._recompress_all_bytes_in()
                    self.compressor = compressor
                    # Force us to not allow more data
                    self.num_repack = self._max_repack + 1
                    self.bytes_list = bytes_out
                    self.bytes_out_len = this_len
                    self.unused_bytes = bytes
                    return True
                else:
                    # This fits when we pack it tighter, so use the new packing
                    self.compressor = compressor
                    self.bytes_in.append(bytes)
                    self.bytes_list = bytes_out
                    self.bytes_out_len = this_len
        return False


3641.3.29 by John Arbash Meinel Cleanup the copyright headers	1	# Copyright (C) 2008 Canonical Ltd
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	2	#
	3	# This program is free software; you can redistribute it and/or modify
3641.3.29 by John Arbash Meinel Cleanup the copyright headers	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
3641.3.29 by John Arbash Meinel Cleanup the copyright headers	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	16	#
	17
	18	"""ChunkWriter: write compressed data out with a fixed upper bound."""
	19
	20	import zlib
	21	from zlib import Z_FINISH, Z_SYNC_FLUSH
	22
	23
	24	class ChunkWriter(object):
	25	"""ChunkWriter allows writing of compressed data with a fixed size.
	26
	27	If less data is supplied than fills a chunk, the chunk is padded with
	28	NULL bytes. If more data is supplied, then the writer packs as much
	29	in as it can, but never splits any item it was given.
	30
	31	The algorithm for packing is open to improvement! Current it is:
	32	- write the bytes given
	33	- if the total seen bytes so far exceeds the chunk size, flush.
3641.3.4 by John Arbash Meinel Tweak some 'sum' lines.	34
	35	:cvar _max_repack: To fit the maximum number of entries into a node, we
	36	will sometimes start over and compress the whole list to get tighter
	37	packing. We get diminishing returns after a while, so this limits the
	38	number of times we will try.
3641.5.18 by John Arbash Meinel Clean out the global state, good for prototyping and tuning, bad for production code.	39	The default is to try to avoid recompressing entirely, but setting this
	40	to something like 20 will give maximum compression.
	41
	42	:cvar _max_zsync: Another tunable nob. If _max_repack is set to 0, then you
	43	can limit the number of times we will try to pack more data into a
	44	node. This allows us to do a single compression pass, rather than
	45	trying until we overflow, and then recompressing again.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	46	"""
3641.5.18 by John Arbash Meinel Clean out the global state, good for prototyping and tuning, bad for production code.	47	# In testing, some values for bzr.dev::
	48	# repack time MB max full
	49	# 1 7.5 4.6 1140 0
	50	# 2 8.4 4.2 1036 1 6.8
	51	# 3 9.8 4.1 1012 278
	52	# 4 10.8 4.1 728 945
	53	# 20 11.1 4.1 0 1012
	54	# repack = 0
	55	# zsync time MB repack max_z time w/ add_node
	56	# 0 6.7 24.7 0 6270 5.0
	57	# 1 6.5 13.2 0 3342 4.3
	58	# 2 6.6 9.6 0 2414 4.9
	59	# 5 6.5 6.2 0 1549 4.8
	60	# 6 6.5 5.8 1 1435 4.8
	61	# 7 6.6 5.5 19 1337 4.8
	62	# 8 6.7 5.3 81 1220 4.4
	63	# 10 6.8 5.0 260 967 5.3
	64	# 11 6.8 4.9 366 839 5.3
	65	# 12 6.9 4.8 454 731 5.1
	66	# 15 7.2 4.7 704 450 5.8
	67	# 20 7.7 4.6 1133 7 5.8
	68
	69	# In testing, some values for mysql-unpacked::
	70	# next_bytes estim
	71	# repack time MB hit_max full
	72	# 1 51.7 15.4 3913 0
	73	# 2 54.4 13.7 3467 0 35.4
	74	# 20 67.0 13.4 0 3380 46.7
	75	# repack=0
	76	# zsync time w/ add_node
	77	# 0 47.7 116.5 0 29782 29.5
	78	# 1 48.5 60.2 0 15356 27.8
	79	# 2 48.1 42.4 0 10822 27.8
	80	# 5 48.3 25.5 0 6491 26.8
	81	# 6 48.0 23.2 13 5896 27.3
	82	# 7 48.1 21.6 29 5451 27.5
	83	# 8 48.1 20.3 52 5108 27.1
	84	# 10 46.9 18.6 195 4526 29.4
	85	# 11 48.8 18.0 421 4143 29.2
	86	# 12 47.4 17.5 702 3738 28.0
	87	# 15 49.6 16.5 1223 2969 28.9
	88	# 20 48.9 15.7 2182 1810 29.6
	89	# 30 15.4 3891 23 31.4
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	90
3641.5.12 by John Arbash Meinel Play around with max_repack=0 and limiting work done based on	91	_max_repack = 0
3641.5.13 by John Arbash Meinel Include some timing on msyql, which has different	92	_max_zsync = 8
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	93
	94	def __init__(self, chunk_size, reserved=0):
	95	"""Create a ChunkWriter to write chunk_size chunks.
	96
	97	:param chunk_size: The total byte count to emit at the end of the
	98	chunk.
	99	:param reserved: How many bytes to allow for reserved data. reserved
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	100	data space can only be written to via the write(..., reserved=True).
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	101	"""
	102	self.chunk_size = chunk_size
	103	self.compressor = zlib.compressobj()
	104	self.bytes_in = []
	105	self.bytes_list = []
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	106	self.bytes_out_len = 0
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	107	# bytes that have been seen, but not included in a flush to out yet
	108	self.unflushed_in_bytes = 0
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	109	self.num_repack = 0
3641.5.12 by John Arbash Meinel Play around with max_repack=0 and limiting work done based on	110	self.num_zsync = 0
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	111	self.unused_bytes = None
	112	self.reserved_size = reserved
	113
	114	def finish(self):
	115	"""Finish the chunk.
	116
	117	This returns the final compressed chunk, and either None, or the
	118	bytes that did not fit in the chunk.
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	119
	120	:return: (compressed_bytes, unused_bytes, num_nulls_needed)
	121	compressed_bytes a list of bytes that were output from the
	122	compressor. If the compressed length was not
	123	exactly chunk_size, the final string will be a
	124	string of all null bytes to pad this to
	125	chunk_size
	126	unused_bytes None, or the last bytes that were added, which
	127	we could not fit.
	128	num_nulls_needed How many nulls are padded at the end
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	129	"""
	130	self.bytes_in = None # Free the data cached so far, we don't need it
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	131	out = self.compressor.flush(Z_FINISH)
	132	self.bytes_list.append(out)
	133	self.bytes_out_len += len(out)
3641.5.12 by John Arbash Meinel Play around with max_repack=0 and limiting work done based on	134
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	135	if self.bytes_out_len > self.chunk_size:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	136	raise AssertionError('Somehow we ended up with too much'
	137	' compressed data, %d > %d'
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	138	% (self.bytes_out_len, self.chunk_size))
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	139	nulls_needed = self.chunk_size - self.bytes_out_len
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	140	if nulls_needed:
	141	self.bytes_list.append("\x00" * nulls_needed)
	142	return self.bytes_list, self.unused_bytes, nulls_needed
	143
	144	def _recompress_all_bytes_in(self, extra_bytes=None):
3641.3.12 by John Arbash Meinel Collect some info on the space/time tradeoff for _max_repack.	145	"""Recompress the current bytes_in, and optionally more.
	146
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	147	:param extra_bytes: Optional, if supplied we will add it with
3641.3.12 by John Arbash Meinel Collect some info on the space/time tradeoff for _max_repack.	148	Z_SYNC_FLUSH
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	149	:return: (bytes_out, bytes_out_len, alt_compressed)
3641.3.12 by John Arbash Meinel Collect some info on the space/time tradeoff for _max_repack.	150	bytes_out is the compressed bytes returned from the compressor
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	151	bytes_out_len the length of the compressed output
3641.3.12 by John Arbash Meinel Collect some info on the space/time tradeoff for _max_repack.	152	compressor An object with everything packed in so far, and
	153	Z_SYNC_FLUSH called.
	154	"""
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	155	compressor = zlib.compressobj()
	156	bytes_out = []
3641.3.5 by John Arbash Meinel For iter_all and three_level tests adjust spill-at.	157	append = bytes_out.append
	158	compress = compressor.compress
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	159	for accepted_bytes in self.bytes_in:
3641.3.5 by John Arbash Meinel For iter_all and three_level tests adjust spill-at.	160	out = compress(accepted_bytes)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	161	if out:
3641.3.5 by John Arbash Meinel For iter_all and three_level tests adjust spill-at.	162	append(out)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	163	if extra_bytes:
3641.3.5 by John Arbash Meinel For iter_all and three_level tests adjust spill-at.	164	out = compress(extra_bytes)
3641.5.10 by John Arbash Meinel Only Z_SYNC_FLUSH when we have extra bytes.	165	out += compressor.flush(Z_SYNC_FLUSH)
	166	append(out)
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	167	bytes_out_len = sum(map(len, bytes_out))
	168	return bytes_out, bytes_out_len, compressor
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	169
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	170	def write(self, bytes, reserved=False):
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	171	"""Write some bytes to the chunk.
	172
	173	If the bytes fit, False is returned. Otherwise True is returned
	174	and the bytes have not been added to the chunk.
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	175
	176	:param bytes: The bytes to include
	177	:param reserved: If True, we can use the space reserved in the
	178	constructor.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	179	"""
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	180	if self.num_repack > self._max_repack and not reserved:
	181	self.unused_bytes = bytes
	182	return True
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	183	if reserved:
	184	capacity = self.chunk_size
	185	else:
	186	capacity = self.chunk_size - self.reserved_size
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	187	comp = self.compressor
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	188
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	189	# Check to see if the currently unflushed bytes would fit with a bit of
	190	# room to spare, assuming no compression.
	191	next_unflushed = self.unflushed_in_bytes + len(bytes)
	192	remaining_capacity = capacity - self.bytes_out_len - 10
	193	if (next_unflushed < remaining_capacity):
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	194	# looks like it will fit
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	195	out = comp.compress(bytes)
3641.3.11 by John Arbash Meinel Start working on an alternate way to track compressed_chunk state.	196	if out:
	197	self.bytes_list.append(out)
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	198	self.bytes_out_len += len(out)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	199	self.bytes_in.append(bytes)
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	200	self.unflushed_in_bytes += len(bytes)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	201	else:
	202	# This may or may not fit, try to add it with Z_SYNC_FLUSH
3641.5.5 by John Arbash Meinel Document my attempt to use copy() as a look-ahead.	203	# Note: It is tempting to do this as a look-ahead pass, and to
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	204	# 'copy()' the compressor before flushing. However, it seems
	205	# that Which means that it is the same thing as increasing
	206	# repack, similar cost, same benefit. And this way we still
	207	# have the 'repack' knob that can be adjusted, and not depend
	208	# on a platform-specific 'copy()' function.
3641.5.12 by John Arbash Meinel Play around with max_repack=0 and limiting work done based on	209	self.num_zsync += 1
	210	if self._max_repack == 0 and self.num_zsync > self._max_zsync:
	211	self.num_repack += 1
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	212	self.unused_bytes = bytes
3641.5.12 by John Arbash Meinel Play around with max_repack=0 and limiting work done based on	213	return True
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	214	out = comp.compress(bytes)
	215	out += comp.flush(Z_SYNC_FLUSH)
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	216	self.unflushed_in_bytes = 0
3641.3.15 by John Arbash Meinel Now that we have real data, remove the copy() code.	217	if out:
	218	self.bytes_list.append(out)
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	219	self.bytes_out_len += len(out)
3641.5.3 by John Arbash Meinel If we repack earlier, it catches this case.	220
	221	# We are a bit extra conservative, because it seems that you can
	222	# get better compression with Z_SYNC_FLUSH than a full compress. It
	223	# is probably very rare, but we were able to trigger it.
3641.5.4 by John Arbash Meinel Using a different safety margin for the first repack,	224	if self.num_repack == 0:
	225	safety_margin = 100
	226	else:
	227	safety_margin = 10
	228	if self.bytes_out_len + safety_margin <= capacity:
3641.5.1 by John Arbash Meinel Update the stats for the current code layout.	229	# It fit, so mark it added
	230	self.bytes_in.append(bytes)
	231	else:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	232	# We are over budget, try to squeeze this in without any
	233	# Z_SYNC_FLUSH calls
	234	self.num_repack += 1
3641.5.1 by John Arbash Meinel Update the stats for the current code layout.	235	(bytes_out, this_len,
	236	compressor) = self._recompress_all_bytes_in(bytes)
	237	if self.num_repack >= self._max_repack:
	238	# When we get to _max_repack, bump over so that the
	239	# earlier > _max_repack will be triggered.
	240	self.num_repack += 1
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	241	if this_len + 10 > capacity:
3641.5.1 by John Arbash Meinel Update the stats for the current code layout.	242	(bytes_out, this_len,
	243	compressor) = self._recompress_all_bytes_in()
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	244	self.compressor = compressor
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	245	# Force us to not allow more data
	246	self.num_repack = self._max_repack + 1
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	247	self.bytes_list = bytes_out
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	248	self.bytes_out_len = this_len
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	249	self.unused_bytes = bytes
	250	return True
	251	else:
	252	# This fits when we pack it tighter, so use the new packing
	253	self.compressor = compressor
	254	self.bytes_in.append(bytes)
	255	self.bytes_list = bytes_out
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	256	self.bytes_out_len = this_len
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	257	return False
	258