~bzr-pqm/bzr/bzr.dev : contents of bzrlib/chunk

~bzr-pqm/bzr/bzr.dev : (revision 4443.2.1)

# Copyright (C) 2008 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#

"""ChunkWriter: write compressed data out with a fixed upper bound."""

import zlib
from zlib import Z_FINISH, Z_SYNC_FLUSH


class ChunkWriter(object):
    """ChunkWriter allows writing of compressed data with a fixed size.

    If less data is supplied than fills a chunk, the chunk is padded with
    NULL bytes. If more data is supplied, then the writer packs as much
    in as it can, but never splits any item it was given.

    The algorithm for packing is open to improvement! Current it is:
     - write the bytes given
     - if the total seen bytes so far exceeds the chunk size, flush.

    :cvar _max_repack: To fit the maximum number of entries into a node, we
        will sometimes start over and compress the whole list to get tighter
        packing. We get diminishing returns after a while, so this limits the
        number of times we will try.
        The default is to try to avoid recompressing entirely, but setting this
        to something like 20 will give maximum compression.

    :cvar _max_zsync: Another tunable nob. If _max_repack is set to 0, then you
        can limit the number of times we will try to pack more data into a
        node. This allows us to do a single compression pass, rather than
        trying until we overflow, and then recompressing again.
    """
    #    In testing, some values for bzr.dev::
    #        repack  time  MB   max   full
    #         1       7.5  4.6  1140  0
    #         2       8.4  4.2  1036  1
    #         3       9.8  4.1  1012  278
    #         4      10.8  4.1  728   945
    #        20      11.1  4.1  0     1012
    #        repack = 0
    #        zsync   time  MB    repack  stop_for_z
    #         0       5.0  24.7  0       6270
    #         1       4.3  13.2  0       3342
    #         2       4.9   9.6  0       2414
    #         5       4.8   6.2  0       1549
    #         6       4.8   5.8  1       1435
    #         7       4.8   5.5  19      1337
    #         8       4.4   5.3  81      1220
    #        10       5.3   5.0  260     967
    #        11       5.3   4.9  366     839
    #        12       5.1   4.8  454     731
    #        15       5.8   4.7  704     450
    #        20       5.8   4.6  1133    7

    #    In testing, some values for mysql-unpacked::
    #                next_bytes estim
    #        repack  time  MB    full    stop_for_repack
    #         1            15.4  0       3913
    #         2      35.4  13.7  0       346
    #        20      46.7  13.4  3380    0
    #        repack=0
    #        zsync                       stop_for_z
    #         0      29.5 116.5  0       29782
    #         1      27.8  60.2  0       15356
    #         2      27.8  42.4  0       10822
    #         5      26.8  25.5  0       6491
    #         6      27.3  23.2  13      5896
    #         7      27.5  21.6  29      5451
    #         8      27.1  20.3  52      5108
    #        10      29.4  18.6  195     4526
    #        11      29.2  18.0  421     4143
    #        12      28.0  17.5  702     3738
    #        15      28.9  16.5  1223    2969
    #        20      29.6  15.7  2182    1810
    #        30      31.4  15.4  3891    23

    # Tuple of (num_repack_attempts, num_zsync_attempts)
    # num_zsync_attempts only has meaning if num_repack_attempts is 0.
    _repack_opts_for_speed = (0, 8)
    _repack_opts_for_size = (20, 0)

    def __init__(self, chunk_size, reserved=0, optimize_for_size=False):
        """Create a ChunkWriter to write chunk_size chunks.

        :param chunk_size: The total byte count to emit at the end of the
            chunk.
        :param reserved: How many bytes to allow for reserved data. reserved
            data space can only be written to via the write(..., reserved=True).
        """
        self.chunk_size = chunk_size
        self.compressor = zlib.compressobj()
        self.bytes_in = []
        self.bytes_list = []
        self.bytes_out_len = 0
        # bytes that have been seen, but not included in a flush to out yet
        self.unflushed_in_bytes = 0
        self.num_repack = 0
        self.num_zsync = 0
        self.unused_bytes = None
        self.reserved_size = reserved
        # Default is to make building fast rather than compact
        self.set_optimize(for_size=optimize_for_size)

    def finish(self):
        """Finish the chunk.

        This returns the final compressed chunk, and either None, or the
        bytes that did not fit in the chunk.

        :return: (compressed_bytes, unused_bytes, num_nulls_needed)
            compressed_bytes    a list of bytes that were output from the
                                compressor. If the compressed length was not
                                exactly chunk_size, the final string will be a
                                string of all null bytes to pad this to
                                chunk_size
            unused_bytes        None, or the last bytes that were added, which
                                we could not fit.
            num_nulls_needed    How many nulls are padded at the end
        """
        self.bytes_in = None # Free the data cached so far, we don't need it
        out = self.compressor.flush(Z_FINISH)
        self.bytes_list.append(out)
        self.bytes_out_len += len(out)

        if self.bytes_out_len > self.chunk_size:
            raise AssertionError('Somehow we ended up with too much'
                                 ' compressed data, %d > %d'
                                 % (self.bytes_out_len, self.chunk_size))
        nulls_needed = self.chunk_size - self.bytes_out_len
        if nulls_needed:
            self.bytes_list.append("\x00" * nulls_needed)
        return self.bytes_list, self.unused_bytes, nulls_needed

    def set_optimize(self, for_size=True):
        """Change how we optimize our writes.

        :param for_size: If True, optimize for minimum space usage, otherwise
            optimize for fastest writing speed.
        :return: None
        """
        if for_size:
            opts = ChunkWriter._repack_opts_for_size
        else:
            opts = ChunkWriter._repack_opts_for_speed
        self._max_repack, self._max_zsync = opts

    def _recompress_all_bytes_in(self, extra_bytes=None):
        """Recompress the current bytes_in, and optionally more.

        :param extra_bytes: Optional, if supplied we will add it with
            Z_SYNC_FLUSH
        :return: (bytes_out, bytes_out_len, alt_compressed)
            bytes_out   is the compressed bytes returned from the compressor
            bytes_out_len the length of the compressed output
            compressor  An object with everything packed in so far, and
                        Z_SYNC_FLUSH called.
        """
        compressor = zlib.compressobj()
        bytes_out = []
        append = bytes_out.append
        compress = compressor.compress
        for accepted_bytes in self.bytes_in:
            out = compress(accepted_bytes)
            if out:
                append(out)
        if extra_bytes:
            out = compress(extra_bytes)
            out += compressor.flush(Z_SYNC_FLUSH)
            append(out)
        bytes_out_len = sum(map(len, bytes_out))
        return bytes_out, bytes_out_len, compressor

    def write(self, bytes, reserved=False):
        """Write some bytes to the chunk.

        If the bytes fit, False is returned. Otherwise True is returned
        and the bytes have not been added to the chunk.

        :param bytes: The bytes to include
        :param reserved: If True, we can use the space reserved in the
            constructor.
        """
        if self.num_repack > self._max_repack and not reserved:
            self.unused_bytes = bytes
            return True
        if reserved:
            capacity = self.chunk_size
        else:
            capacity = self.chunk_size - self.reserved_size
        comp = self.compressor

        # Check to see if the currently unflushed bytes would fit with a bit of
        # room to spare, assuming no compression.
        next_unflushed = self.unflushed_in_bytes + len(bytes)
        remaining_capacity = capacity - self.bytes_out_len - 10
        if (next_unflushed < remaining_capacity):
            # looks like it will fit
            out = comp.compress(bytes)
            if out:
                self.bytes_list.append(out)
                self.bytes_out_len += len(out)
            self.bytes_in.append(bytes)
            self.unflushed_in_bytes += len(bytes)
        else:
            # This may or may not fit, try to add it with Z_SYNC_FLUSH
            # Note: It is tempting to do this as a look-ahead pass, and to
            #       'copy()' the compressor before flushing. However, it seems
            #       that Which means that it is the same thing as increasing
            #       repack, similar cost, same benefit. And this way we still
            #       have the 'repack' knob that can be adjusted, and not depend
            #       on a platform-specific 'copy()' function.
            self.num_zsync += 1
            if self._max_repack == 0 and self.num_zsync > self._max_zsync:
                self.num_repack += 1
                self.unused_bytes = bytes
                return True
            out = comp.compress(bytes)
            out += comp.flush(Z_SYNC_FLUSH)
            self.unflushed_in_bytes = 0
            if out:
                self.bytes_list.append(out)
                self.bytes_out_len += len(out)

            # We are a bit extra conservative, because it seems that you *can*
            # get better compression with Z_SYNC_FLUSH than a full compress. It
            # is probably very rare, but we were able to trigger it.
            if self.num_repack == 0:
                safety_margin = 100
            else:
                safety_margin = 10
            if self.bytes_out_len + safety_margin <= capacity:
                # It fit, so mark it added
                self.bytes_in.append(bytes)
            else:
                # We are over budget, try to squeeze this in without any
                # Z_SYNC_FLUSH calls
                self.num_repack += 1
                (bytes_out, this_len,
                 compressor) = self._recompress_all_bytes_in(bytes)
                if self.num_repack >= self._max_repack:
                    # When we get *to* _max_repack, bump over so that the
                    # earlier > _max_repack will be triggered.
                    self.num_repack += 1
                if this_len + 10 > capacity:
                    (bytes_out, this_len,
                     compressor) = self._recompress_all_bytes_in()
                    self.compressor = compressor
                    # Force us to not allow more data
                    self.num_repack = self._max_repack + 1
                    self.bytes_list = bytes_out
                    self.bytes_out_len = this_len
                    self.unused_bytes = bytes
                    return True
                else:
                    # This fits when we pack it tighter, so use the new packing
                    self.compressor = compressor
                    self.bytes_in.append(bytes)
                    self.bytes_list = bytes_out
                    self.bytes_out_len = this_len
        return False


3641.3.29 by John Arbash Meinel Cleanup the copyright headers	1	# Copyright (C) 2008 Canonical Ltd
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	2	#
	3	# This program is free software; you can redistribute it and/or modify
3641.3.29 by John Arbash Meinel Cleanup the copyright headers	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	15	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	16	#
	17
	18	"""ChunkWriter: write compressed data out with a fixed upper bound."""
	19
	20	import zlib
	21	from zlib import Z_FINISH, Z_SYNC_FLUSH
	22
	23
	24	class ChunkWriter(object):
	25	"""ChunkWriter allows writing of compressed data with a fixed size.
	26
	27	If less data is supplied than fills a chunk, the chunk is padded with
	28	NULL bytes. If more data is supplied, then the writer packs as much
	29	in as it can, but never splits any item it was given.
	30
	31	The algorithm for packing is open to improvement! Current it is:
	32	- write the bytes given
	33	- if the total seen bytes so far exceeds the chunk size, flush.
3641.3.4 by John Arbash Meinel Tweak some 'sum' lines.	34
	35	:cvar _max_repack: To fit the maximum number of entries into a node, we
	36	will sometimes start over and compress the whole list to get tighter
	37	packing. We get diminishing returns after a while, so this limits the
	38	number of times we will try.
3641.5.18 by John Arbash Meinel Clean out the global state, good for prototyping and tuning, bad for production code.	39	The default is to try to avoid recompressing entirely, but setting this
	40	to something like 20 will give maximum compression.
	41
	42	:cvar _max_zsync: Another tunable nob. If _max_repack is set to 0, then you
	43	can limit the number of times we will try to pack more data into a
	44	node. This allows us to do a single compression pass, rather than
	45	trying until we overflow, and then recompressing again.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	46	"""
3641.5.18 by John Arbash Meinel Clean out the global state, good for prototyping and tuning, bad for production code.	47	# In testing, some values for bzr.dev::
	48	# repack time MB max full
	49	# 1 7.5 4.6 1140 0
3777.5.1 by John Arbash Meinel Add ChunkWriter.optimize(for_size=True)	50	# 2 8.4 4.2 1036 1
3641.5.18 by John Arbash Meinel Clean out the global state, good for prototyping and tuning, bad for production code.	51	# 3 9.8 4.1 1012 278
	52	# 4 10.8 4.1 728 945
	53	# 20 11.1 4.1 0 1012
	54	# repack = 0
3777.5.1 by John Arbash Meinel Add ChunkWriter.optimize(for_size=True)	55	# zsync time MB repack stop_for_z
	56	# 0 5.0 24.7 0 6270
	57	# 1 4.3 13.2 0 3342
	58	# 2 4.9 9.6 0 2414
	59	# 5 4.8 6.2 0 1549
	60	# 6 4.8 5.8 1 1435
	61	# 7 4.8 5.5 19 1337
	62	# 8 4.4 5.3 81 1220
	63	# 10 5.3 5.0 260 967
	64	# 11 5.3 4.9 366 839
	65	# 12 5.1 4.8 454 731
	66	# 15 5.8 4.7 704 450
	67	# 20 5.8 4.6 1133 7
3641.5.18 by John Arbash Meinel Clean out the global state, good for prototyping and tuning, bad for production code.	68
	69	# In testing, some values for mysql-unpacked::
	70	# next_bytes estim
3777.5.1 by John Arbash Meinel Add ChunkWriter.optimize(for_size=True)	71	# repack time MB full stop_for_repack
	72	# 1 15.4 0 3913
	73	# 2 35.4 13.7 0 346
	74	# 20 46.7 13.4 3380 0
3641.5.18 by John Arbash Meinel Clean out the global state, good for prototyping and tuning, bad for production code.	75	# repack=0
3777.5.1 by John Arbash Meinel Add ChunkWriter.optimize(for_size=True)	76	# zsync stop_for_z
	77	# 0 29.5 116.5 0 29782
	78	# 1 27.8 60.2 0 15356
	79	# 2 27.8 42.4 0 10822
	80	# 5 26.8 25.5 0 6491
	81	# 6 27.3 23.2 13 5896
	82	# 7 27.5 21.6 29 5451
	83	# 8 27.1 20.3 52 5108
	84	# 10 29.4 18.6 195 4526
	85	# 11 29.2 18.0 421 4143
	86	# 12 28.0 17.5 702 3738
	87	# 15 28.9 16.5 1223 2969
	88	# 20 29.6 15.7 2182 1810
	89	# 30 31.4 15.4 3891 23
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	90
3777.5.1 by John Arbash Meinel Add ChunkWriter.optimize(for_size=True)	91	# Tuple of (num_repack_attempts, num_zsync_attempts)
	92	# num_zsync_attempts only has meaning if num_repack_attempts is 0.
	93	_repack_opts_for_speed = (0, 8)
	94	_repack_opts_for_size = (20, 0)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	95
3777.5.2 by John Arbash Meinel Change the name to ChunkWriter.set_optimize()	96	def __init__(self, chunk_size, reserved=0, optimize_for_size=False):
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	97	"""Create a ChunkWriter to write chunk_size chunks.
	98
	99	:param chunk_size: The total byte count to emit at the end of the
	100	chunk.
	101	:param reserved: How many bytes to allow for reserved data. reserved
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	102	data space can only be written to via the write(..., reserved=True).
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	103	"""
	104	self.chunk_size = chunk_size
	105	self.compressor = zlib.compressobj()
	106	self.bytes_in = []
	107	self.bytes_list = []
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	108	self.bytes_out_len = 0
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	109	# bytes that have been seen, but not included in a flush to out yet
	110	self.unflushed_in_bytes = 0
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	111	self.num_repack = 0
3641.5.12 by John Arbash Meinel Play around with max_repack=0 and limiting work done based on	112	self.num_zsync = 0
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	113	self.unused_bytes = None
	114	self.reserved_size = reserved
3777.5.1 by John Arbash Meinel Add ChunkWriter.optimize(for_size=True)	115	# Default is to make building fast rather than compact
3777.5.2 by John Arbash Meinel Change the name to ChunkWriter.set_optimize()	116	self.set_optimize(for_size=optimize_for_size)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	117
	118	def finish(self):
	119	"""Finish the chunk.
	120
	121	This returns the final compressed chunk, and either None, or the
	122	bytes that did not fit in the chunk.
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	123
	124	:return: (compressed_bytes, unused_bytes, num_nulls_needed)
	125	compressed_bytes a list of bytes that were output from the
	126	compressor. If the compressed length was not
	127	exactly chunk_size, the final string will be a
	128	string of all null bytes to pad this to
	129	chunk_size
	130	unused_bytes None, or the last bytes that were added, which
	131	we could not fit.
	132	num_nulls_needed How many nulls are padded at the end
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	133	"""
	134	self.bytes_in = None # Free the data cached so far, we don't need it
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	135	out = self.compressor.flush(Z_FINISH)
	136	self.bytes_list.append(out)
	137	self.bytes_out_len += len(out)
3641.5.12 by John Arbash Meinel Play around with max_repack=0 and limiting work done based on	138
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	139	if self.bytes_out_len > self.chunk_size:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	140	raise AssertionError('Somehow we ended up with too much'
	141	' compressed data, %d > %d'
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	142	% (self.bytes_out_len, self.chunk_size))
3644.2.3 by John Arbash Meinel Do a bit more work to get all the tests to pass.	143	nulls_needed = self.chunk_size - self.bytes_out_len
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	144	if nulls_needed:
	145	self.bytes_list.append("\x00" * nulls_needed)
	146	return self.bytes_list, self.unused_bytes, nulls_needed
	147
3777.5.2 by John Arbash Meinel Change the name to ChunkWriter.set_optimize()	148	def set_optimize(self, for_size=True):
	149	"""Change how we optimize our writes.
	150
	151	:param for_size: If True, optimize for minimum space usage, otherwise
	152	optimize for fastest writing speed.
	153	:return: None
	154	"""
	155	if for_size:
	156	opts = ChunkWriter._repack_opts_for_size
	157	else:
	158	opts = ChunkWriter._repack_opts_for_speed
	159	self._max_repack, self._max_zsync = opts
	160
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	161	def _recompress_all_bytes_in(self, extra_bytes=None):
3641.3.12 by John Arbash Meinel Collect some info on the space/time tradeoff for _max_repack.	162	"""Recompress the current bytes_in, and optionally more.
	163
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	164	:param extra_bytes: Optional, if supplied we will add it with
3641.3.12 by John Arbash Meinel Collect some info on the space/time tradeoff for _max_repack.	165	Z_SYNC_FLUSH
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	166	:return: (bytes_out, bytes_out_len, alt_compressed)
3641.3.12 by John Arbash Meinel Collect some info on the space/time tradeoff for _max_repack.	167	bytes_out is the compressed bytes returned from the compressor
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	168	bytes_out_len the length of the compressed output
3641.3.12 by John Arbash Meinel Collect some info on the space/time tradeoff for _max_repack.	169	compressor An object with everything packed in so far, and
	170	Z_SYNC_FLUSH called.
	171	"""
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	172	compressor = zlib.compressobj()
	173	bytes_out = []
3641.3.5 by John Arbash Meinel For iter_all and three_level tests adjust spill-at.	174	append = bytes_out.append
	175	compress = compressor.compress
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	176	for accepted_bytes in self.bytes_in:
3641.3.5 by John Arbash Meinel For iter_all and three_level tests adjust spill-at.	177	out = compress(accepted_bytes)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	178	if out:
3641.3.5 by John Arbash Meinel For iter_all and three_level tests adjust spill-at.	179	append(out)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	180	if extra_bytes:
3641.3.5 by John Arbash Meinel For iter_all and three_level tests adjust spill-at.	181	out = compress(extra_bytes)
3641.5.10 by John Arbash Meinel Only Z_SYNC_FLUSH when we have extra bytes.	182	out += compressor.flush(Z_SYNC_FLUSH)
	183	append(out)
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	184	bytes_out_len = sum(map(len, bytes_out))
	185	return bytes_out, bytes_out_len, compressor
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	186
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	187	def write(self, bytes, reserved=False):
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	188	"""Write some bytes to the chunk.
	189
	190	If the bytes fit, False is returned. Otherwise True is returned
	191	and the bytes have not been added to the chunk.
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	192
	193	:param bytes: The bytes to include
	194	:param reserved: If True, we can use the space reserved in the
	195	constructor.
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	196	"""
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	197	if self.num_repack > self._max_repack and not reserved:
	198	self.unused_bytes = bytes
	199	return True
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	200	if reserved:
	201	capacity = self.chunk_size
	202	else:
	203	capacity = self.chunk_size - self.reserved_size
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	204	comp = self.compressor
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	205
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	206	# Check to see if the currently unflushed bytes would fit with a bit of
	207	# room to spare, assuming no compression.
	208	next_unflushed = self.unflushed_in_bytes + len(bytes)
	209	remaining_capacity = capacity - self.bytes_out_len - 10
	210	if (next_unflushed < remaining_capacity):
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	211	# looks like it will fit
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	212	out = comp.compress(bytes)
3641.3.11 by John Arbash Meinel Start working on an alternate way to track compressed_chunk state.	213	if out:
	214	self.bytes_list.append(out)
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	215	self.bytes_out_len += len(out)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	216	self.bytes_in.append(bytes)
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	217	self.unflushed_in_bytes += len(bytes)
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	218	else:
	219	# This may or may not fit, try to add it with Z_SYNC_FLUSH
3641.5.5 by John Arbash Meinel Document my attempt to use copy() as a look-ahead.	220	# Note: It is tempting to do this as a look-ahead pass, and to
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	221	# 'copy()' the compressor before flushing. However, it seems
	222	# that Which means that it is the same thing as increasing
	223	# repack, similar cost, same benefit. And this way we still
	224	# have the 'repack' knob that can be adjusted, and not depend
	225	# on a platform-specific 'copy()' function.
3641.5.12 by John Arbash Meinel Play around with max_repack=0 and limiting work done based on	226	self.num_zsync += 1
	227	if self._max_repack == 0 and self.num_zsync > self._max_zsync:
	228	self.num_repack += 1
3641.5.19 by John Arbash Meinel Documentation cleanup pass.	229	self.unused_bytes = bytes
3641.5.12 by John Arbash Meinel Play around with max_repack=0 and limiting work done based on	230	return True
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	231	out = comp.compress(bytes)
	232	out += comp.flush(Z_SYNC_FLUSH)
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	233	self.unflushed_in_bytes = 0
3641.3.15 by John Arbash Meinel Now that we have real data, remove the copy() code.	234	if out:
	235	self.bytes_list.append(out)
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	236	self.bytes_out_len += len(out)
3641.5.3 by John Arbash Meinel If we repack earlier, it catches this case.	237
	238	# We are a bit extra conservative, because it seems that you can
	239	# get better compression with Z_SYNC_FLUSH than a full compress. It
	240	# is probably very rare, but we were able to trigger it.
3641.5.4 by John Arbash Meinel Using a different safety margin for the first repack,	241	if self.num_repack == 0:
	242	safety_margin = 100
	243	else:
	244	safety_margin = 10
	245	if self.bytes_out_len + safety_margin <= capacity:
3641.5.1 by John Arbash Meinel Update the stats for the current code layout.	246	# It fit, so mark it added
	247	self.bytes_in.append(bytes)
	248	else:
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	249	# We are over budget, try to squeeze this in without any
	250	# Z_SYNC_FLUSH calls
	251	self.num_repack += 1
3641.5.1 by John Arbash Meinel Update the stats for the current code layout.	252	(bytes_out, this_len,
	253	compressor) = self._recompress_all_bytes_in(bytes)
	254	if self.num_repack >= self._max_repack:
	255	# When we get to _max_repack, bump over so that the
	256	# earlier > _max_repack will be triggered.
	257	self.num_repack += 1
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	258	if this_len + 10 > capacity:
3641.5.1 by John Arbash Meinel Update the stats for the current code layout.	259	(bytes_out, this_len,
	260	compressor) = self._recompress_all_bytes_in()
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	261	self.compressor = compressor
3641.5.2 by John Arbash Meinel (broken, but hopeful) Change the compact logic.	262	# Force us to not allow more data
	263	self.num_repack = self._max_repack + 1
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	264	self.bytes_list = bytes_out
3641.3.27 by John Arbash Meinel Bringing reserved in as a keyword to write() also saves some time.	265	self.bytes_out_len = this_len
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	266	self.unused_bytes = bytes
	267	return True
	268	else:
	269	# This fits when we pack it tighter, so use the new packing
	270	self.compressor = compressor
	271	self.bytes_in.append(bytes)
	272	self.bytes_list = bytes_out
3641.3.16 by John Arbash Meinel Somewhat surprisingly, tracking bytes_out_len makes a	273	self.bytes_out_len = this_len
3641.3.1 by John Arbash Meinel Bring in the btree_index and chunk_writer code and their tests.	274	return False
	275