~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/chunk_writer.py

  • Committer: John Arbash Meinel
  • Author(s): Mark Hammond
  • Date: 2008-09-09 17:02:21 UTC
  • mto: This revision was merged to the branch mainline in revision 3697.
  • Revision ID: john@arbash-meinel.com-20080909170221-svim3jw2mrz0amp3
An updated transparent icon for bzr.

Show diffs side-by-side

added added

removed removed

Lines of Context:
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
16
#
17
17
 
18
18
"""ChunkWriter: write compressed data out with a fixed upper bound."""
19
19
 
20
 
from __future__ import absolute_import
21
 
 
22
20
import zlib
23
21
from zlib import Z_FINISH, Z_SYNC_FLUSH
24
22
 
49
47
    #    In testing, some values for bzr.dev::
50
48
    #        repack  time  MB   max   full
51
49
    #         1       7.5  4.6  1140  0
52
 
    #         2       8.4  4.2  1036  1
 
50
    #         2       8.4  4.2  1036  1          6.8
53
51
    #         3       9.8  4.1  1012  278
54
52
    #         4      10.8  4.1  728   945
55
53
    #        20      11.1  4.1  0     1012
56
54
    #        repack = 0
57
 
    #        zsync   time  MB    repack  stop_for_z
58
 
    #         0       5.0  24.7  0       6270
59
 
    #         1       4.3  13.2  0       3342
60
 
    #         2       4.9   9.6  0       2414
61
 
    #         5       4.8   6.2  0       1549
62
 
    #         6       4.8   5.8  1       1435
63
 
    #         7       4.8   5.5  19      1337
64
 
    #         8       4.4   5.3  81      1220
65
 
    #        10       5.3   5.0  260     967
66
 
    #        11       5.3   4.9  366     839
67
 
    #        12       5.1   4.8  454     731
68
 
    #        15       5.8   4.7  704     450
69
 
    #        20       5.8   4.6  1133    7
 
55
    #        zsync   time  MB    repack  max_z   time w/ add_node
 
56
    #         0       6.7  24.7  0       6270    5.0
 
57
    #         1       6.5  13.2  0       3342    4.3
 
58
    #         2       6.6   9.6  0       2414    4.9
 
59
    #         5       6.5   6.2  0       1549    4.8
 
60
    #         6       6.5   5.8  1       1435    4.8
 
61
    #         7       6.6   5.5  19      1337    4.8
 
62
    #         8       6.7   5.3  81      1220    4.4
 
63
    #        10       6.8   5.0  260     967     5.3
 
64
    #        11       6.8   4.9  366     839     5.3
 
65
    #        12       6.9   4.8  454     731     5.1
 
66
    #        15       7.2   4.7  704     450     5.8
 
67
    #        20       7.7   4.6  1133    7       5.8
70
68
 
71
69
    #    In testing, some values for mysql-unpacked::
72
70
    #                next_bytes estim
73
 
    #        repack  time  MB    full    stop_for_repack
74
 
    #         1            15.4  0       3913
75
 
    #         2      35.4  13.7  0       346
76
 
    #        20      46.7  13.4  3380    0
 
71
    #        repack  time  MB    hit_max full
 
72
    #         1      51.7  15.4  3913  0
 
73
    #         2      54.4  13.7  3467  0         35.4
 
74
    #        20      67.0  13.4  0     3380      46.7
77
75
    #        repack=0
78
 
    #        zsync                       stop_for_z
79
 
    #         0      29.5 116.5  0       29782
80
 
    #         1      27.8  60.2  0       15356
81
 
    #         2      27.8  42.4  0       10822
82
 
    #         5      26.8  25.5  0       6491
83
 
    #         6      27.3  23.2  13      5896
84
 
    #         7      27.5  21.6  29      5451
85
 
    #         8      27.1  20.3  52      5108
86
 
    #        10      29.4  18.6  195     4526
87
 
    #        11      29.2  18.0  421     4143
88
 
    #        12      28.0  17.5  702     3738
89
 
    #        15      28.9  16.5  1223    2969
90
 
    #        20      29.6  15.7  2182    1810
91
 
    #        30      31.4  15.4  3891    23
92
 
 
93
 
    # Tuple of (num_repack_attempts, num_zsync_attempts)
94
 
    # num_zsync_attempts only has meaning if num_repack_attempts is 0.
95
 
    _repack_opts_for_speed = (0, 8)
96
 
    _repack_opts_for_size = (20, 0)
97
 
 
98
 
    def __init__(self, chunk_size, reserved=0, optimize_for_size=False):
 
76
    #        zsync                               time w/ add_node
 
77
    #         0      47.7 116.5  0       29782   29.5
 
78
    #         1      48.5  60.2  0       15356   27.8
 
79
    #         2      48.1  42.4  0       10822   27.8
 
80
    #         5      48.3  25.5  0       6491    26.8
 
81
    #         6      48.0  23.2  13      5896    27.3
 
82
    #         7      48.1  21.6  29      5451    27.5
 
83
    #         8      48.1  20.3  52      5108    27.1
 
84
    #        10      46.9  18.6  195     4526    29.4
 
85
    #        11      48.8  18.0  421     4143    29.2
 
86
    #        12      47.4  17.5  702     3738    28.0
 
87
    #        15      49.6  16.5  1223    2969    28.9
 
88
    #        20      48.9  15.7  2182    1810    29.6
 
89
    #        30            15.4  3891    23      31.4
 
90
 
 
91
    _max_repack = 0
 
92
    _max_zsync = 8
 
93
 
 
94
    def __init__(self, chunk_size, reserved=0):
99
95
        """Create a ChunkWriter to write chunk_size chunks.
100
96
 
101
97
        :param chunk_size: The total byte count to emit at the end of the
114
110
        self.num_zsync = 0
115
111
        self.unused_bytes = None
116
112
        self.reserved_size = reserved
117
 
        # Default is to make building fast rather than compact
118
 
        self.set_optimize(for_size=optimize_for_size)
119
113
 
120
114
    def finish(self):
121
115
        """Finish the chunk.
124
118
        bytes that did not fit in the chunk.
125
119
 
126
120
        :return: (compressed_bytes, unused_bytes, num_nulls_needed)
127
 
 
128
 
            * compressed_bytes: a list of bytes that were output from the
129
 
              compressor. If the compressed length was not exactly chunk_size,
130
 
              the final string will be a string of all null bytes to pad this
131
 
              to chunk_size
132
 
            * unused_bytes: None, or the last bytes that were added, which we
133
 
              could not fit.
134
 
            * num_nulls_needed: How many nulls are padded at the end
 
121
            compressed_bytes    a list of bytes that were output from the
 
122
                                compressor. If the compressed length was not
 
123
                                exactly chunk_size, the final string will be a
 
124
                                string of all null bytes to pad this to
 
125
                                chunk_size
 
126
            unused_bytes        None, or the last bytes that were added, which
 
127
                                we could not fit.
 
128
            num_nulls_needed    How many nulls are padded at the end
135
129
        """
136
130
        self.bytes_in = None # Free the data cached so far, we don't need it
137
131
        out = self.compressor.flush(Z_FINISH)
147
141
            self.bytes_list.append("\x00" * nulls_needed)
148
142
        return self.bytes_list, self.unused_bytes, nulls_needed
149
143
 
150
 
    def set_optimize(self, for_size=True):
151
 
        """Change how we optimize our writes.
152
 
 
153
 
        :param for_size: If True, optimize for minimum space usage, otherwise
154
 
            optimize for fastest writing speed.
155
 
        :return: None
156
 
        """
157
 
        if for_size:
158
 
            opts = ChunkWriter._repack_opts_for_size
159
 
        else:
160
 
            opts = ChunkWriter._repack_opts_for_speed
161
 
        self._max_repack, self._max_zsync = opts
162
 
 
163
144
    def _recompress_all_bytes_in(self, extra_bytes=None):
164
145
        """Recompress the current bytes_in, and optionally more.
165
146
 
166
147
        :param extra_bytes: Optional, if supplied we will add it with
167
148
            Z_SYNC_FLUSH
168
149
        :return: (bytes_out, bytes_out_len, alt_compressed)
169
 
 
170
 
            * bytes_out: is the compressed bytes returned from the compressor
171
 
            * bytes_out_len: the length of the compressed output
172
 
            * compressor: An object with everything packed in so far, and
173
 
              Z_SYNC_FLUSH called.
 
150
            bytes_out   is the compressed bytes returned from the compressor
 
151
            bytes_out_len the length of the compressed output
 
152
            compressor  An object with everything packed in so far, and
 
153
                        Z_SYNC_FLUSH called.
174
154
        """
175
155
        compressor = zlib.compressobj()
176
156
        bytes_out = []