~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to tests/test_groupcompress.py

  • Committer: John Arbash Meinel
  • Date: 2009-03-04 21:06:22 UTC
  • mto: (0.17.34 trunk)
  • mto: This revision was merged to the branch mainline in revision 4280.
  • Revision ID: john@arbash-meinel.com-20090304210622-ur7wz2dz0w4lhzn3
(tests broken) implement the basic ability to have a separate header
This puts the labels/sha1/etc together, and then has the actual content deltas
combined later on.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
2
 
#
 
1
# groupcompress, a bzr plugin providing new compression logic.
 
2
# Copyright (C) 2008 Canonical Limited.
 
3
3
4
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
 
5
# it under the terms of the GNU General Public License version 2 as published
 
6
# by the Free Software Foundation.
 
7
8
8
# This program is distributed in the hope that it will be useful,
9
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
11
# GNU General Public License for more details.
12
 
#
 
12
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 
16
16
17
 
17
18
"""Tests for group compression."""
18
19
 
19
20
import zlib
20
21
 
21
 
from bzrlib import (
22
 
    btree_index,
23
 
    groupcompress,
24
 
    errors,
25
 
    index as _mod_index,
26
 
    osutils,
27
 
    tests,
28
 
    trace,
29
 
    versionedfile,
 
22
from bzrlib import tests
 
23
from bzrlib.osutils import sha_string
 
24
from bzrlib.plugins.groupcompress import errors, groupcompress
 
25
from bzrlib.tests import (
 
26
    TestCaseWithTransport,
 
27
    TestScenarioApplier,
 
28
    adapt_tests,
30
29
    )
31
 
from bzrlib.osutils import sha_string
32
 
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
 
30
from bzrlib.transport import get_transport
33
31
 
34
32
 
35
33
def load_tests(standard_tests, module, loader):
36
 
    """Parameterize tests for all versions of groupcompress."""
37
 
    to_adapt, result = tests.split_suite_by_condition(
38
 
        standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
39
 
    scenarios = [
40
 
        ('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
 
        ]
42
 
    if compiled_groupcompress_feature.available():
43
 
        scenarios.append(('C',
44
 
            {'compressor': groupcompress.PyrexGroupCompressor}))
45
 
    return tests.multiply_tests(to_adapt, scenarios, result)
 
34
    from bzrlib.tests.test_versionedfile import TestVersionedFiles
 
35
    vf_interface_tests = loader.loadTestsFromTestCase(TestVersionedFiles)
 
36
    cleanup_pack_group = groupcompress.cleanup_pack_group
 
37
    make_pack_factory = groupcompress.make_pack_factory
 
38
    group_scenario = ('groupcompressrabin-nograph', {
 
39
            'cleanup':cleanup_pack_group,
 
40
            'factory':make_pack_factory(False, False, 1),
 
41
            'graph': False,
 
42
            'key_length':1,
 
43
            'support_partial_insertion':False,
 
44
            }
 
45
        )
 
46
    applier = TestScenarioApplier()
 
47
    applier.scenarios = [group_scenario]
 
48
    adapt_tests(vf_interface_tests, applier, standard_tests)
 
49
    return standard_tests
46
50
 
47
51
 
48
52
class TestGroupCompressor(tests.TestCase):
 
53
    """Tests for GroupCompressor"""
 
54
 
 
55
    def test_empty_delta(self):
 
56
        compressor = groupcompress.GroupCompressor(True)
 
57
        self.assertEqual([], compressor.lines)
 
58
 
 
59
    def test_one_nosha_delta(self):
 
60
        # diff against NUKK
 
61
        compressor = groupcompress.GroupCompressor(True)
 
62
        sha1, end_point = compressor.compress(('label',),
 
63
            'strange\ncommon\n', None)
 
64
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
 
65
        expected_lines = [
 
66
            'fulltext\n',
 
67
            'label:label\nsha1:%s\n' % sha1,
 
68
            'len:15\n',
 
69
            'strange\ncommon\n',
 
70
            ]
 
71
        self.assertEqual(expected_lines, compressor.lines)
 
72
        self.assertEqual(sum(map(len, expected_lines)), end_point)
49
73
 
50
74
    def _chunks_to_repr_lines(self, chunks):
51
75
        return '\n'.join(map(repr, ''.join(chunks).split('\n')))
62
86
        self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
87
                             self._chunks_to_repr_lines(actual))
64
88
 
65
 
 
66
 
class TestAllGroupCompressors(TestGroupCompressor):
67
 
    """Tests for GroupCompressor"""
68
 
 
69
 
    compressor = None # Set by multiply_tests
70
 
 
71
 
    def test_empty_delta(self):
72
 
        compressor = self.compressor()
73
 
        self.assertEqual([], compressor.chunks)
74
 
 
75
 
    def test_one_nosha_delta(self):
76
 
        # diff against NUKK
77
 
        compressor = self.compressor()
78
 
        sha1, start_point, end_point, _ = compressor.compress(('label',),
79
 
            'strange\ncommon\n', None)
80
 
        self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
 
        expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
 
        self.assertEqual(expected_lines, ''.join(compressor.chunks))
83
 
        self.assertEqual(0, start_point)
84
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
85
 
 
86
 
    def test_empty_content(self):
87
 
        compressor = self.compressor()
88
 
        # Adding empty bytes should return the 'null' record
89
 
        sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
 
                                                                 '', None)
91
 
        self.assertEqual(0, start_point)
92
 
        self.assertEqual(0, end_point)
93
 
        self.assertEqual('fulltext', kind)
94
 
        self.assertEqual(groupcompress._null_sha1, sha1)
95
 
        self.assertEqual(0, compressor.endpoint)
96
 
        self.assertEqual([], compressor.chunks)
97
 
        # Even after adding some content
98
 
        compressor.compress(('content',), 'some\nbytes\n', None)
99
 
        self.assertTrue(compressor.endpoint > 0)
100
 
        sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
 
                                                                 '', None)
102
 
        self.assertEqual(0, start_point)
103
 
        self.assertEqual(0, end_point)
104
 
        self.assertEqual('fulltext', kind)
105
 
        self.assertEqual(groupcompress._null_sha1, sha1)
 
89
    def test_two_nosha_delta(self):
 
90
        compressor = groupcompress.GroupCompressor(True)
 
91
        sha1_1, _ = compressor.compress(('label',),
 
92
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
 
93
        expected_lines = list(compressor.lines)
 
94
        sha1_2, end_point = compressor.compress(('newlabel',),
 
95
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
 
96
        self.assertEqual(sha_string('common long line\n'
 
97
                                    'that needs a 16 byte match\n'
 
98
                                    'different\n'), sha1_2)
 
99
        expected_lines.extend([
 
100
            'delta\n'
 
101
            'label:newlabel\n',
 
102
            'sha1:%s\n' % sha1_2,
 
103
            'len:16\n',
 
104
            # source and target length
 
105
            '\x7e\x36',
 
106
            # copy the line common
 
107
            '\x91\x52\x2c', #copy, offset 0x52, len 0x2c
 
108
            # add the line different, and the trailing newline
 
109
            '\x0adifferent\n', # insert 10 bytes
 
110
            ])
 
111
        self.assertEqualDiffEncoded(expected_lines, compressor.lines)
 
112
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
113
 
 
114
    def test_three_nosha_delta(self):
 
115
        # The first interesting test: make a change that should use lines from
 
116
        # both parents.
 
117
        compressor = groupcompress.GroupCompressor(True)
 
118
        sha1_1, end_point = compressor.compress(('label',),
 
119
            'strange\ncommon very very long line\nwith some extra text\n', None)
 
120
        sha1_2, _ = compressor.compress(('newlabel',),
 
121
            'different\nmoredifferent\nand then some more\n', None)
 
122
        expected_lines = list(compressor.lines)
 
123
        sha1_3, end_point = compressor.compress(('label3',),
 
124
            'new\ncommon very very long line\nwith some extra text\n'
 
125
            'different\nmoredifferent\nand then some more\n',
 
126
            None)
 
127
        self.assertEqual(
 
128
            sha_string('new\ncommon very very long line\nwith some extra text\n'
 
129
                       'different\nmoredifferent\nand then some more\n'),
 
130
            sha1_3)
 
131
        expected_lines.extend([
 
132
            'delta\n',
 
133
            'label:label3\n',
 
134
            'sha1:%s\n' % sha1_3,
 
135
            'len:13\n',
 
136
            '\xfa\x01\x5f' # source and target length
 
137
            # insert new
 
138
            '\x03new',
 
139
            # Copy of first parent 'common' range
 
140
            '\x91\x51\x31' # copy, offset 0x51, 0x31 bytes
 
141
            # Copy of second parent 'different' range
 
142
            '\x91\xcf\x2b' # copy, offset 0xcf, 0x2b bytes
 
143
            ])
 
144
        self.assertEqualDiffEncoded(expected_lines, compressor.lines)
 
145
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
146
 
 
147
    def test_stats(self):
 
148
        compressor = groupcompress.GroupCompressor(True)
 
149
        compressor.compress(('label',), 'strange\ncommon\n', None)
 
150
        compressor.compress(('newlabel',),
 
151
                            'common\ndifferent\nmoredifferent\n', None)
 
152
        compressor.compress(('label3',),
 
153
                            'new\ncommon\ndifferent\nmoredifferent\n', None)
 
154
        self.assertAlmostEqual(0.3, compressor.ratio(), 1)
106
155
 
107
156
    def test_extract_from_compressor(self):
108
157
        # Knit fetching will try to reconstruct texts locally which results in
109
158
        # reading something that is in the compressor stream already.
110
 
        compressor = self.compressor()
111
 
        sha1_1, _, _, _ = compressor.compress(('label',),
 
159
        compressor = groupcompress.GroupCompressor(True)
 
160
        sha1_1, _ = compressor.compress(('label',),
112
161
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
 
        expected_lines = list(compressor.chunks)
114
 
        sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
 
162
        expected_lines = list(compressor.lines)
 
163
        sha1_2, end_point = compressor.compress(('newlabel',),
115
164
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
165
        # get the first out
117
 
        self.assertEqual(('strange\ncommon long line\n'
118
 
                          'that needs a 16 byte match\n', sha1_1),
119
 
                         compressor.extract(('label',)))
 
166
        self.assertEqual(('strange\ncommon\n', sha1_1),
 
167
            compressor.extract(('label',)))
120
168
        # and the second
121
169
        self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
170
                          'different\n', sha1_2),
123
171
                         compressor.extract(('newlabel',)))
124
172
 
125
 
    def test_pop_last(self):
126
 
        compressor = self.compressor()
127
 
        _, _, _, _ = compressor.compress(('key1',),
128
 
            'some text\nfor the first entry\n', None)
129
 
        expected_lines = list(compressor.chunks)
130
 
        _, _, _, _ = compressor.compress(('key2',),
131
 
            'some text\nfor the second entry\n', None)
132
 
        compressor.pop_last()
133
 
        self.assertEqual(expected_lines, compressor.chunks)
134
 
 
135
 
 
136
 
class TestPyrexGroupCompressor(TestGroupCompressor):
137
 
 
138
 
    _test_needs_features = [compiled_groupcompress_feature]
139
 
    compressor = groupcompress.PyrexGroupCompressor
140
 
 
141
 
    def test_stats(self):
142
 
        compressor = self.compressor()
143
 
        compressor.compress(('label',),
144
 
                            'strange\n'
145
 
                            'common very very long line\n'
146
 
                            'plus more text\n', None)
147
 
        compressor.compress(('newlabel',),
148
 
                            'common very very long line\n'
149
 
                            'plus more text\n'
150
 
                            'different\n'
151
 
                            'moredifferent\n', None)
152
 
        compressor.compress(('label3',),
153
 
                            'new\n'
154
 
                            'common very very long line\n'
155
 
                            'plus more text\n'
156
 
                            'different\n'
157
 
                            'moredifferent\n', None)
158
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
159
 
 
160
 
    def test_two_nosha_delta(self):
161
 
        compressor = self.compressor()
162
 
        sha1_1, _, _, _ = compressor.compress(('label',),
163
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
 
        expected_lines = list(compressor.chunks)
165
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
 
        self.assertEqual(sha_string('common long line\n'
168
 
                                    'that needs a 16 byte match\n'
169
 
                                    'different\n'), sha1_2)
170
 
        expected_lines.extend([
171
 
            # 'delta', delta length
172
 
            'd\x0f',
173
 
            # source and target length
174
 
            '\x36',
175
 
            # copy the line common
176
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
 
            # add the line different, and the trailing newline
178
 
            '\x0adifferent\n', # insert 10 bytes
179
 
            ])
180
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
182
 
 
183
 
    def test_three_nosha_delta(self):
184
 
        # The first interesting test: make a change that should use lines from
185
 
        # both parents.
186
 
        compressor = self.compressor()
187
 
        sha1_1, _, _, _ = compressor.compress(('label',),
188
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
189
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
 
            'different\nmoredifferent\nand then some more\n', None)
191
 
        expected_lines = list(compressor.chunks)
192
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
 
            'new\ncommon very very long line\nwith some extra text\n'
194
 
            'different\nmoredifferent\nand then some more\n',
195
 
            None)
196
 
        self.assertEqual(
197
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
198
 
                       'different\nmoredifferent\nand then some more\n'),
199
 
            sha1_3)
200
 
        expected_lines.extend([
201
 
            # 'delta', delta length
202
 
            'd\x0b',
203
 
            # source and target length
204
 
            '\x5f'
205
 
            # insert new
206
 
            '\x03new',
207
 
            # Copy of first parent 'common' range
208
 
            '\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
 
            # Copy of second parent 'different' range
210
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
 
            ])
212
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
214
 
 
215
 
 
216
 
class TestPythonGroupCompressor(TestGroupCompressor):
217
 
 
218
 
    compressor = groupcompress.PythonGroupCompressor
219
 
 
220
 
    def test_stats(self):
221
 
        compressor = self.compressor()
222
 
        compressor.compress(('label',),
223
 
                            'strange\n'
224
 
                            'common very very long line\n'
225
 
                            'plus more text\n', None)
226
 
        compressor.compress(('newlabel',),
227
 
                            'common very very long line\n'
228
 
                            'plus more text\n'
229
 
                            'different\n'
230
 
                            'moredifferent\n', None)
231
 
        compressor.compress(('label3',),
232
 
                            'new\n'
233
 
                            'common very very long line\n'
234
 
                            'plus more text\n'
235
 
                            'different\n'
236
 
                            'moredifferent\n', None)
237
 
        self.assertAlmostEqual(1.9, compressor.ratio(), 1)
238
 
 
239
 
    def test_two_nosha_delta(self):
240
 
        compressor = self.compressor()
241
 
        sha1_1, _, _, _ = compressor.compress(('label',),
242
 
            'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
 
        expected_lines = list(compressor.chunks)
244
 
        sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
 
            'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
 
        self.assertEqual(sha_string('common long line\n'
247
 
                                    'that needs a 16 byte match\n'
248
 
                                    'different\n'), sha1_2)
249
 
        expected_lines.extend([
250
 
            # 'delta', delta length
251
 
            'd\x0f',
252
 
            # target length
253
 
            '\x36',
254
 
            # copy the line common
255
 
            '\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
 
            # add the line different, and the trailing newline
257
 
            '\x0adifferent\n', # insert 10 bytes
258
 
            ])
259
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
261
 
 
262
 
    def test_three_nosha_delta(self):
263
 
        # The first interesting test: make a change that should use lines from
264
 
        # both parents.
265
 
        compressor = self.compressor()
266
 
        sha1_1, _, _, _ = compressor.compress(('label',),
267
 
            'strange\ncommon very very long line\nwith some extra text\n', None)
268
 
        sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
 
            'different\nmoredifferent\nand then some more\n', None)
270
 
        expected_lines = list(compressor.chunks)
271
 
        sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
 
            'new\ncommon very very long line\nwith some extra text\n'
273
 
            'different\nmoredifferent\nand then some more\n',
274
 
            None)
275
 
        self.assertEqual(
276
 
            sha_string('new\ncommon very very long line\nwith some extra text\n'
277
 
                       'different\nmoredifferent\nand then some more\n'),
278
 
            sha1_3)
279
 
        expected_lines.extend([
280
 
            # 'delta', delta length
281
 
            'd\x0c',
282
 
            # target length
283
 
            '\x5f'
284
 
            # insert new
285
 
            '\x04new\n',
286
 
            # Copy of first parent 'common' range
287
 
            '\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
 
            # Copy of second parent 'different' range
289
 
            '\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
 
            ])
291
 
        self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
 
        self.assertEqual(sum(map(len, expected_lines)), end_point)
 
173
 
 
174
class TestBase128Int(tests.TestCase):
 
175
 
 
176
    def assertEqualEncode(self, bytes, val):
 
177
        self.assertEqual(bytes, groupcompress.encode_base128_int(val))
 
178
 
 
179
    def assertEqualDecode(self, val, num_decode, bytes):
 
180
        self.assertEqual((val, num_decode),
 
181
                         groupcompress.decode_base128_int(bytes))
 
182
 
 
183
    def test_encode(self):
 
184
        self.assertEqualEncode('\x01', 1)
 
185
        self.assertEqualEncode('\x02', 2)
 
186
        self.assertEqualEncode('\x7f', 127)
 
187
        self.assertEqualEncode('\x80\x01', 128)
 
188
        self.assertEqualEncode('\xff\x01', 255)
 
189
        self.assertEqualEncode('\x80\x02', 256)
 
190
        self.assertEqualEncode('\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
 
191
 
 
192
    def test_decode(self):
 
193
        self.assertEqualDecode(1, 1, '\x01')
 
194
        self.assertEqualDecode(2, 1, '\x02')
 
195
        self.assertEqualDecode(127, 1, '\x7f')
 
196
        self.assertEqualDecode(128, 2, '\x80\x01')
 
197
        self.assertEqualDecode(255, 2, '\xff\x01')
 
198
        self.assertEqualDecode(256, 2, '\x80\x02')
 
199
        self.assertEqualDecode(0xFFFFFFFF, 5, '\xff\xff\xff\xff\x0f')
 
200
 
 
201
    def test_decode_with_trailing_bytes(self):
 
202
        self.assertEqualDecode(1, 1, '\x01abcdef')
 
203
        self.assertEqualDecode(127, 1, '\x7f\x01')
 
204
        self.assertEqualDecode(128, 2, '\x80\x01abcdef')
 
205
        self.assertEqualDecode(255, 2, '\xff\x01\xff')
293
206
 
294
207
 
295
208
class TestGroupCompressBlock(tests.TestCase):
296
209
 
297
 
    def make_block(self, key_to_text):
298
 
        """Create a GroupCompressBlock, filling it with the given texts."""
299
 
        compressor = groupcompress.GroupCompressor()
300
 
        start = 0
301
 
        for key in sorted(key_to_text):
302
 
            compressor.compress(key, key_to_text[key], None)
303
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
304
 
                    in compressor.labels_deltas.iteritems())
305
 
        block = compressor.flush()
306
 
        raw_bytes = block.to_bytes()
307
 
        # Go through from_bytes(to_bytes()) so that we start with a compressed
308
 
        # content object
309
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
310
 
 
311
210
    def test_from_empty_bytes(self):
312
 
        self.assertRaises(ValueError,
 
211
        self.assertRaises(errors.InvalidGroupCompressBlock,
313
212
                          groupcompress.GroupCompressBlock.from_bytes, '')
314
213
 
315
214
    def test_from_minimal_bytes(self):
316
 
        block = groupcompress.GroupCompressBlock.from_bytes(
317
 
            'gcb1z\n0\n0\n')
 
215
        block = groupcompress.GroupCompressBlock.from_bytes('gcb1z\n0\n0\n')
318
216
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
 
        self.assertIs(None, block._content)
320
 
        self.assertEqual('', block._z_content)
321
 
        block._ensure_content()
322
 
        self.assertEqual('', block._content)
323
 
        self.assertEqual('', block._z_content)
324
 
        block._ensure_content() # Ensure content is safe to call 2x
325
 
 
326
 
    def test_from_invalid(self):
327
 
        self.assertRaises(ValueError,
328
 
                          groupcompress.GroupCompressBlock.from_bytes,
329
 
                          'this is not a valid header')
 
217
        self.assertEqual({}, block._entries)
330
218
 
331
219
    def test_from_bytes(self):
332
 
        content = ('a tiny bit of content\n')
333
 
        z_content = zlib.compress(content)
334
 
        z_bytes = (
 
220
        z_header_bytes = (
335
221
            'gcb1z\n' # group compress block v1 plain
336
 
            '%d\n' # Length of compressed content
337
 
            '%d\n' # Length of uncompressed content
338
 
            '%s'   # Compressed content
339
 
            ) % (len(z_content), len(content), z_content)
 
222
            '76\n' # Length of zlib bytes
 
223
            '183\n' # Length of all meta-info
 
224
            + zlib.compress(
 
225
            'key:bing\n'
 
226
            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
 
227
            'type:fulltext\n'
 
228
            'start:100\n'
 
229
            'length:100\n'
 
230
            '\n'
 
231
            'key:foo\x00bar\n'
 
232
            'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
 
233
            'type:fulltext\n'
 
234
            'start:0\n'
 
235
            'length:100\n'
 
236
            '\n'))
340
237
        block = groupcompress.GroupCompressBlock.from_bytes(
341
 
            z_bytes)
342
 
        self.assertEqual(z_content, block._z_content)
 
238
            z_header_bytes)
343
239
        self.assertIs(None, block._content)
344
 
        self.assertEqual(len(z_content), block._z_content_length)
345
 
        self.assertEqual(len(content), block._content_length)
346
 
        block._ensure_content()
347
 
        self.assertEqual(z_content, block._z_content)
348
 
        self.assertEqual(content, block._content)
 
240
        self.assertIsInstance(block, groupcompress.GroupCompressBlock)
 
241
        self.assertEqual([('bing',), ('foo', 'bar')], sorted(block._entries))
 
242
        bing = block._entries[('bing',)]
 
243
        self.assertEqual(('bing',), bing.key)
 
244
        self.assertEqual('fulltext', bing.type)
 
245
        self.assertEqual('abcd'*10, bing.sha1)
 
246
        self.assertEqual(100, bing.start)
 
247
        self.assertEqual(100, bing.length)
 
248
        foobar = block._entries[('foo', 'bar')]
 
249
        self.assertEqual(('foo', 'bar'), foobar.key)
 
250
        self.assertEqual('fulltext', foobar.type)
 
251
        self.assertEqual('abcd'*10, foobar.sha1)
 
252
        self.assertEqual(0, foobar.start)
 
253
        self.assertEqual(100, foobar.length)
 
254
 
 
255
    def test_add_entry(self):
 
256
        gcb = groupcompress.GroupCompressBlock()
 
257
        e = gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
 
258
        self.assertIsInstance(e, groupcompress.GroupCompressBlockEntry)
 
259
        self.assertEqual(('foo', 'bar'), e.key)
 
260
        self.assertEqual('fulltext', e.type)
 
261
        self.assertEqual('abcd'*10, e.sha1)
 
262
        self.assertEqual(0, e.start)
 
263
        self.assertEqual(100, e.length)
349
264
 
350
265
    def test_to_bytes(self):
351
 
        content = ('this is some content\n'
352
 
                   'this content will be compressed\n')
353
266
        gcb = groupcompress.GroupCompressBlock()
354
 
        gcb.set_content(content)
 
267
        gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
 
268
        gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
355
269
        bytes = gcb.to_bytes()
356
 
        self.assertEqual(gcb._z_content_length, len(gcb._z_content))
357
 
        self.assertEqual(gcb._content_length, len(content))
358
 
        expected_header =('gcb1z\n' # group compress block v1 zlib
359
 
                          '%d\n' # Length of compressed content
360
 
                          '%d\n' # Length of uncompressed content
361
 
                         ) % (gcb._z_content_length, gcb._content_length)
362
 
        self.assertStartsWith(bytes, expected_header)
363
 
        remaining_bytes = bytes[len(expected_header):]
 
270
        self.assertStartsWith(bytes,
 
271
                              'gcb1z\n' # group compress block v1 zlib
 
272
                              '76\n' # Length of compressed bytes
 
273
                              '183\n' # Length of all meta-info
 
274
                             )
 
275
        remaining_bytes = bytes[13:]
364
276
        raw_bytes = zlib.decompress(remaining_bytes)
365
 
        self.assertEqual(content, raw_bytes)
366
 
 
367
 
        # we should get the same results if using the chunked version
368
 
        gcb = groupcompress.GroupCompressBlock()
369
 
        gcb.set_chunked_content(['this is some content\n'
370
 
                                 'this content will be compressed\n'],
371
 
                                 len(content))
372
 
        old_bytes = bytes
373
 
        bytes = gcb.to_bytes()
374
 
        self.assertEqual(old_bytes, bytes)
375
 
 
376
 
    def test_partial_decomp(self):
377
 
        content_chunks = []
378
 
        # We need a sufficient amount of data so that zlib.decompress has
379
 
        # partial decompression to work with. Most auto-generated data
380
 
        # compresses a bit too well, we want a combination, so we combine a sha
381
 
        # hash with compressible data.
382
 
        for i in xrange(2048):
383
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
384
 
            content_chunks.append(next_content)
385
 
            next_sha1 = osutils.sha_string(next_content)
386
 
            content_chunks.append(next_sha1 + '\n')
387
 
        content = ''.join(content_chunks)
388
 
        self.assertEqual(158634, len(content))
389
 
        z_content = zlib.compress(content)
390
 
        self.assertEqual(57182, len(z_content))
391
 
        block = groupcompress.GroupCompressBlock()
392
 
        block._z_content = z_content
393
 
        block._z_content_length = len(z_content)
394
 
        block._compressor_name = 'zlib'
395
 
        block._content_length = 158634
396
 
        self.assertIs(None, block._content)
397
 
        block._ensure_content(100)
398
 
        self.assertIsNot(None, block._content)
399
 
        # We have decompressed at least 100 bytes
400
 
        self.assertTrue(len(block._content) >= 100)
401
 
        # We have not decompressed the whole content
402
 
        self.assertTrue(len(block._content) < 158634)
403
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
404
 
        # ensuring content that we already have shouldn't cause any more data
405
 
        # to be extracted
406
 
        cur_len = len(block._content)
407
 
        block._ensure_content(cur_len - 10)
408
 
        self.assertEqual(cur_len, len(block._content))
409
 
        # Now we want a bit more content
410
 
        cur_len += 10
411
 
        block._ensure_content(cur_len)
412
 
        self.assertTrue(len(block._content) >= cur_len)
413
 
        self.assertTrue(len(block._content) < 158634)
414
 
        self.assertEqualDiff(content[:len(block._content)], block._content)
415
 
        # And now lets finish
416
 
        block._ensure_content(158634)
417
 
        self.assertEqualDiff(content, block._content)
418
 
        # And the decompressor is finalized
419
 
        self.assertIs(None, block._z_content_decompressor)
420
 
 
421
 
    def test__ensure_all_content(self):
422
 
        content_chunks = []
423
 
        # We need a sufficient amount of data so that zlib.decompress has
424
 
        # partial decompression to work with. Most auto-generated data
425
 
        # compresses a bit too well, we want a combination, so we combine a sha
426
 
        # hash with compressible data.
427
 
        for i in xrange(2048):
428
 
            next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
429
 
            content_chunks.append(next_content)
430
 
            next_sha1 = osutils.sha_string(next_content)
431
 
            content_chunks.append(next_sha1 + '\n')
432
 
        content = ''.join(content_chunks)
433
 
        self.assertEqual(158634, len(content))
434
 
        z_content = zlib.compress(content)
435
 
        self.assertEqual(57182, len(z_content))
436
 
        block = groupcompress.GroupCompressBlock()
437
 
        block._z_content = z_content
438
 
        block._z_content_length = len(z_content)
439
 
        block._compressor_name = 'zlib'
440
 
        block._content_length = 158634
441
 
        self.assertIs(None, block._content)
442
 
        # The first _ensure_content got all of the required data
443
 
        block._ensure_content(158634)
444
 
        self.assertEqualDiff(content, block._content)
445
 
        # And we should have released the _z_content_decompressor since it was
446
 
        # fully consumed
447
 
        self.assertIs(None, block._z_content_decompressor)
448
 
 
449
 
    def test__dump(self):
450
 
        dup_content = 'some duplicate content\nwhich is sufficiently long\n'
451
 
        key_to_text = {('1',): dup_content + '1 unique\n',
452
 
                       ('2',): dup_content + '2 extra special\n'}
453
 
        locs, block = self.make_block(key_to_text)
454
 
        self.assertEqual([('f', len(key_to_text[('1',)])),
455
 
                          ('d', 21, len(key_to_text[('2',)]),
456
 
                           [('c', 2, len(dup_content)),
457
 
                            ('i', len('2 extra special\n'), '')
458
 
                           ]),
459
 
                         ], block._dump())
460
 
 
461
 
 
462
 
class TestCaseWithGroupCompressVersionedFiles(
463
 
        tests.TestCaseWithMemoryTransport):
464
 
 
465
 
    def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
466
 
                     dir='.', inconsistency_fatal=True):
467
 
        t = self.get_transport(dir)
468
 
        t.ensure_base()
469
 
        vf = groupcompress.make_pack_factory(graph=create_graph,
470
 
            delta=False, keylength=keylength,
471
 
            inconsistency_fatal=inconsistency_fatal)(t)
472
 
        if do_cleanup:
473
 
            self.addCleanup(groupcompress.cleanup_pack_group, vf)
474
 
        return vf
475
 
 
476
 
 
477
 
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
478
 
 
479
 
    def make_g_index(self, name, ref_lists=0, nodes=[]):
480
 
        builder = btree_index.BTreeBuilder(ref_lists)
481
 
        for node, references, value in nodes:
482
 
            builder.add_node(node, references, value)
483
 
        stream = builder.finish()
484
 
        trans = self.get_transport()
485
 
        size = trans.put_file(name, stream)
486
 
        return btree_index.BTreeGraphIndex(trans, name, size)
487
 
 
488
 
    def make_g_index_missing_parent(self):
489
 
        graph_index = self.make_g_index('missing_parent', 1,
490
 
            [(('parent', ), '2 78 2 10', ([],)),
491
 
             (('tip', ), '2 78 2 10',
492
 
              ([('parent', ), ('missing-parent', )],)),
493
 
              ])
494
 
        return graph_index
495
 
 
496
 
    def test_get_record_stream_as_requested(self):
497
 
        # Consider promoting 'as-requested' to general availability, and
498
 
        # make this a VF interface test
499
 
        vf = self.make_test_vf(False, dir='source')
500
 
        vf.add_lines(('a',), (), ['lines\n'])
501
 
        vf.add_lines(('b',), (), ['lines\n'])
502
 
        vf.add_lines(('c',), (), ['lines\n'])
503
 
        vf.add_lines(('d',), (), ['lines\n'])
504
 
        vf.writer.end()
505
 
        keys = [record.key for record in vf.get_record_stream(
506
 
                    [('a',), ('b',), ('c',), ('d',)],
507
 
                    'as-requested', False)]
508
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
509
 
        keys = [record.key for record in vf.get_record_stream(
510
 
                    [('b',), ('a',), ('d',), ('c',)],
511
 
                    'as-requested', False)]
512
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
513
 
 
514
 
        # It should work even after being repacked into another VF
515
 
        vf2 = self.make_test_vf(False, dir='target')
516
 
        vf2.insert_record_stream(vf.get_record_stream(
517
 
                    [('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
518
 
        vf2.writer.end()
519
 
 
520
 
        keys = [record.key for record in vf2.get_record_stream(
521
 
                    [('a',), ('b',), ('c',), ('d',)],
522
 
                    'as-requested', False)]
523
 
        self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
524
 
        keys = [record.key for record in vf2.get_record_stream(
525
 
                    [('b',), ('a',), ('d',), ('c',)],
526
 
                    'as-requested', False)]
527
 
        self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
528
 
 
529
 
    def test_insert_record_stream_reuses_blocks(self):
530
 
        vf = self.make_test_vf(True, dir='source')
531
 
        def grouped_stream(revision_ids, first_parents=()):
532
 
            parents = first_parents
533
 
            for revision_id in revision_ids:
534
 
                key = (revision_id,)
535
 
                record = versionedfile.FulltextContentFactory(
536
 
                    key, parents, None,
537
 
                    'some content that is\n'
538
 
                    'identical except for\n'
539
 
                    'revision_id:%s\n' % (revision_id,))
540
 
                yield record
541
 
                parents = (key,)
542
 
        # One group, a-d
543
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
544
 
        # Second group, e-h
545
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
546
 
                                               first_parents=(('d',),)))
547
 
        block_bytes = {}
548
 
        stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
549
 
                                      'unordered', False)
550
 
        num_records = 0
551
 
        for record in stream:
552
 
            if record.key in [('a',), ('e',)]:
553
 
                self.assertEqual('groupcompress-block', record.storage_kind)
554
 
            else:
555
 
                self.assertEqual('groupcompress-block-ref',
556
 
                                 record.storage_kind)
557
 
            block_bytes[record.key] = record._manager._block._z_content
558
 
            num_records += 1
559
 
        self.assertEqual(8, num_records)
560
 
        for r in 'abcd':
561
 
            key = (r,)
562
 
            self.assertIs(block_bytes[key], block_bytes[('a',)])
563
 
            self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
564
 
        for r in 'efgh':
565
 
            key = (r,)
566
 
            self.assertIs(block_bytes[key], block_bytes[('e',)])
567
 
            self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
568
 
        # Now copy the blocks into another vf, and ensure that the blocks are
569
 
        # preserved without creating new entries
570
 
        vf2 = self.make_test_vf(True, dir='target')
571
 
        # ordering in 'groupcompress' order, should actually swap the groups in
572
 
        # the target vf, but the groups themselves should not be disturbed.
573
 
        def small_size_stream():
574
 
            for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
 
                                               'groupcompress', False):
576
 
                record._manager._full_enough_block_size = \
577
 
                    record._manager._block._content_length
578
 
                yield record
579
 
                        
580
 
        vf2.insert_record_stream(small_size_stream())
581
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
582
 
                                       'groupcompress', False)
583
 
        vf2.writer.end()
584
 
        num_records = 0
585
 
        for record in stream:
586
 
            num_records += 1
587
 
            self.assertEqual(block_bytes[record.key],
588
 
                             record._manager._block._z_content)
589
 
        self.assertEqual(8, num_records)
590
 
 
591
 
    def test_insert_record_stream_packs_on_the_fly(self):
592
 
        vf = self.make_test_vf(True, dir='source')
593
 
        def grouped_stream(revision_ids, first_parents=()):
594
 
            parents = first_parents
595
 
            for revision_id in revision_ids:
596
 
                key = (revision_id,)
597
 
                record = versionedfile.FulltextContentFactory(
598
 
                    key, parents, None,
599
 
                    'some content that is\n'
600
 
                    'identical except for\n'
601
 
                    'revision_id:%s\n' % (revision_id,))
602
 
                yield record
603
 
                parents = (key,)
604
 
        # One group, a-d
605
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
606
 
        # Second group, e-h
607
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
608
 
                                               first_parents=(('d',),)))
609
 
        # Now copy the blocks into another vf, and see that the
610
 
        # insert_record_stream rebuilt a new block on-the-fly because of
611
 
        # under-utilization
612
 
        vf2 = self.make_test_vf(True, dir='target')
613
 
        vf2.insert_record_stream(vf.get_record_stream(
614
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False))
615
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
616
 
                                       'groupcompress', False)
617
 
        vf2.writer.end()
618
 
        num_records = 0
619
 
        # All of the records should be recombined into a single block
620
 
        block = None
621
 
        for record in stream:
622
 
            num_records += 1
623
 
            if block is None:
624
 
                block = record._manager._block
625
 
            else:
626
 
                self.assertIs(block, record._manager._block)
627
 
        self.assertEqual(8, num_records)
628
 
 
629
 
    def test__insert_record_stream_no_reuse_block(self):
630
 
        vf = self.make_test_vf(True, dir='source')
631
 
        def grouped_stream(revision_ids, first_parents=()):
632
 
            parents = first_parents
633
 
            for revision_id in revision_ids:
634
 
                key = (revision_id,)
635
 
                record = versionedfile.FulltextContentFactory(
636
 
                    key, parents, None,
637
 
                    'some content that is\n'
638
 
                    'identical except for\n'
639
 
                    'revision_id:%s\n' % (revision_id,))
640
 
                yield record
641
 
                parents = (key,)
642
 
        # One group, a-d
643
 
        vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
644
 
        # Second group, e-h
645
 
        vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
646
 
                                               first_parents=(('d',),)))
647
 
        vf.writer.end()
648
 
        self.assertEqual(8, len(list(vf.get_record_stream(
649
 
                                        [(r,) for r in 'abcdefgh'],
650
 
                                        'unordered', False))))
651
 
        # Now copy the blocks into another vf, and ensure that the blocks are
652
 
        # preserved without creating new entries
653
 
        vf2 = self.make_test_vf(True, dir='target')
654
 
        # ordering in 'groupcompress' order, should actually swap the groups in
655
 
        # the target vf, but the groups themselves should not be disturbed.
656
 
        list(vf2._insert_record_stream(vf.get_record_stream(
657
 
            [(r,) for r in 'abcdefgh'], 'groupcompress', False),
658
 
            reuse_blocks=False))
659
 
        vf2.writer.end()
660
 
        # After inserting with reuse_blocks=False, we should have everything in
661
 
        # a single new block.
662
 
        stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
663
 
                                       'groupcompress', False)
664
 
        block = None
665
 
        for record in stream:
666
 
            if block is None:
667
 
                block = record._manager._block
668
 
            else:
669
 
                self.assertIs(block, record._manager._block)
670
 
 
671
 
    def test_add_missing_noncompression_parent_unvalidated_index(self):
672
 
        unvalidated = self.make_g_index_missing_parent()
673
 
        combined = _mod_index.CombinedGraphIndex([unvalidated])
674
 
        index = groupcompress._GCGraphIndex(combined,
675
 
            is_locked=lambda: True, parents=True,
676
 
            track_external_parent_refs=True)
677
 
        index.scan_unvalidated_index(unvalidated)
678
 
        self.assertEqual(
679
 
            frozenset([('missing-parent',)]), index.get_missing_parents())
680
 
 
681
 
    def test_track_external_parent_refs(self):
682
 
        g_index = self.make_g_index('empty', 1, [])
683
 
        mod_index = btree_index.BTreeBuilder(1, 1)
684
 
        combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
685
 
        index = groupcompress._GCGraphIndex(combined,
686
 
            is_locked=lambda: True, parents=True,
687
 
            add_callback=mod_index.add_nodes,
688
 
            track_external_parent_refs=True)
689
 
        index.add_records([
690
 
            (('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
691
 
        self.assertEqual(
692
 
            frozenset([('parent-1',), ('parent-2',)]),
693
 
            index.get_missing_parents())
694
 
 
695
 
    def make_source_with_b(self, a_parent, path):
696
 
        source = self.make_test_vf(True, dir=path)
697
 
        source.add_lines(('a',), (), ['lines\n'])
698
 
        if a_parent:
699
 
            b_parents = (('a',),)
700
 
        else:
701
 
            b_parents = ()
702
 
        source.add_lines(('b',), b_parents, ['lines\n'])
703
 
        return source
704
 
 
705
 
    def do_inconsistent_inserts(self, inconsistency_fatal):
706
 
        target = self.make_test_vf(True, dir='target',
707
 
                                   inconsistency_fatal=inconsistency_fatal)
708
 
        for x in range(2):
709
 
            source = self.make_source_with_b(x==1, 'source%s' % x)
710
 
            target.insert_record_stream(source.get_record_stream(
711
 
                [('b',)], 'unordered', False))
712
 
 
713
 
    def test_inconsistent_redundant_inserts_warn(self):
714
 
        """Should not insert a record that is already present."""
715
 
        warnings = []
716
 
        def warning(template, args):
717
 
            warnings.append(template % args)
718
 
        _trace_warning = trace.warning
719
 
        trace.warning = warning
720
 
        try:
721
 
            self.do_inconsistent_inserts(inconsistency_fatal=False)
722
 
        finally:
723
 
            trace.warning = _trace_warning
724
 
        self.assertEqual(["inconsistent details in skipped record: ('b',)"
725
 
                          " ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
726
 
                         warnings)
727
 
 
728
 
    def test_inconsistent_redundant_inserts_raises(self):
729
 
        e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
730
 
                              inconsistency_fatal=True)
731
 
        self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
732
 
                              " in add_records:"
733
 
                              " \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
734
 
                              " 0 8', \(\(\('a',\),\),\)\)")
735
 
 
736
 
    def test_clear_cache(self):
737
 
        vf = self.make_source_with_b(True, 'source')
738
 
        vf.writer.end()
739
 
        for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
740
 
                                           True):
741
 
            pass
742
 
        self.assertTrue(len(vf._group_cache) > 0)
743
 
        vf.clear_cache()
744
 
        self.assertEqual(0, len(vf._group_cache))
745
 
 
746
 
 
747
 
 
748
 
class StubGCVF(object):
749
 
    def __init__(self, canned_get_blocks=None):
750
 
        self._group_cache = {}
751
 
        self._canned_get_blocks = canned_get_blocks or []
752
 
    def _get_blocks(self, read_memos):
753
 
        return iter(self._canned_get_blocks)
754
 
    
755
 
 
756
 
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
757
 
    """Simple whitebox unit tests for _BatchingBlockFetcher."""
758
 
    
759
 
    def test_add_key_new_read_memo(self):
760
 
        """Adding a key with an uncached read_memo new to this batch adds that
761
 
        read_memo to the list of memos to fetch.
762
 
        """
763
 
        # locations are: index_memo, ignored, parents, ignored
764
 
        # where index_memo is: (idx, offset, len, factory_start, factory_end)
765
 
        # and (idx, offset, size) is known as the 'read_memo', identifying the
766
 
        # raw bytes needed.
767
 
        read_memo = ('fake index', 100, 50)
768
 
        locations = {
769
 
            ('key',): (read_memo + (None, None), None, None, None)}
770
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
771
 
        total_size = batcher.add_key(('key',))
772
 
        self.assertEqual(50, total_size)
773
 
        self.assertEqual([('key',)], batcher.keys)
774
 
        self.assertEqual([read_memo], batcher.memos_to_get)
775
 
 
776
 
    def test_add_key_duplicate_read_memo(self):
777
 
        """read_memos that occur multiple times in a batch will only be fetched
778
 
        once.
779
 
        """
780
 
        read_memo = ('fake index', 100, 50)
781
 
        # Two keys, both sharing the same read memo (but different overall
782
 
        # index_memos).
783
 
        locations = {
784
 
            ('key1',): (read_memo + (0, 1), None, None, None),
785
 
            ('key2',): (read_memo + (1, 2), None, None, None)}
786
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
787
 
        total_size = batcher.add_key(('key1',))
788
 
        total_size = batcher.add_key(('key2',))
789
 
        self.assertEqual(50, total_size)
790
 
        self.assertEqual([('key1',), ('key2',)], batcher.keys)
791
 
        self.assertEqual([read_memo], batcher.memos_to_get)
792
 
 
793
 
    def test_add_key_cached_read_memo(self):
794
 
        """Adding a key with a cached read_memo will not cause that read_memo
795
 
        to be added to the list to fetch.
796
 
        """
797
 
        read_memo = ('fake index', 100, 50)
798
 
        gcvf = StubGCVF()
799
 
        gcvf._group_cache[read_memo] = 'fake block'
800
 
        locations = {
801
 
            ('key',): (read_memo + (None, None), None, None, None)}
802
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
803
 
        total_size = batcher.add_key(('key',))
804
 
        self.assertEqual(0, total_size)
805
 
        self.assertEqual([('key',)], batcher.keys)
806
 
        self.assertEqual([], batcher.memos_to_get)
807
 
 
808
 
    def test_yield_factories_empty(self):
809
 
        """An empty batch yields no factories."""
810
 
        batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
811
 
        self.assertEqual([], list(batcher.yield_factories()))
812
 
 
813
 
    def test_yield_factories_calls_get_blocks(self):
814
 
        """Uncached memos are retrieved via get_blocks."""
815
 
        read_memo1 = ('fake index', 100, 50)
816
 
        read_memo2 = ('fake index', 150, 40)
817
 
        gcvf = StubGCVF(
818
 
            canned_get_blocks=[
819
 
                (read_memo1, groupcompress.GroupCompressBlock()),
820
 
                (read_memo2, groupcompress.GroupCompressBlock())])
821
 
        locations = {
822
 
            ('key1',): (read_memo1 + (None, None), None, None, None),
823
 
            ('key2',): (read_memo2 + (None, None), None, None, None)}
824
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
825
 
        batcher.add_key(('key1',))
826
 
        batcher.add_key(('key2',))
827
 
        factories = list(batcher.yield_factories(full_flush=True))
828
 
        self.assertLength(2, factories)
829
 
        keys = [f.key for f in factories]
830
 
        kinds = [f.storage_kind for f in factories]
831
 
        self.assertEqual([('key1',), ('key2',)], keys)
832
 
        self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
833
 
 
834
 
    def test_yield_factories_flushing(self):
835
 
        """yield_factories holds back on yielding results from the final block
836
 
        unless passed full_flush=True.
837
 
        """
838
 
        fake_block = groupcompress.GroupCompressBlock()
839
 
        read_memo = ('fake index', 100, 50)
840
 
        gcvf = StubGCVF()
841
 
        gcvf._group_cache[read_memo] = fake_block
842
 
        locations = {
843
 
            ('key',): (read_memo + (None, None), None, None, None)}
844
 
        batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
845
 
        batcher.add_key(('key',))
846
 
        self.assertEqual([], list(batcher.yield_factories()))
847
 
        factories = list(batcher.yield_factories(full_flush=True))
848
 
        self.assertLength(1, factories)
849
 
        self.assertEqual(('key',), factories[0].key)
850
 
        self.assertEqual('groupcompress-block', factories[0].storage_kind)
851
 
 
852
 
 
853
 
class TestLazyGroupCompress(tests.TestCaseWithTransport):
854
 
 
855
 
    _texts = {
856
 
        ('key1',): "this is a text\n"
857
 
                   "with a reasonable amount of compressible bytes\n"
858
 
                   "which can be shared between various other texts\n",
859
 
        ('key2',): "another text\n"
860
 
                   "with a reasonable amount of compressible bytes\n"
861
 
                   "which can be shared between various other texts\n",
862
 
        ('key3',): "yet another text which won't be extracted\n"
863
 
                   "with a reasonable amount of compressible bytes\n"
864
 
                   "which can be shared between various other texts\n",
865
 
        ('key4',): "this will be extracted\n"
866
 
                   "but references most of its bytes from\n"
867
 
                   "yet another text which won't be extracted\n"
868
 
                   "with a reasonable amount of compressible bytes\n"
869
 
                   "which can be shared between various other texts\n",
870
 
    }
871
 
    def make_block(self, key_to_text):
872
 
        """Create a GroupCompressBlock, filling it with the given texts."""
873
 
        compressor = groupcompress.GroupCompressor()
874
 
        start = 0
875
 
        for key in sorted(key_to_text):
876
 
            compressor.compress(key, key_to_text[key], None)
877
 
        locs = dict((key, (start, end)) for key, (start, _, end, _)
878
 
                    in compressor.labels_deltas.iteritems())
879
 
        block = compressor.flush()
880
 
        raw_bytes = block.to_bytes()
881
 
        return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
882
 
 
883
 
    def add_key_to_manager(self, key, locations, block, manager):
884
 
        start, end = locations[key]
885
 
        manager.add_factory(key, (), start, end)
886
 
 
887
 
    def make_block_and_full_manager(self, texts):
888
 
        locations, block = self.make_block(texts)
889
 
        manager = groupcompress._LazyGroupContentManager(block)
890
 
        for key in sorted(texts):
891
 
            self.add_key_to_manager(key, locations, block, manager)
892
 
        return block, manager
893
 
 
894
 
    def test_get_fulltexts(self):
895
 
        locations, block = self.make_block(self._texts)
896
 
        manager = groupcompress._LazyGroupContentManager(block)
897
 
        self.add_key_to_manager(('key1',), locations, block, manager)
898
 
        self.add_key_to_manager(('key2',), locations, block, manager)
899
 
        result_order = []
900
 
        for record in manager.get_record_stream():
901
 
            result_order.append(record.key)
902
 
            text = self._texts[record.key]
903
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
904
 
        self.assertEqual([('key1',), ('key2',)], result_order)
905
 
 
906
 
        # If we build the manager in the opposite order, we should get them
907
 
        # back in the opposite order
908
 
        manager = groupcompress._LazyGroupContentManager(block)
909
 
        self.add_key_to_manager(('key2',), locations, block, manager)
910
 
        self.add_key_to_manager(('key1',), locations, block, manager)
911
 
        result_order = []
912
 
        for record in manager.get_record_stream():
913
 
            result_order.append(record.key)
914
 
            text = self._texts[record.key]
915
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
916
 
        self.assertEqual([('key2',), ('key1',)], result_order)
917
 
 
918
 
    def test__wire_bytes_no_keys(self):
919
 
        locations, block = self.make_block(self._texts)
920
 
        manager = groupcompress._LazyGroupContentManager(block)
921
 
        wire_bytes = manager._wire_bytes()
922
 
        block_length = len(block.to_bytes())
923
 
        # We should have triggered a strip, since we aren't using any content
924
 
        stripped_block = manager._block.to_bytes()
925
 
        self.assertTrue(block_length > len(stripped_block))
926
 
        empty_z_header = zlib.compress('')
927
 
        self.assertEqual('groupcompress-block\n'
928
 
                         '8\n' # len(compress(''))
929
 
                         '0\n' # len('')
930
 
                         '%d\n'# compressed block len
931
 
                         '%s'  # zheader
932
 
                         '%s'  # block
933
 
                         % (len(stripped_block), empty_z_header,
934
 
                            stripped_block),
935
 
                         wire_bytes)
936
 
 
937
 
    def test__wire_bytes(self):
938
 
        locations, block = self.make_block(self._texts)
939
 
        manager = groupcompress._LazyGroupContentManager(block)
940
 
        self.add_key_to_manager(('key1',), locations, block, manager)
941
 
        self.add_key_to_manager(('key4',), locations, block, manager)
942
 
        block_bytes = block.to_bytes()
943
 
        wire_bytes = manager._wire_bytes()
944
 
        (storage_kind, z_header_len, header_len,
945
 
         block_len, rest) = wire_bytes.split('\n', 4)
946
 
        z_header_len = int(z_header_len)
947
 
        header_len = int(header_len)
948
 
        block_len = int(block_len)
949
 
        self.assertEqual('groupcompress-block', storage_kind)
950
 
        self.assertEqual(34, z_header_len)
951
 
        self.assertEqual(26, header_len)
952
 
        self.assertEqual(len(block_bytes), block_len)
953
 
        z_header = rest[:z_header_len]
954
 
        header = zlib.decompress(z_header)
955
 
        self.assertEqual(header_len, len(header))
956
 
        entry1 = locations[('key1',)]
957
 
        entry4 = locations[('key4',)]
958
 
        self.assertEqualDiff('key1\n'
959
 
                             '\n'  # no parents
960
 
                             '%d\n' # start offset
961
 
                             '%d\n' # end offset
962
 
                             'key4\n'
 
277
        self.assertEqualDiff('key:bing\n'
 
278
                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
 
279
                             'type:fulltext\n'
 
280
                             'start:100\n'
 
281
                             'length:100\n'
963
282
                             '\n'
964
 
                             '%d\n'
965
 
                             '%d\n'
966
 
                             % (entry1[0], entry1[1],
967
 
                                entry4[0], entry4[1]),
968
 
                            header)
969
 
        z_block = rest[z_header_len:]
970
 
        self.assertEqual(block_bytes, z_block)
971
 
 
972
 
    def test_from_bytes(self):
973
 
        locations, block = self.make_block(self._texts)
974
 
        manager = groupcompress._LazyGroupContentManager(block)
975
 
        self.add_key_to_manager(('key1',), locations, block, manager)
976
 
        self.add_key_to_manager(('key4',), locations, block, manager)
977
 
        wire_bytes = manager._wire_bytes()
978
 
        self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
979
 
        manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
980
 
        self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
981
 
        self.assertEqual(2, len(manager._factories))
982
 
        self.assertEqual(block._z_content, manager._block._z_content)
983
 
        result_order = []
984
 
        for record in manager.get_record_stream():
985
 
            result_order.append(record.key)
986
 
            text = self._texts[record.key]
987
 
            self.assertEqual(text, record.get_bytes_as('fulltext'))
988
 
        self.assertEqual([('key1',), ('key4',)], result_order)
989
 
 
990
 
    def test__check_rebuild_no_changes(self):
991
 
        block, manager = self.make_block_and_full_manager(self._texts)
992
 
        manager._check_rebuild_block()
993
 
        self.assertIs(block, manager._block)
994
 
 
995
 
    def test__check_rebuild_only_one(self):
996
 
        locations, block = self.make_block(self._texts)
997
 
        manager = groupcompress._LazyGroupContentManager(block)
998
 
        # Request just the first key, which should trigger a 'strip' action
999
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1000
 
        manager._check_rebuild_block()
1001
 
        self.assertIsNot(block, manager._block)
1002
 
        self.assertTrue(block._content_length > manager._block._content_length)
1003
 
        # We should be able to still get the content out of this block, though
1004
 
        # it should only have 1 entry
1005
 
        for record in manager.get_record_stream():
1006
 
            self.assertEqual(('key1',), record.key)
1007
 
            self.assertEqual(self._texts[record.key],
1008
 
                             record.get_bytes_as('fulltext'))
1009
 
 
1010
 
    def test__check_rebuild_middle(self):
1011
 
        locations, block = self.make_block(self._texts)
1012
 
        manager = groupcompress._LazyGroupContentManager(block)
1013
 
        # Request a small key in the middle should trigger a 'rebuild'
1014
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1015
 
        manager._check_rebuild_block()
1016
 
        self.assertIsNot(block, manager._block)
1017
 
        self.assertTrue(block._content_length > manager._block._content_length)
1018
 
        for record in manager.get_record_stream():
1019
 
            self.assertEqual(('key4',), record.key)
1020
 
            self.assertEqual(self._texts[record.key],
1021
 
                             record.get_bytes_as('fulltext'))
1022
 
 
1023
 
    def test_check_is_well_utilized_all_keys(self):
1024
 
        block, manager = self.make_block_and_full_manager(self._texts)
1025
 
        self.assertFalse(manager.check_is_well_utilized())
1026
 
        # Though we can fake it by changing the recommended minimum size
1027
 
        manager._full_enough_block_size = block._content_length
1028
 
        self.assertTrue(manager.check_is_well_utilized())
1029
 
        # Setting it just above causes it to fail
1030
 
        manager._full_enough_block_size = block._content_length + 1
1031
 
        self.assertFalse(manager.check_is_well_utilized())
1032
 
        # Setting the mixed-block size doesn't do anything, because the content
1033
 
        # is considered to not be 'mixed'
1034
 
        manager._full_enough_mixed_block_size = block._content_length
1035
 
        self.assertFalse(manager.check_is_well_utilized())
1036
 
 
1037
 
    def test_check_is_well_utilized_mixed_keys(self):
1038
 
        texts = {}
1039
 
        f1k1 = ('f1', 'k1')
1040
 
        f1k2 = ('f1', 'k2')
1041
 
        f2k1 = ('f2', 'k1')
1042
 
        f2k2 = ('f2', 'k2')
1043
 
        texts[f1k1] = self._texts[('key1',)]
1044
 
        texts[f1k2] = self._texts[('key2',)]
1045
 
        texts[f2k1] = self._texts[('key3',)]
1046
 
        texts[f2k2] = self._texts[('key4',)]
1047
 
        block, manager = self.make_block_and_full_manager(texts)
1048
 
        self.assertFalse(manager.check_is_well_utilized())
1049
 
        manager._full_enough_block_size = block._content_length
1050
 
        self.assertTrue(manager.check_is_well_utilized())
1051
 
        manager._full_enough_block_size = block._content_length + 1
1052
 
        self.assertFalse(manager.check_is_well_utilized())
1053
 
        manager._full_enough_mixed_block_size = block._content_length
1054
 
        self.assertTrue(manager.check_is_well_utilized())
1055
 
 
1056
 
    def test_check_is_well_utilized_partial_use(self):
1057
 
        locations, block = self.make_block(self._texts)
1058
 
        manager = groupcompress._LazyGroupContentManager(block)
1059
 
        manager._full_enough_block_size = block._content_length
1060
 
        self.add_key_to_manager(('key1',), locations, block, manager)
1061
 
        self.add_key_to_manager(('key2',), locations, block, manager)
1062
 
        # Just using the content from key1 and 2 is not enough to be considered
1063
 
        # 'complete'
1064
 
        self.assertFalse(manager.check_is_well_utilized())
1065
 
        # However if we add key3, then we have enough, as we only require 75%
1066
 
        # consumption
1067
 
        self.add_key_to_manager(('key4',), locations, block, manager)
1068
 
        self.assertTrue(manager.check_is_well_utilized())
1069
 
 
1070
 
 
1071
 
class Test_GCBuildDetails(tests.TestCase):
1072
 
 
1073
 
    def test_acts_like_tuple(self):
1074
 
        # _GCBuildDetails inlines some of the data that used to be spread out
1075
 
        # across a bunch of tuples
1076
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1077
 
            ('INDEX', 10, 20, 0, 5))
1078
 
        self.assertEqual(4, len(bd))
1079
 
        self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1080
 
        self.assertEqual(None, bd[1]) # Compression Parent is always None
1081
 
        self.assertEqual((('parent1',), ('parent2',)), bd[2])
1082
 
        self.assertEqual(('group', None), bd[3]) # Record details
1083
 
 
1084
 
    def test__repr__(self):
1085
 
        bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1086
 
            ('INDEX', 10, 20, 0, 5))
1087
 
        self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1088
 
                         " (('parent1',), ('parent2',)))",
1089
 
                         repr(bd))
1090
 
                    
 
283
                             'key:foo\x00bar\n'
 
284
                             'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
 
285
                             'type:fulltext\n'
 
286
                             'start:0\n'
 
287
                             'length:100\n'
 
288
                             '\n', raw_bytes)